Commit cdd972fd authored by Florian Schäfer's avatar Florian Schäfer

Add validator to check if Wikipedia article redirects

parent 4e22aa03
......@@ -23,6 +23,7 @@ import org.wikipedia.gui.WikidataTagCellRenderer;
import org.wikipedia.gui.WikipediaToggleDialog;
import org.wikipedia.validator.WikidataItemExists;
import org.wikipedia.validator.WikipediaAgainstWikidata;
import org.wikipedia.validator.WikipediaRedirect;
public final class WikipediaPlugin extends Plugin {
public static final ImageIcon LOGO = ImageProvider.get("dialogs/wikipedia");
......@@ -46,6 +47,7 @@ public final class WikipediaPlugin extends Plugin {
OsmValidator.addTest(WikidataItemExists.class);
OsmValidator.addTest(WikipediaAgainstWikidata.class);
OsmValidator.addTest(WikipediaRedirect.class);
}
public static String getVersionInfo() {
......
......@@ -57,7 +57,7 @@ public final class ApiQueryClient {
Logging.log(Level.INFO, "Failed to update the cached API response. Falling back to the cached response.", e);
}
}
Logging.info("API request is served from cache: {0}", query.getCacheKey());
Logging.debug("API request is served from cache: {0}", query.getCacheKey());
stream = new ByteArrayInputStream(cachedValue.getBytes(StandardCharsets.UTF_8));
} else {
stream = getInputStreamForQuery(query);
......@@ -70,7 +70,7 @@ public final class ApiQueryClient {
}
}
private static InputStream getInputStreamForQuery(final ApiQuery query) throws IOException {
private static InputStream getInputStreamForQuery(final ApiQuery<?> query) throws IOException {
final HttpClient.Response response;
try {
response = query.getHttpClient().connect();
......
......@@ -16,6 +16,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.Map;
import java.util.Objects;
import org.wikipedia.api.SerializationSchema;
import org.wikipedia.tools.RegexUtil;
......@@ -97,13 +98,16 @@ public final class SitematrixResult {
@JsonProperty("name") final String name,
@JsonProperty("site") final Collection<Site> sites
) {
this.code = code;
this.code = Objects.requireNonNull(code);
this.name = name;
if (sites != null) {
this.sites.addAll(sites);
}
}
/**
* @return the code representing the language of the Wikimedia site
*/
public String getCode() {
return code;
}
......@@ -135,10 +139,17 @@ public final class SitematrixResult {
this.url = url;
}
/**
* @return the code representing the type of the Wikimedia site (NOT the language).
* Values can be e.g. "wiki", "wikibooks", "wiktionary", "wikidata", …
*/
public String getCode() {
return code;
}
/**
* @return a unique string representing the Wikimedia site
*/
public String getDbName() {
return dbName;
}
......
// License: GPL. For details, see LICENSE file.
package org.wikipedia.tools;
import java.io.IOException;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.openstreetmap.josm.data.osm.OsmPrimitive;
import org.openstreetmap.josm.tools.Pair;
import org.wikipedia.data.IWikipediaSite;
import org.wikipedia.data.WikipediaSite;
public final class OsmPrimitiveUtil {
private static final Pattern WIKIPEDIA_PATTERN = Pattern.compile("(.+):(.+)");
public static final String TAG_NAME_WIKIPEDIA = "wikipedia";
private OsmPrimitiveUtil() {
// Private constructor to avoid instantiation
}
/**
* Returns the language and article title iff the given primitive has a wikipedia=* tag of
* the form {@code (.+):(.+)} and the part before the colon is an existent Wikipedia language.
* @param primitive the primitive for which the Wikipedia site and title of the Wikipedia article will be returned
* @return A pair of the Wikipedia site as the first component, the article title as second component.
* Or an empty optional if there is either no wikipedia=* tag, or if the tag value does not match {@code (.+):(.+)},
* or if the Wikipedia language does not exist or is closed
*/
public static Optional<Pair<IWikipediaSite, String>> getWikipediaValue(final OsmPrimitive primitive) {
final String tagValue = primitive.get(TAG_NAME_WIKIPEDIA);
if (tagValue != null) {
final Matcher matcher = WIKIPEDIA_PATTERN.matcher(tagValue);
if (matcher.matches()) {
try {
final WikipediaSite site = new WikipediaSite(matcher.group(1));
if (!site.getSite().isClosed()) {
return Optional.of(Pair.create(site, matcher.group(2)));
}
} catch (IOException | IllegalArgumentException e) {
return Optional.empty();
}
}
}
return Optional.empty();
}
}
......@@ -14,6 +14,8 @@ class AllValidationTests {
static final ValidationTest<WikidataItemExists> WIKIDATA_ITEM_DOES_NOT_EXIST = new ValidationTest<>(Severity.ERROR, 30_002);
static final ValidationTest<WikidataItemExists> WIKIDATA_ITEM_IS_REDIRECT = new ValidationTest<>(Severity.WARNING, 30_003);
static final ValidationTest<WikipediaAgainstWikidata> WIKIDATA_ITEM_NOT_MATCHING_WIKIPEDIA = new ValidationTest<>(Severity.WARNING, 30_004);
static final ValidationTest<WikipediaRedirect> WIKIPEDIA_ARTICLE_REDIRECTS = new ValidationTest<>(Severity.WARNING, 30_005);
static final ValidationTest<WikipediaRedirect> WIKIPEDIA_TAG_INVALID = new ValidationTest<>(Severity.ERROR, 30_006);
// i18n: Prefix for the validator messages. Note the space at the end!
static final String VALIDATOR_MESSAGE_MARKER = I18n.tr("[Wiki] ");
......
// License: GPL. For details, see LICENSE file.
package org.wikipedia.validator;
import static org.wikipedia.validator.AllValidationTests.SEE_OTHER_CATEGORY_VALIDATOR_ERRORS;
import static org.wikipedia.validator.AllValidationTests.VALIDATOR_MESSAGE_MARKER;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import javax.swing.JOptionPane;
import org.openstreetmap.josm.command.ChangePropertyCommand;
import org.openstreetmap.josm.data.osm.OsmPrimitive;
import org.openstreetmap.josm.gui.Notification;
import org.openstreetmap.josm.tools.I18n;
import org.openstreetmap.josm.tools.Pair;
import org.wikipedia.WikipediaPlugin;
import org.wikipedia.api.ApiQueryClient;
import org.wikipedia.api.wikipedia_action.WikipediaActionApiQuery;
import org.wikipedia.api.wikipedia_action.json.QueryResult;
import org.wikipedia.data.IWikipediaSite;
import org.wikipedia.tools.ListUtil;
import org.wikipedia.tools.OsmPrimitiveUtil;
public class WikipediaRedirect extends BatchProcessedTagTest<WikipediaRedirect.TestCompanion> {
private static final Notification NETWORK_FAILED_NOTIFICATION = new Notification(
I18n.tr("Could not check for all wikipedia=* tags if they redirect to another lemma.") +
"\n" + SEE_OTHER_CATEGORY_VALIDATOR_ERRORS
).setIcon(WikipediaPlugin.LOGO);
public WikipediaRedirect() {
super(
I18n.tr("Check wikipedia=* is not a redirect"),
I18n.tr("Make sure that the wikipedia=* article is not redirecting to another lemma")
);
}
@Override
protected TestCompanion prepareTestCompanion(OsmPrimitive primitive) {
final String plainWikipediaValue = primitive.get(OsmPrimitiveUtil.TAG_NAME_WIKIPEDIA);
final Optional<Pair<IWikipediaSite, String>> companion = OsmPrimitiveUtil.getWikipediaValue(primitive);
if (plainWikipediaValue != null && !companion.isPresent()) {
errors.add(
AllValidationTests.WIKIPEDIA_TAG_INVALID.getBuilder(this)
.message(
VALIDATOR_MESSAGE_MARKER + I18n.tr("Wikipedia tag has invalid format!"),
I18n.marktr("The value ''{0}'' is not allowed for the wikipedia=* tag"),
plainWikipediaValue
)
.primitives(primitive)
.build()
);
}
return companion
.map(it -> new TestCompanion(primitive, it.a, it.b))
.orElse(null);
}
@Override
protected void check(List<TestCompanion> allPrimitives) {
allPrimitives.stream()
.collect(Collectors.groupingBy(it -> it.site.getLanguageCode()))
.forEach((langCode, primitiveList) -> {
ListUtil.processInBatches(
new ArrayList<>(primitiveList.stream()
.collect(Collectors.groupingBy(
it -> it.title,
Collectors.mapping(BatchProcessedTagTest.TestCompanion::getPrimitive, Collectors.toList())
))
.entrySet()
),
50,
batch -> {
primitiveList.stream().findAny().ifPresent(any -> {
this.checkBatch(any.site, batch);
});
},
this::updateBatchProgress
);
});
}
/**
* Check one batch containing only article titles for one Wikipedia
* @param site the Wikimedia site for which the titles should be checked
* @param batch a list of map entries, which map the title of an article to the list of primitives
* whose wikipedia=* tag points to that lemma.
*/
private void checkBatch(final IWikipediaSite site, final List<Map.Entry<String, List<OsmPrimitive>>> batch) {
try {
final QueryResult queryResult = ApiQueryClient.query(
WikipediaActionApiQuery.query(site, batch.stream().map(Map.Entry::getKey).collect(Collectors.toList()))
);
for (Map.Entry<String, List<OsmPrimitive>> entry : batch) {
final String redirectedTitle = queryResult.getQuery().getRedirects().resolveRedirect(entry.getKey());
if (redirectedTitle != null && !redirectedTitle.equals(entry.getKey())) {
errors.add(
AllValidationTests.WIKIPEDIA_ARTICLE_REDIRECTS.getBuilder(this)
.primitives(entry.getValue())
.message(
VALIDATOR_MESSAGE_MARKER + I18n.tr("Wikipedia article is a redirect"),
I18n.marktr("Wikipedia article ''{0}'' redirects to ''{1}''"),
entry.getKey(),
redirectedTitle
)
.fix(() -> {
// TODO: Allow the user to view either Wikipedia article
final int optionPaneResult = JOptionPane.showConfirmDialog(
null,
I18n.tr("Should the wikipedia tag be replaced with the redirect target? Make sure the meaning of the tag remains the same!\n\nBefore: wikipedia={0}:{1}\nAfter: wikipedia={0}:{2}", site.getLanguageCode(), entry.getKey(), redirectedTitle),
I18n.tr("Change wikipedia tag?"),
JOptionPane.YES_NO_OPTION,
JOptionPane.QUESTION_MESSAGE
);
if (optionPaneResult == JOptionPane.YES_OPTION) {
return new ChangePropertyCommand(
entry.getValue(),
OsmPrimitiveUtil.TAG_NAME_WIKIPEDIA,
site.getLanguageCode() + ':' + redirectedTitle
);
}
return null;
})
.build()
);
}
}
} catch (IOException e) {
errors.add(
AllValidationTests.API_REQUEST_FAILED.getBuilder(this)
.primitives(batch.stream().flatMap(it -> it.getValue().stream()).collect(Collectors.toList()))
.message(VALIDATOR_MESSAGE_MARKER + e.getMessage())
.build()
);
finalNotification = NETWORK_FAILED_NOTIFICATION;
}
}
static class TestCompanion extends BatchProcessedTagTest.TestCompanion {
final IWikipediaSite site;
final String title;
TestCompanion(OsmPrimitive primitive, final IWikipediaSite site, final String title) {
super(primitive);
this.site = Objects.requireNonNull(site);
this.title = Objects.requireNonNull(title);
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment