Commit 5659dca1 authored by Florian Schäfer's avatar Florian Schäfer

Add first version of validator check for unusual types (humans, fictional items, …)

Needs rework, currently times out pretty often. Probably hits some API rate limit (?), probably the queries for the different unwanted classes could be combined.
parent a1c2402f
Pipeline #26306564 passed with stages
in 7 minutes and 33 seconds
......@@ -21,6 +21,7 @@ import org.wikipedia.gui.WikiPreferences;
import org.wikipedia.gui.WikidataItemSearchDialog;
import org.wikipedia.gui.WikidataTagCellRenderer;
import org.wikipedia.gui.WikipediaToggleDialog;
import org.wikipedia.validator.UnusualWikidataClasses;
import org.wikipedia.validator.WikidataItemExists;
import org.wikipedia.validator.WikipediaAgainstWikidata;
import org.wikipedia.validator.WikipediaRedirect;
......@@ -49,6 +50,7 @@ public final class WikipediaPlugin extends Plugin {
OsmValidator.addTest(WikidataItemExists.class);
OsmValidator.addTest(WikipediaAgainstWikidata.class);
OsmValidator.addTest(WikipediaRedirect.class);
OsmValidator.addTest(UnusualWikidataClasses.class);
}
public static String getVersionInfo() {
......
......@@ -2,7 +2,9 @@
package org.wikipedia.tools;
import java.util.Arrays;
import org.openstreetmap.josm.data.preferences.DoubleProperty;
import org.openstreetmap.josm.data.preferences.ListProperty;
import org.openstreetmap.josm.data.preferences.StringProperty;
import org.openstreetmap.josm.tools.LanguageInfo;
......@@ -10,7 +12,23 @@ public final class WikiProperties {
public static final DoubleProperty WIKI_LAYER_MARKER_HEIGHT = new DoubleProperty("wikipedia.layer.marker_height", 30.0);
public static final StringProperty WIKIPEDIA_LANGUAGE = new StringProperty("wikipedia.lang", LanguageInfo.getJOSMLocaleCode().substring(0, 2));
private static final String JOSM_LOCALE = LanguageInfo.getJOSMLocaleCode();
public static final StringProperty WIKIPEDIA_LANGUAGE = new StringProperty(
"wikipedia.lang",
JOSM_LOCALE.substring(0, JOSM_LOCALE.indexOf('_') >= 1 ? JOSM_LOCALE.indexOf('_') : JOSM_LOCALE.length())
);
public static final ListProperty WIKIDATA_VALIDATOR_UNUSUAL_CLASSES = new ListProperty(
"wikipedia.validator.wikidata.unusual-classes",
Arrays.asList(
"Q36774", /* web page (includes e.g. disambiguation pages) */
"Q215627", /* person (included by "abstract object") */
"Q729", /* animal */
"Q8253", /* fiction */
"Q7184903" /* abstract object (includes e.g. taxons or brands) */
)
);
private WikiProperties() {
// Private constructor to avoid instantiation
......
......@@ -16,6 +16,7 @@ class AllValidationTests {
static final ValidationTest<WikipediaAgainstWikidata> WIKIDATA_ITEM_NOT_MATCHING_WIKIPEDIA = new ValidationTest<>(Severity.WARNING, 30_004);
static final ValidationTest<WikipediaRedirect> WIKIPEDIA_ARTICLE_REDIRECTS = new ValidationTest<>(Severity.WARNING, 30_005);
static final ValidationTest<WikipediaRedirect> WIKIPEDIA_TAG_INVALID = new ValidationTest<>(Severity.ERROR, 30_006);
static final ValidationTest<UnusualWikidataClasses> WIKIDATA_TAG_HAS_UNUSUAL_TYPE = new ValidationTest<>(Severity.WARNING, 30_007);
// i18n: Prefix for the validator messages. Note the space at the end!
static final String VALIDATOR_MESSAGE_MARKER = I18n.tr("[Wiki] ");
......
package org.wikipedia.validator;
import static org.wikipedia.validator.AllValidationTests.SEE_OTHER_CATEGORY_VALIDATOR_ERRORS;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;
import org.openstreetmap.josm.data.osm.OsmPrimitive;
import org.openstreetmap.josm.gui.Notification;
import org.openstreetmap.josm.tools.I18n;
import org.wikipedia.WikipediaPlugin;
import org.wikipedia.api.ApiQueryClient;
import org.wikipedia.api.wdq.WdqApiQuery;
import org.wikipedia.api.wdq.json.SparqlResult;
import org.wikipedia.tools.ListUtil;
import org.wikipedia.tools.RegexUtil;
import org.wikipedia.tools.WikiProperties;
public class UnusualWikidataClasses extends BatchProcessedTagTest<UnusualWikidataClasses.TestCompanion> {
private static final Notification NETWORK_FAILED_NOTIFICATION = new Notification(
I18n.tr("Could not check for unusual classes in wikidata=* tags.") +
"\n" + SEE_OTHER_CATEGORY_VALIDATOR_ERRORS
).setIcon(WikipediaPlugin.LOGO);
public UnusualWikidataClasses() {
super(
I18n.tr("Find OSM objects linked with wikidata items of a class that is untypical for OSM"),
I18n.tr("This check queries Wikidata to find those OSM objects that are linked to wikidata items of a type, which should not occur in OSM data (at least not as the main wikidata tag)")
);
}
@Override
protected TestCompanion prepareTestCompanion(OsmPrimitive primitive) {
final String wikidataValue = primitive.get("wikidata");
if (RegexUtil.isValidQId(wikidataValue)) {
return new TestCompanion(primitive, wikidataValue);
}
return null;
}
@Override
protected void check(List<TestCompanion> allPrimitives) {
ListUtil.processInBatches(allPrimitives, 50, batch -> {
for (final String forbiddenType : WikiProperties.WIKIDATA_VALIDATOR_UNUSUAL_CLASSES.get()) {
try {
checkBatch(batch, forbiddenType);
} catch (IOException e) {
errors.add(
AllValidationTests.API_REQUEST_FAILED.getBuilder(this)
.primitives(batch.stream().map(BatchProcessedTagTest.TestCompanion::getPrimitive).collect(Collectors.toList()))
.message(AllValidationTests.VALIDATOR_MESSAGE_MARKER + e.getMessage())
.build()
);
}
}
});
}
private void checkBatch(final Collection<TestCompanion> batch, final String forbiddenType) throws IOException {
final SparqlResult result = ApiQueryClient.query(WdqApiQuery.findInstancesOfXOrOfSubclass(batch.stream().map(it -> it.wikidataValue).collect(Collectors.toList()), forbiddenType));
for (List<SparqlResult.Results.Entry> row : result.getRows()) {
final String entityURL = row.get(0).getValue();
final String qID = entityURL.substring(entityURL.lastIndexOf('/') >= 0 ? entityURL.lastIndexOf('/') + 1 : 0);
final Collection<OsmPrimitive> primitives = batch.stream()
.filter(it -> qID.equals(it.wikidataValue))
.map(BatchProcessedTagTest.TestCompanion::getPrimitive)
.collect(Collectors.toList());
if (primitives.size() >= 1) {
errors.add(
AllValidationTests.WIKIDATA_TAG_HAS_UNUSUAL_TYPE.getBuilder(this)
.primitives(primitives)
.message(
"Wikidata value is of unusual type for the wikidata=* tag on OSM objects",
I18n.marktr("{0} is an instance of {1} (or any subclass thereof)"),
qID,
forbiddenType
)
.build()
);
}
}
}
static class TestCompanion extends BatchProcessedTagTest.TestCompanion {
private final String wikidataValue;
TestCompanion(final OsmPrimitive primitive, final String wikidataValue) {
super(primitive);
this.wikidataValue = wikidataValue;
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment