...
 
Commits (1)
......@@ -4,6 +4,7 @@ import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.Objects;
import java.util.stream.Stream;
import org.openstreetmap.josm.tools.HttpClient;
import org.openstreetmap.josm.tools.Utils;
import org.wikipedia.api.ApiQuery;
......@@ -11,6 +12,7 @@ import org.wikipedia.api.ApiUrl;
import org.wikipedia.api.SerializationSchema;
import org.wikipedia.api.wdq.json.SparqlResult;
import org.wikipedia.tools.RegexUtil;
import org.wikipedia.tools.WikiProperties;
public class WdqApiQuery<T> extends ApiQuery<T> {
private static final String[] TICKET_KEYWORDS = {"wikidata", "QueryService"};
......@@ -37,22 +39,28 @@ public class WdqApiQuery<T> extends ApiQuery<T> {
}
/**
* @param items the items for which we check if they are instances of {@code x}
* or instances of any subclass of {@code x}.
* @param x the Q-ID of an item, for which the query checks if the provided items are instances of it,
* @param items the items for which we check if they are instances of one of the items provided by {@code classes}
* or instances of any subclass of the items provided by {@code classes}.
* @param classes the Q-IDs of items, for which the query checks if the provided items are instances of it,
* or instances of subclasses of it.
* @return the API query
*/
public static WdqApiQuery<SparqlResult> findInstancesOfXOrOfSubclass(final Collection<String> items, final String x) {
public static WdqApiQuery<SparqlResult> findInstancesOfClassesOrTheirSubclasses(final Collection<String> items, final Collection<String> classes) {
Objects.requireNonNull(items);
Objects.requireNonNull(x);
if (items.size() <= 0 || !items.stream().allMatch(RegexUtil::isValidQId) || !RegexUtil.isValidQId(x)) {
throw new IllegalArgumentException("All arguments for the 'is instance of X or of subclass' check must be valid Q-IDs!");
Objects.requireNonNull(classes);
if (items.size() >= 1 && classes.size() >= 1 && Stream.concat(items.stream(), classes.stream()).allMatch(RegexUtil::isValidQId)) {
return new WdqApiQuery<>(
ApiUrl.url("https://query.wikidata.org/sparql"),
"format=json&query=" + Utils.encodeUrl(String.format(
"SELECT DISTINCT ?item ?itemLabel ?classes ?classesLabel WHERE { VALUES ?item { wd:%s }. VALUES ?classes { wd:%s }. ?item wdt:P31/wdt:P279* ?supertype. ?supertype wdt:P279* ?classes. SERVICE wikibase:label { bd:serviceParam wikibase:language \"%s\" }. }",
String.join(" wd:", items),
String.join(" wd:", classes),
WikiProperties.WIKIPEDIA_LANGUAGE.get()
)),
SparqlResult.SCHEMA
);
} else {
throw new IllegalArgumentException("All arguments for the 'is instance of classes or their subclasses' check must be one or more valid Q-IDs!");
}
return new WdqApiQuery<>(
ApiUrl.url("https://query.wikidata.org/sparql"),
"format=json&query=" + Utils.encodeUrl(String.format("SELECT DISTINCT ?item WHERE { VALUES ?item { wd:%s } ?item wdt:P31/wdt:P279* wd:%s. }", String.join(" wd:", items), x)),
SparqlResult.SCHEMA
);
}
}
package org.wikipedia.validator;
import static org.wikipedia.validator.AllValidationTests.SEE_OTHER_CATEGORY_VALIDATOR_ERRORS;
import static org.wikipedia.validator.AllValidationTests.VALIDATOR_MESSAGE_MARKER;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.openstreetmap.josm.data.osm.OsmPrimitive;
import org.openstreetmap.josm.gui.Notification;
......@@ -41,45 +44,50 @@ public class UnusualWikidataClasses extends BatchProcessedTagTest<UnusualWikidat
@Override
protected void check(List<TestCompanion> allPrimitives) {
ListUtil.processInBatches(allPrimitives, 50, batch -> {
for (final String forbiddenType : WikiProperties.WIKIDATA_VALIDATOR_UNUSUAL_CLASSES.get()) {
ListUtil.processInBatches(
// group the same wikidata values to reduce number of queries
new ArrayList<>(allPrimitives.stream().collect(Collectors.groupingBy(it -> it.wikidataValue)).entrySet()),
50,
batch -> {
try {
checkBatch(batch, forbiddenType);
final SparqlResult result = ApiQueryClient.query(WdqApiQuery.findInstancesOfClassesOrTheirSubclasses(
batch.stream().map(Map.Entry::getKey).collect(Collectors.toList()),
WikiProperties.WIKIDATA_VALIDATOR_UNUSUAL_CLASSES.get()
));
for (final List<SparqlResult.Results.Entry> row : result.getRows()) {
final String itemUrl = row.get(0).getValue();
final String itemQId = itemUrl.substring(itemUrl.lastIndexOf('/') >= 0 ? itemUrl.lastIndexOf('/') + 1 : 0);
final String classUrl = row.get(2).getValue();
final String classQId = classUrl.substring(itemUrl.lastIndexOf('/') >= 0 ? itemUrl.lastIndexOf('/') + 1 : 0);
final Collection<OsmPrimitive> primitives = batch.stream()
.filter(it -> itemQId.equals(it.getKey()))
.flatMap(it -> it.getValue().stream().map(BatchProcessedTagTest.TestCompanion::getPrimitive))
.collect(Collectors.toList());
if (primitives.size() >= 1) {
errors.add(
AllValidationTests.WIKIDATA_TAG_HAS_UNUSUAL_TYPE.getBuilder(this)
.message(AllValidationTests.VALIDATOR_MESSAGE_MARKER + I18n.tr("Wikidata value is of unusual type for the wikidata=* tag on OSM objects"),
I18n.marktr("{0} is an instance of {1} (or any subclass thereof)"),
row.get(1).getValue() + " (" + itemQId + ")",
row.get(3).getValue() + " (" + classQId + ")"
)
.primitives(primitives)
.build()
);
}
}
} catch (IOException e) {
errors.add(
AllValidationTests.API_REQUEST_FAILED.getBuilder(this)
.primitives(batch.stream().map(BatchProcessedTagTest.TestCompanion::getPrimitive).collect(Collectors.toList()))
.message(AllValidationTests.VALIDATOR_MESSAGE_MARKER + e.getMessage())
.primitives(batch.stream().flatMap(it -> it.getValue().stream().map(BatchProcessedTagTest.TestCompanion::getPrimitive)).collect(Collectors.toList()))
.message(VALIDATOR_MESSAGE_MARKER + e.getMessage())
.build()
);
finalNotification = NETWORK_FAILED_NOTIFICATION;
}
}
});
}
private void checkBatch(final Collection<TestCompanion> batch, final String forbiddenType) throws IOException {
final SparqlResult result = ApiQueryClient.query(WdqApiQuery.findInstancesOfXOrOfSubclass(batch.stream().map(it -> it.wikidataValue).collect(Collectors.toList()), forbiddenType));
for (List<SparqlResult.Results.Entry> row : result.getRows()) {
final String entityURL = row.get(0).getValue();
final String qID = entityURL.substring(entityURL.lastIndexOf('/') >= 0 ? entityURL.lastIndexOf('/') + 1 : 0);
final Collection<OsmPrimitive> primitives = batch.stream()
.filter(it -> qID.equals(it.wikidataValue))
.map(BatchProcessedTagTest.TestCompanion::getPrimitive)
.collect(Collectors.toList());
if (primitives.size() >= 1) {
errors.add(
AllValidationTests.WIKIDATA_TAG_HAS_UNUSUAL_TYPE.getBuilder(this)
.primitives(primitives)
.message(
"Wikidata value is of unusual type for the wikidata=* tag on OSM objects",
I18n.marktr("{0} is an instance of {1} (or any subclass thereof)"),
qID,
forbiddenType
)
.build()
);
}
}
);
}
static class TestCompanion extends BatchProcessedTagTest.TestCompanion {
......
......@@ -6,7 +6,9 @@ import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.junit.Rule;
import org.junit.Test;
import org.openstreetmap.josm.testutils.JOSMTestRules;
......@@ -63,24 +65,16 @@ public class WdqApiQueryTest {
@Test
public void test() throws IOException {
final SparqlResult bridgeResult = ApiQueryClient.query(WdqApiQuery.findInstancesOfXOrOfSubclass(MIXED_LIST, BRIDGE_CLASS));
bridgeResult.getRows().forEach(row -> assertEquals(1, row.size()));
for (final String bridge : BRIDGE_LIST) {
assertEquals("Bridge " + bridge + " not found in the result!", 1, bridgeResult.getRows().stream().filter(it -> ("http://www.wikidata.org/entity/" + bridge).equals(it.get(0).getValue())).count());
}
assertEquals(BRIDGE_LIST.size(), bridgeResult.size());
assertTrue(bridgeResult.getRows().stream().allMatch(row -> "uri".equals(row.get(0).getType())));
testFindInstancesOfClassesOrTheirSubclasses(MIXED_LIST, Arrays.asList(BRIDGE_CLASS, BUILDING_CLASS), MIXED_LIST);
testFindInstancesOfClassesOrTheirSubclasses(MIXED_LIST, Collections.singletonList(BRIDGE_CLASS), BRIDGE_LIST);
testFindInstancesOfClassesOrTheirSubclasses(MIXED_LIST, Collections.singletonList(BUILDING_CLASS), BUILDING_LIST);
}
final SparqlResult buildingResult = ApiQueryClient.query(WdqApiQuery.findInstancesOfXOrOfSubclass(MIXED_LIST, BUILDING_CLASS));
buildingResult.getRows().forEach(row -> assertEquals(1, row.size()));
for (final String building : BUILDING_LIST) {
assertEquals(
"Building " + building + " not found in the result!",
1,
buildingResult.getRows().stream().filter(row -> ("http://www.wikidata.org/entity/" + building).equals(row.get(0).getValue())).count()
);
private void testFindInstancesOfClassesOrTheirSubclasses(final Collection<String> itemList, final Collection<String> classesList, final Collection<String> expectedResultList) throws IOException {
final SparqlResult result = ApiQueryClient.query(WdqApiQuery.findInstancesOfClassesOrTheirSubclasses(itemList, classesList));
for (final String expectedEntry : expectedResultList) {
assertEquals("Entry " + expectedEntry + " not found in the result!", 1, result.getRows().stream().filter(row -> ("http://www.wikidata.org/entity/" + expectedEntry).equals(row.get(0).getValue())).count());
}
assertEquals(BUILDING_LIST.size(), buildingResult.size());
assertTrue(buildingResult.getRows().stream().allMatch(row -> "uri".equals(row.get(0).getType())));
assertEquals(expectedResultList.size(), result.size());
}
}