Commit 1cad5e6e authored by Florian Schäfer's avatar Florian Schäfer

Add API query for Wikidata Query Service

It filters a list of given Wikidata items. So only instances of a certain Wikidata item and instances of any subclass of that Wikidata item remain after filtering.
parent 34609344
package org.wikipedia.api.wdq;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.Objects;
import org.openstreetmap.josm.tools.HttpClient;
import org.openstreetmap.josm.tools.Utils;
import org.wikipedia.api.ApiQuery;
import org.wikipedia.api.ApiUrl;
import org.wikipedia.api.SerializationSchema;
import org.wikipedia.api.wdq.json.SparqlResult;
import org.wikipedia.tools.RegexUtil;
public class WdqApiQuery<T> extends ApiQuery<T> {
private static final String[] TICKET_KEYWORDS = {"wikidata", "QueryService"};
private final String queryString;
public WdqApiQuery(final URL url, final String queryString, final SerializationSchema<T> schema) {
super(url, schema, -1);
this.queryString = Objects.requireNonNull(queryString);
}
public String getApiName() {
return "Wikidata Query Service API";
}
@Override
public HttpClient getHttpClient() {
return HttpClient.create(getUrl(), "POST")
.setAccept("application/sparql-results+json")
.setHeader("Content-Type", "application/x-www-form-urlencoded")
.setHeader("User-Agent", getUserAgent(TICKET_KEYWORDS))
.setReasonForRequest(queryString.replace('&', ' '))
.setRequestBody(queryString.getBytes(StandardCharsets.UTF_8));
}
/**
* @param items the items for which we check if they are instances of {@code x}
* or instances of any subclass of {@code x}.
* @param x the Q-ID of an item, for which the query checks if the provided items are instances of it,
* or instances of subclasses of it.
* @return the API query
*/
public static WdqApiQuery<SparqlResult> findInstancesOfXOrOfSubclass(final Collection<String> items, final String x) {
Objects.requireNonNull(items);
Objects.requireNonNull(x);
if (items.size() <= 0 || !items.stream().allMatch(RegexUtil::isValidQId) || !RegexUtil.isValidQId(x)) {
throw new IllegalArgumentException("All arguments for the 'is instance of X or of subclass' check must be valid Q-IDs!");
}
return new WdqApiQuery<>(
ApiUrl.url("https://query.wikidata.org/sparql"),
"format=json&query=" + Utils.encodeUrl(String.format("SELECT DISTINCT ?item WHERE { VALUES ?item { wd:%s } ?item wdt:P31/wdt:P279* wd:%s. }", String.join(" wd:", items), x)),
SparqlResult.SCHEMA
);
}
}
package org.wikipedia.api.wdq.json;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.DeserializationFeature;
import java.security.cert.CollectionCertStoreParameters;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import org.wikipedia.api.SerializationSchema;
public class SparqlResult {
public static final SerializationSchema<SparqlResult> SCHEMA = new SerializationSchema<>(
SparqlResult.class,
mapper -> mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES)
);
private final Head head;
private final Results results;
@JsonCreator
public SparqlResult(@JsonProperty("head") final Head head, @JsonProperty("results") final Results results) {
this.head = Objects.requireNonNull(head);
this.results = Objects.requireNonNull(results);
}
public Collection<String> getColumnLabels() {
return Collections.unmodifiableCollection(head.vars);
}
public int getNumColumns() {
return head.vars.size();
}
public int size() {
return results.bindings.size();
}
public Results.Entry getEntry(final int rowIndex, final int columnIndex) {
return results.bindings.get(rowIndex).get(head.vars.get(columnIndex));
}
public List<List<Results.Entry>> getRows() {
return Collections.unmodifiableList(
results.bindings.stream()
.map(row -> head.vars.stream().map(row::get).collect(Collectors.toList()))
.collect(Collectors.toList())
);
}
private static class Head {
private final List<String> vars;
@JsonCreator
public Head(@JsonProperty("vars") final List<String> vars) {
this.vars = Objects.requireNonNull(vars);
}
}
public static class Results {
private final List<Map<String,Entry>> bindings;
@JsonCreator
public Results(@JsonProperty("bindings") final List<Map<String, Entry>> bindings) {
this.bindings = Objects.requireNonNull(bindings);
}
public static class Entry {
private final String type;
private final String value;
@JsonCreator
public Entry(@JsonProperty("type") final String type, @JsonProperty("value") final String value) {
this.type = Objects.requireNonNull(type);
this.value = Objects.requireNonNull(value);
}
public String getType() {
return type;
}
public String getValue() {
return value;
}
}
}
}
package org.wikipedia.api.wdq;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import com.github.tomakehurst.wiremock.junit.WireMockRule;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.compress.utils.IOUtils;
import org.junit.Rule;
import org.junit.Test;
import org.openstreetmap.josm.testutils.JOSMTestRules;
import org.openstreetmap.josm.tools.HttpClient;
import org.wikipedia.api.ApiQuery;
import org.wikipedia.api.ApiQueryClient;
import org.wikipedia.api.wdq.json.SparqlResult;
public class WdqApiQueryTest {
// TODO: Mock API responses with WireMock
@Rule
public JOSMTestRules josmRules = new JOSMTestRules().preferences().timeout(30_000);
private static final List<String> BRIDGE_LIST = Arrays.asList(
"Q99236", /* Millau viaduct */
"Q44440", /* Golden Gate Bridge */
"Q83125", /* Tower Bridge */
"Q54495", /* Sydney Harbour Bridge */
"Q459086", /* Jungfern Bridge */
"Q52505", /* Rialto Bridge */
"Q18109819", /* Duge Bridge */
"Q805835", /* Baluarte Bridge */
"Q5459867" /* Floating Bridge */
);
private static final List<String> BUILDING_LIST = Arrays.asList(
"Q48435", /* Sagrada Família */
"Q18712428", /* Makkah Clock Royal Tower Hotel */
"Q494895", /* Lotte World Tower */
"Q507939", /* World One */
"Q201013", /* Svalbard Seed Vault */
"Q379080", /* Fort Jesus */
"Q3368242" /* Dom Tower of Utrecht */
);
private static final Collection<String> MIXED_LIST = Arrays.asList(
BRIDGE_LIST.get(0),
BUILDING_LIST.get(0),
BRIDGE_LIST.get(1),
BUILDING_LIST.get(1),
BUILDING_LIST.get(2),
BRIDGE_LIST.get(2),
BRIDGE_LIST.get(3),
BRIDGE_LIST.get(4),
BUILDING_LIST.get(3),
BRIDGE_LIST.get(5),
BUILDING_LIST.get(4),
BRIDGE_LIST.get(6),
BRIDGE_LIST.get(7),
BRIDGE_LIST.get(8),
BUILDING_LIST.get(5),
BUILDING_LIST.get(6)
);
private static final String BRIDGE_CLASS = "Q12280";
private static final String BUILDING_CLASS = "Q41176";
@Test
public void test() throws IOException {
final SparqlResult bridgeResult = ApiQueryClient.query(WdqApiQuery.findInstancesOfXOrOfSubclass(MIXED_LIST, BRIDGE_CLASS));
bridgeResult.getRows().forEach(row -> assertEquals(1, row.size()));
for (final String bridge : BRIDGE_LIST) {
assertEquals("Bridge " + bridge + " not found in the result!", 1, bridgeResult.getRows().stream().filter(it -> ("http://www.wikidata.org/entity/" + bridge).equals(it.get(0).getValue())).count());
}
assertEquals(BRIDGE_LIST.size(), bridgeResult.size());
assertTrue(bridgeResult.getRows().stream().allMatch(row -> "uri".equals(row.get(0).getType())));
final SparqlResult buildingResult = ApiQueryClient.query(WdqApiQuery.findInstancesOfXOrOfSubclass(MIXED_LIST, BUILDING_CLASS));
buildingResult.getRows().forEach(row -> assertEquals(1, row.size()));
for (final String building : BUILDING_LIST) {
assertEquals(
"Building " + building + " not found in the result!",
1,
buildingResult.getRows().stream().filter(row -> ("http://www.wikidata.org/entity/" + building).equals(row.get(0).getValue())).count()
);
}
assertEquals(BUILDING_LIST.size(), buildingResult.size());
assertTrue(buildingResult.getRows().stream().allMatch(row -> "uri".equals(row.get(0).getType())));
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment