Commit c5a37b77 authored by Florian Schäfer's avatar Florian Schäfer

Prepare fetching Wikidata item for specific Wikipedia article in batches

A new API URL to find for multiple Wikipedia articles (in one language) the associated Wikidata item.
The JSON schema for deserializing the JSON answer has also been extended for the new API query.
parent f9fa6829
......@@ -16,6 +16,7 @@ import org.openstreetmap.josm.tools.Logging;
import org.openstreetmap.josm.tools.bugreport.BugReport;
import org.wikipedia.WikipediaPlugin;
import org.wikipedia.api.InvalidApiQueryException;
import org.wikipedia.api.wikidata_action.json.SerializationSchema;
public final class ApiQueryClient {
private static final ObjectMapper JSON_OBJECT_MAPPER = new ObjectMapper();
......@@ -36,7 +37,7 @@ public final class ApiQueryClient {
* @return the deserialized object
* @throws IOException if any error occurs while executing the query, with a translated message that can be shown to the user.
*/
public static <T> T query(final URL url, final Class<T> klass) throws IOException {
public static <T> T query(final URL url, final SerializationSchema<T> schema) throws IOException {
final HttpClient.Response response;
try {
response = HttpClient.create(url)
......@@ -63,8 +64,7 @@ public final class ApiQueryClient {
throw new IOException(I18n.tr("The Wikidata Action API reported that the query was invalid! Please report as bug to the Wikipedia plugin!"));
}
try {
return JSON_OBJECT_MAPPER.readValue(response.getContent(), klass);
return schema.getMapper().readValue(response.getContent(), schema.getSchemaClass());
} catch (JsonMappingException | JsonParseException e) {
throw new IOException(I18n.tr("The JSON response from the Wikidata Action API can't be read!"), e);
} catch (IOException e) {
......
......@@ -9,7 +9,8 @@ import org.wikipedia.api.ApiUrl;
import org.wikipedia.tools.RegexUtil;
public class WikidataActionApiUrl {
private static final String BASE_URL = "https://www.wikidata.org/w/api.php";
private static final String BASE_URL = "https://www.wikidata.org/w/api.php?";
private static final String FORMAT_PARAMS = "format=json&utf8=1&formatversion=1";
private WikidataActionApiUrl() {
// Private constructor to avoid instantiation
......@@ -24,9 +25,27 @@ public class WikidataActionApiUrl {
}
return ApiUrl.url(
BASE_URL,
"?action=wbgetentities&format=json&sites=&props=&ids=",
FORMAT_PARAMS,
"&action=wbgetentities&sites=&props=&ids=",
Utils.encodeUrl(String.join("|", qIds))
);
}
public static URL getEntityForSitelink(final String siteId, final Collection<String> titles) {
if (siteId == null || titles == null || titles.size() <= 0) {
throw new IllegalArgumentException("The site ID and titles must be present!");
}
if (!RegexUtil.isValidSiteId(siteId)) {
throw new IllegalArgumentException("The site ID is not given in the expected format!");
}
return ApiUrl.url(
BASE_URL,
FORMAT_PARAMS,
"&action=wbgetentities&props=sitelinks",
"&sites=", siteId, // defines the language of the titles
"&sitefilter=", siteId, // defines for which languages sitelinks should be returned
"&titles=", Utils.encodeUrl(String.join("|", titles))
);
}
}
// License: GPL. For details, see LICENSE file.
package org.wikipedia.api.wikidata_action.json;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
public final class CheckEntityExistsResult {
private final int success;
private final Map<String, Entity> entities;
private final Map<String, Entity> entities = new HashMap<>();
private final List<MissingEntity> missingEntities = new ArrayList<>();
@JsonCreator
public CheckEntityExistsResult(@JsonProperty("success") final int success, @JsonProperty("entities") final Map<String, Entity> entities) {
public CheckEntityExistsResult(@JsonProperty("success") final int success, @JsonProperty("entities") final Map<String, AbstractEntity> entities) {
this.success = success;
this.entities = entities;
entities.entrySet().stream().filter(it -> it.getValue() instanceof Entity).forEach(it -> {
this.entities.put(it.getKey(), (Entity) it.getValue());
});
entities.entrySet().stream().filter(it -> it.getValue() instanceof MissingEntity).forEach(it -> {
this.missingEntities.add((MissingEntity) it.getValue());
});
}
/**
* @return the success-value of the result, 1 means success, other values mean failure
*/
public int getSuccess() {
return success;
}
/**
* @return All entities that were found, the values are the entities itself, the key is the Q-ID of the entity.
* (but better rely on the Q-ID provided by the {@link Entity} object, I'm not sure but sometimes the key
* might be a redirect to the Q-ID that the entity provides??!)
*/
public Map<String, Entity> getEntities() {
return Collections.unmodifiableMap(entities);
}
public static final class Entity {
/**
* @return all the entities that are reported as missing
*/
public Collection<MissingEntity> getMissingEntities() {
return Collections.unmodifiableCollection(missingEntities);
}
/**
* Supertype for {@link MissingEntity} and {@link Entity}
*/
interface AbstractEntity {
class Deserializer extends StdDeserializer<AbstractEntity> {
private final ObjectMapper mapper;
Deserializer(final ObjectMapper mapper) {
super((Class<?>) null);
this.mapper = mapper;
}
@Override
public AbstractEntity deserialize(final JsonParser p, final DeserializationContext ctxt) throws IOException, JsonProcessingException {
final JsonNode node = p.getCodec().readTree(p);
if (node.has("missing")) {
return mapper.treeToValue(node, MissingEntity.class);
}
return mapper.treeToValue(node, Entity.class);
}
}
}
public static final class MissingEntity implements AbstractEntity {
private final String id;
private final String site;
private final String title;
@JsonCreator
public MissingEntity(@JsonProperty("id") final String id, @JsonProperty("site") final String site, @JsonProperty("title") final String title) {
this.id = id;
this.site = site;
this.title = title;
}
public String getId() {
return id;
}
public String getSite() {
return site;
}
public String getTitle() {
return title;
}
}
public static final class Entity implements AbstractEntity {
private final String id;
private final String type;
private final Map<String, Sitelink> sitelinks;
@JsonCreator
public Entity(@JsonProperty("id") final String id, @JsonProperty("type") final String type) {
public Entity(
@JsonProperty("id") final String id,
@JsonProperty("type") final String type,
@JsonProperty("sitelinks") final Map<String, Sitelink> sitelinks
) {
this.id = id;
this.type = type;
if (sitelinks == null) {
this.sitelinks = null;
} else {
this.sitelinks = new HashMap<>(sitelinks);
}
}
public String getId() {
......@@ -42,5 +132,31 @@ public final class CheckEntityExistsResult {
public String getType() {
return type;
}
public Optional<Collection<Sitelink>> getSitelinks() {
if (sitelinks == null) {
return Optional.empty();
}
return Optional.of(Collections.unmodifiableCollection(sitelinks.values()));
}
public static final class Sitelink {
private final String site;
private final String title;
@JsonCreator
public Sitelink(@JsonProperty("site") final String site, @JsonProperty("title") final String title) {
this.site = site;
this.title = title;
}
public String getSite() {
return site;
}
public String getTitle() {
return title;
}
}
}
}
\ No newline at end of file
// License: GPL. For details, see LICENSE file.
package org.wikipedia.api.wikidata_action.json;
import java.util.function.Consumer;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.module.SimpleModule;
/**
* Wrapper class for the object mapper and the class to which you want to deserialize.
* @param <T> The type which represents the JSON on the Java side.
*/
public class SerializationSchema<T> {
public static final SerializationSchema<CheckEntityExistsResult> WBGETENTITIES = new SerializationSchema<>(
CheckEntityExistsResult.class,
it -> {
it.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
it.registerModule(new SimpleModule().addDeserializer(
CheckEntityExistsResult.AbstractEntity.class,
new CheckEntityExistsResult.AbstractEntity.Deserializer(it)
));
}
);
private final ObjectMapper mapper;
private final Class<T> schemaClass;
private SerializationSchema(final Class<T> schemaClass, final Consumer<ObjectMapper> mapperConfig) {
this.schemaClass = schemaClass;
mapper = new ObjectMapper();
mapperConfig.accept(mapper);
}
public ObjectMapper getMapper() {
return mapper;
}
public Class<T> getSchemaClass() {
return schemaClass;
}
}
// License: GPL. For details, see LICENSE file.
package org.wikipedia.data;
import java.util.Comparator;
import java.util.Optional;
import org.openstreetmap.josm.data.coor.LatLon;
import org.openstreetmap.josm.data.osm.Tag;
import org.openstreetmap.josm.tools.AlphanumComparator;
import org.openstreetmap.josm.tools.CheckParameterUtil;
import org.openstreetmap.josm.tools.Utils;
import org.wikipedia.WikipediaApp;
import org.wikipedia.tools.RegexUtil;
import java.util.Comparator;
import java.util.Optional;
public class WikidataEntry extends WikipediaEntry {
public final String label;
......
// License: GPL. For details, see LICENSE file.
package org.wikipedia.data;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.openstreetmap.josm.data.coor.LatLon;
import org.openstreetmap.josm.data.osm.Tag;
import org.openstreetmap.josm.tools.AlphanumComparator;
import org.openstreetmap.josm.tools.Utils;
import org.wikipedia.WikipediaApp;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class WikipediaEntry implements Comparable<WikipediaEntry> {
public final String lang;
......
// License: GPL. For details, see LICENSE file.
package org.wikipedia.gui;
import static org.openstreetmap.josm.tools.I18n.tr;
import static org.openstreetmap.josm.tools.I18n.trn;
import java.util.Collection;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;
import javax.swing.JOptionPane;
import org.openstreetmap.josm.Main;
import org.openstreetmap.josm.data.osm.DefaultNameFormatter;
import org.openstreetmap.josm.data.osm.OsmPrimitive;
......@@ -9,15 +18,6 @@ import org.openstreetmap.josm.gui.util.GuiHelper;
import org.openstreetmap.josm.tools.AlphanumComparator;
import org.openstreetmap.josm.tools.Utils;
import javax.swing.JOptionPane;
import java.util.Collection;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;
import static org.openstreetmap.josm.tools.I18n.tr;
import static org.openstreetmap.josm.tools.I18n.trn;
public class GuiUtils {
private GuiUtils() {
......
......@@ -5,6 +5,7 @@ import java.util.regex.Pattern;
public class RegexUtil {
private static final Pattern Q_ID_PATTERN = Pattern.compile("^Q[1-9][0-9]*$");
private static final Pattern SITE_ID_PATTERN = Pattern.compile("^[a-z][a-z][a-z]?wiki");
private RegexUtil() {
// Private constructor to avoid instantiation
......@@ -13,4 +14,15 @@ public class RegexUtil {
public static boolean isValidQId(final String value) {
return value != null && Q_ID_PATTERN.matcher(value).matches();
}
/**
* Validates that a given string matches "[a-z]{2,3}wiki".
* This has to be improved in the future to exactly allow only existing site IDs and allow other wikimedia sites
* other than wikipedias, but for now it's good enough.
* @param value the potential site ID to check
* @return {@code true} if the site ID is valid, {@code false} otherwise
*/
public static boolean isValidSiteId(final String value) {
return value != null && SITE_ID_PATTERN.matcher(value).matches();
}
}
// License: GPL. For details, see LICENSE file.
package org.wikipedia.tools;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import java.util.AbstractList;
import java.util.Collection;
import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import javax.xml.xpath.XPathConstants;
......@@ -13,8 +11,10 @@ import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import javax.xml.xpath.XPathFunctionResolver;
import javax.xml.xpath.XPathVariableResolver;
import java.util.AbstractList;
import java.util.Collection;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
public final class XPath implements javax.xml.xpath.XPath {
private final javax.xml.xpath.XPath xPath;
......
......@@ -20,6 +20,7 @@ import org.openstreetmap.josm.tools.ImageProvider;
import org.wikipedia.api.wikidata_action.ApiQueryClient;
import org.wikipedia.api.wikidata_action.WikidataActionApiUrl;
import org.wikipedia.api.wikidata_action.json.CheckEntityExistsResult;
import org.wikipedia.api.wikidata_action.json.SerializationSchema;
import org.wikipedia.tools.RegexUtil;
/**
......@@ -52,7 +53,7 @@ public class WikidataItemExists extends Test.TagTest {
if (qIds.stream().anyMatch(Objects::nonNull)) {
try {
final URL url = WikidataActionApiUrl.checkEntityExistsUrl(qIds.stream().filter(Objects::nonNull).collect(Collectors.toList()));
final CheckEntityExistsResult entityQueryResult = ApiQueryClient.query(url, CheckEntityExistsResult.class);
final CheckEntityExistsResult entityQueryResult = ApiQueryClient.query(url, SerializationSchema.WBGETENTITIES);
if (entityQueryResult.getSuccess() != 1) {
errors.add(AllValidationTests.API_REQUEST_FAILED.getBuilder(this).primitives(new ArrayList<>(primitives)).message(VALIDATOR_MESSAGE_MARKER + I18n.tr("The Wikidata Action API reports a failed query!")).build());
} else {
......
// License: GPL. For details, see LICENSE file.
package org.wikipedia;
import org.junit.Rule;
import org.junit.Test;
import org.openstreetmap.josm.data.coor.LatLon;
import org.openstreetmap.josm.testutils.JOSMTestRules;
import org.wikipedia.data.WikidataEntry;
import org.wikipedia.data.WikipediaEntry;
import static org.hamcrest.CoreMatchers.hasItem;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.CoreMatchers.nullValue;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import java.util.Arrays;
import java.util.Collection;
......@@ -18,12 +18,12 @@ import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import static org.hamcrest.CoreMatchers.hasItem;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.CoreMatchers.nullValue;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import org.junit.Rule;
import org.junit.Test;
import org.openstreetmap.josm.data.coor.LatLon;
import org.openstreetmap.josm.testutils.JOSMTestRules;
import org.wikipedia.data.WikidataEntry;
import org.wikipedia.data.WikipediaEntry;
public class WikipediaAppTest {
......
......@@ -32,19 +32,19 @@ public class WikidataActionApiUrlTest {
@Test
public void testCheckEntityExistsUrl() {
assertEquals(
"https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&sites=&props=&ids=Q1",
"https://www.wikidata.org/w/api.php?format=json&utf8=1&formatversion=1&action=wbgetentities&sites=&props=&ids=Q1",
WikidataActionApiUrl.checkEntityExistsUrl(Collections.singletonList("Q1")).toString()
);
assertEquals(
"https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&sites=&props=&ids=Q1%7CQ42",
"https://www.wikidata.org/w/api.php?format=json&utf8=1&formatversion=1&action=wbgetentities&sites=&props=&ids=Q1%7CQ42",
WikidataActionApiUrl.checkEntityExistsUrl(Arrays.asList("Q1", "Q42")).toString()
);
assertEquals(
"https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&sites=&props=&ids=Q1%7CQ42%7CQ12345",
"https://www.wikidata.org/w/api.php?format=json&utf8=1&formatversion=1&action=wbgetentities&sites=&props=&ids=Q1%7CQ42%7CQ12345",
WikidataActionApiUrl.checkEntityExistsUrl(Arrays.asList("Q1", "Q42", "Q12345")).toString()
);
assertEquals(
"https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&sites=&props=&ids=Q1%7CQ13%7CQ24%7CQ20150617%7CQ42%7CQ12345",
"https://www.wikidata.org/w/api.php?format=json&utf8=1&formatversion=1&action=wbgetentities&sites=&props=&ids=Q1%7CQ13%7CQ24%7CQ20150617%7CQ42%7CQ12345",
WikidataActionApiUrl.checkEntityExistsUrl(Arrays.asList("Q1", "Q13", "Q24", "Q20150617", "Q42", "Q12345")).toString()
);
}
......
......@@ -2,29 +2,23 @@
package org.wikipedia.api.wikidata_action.json;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import java.io.IOException;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.Test;
public class CheckEntityExistsResultTest {
@Test
public void test() throws IOException {
final ObjectMapper mapper = new ObjectMapper();
mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
final CheckEntityExistsResult enitityQueryResult = mapper.readValue(
final CheckEntityExistsResult enitityQueryResult = SerializationSchema.WBGETENTITIES.getMapper().readValue(
CheckEntityExistsResultTest.class.getResourceAsStream("response-wbgetentities-checkExists-Q1_Q1234567.json"),
CheckEntityExistsResult.class
SerializationSchema.WBGETENTITIES.getSchemaClass()
);
assertEquals(1, enitityQueryResult.getSuccess());
assertEquals(2, enitityQueryResult.getEntities().size());
assertEquals(1, enitityQueryResult.getEntities().size());
assertEquals(1, enitityQueryResult.getMissingEntities().size());
assertEquals("Q1", enitityQueryResult.getEntities().get("Q1").getId());
assertEquals("item", enitityQueryResult.getEntities().get("Q1").getType());
assertEquals("Q1234567", enitityQueryResult.getEntities().get("Q1234567").getId());
assertNull(enitityQueryResult.getEntities().get("Q1234567").getType());
assertEquals("Q1234567", enitityQueryResult.getMissingEntities().iterator().next().getId());
}
}
......@@ -7,7 +7,6 @@ import static org.junit.Assert.assertThat;
import java.util.Arrays;
import java.util.List;
import javax.swing.JLabel;
import javax.swing.JTable;
......
// License: GPL. For details, see LICENSE file.
package org.wikipedia.io;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import org.junit.Rule;
import org.junit.Test;
import org.openstreetmap.josm.data.osm.OsmPrimitiveType;
import org.openstreetmap.josm.data.osm.PrimitiveId;
import org.openstreetmap.josm.data.osm.SimplePrimitiveId;
import org.openstreetmap.josm.testutils.JOSMTestRules;
import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertThat;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
......@@ -16,8 +11,13 @@ import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertThat;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import org.junit.Rule;
import org.junit.Test;
import org.openstreetmap.josm.data.osm.OsmPrimitiveType;
import org.openstreetmap.josm.data.osm.PrimitiveId;
import org.openstreetmap.josm.data.osm.SimplePrimitiveId;
import org.openstreetmap.josm.testutils.JOSMTestRules;
/**
* Unit tests of {@link SophoxDownloadReader} class.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment