Commit 50072358 authored by Florian Schäfer's avatar Florian Schäfer

Provide an auto-fix for wikidata=* tags with a brand as value

If there is a item about a brand in the wikidata=* tag, there is now an auto-fix that adds the value to the brand:wikidata=* tag. If the value is already present in the latter tag, the wikidata=* tag is simply dropped.
This fixes #2 , where this was proposed.
parent f45d42d5
package org.wikipedia.tools;
public class OsmTagConstants {
public static class Key {
public static final String WIKIDATA = "wikidata";
public static final String BRAND_WIKIDATA = "brand:wikidata";
private Key() {
// Private constructor to avoid instantiation
}
}
private OsmTagConstants() {
// Private constructor to avoid instantiation
}
}
......@@ -6,6 +6,7 @@ import java.util.regex.Pattern;
public class RegexUtil {
private static final Pattern PROPERTY_ID_PATTERN = Pattern.compile("^P[1-9][0-9]*$");
private static final Pattern Q_ID_PATTERN = Pattern.compile("^Q[1-9][0-9]*$");
private static final Pattern MULTI_Q_ID_PATTERN = Pattern.compile("^Q[1-9][0-9]*(;Q[1-9][0-9]*)*$");
private static final Pattern SITE_ID_PATTERN = Pattern.compile("^[a-z][a-z][a-z]?wiki$");
public static final Pattern WIKIPEDIA_TAG_VALUE_PATTERN = Pattern.compile("([a-z][a-z][a-z]?):(.+)");
......@@ -23,6 +24,10 @@ public class RegexUtil {
return value != null && Q_ID_PATTERN.matcher(value).matches();
}
public static boolean isValidMultiQId(final String value) {
return value != null && MULTI_Q_ID_PATTERN.matcher(value).matches();
}
public static void requireValidQId(final String value) {
if (!isValidQId(value)) {
throw new IllegalArgumentException("Q-ID is invalid!");
......
......@@ -17,6 +17,7 @@ class AllValidationTests {
static final ValidationTest<WikipediaRedirect> WIKIPEDIA_ARTICLE_REDIRECTS = new ValidationTest<>(Severity.WARNING, 30_005);
static final ValidationTest<WikipediaRedirect> WIKIPEDIA_TAG_INVALID = new ValidationTest<>(Severity.ERROR, 30_006);
static final ValidationTest<UnusualWikidataClasses> WIKIDATA_TAG_HAS_UNUSUAL_TYPE = new ValidationTest<>(Severity.WARNING, 30_007);
static final ValidationTest<UnusualWikidataClasses> INVALID_BRAND_WIKIDATA_TAG_FORMAT = new ValidationTest<>(Severity.ERROR, 30_008);
// i18n: Prefix for the validator messages. Note the space at the end!
static final String VALIDATOR_MESSAGE_MARKER = I18n.tr("[Wiki] ");
......
......@@ -5,11 +5,18 @@ import static org.wikipedia.validator.AllValidationTests.VALIDATOR_MESSAGE_MARKE
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import org.openstreetmap.josm.command.ChangePropertyCommand;
import org.openstreetmap.josm.command.Command;
import org.openstreetmap.josm.command.SequenceCommand;
import org.openstreetmap.josm.data.osm.OsmPrimitive;
import org.openstreetmap.josm.data.validation.TestError;
import org.openstreetmap.josm.gui.Notification;
import org.openstreetmap.josm.tools.I18n;
import org.wikipedia.WikipediaPlugin;
......@@ -17,10 +24,13 @@ import org.wikipedia.api.ApiQueryClient;
import org.wikipedia.api.wdq.WdqApiQuery;
import org.wikipedia.api.wdq.json.SparqlResult;
import org.wikipedia.tools.ListUtil;
import org.wikipedia.tools.OsmTagConstants;
import org.wikipedia.tools.RegexUtil;
import org.wikipedia.tools.WikiProperties;
public class UnusualWikidataClasses extends BatchProcessedTagTest<UnusualWikidataClasses.TestCompanion> {
private static final String BRAND_QID = "Q167270";
private static final Notification NETWORK_FAILED_NOTIFICATION = new Notification(
I18n.tr("Could not check for unusual classes in wikidata=* tags.") +
"\n" + SEE_OTHER_CATEGORY_VALIDATOR_ERRORS
......@@ -35,7 +45,7 @@ public class UnusualWikidataClasses extends BatchProcessedTagTest<UnusualWikidat
@Override
protected TestCompanion prepareTestCompanion(OsmPrimitive primitive) {
final String wikidataValue = primitive.get("wikidata");
final String wikidataValue = primitive.get(OsmTagConstants.Key.WIKIDATA);
if (RegexUtil.isValidQId(wikidataValue)) {
return new TestCompanion(primitive, wikidataValue);
}
......@@ -58,23 +68,14 @@ public class UnusualWikidataClasses extends BatchProcessedTagTest<UnusualWikidat
final String itemUrl = row.get(0).getValue();
final String itemQId = itemUrl.substring(itemUrl.lastIndexOf('/') >= 0 ? itemUrl.lastIndexOf('/') + 1 : 0);
final String classUrl = row.get(2).getValue();
final String classQId = classUrl.substring(itemUrl.lastIndexOf('/') >= 0 ? itemUrl.lastIndexOf('/') + 1 : 0);
final String classQId = classUrl.substring(classUrl.lastIndexOf('/') >= 0 ? classUrl.lastIndexOf('/') + 1 : 0);
final Collection<OsmPrimitive> primitives = batch.stream()
.filter(it -> itemQId.equals(it.getKey()))
.flatMap(it -> it.getValue().stream().map(BatchProcessedTagTest.TestCompanion::getPrimitive))
.collect(Collectors.toList());
if (primitives.size() >= 1) {
errors.add(
AllValidationTests.WIKIDATA_TAG_HAS_UNUSUAL_TYPE.getBuilder(this)
.message(AllValidationTests.VALIDATOR_MESSAGE_MARKER + I18n.tr("Wikidata value is of unusual type for the wikidata=* tag on OSM objects"),
I18n.marktr("{0} is an instance of {1} (or any subclass thereof)"),
row.get(1).getValue() + " (" + itemQId + ")",
row.get(3).getValue() + " (" + classQId + ")"
)
.primitives(primitives)
.build()
);
if (!primitives.isEmpty()) {
buildTestError(primitives, itemQId, classQId, row);
}
}
} catch (IOException e) {
......@@ -90,6 +91,60 @@ public class UnusualWikidataClasses extends BatchProcessedTagTest<UnusualWikidat
);
}
private void buildTestError(final Collection<OsmPrimitive> primitives, final String itemQId, final String classQId, final List<SparqlResult.Results.Entry> row) {
final TestError.Builder errBuilder = AllValidationTests.WIKIDATA_TAG_HAS_UNUSUAL_TYPE.getBuilder(this)
.message(AllValidationTests.VALIDATOR_MESSAGE_MARKER + I18n.tr("Wikidata value is of unusual type for the wikidata=* tag on OSM objects"),
I18n.marktr("{0} is an instance of {1} (or any subclass thereof)"),
row.get(1).getValue() + " (" + itemQId + ")",
row.get(3).getValue() + " (" + classQId + ")"
)
.primitives(primitives);
// Add autofix if item is a brand, move tag value to brand:wikidata=* in that case.
if (BRAND_QID.equals(classQId)) {
final Map<String, List<OsmPrimitive>> prevBrands = primitives.stream()
.collect(Collectors.groupingBy(it -> Optional.ofNullable(it.get(OsmTagConstants.Key.BRAND_WIKIDATA)).orElse("")));
final List<Command> commands = new ArrayList<>();
for (final Map.Entry<String, List<OsmPrimitive>> prevBrand : prevBrands.entrySet()) {
final Map<String, String> tags = new HashMap<>();
tags.put(OsmTagConstants.Key.WIKIDATA, null);
final String prevBrandValue = prevBrand.getKey();
final String newBrandValue;
if (prevBrandValue == null || "".equals(prevBrandValue)) {
newBrandValue = itemQId;
} else {
if (!RegexUtil.isValidMultiQId(prevBrandValue)) {
errors.add(
AllValidationTests.INVALID_BRAND_WIKIDATA_TAG_FORMAT.getBuilder(this)
.message(VALIDATOR_MESSAGE_MARKER + I18n.tr("The value of tag brand:wikidata=* has an invalid format!"))
.primitives(prevBrand.getValue())
.build()
);
newBrandValue = null;
} else if (Arrays.asList(prevBrandValue.split(";")).contains(itemQId)) {
newBrandValue = prevBrandValue;
} else {
newBrandValue = prevBrandValue + ";" + itemQId;
}
}
if (newBrandValue != null) {
if(!newBrandValue.equals(prevBrandValue)) {
tags.put(OsmTagConstants.Key.BRAND_WIKIDATA, newBrandValue);
}
commands.add(new ChangePropertyCommand(prevBrand.getValue(), tags));
}
}
if (commands.size() == 1) {
errBuilder.fix(() -> commands.get(0));
} else if (!commands.isEmpty()) {
errBuilder.fix(() -> new SequenceCommand(I18n.tr("Move wikidata=* tags to tag brand:wikidata=*, when the value points to a brand"), commands));
}
}
errors.add(errBuilder.build());
}
static class TestCompanion extends BatchProcessedTagTest.TestCompanion {
private final String wikidataValue;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment