Commit 28adcdc0 authored by jhargrave's avatar jhargrave

add simplifier test cases and update util code to handle simplification

parent 80401861
......@@ -24,11 +24,18 @@ import net.sf.okapi.lib.tkit.step.OriginalDocumentXliffMergerStep;
import net.sf.okapi.steps.common.FilterEventsWriterStep;
import net.sf.okapi.steps.common.RawDocumentToFilterEventsStep;
import net.sf.okapi.steps.common.RawDocumentWriterStep;
import net.sf.okapi.steps.common.codesimplifier.PostSegmentationCodeSimplifierStep;
import net.sf.okapi.steps.segmentation.Parameters;
import net.sf.okapi.steps.segmentation.SegmentationStep;
public final class RoundTripUtils {
public static void extract(LocaleId source, LocaleId target, String originalPath, String outputPath, String filterConfig, String customConfigPath, boolean segment) throws URISyntaxException {
public static void extract(LocaleId english, LocaleId french, String original, String xliff,
String configName, String customConfigPath, boolean segment) throws URISyntaxException {
extract(english, french, original, xliff, configName, customConfigPath, segment, false);
}
public static void extract(LocaleId source, LocaleId target, String originalPath, String outputPath, String filterConfig, String customConfigPath, boolean segment, boolean simplify) throws URISyntaxException {
FilterConfigurationMapper mapper = new FilterConfigurationMapper();
DefaultFilters.setMappings(mapper, false, true);
if (customConfigPath != null) {
......@@ -61,8 +68,13 @@ public final class RoundTripUtils {
params.setSegmentTarget(true);
params.setSourceSrxPath(RoundTripUtils.class.getClassLoader().getResource("default.srx").getPath());
params.setTargetSrxPath(RoundTripUtils.class.getClassLoader().getResource("default.srx").getPath());
params.setCopySource(false);
driver.addStep(ss);
}
if (simplify) {
driver.addStep(new PostSegmentationCodeSimplifierStep());
}
// Filter events to raw document final step (using the XLIFF writer)
FilterEventsWriterStep fewStep = new FilterEventsWriterStep();
......@@ -80,7 +92,7 @@ public final class RoundTripUtils {
params.setToolId("okapi");
params.setToolName("okapi-tests");
params.setToolCompany("okapi");
params.setToolVersion("M28");
params.setToolVersion("M29");
fewStep.setDocumentRoots(Util.getDirectoryName(originalPath));
driver.addStep(fewStep);
......
......@@ -21,6 +21,7 @@ import net.sf.okapi.common.filters.RoundTripComparison;
import net.sf.okapi.filters.html.HtmlFilter;
import net.sf.okapi.filters.idml.IDMLFilter;
import net.sf.okapi.filters.xliff.XLIFFFilter;
import net.sf.okapi.filters.xmlstream.XmlStreamFilter;
import net.sf.okapi.steps.common.codesimplifier.PostSegmentationCodeSimplifierStep;
import net.sf.okapi.steps.segmentation.Parameters;
import net.sf.okapi.steps.segmentation.SegmentationStep;
......@@ -47,6 +48,7 @@ public class PostSegmentationSimplifierIT {
params.setSegmentTarget(true);
params.setSourceSrxPath(PostSegmentationSimplifierIT.class.getClassLoader().getResource("default.srx").getPath());
params.setTargetSrxPath(PostSegmentationSimplifierIT.class.getClassLoader().getResource("default.srx").getPath());
params.setCopySource(false);
segmentationStep.handleEvent(Event.START_BATCH_ITEM_EVENT);
}
......@@ -80,6 +82,48 @@ public class PostSegmentationSimplifierIT {
new PostSegmentationCodeSimplifierStep()));
}
@SuppressWarnings("resource")
@Test
public void testDoubleExtractionReferences() {
ArrayList<InputDocument> list = new ArrayList<InputDocument>();
list.add(new InputDocument(pathBase + "references_as_codes.html", null));
RoundTripComparison rtc = new RoundTripComparison();
assertTrue(rtc.executeCompare(new HtmlFilter(), list, "UTF-8", EN, ESES, "out",
segmentationStep,
new PostSegmentationCodeSimplifierStep()));
}
@SuppressWarnings("resource")
@Test
public void testDoubleExtractionDita() {
ArrayList<InputDocument> list = new ArrayList<InputDocument>();
list.add(new InputDocument(pathBase + "dita.xml", null));
RoundTripComparison rtc = new RoundTripComparison();
XmlStreamFilter f = new XmlStreamFilter();
f.setParametersFromURL(XmlStreamFilter.class.getResource("dita.yml"));
assertTrue(rtc.executeCompare(f, list, "UTF-8", EN, ESES, "out",
segmentationStep,
new PostSegmentationCodeSimplifierStep()));
}
@SuppressWarnings("resource")
@Test
public void testDoubleExtractionMergedCodes() {
ArrayList<InputDocument> list = new ArrayList<InputDocument>();
list.add(new InputDocument(pathBase + "merged_codes.html", null));
RoundTripComparison rtc = new RoundTripComparison();
assertTrue(rtc.executeCompare(new HtmlFilter(), list, "UTF-8", EN, ESES, "out",
segmentationStep,
new PostSegmentationCodeSimplifierStep()));
}
@SuppressWarnings("resource")
@Test
public void testDoubleExtraction3() {
......
package net.sf.okapi.simplifier.integration;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.FileNotFoundException;
import java.net.URISyntaxException;
import java.util.Arrays;
import net.sf.okapi.common.ClassUtil;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.XMLFileCompare;
import net.sf.okapi.common.integration.IntegrationtestUtils;
import net.sf.okapi.common.integration.RoundTripUtils;
import net.sf.okapi.filters.its.html5.HTML5Filter;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ErrorCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class RoundTripSimplifierHtmlItsTkitsIT
{
private final Logger LOGGER = LoggerFactory.getLogger(getClass());
private HTML5Filter html5Filter;
@Rule
public ErrorCollector errCol = new ErrorCollector();
@Before
public void setUp() throws Exception {
html5Filter = new HTML5Filter();
}
@After
public void tearDown() throws Exception {
html5Filter.close();
}
@Test
public void itsHtmlFiles() throws FileNotFoundException, URISyntaxException {
// run top level files (without config)
for (File file : IntegrationtestUtils.getTestFiles("/htmlIts/dummy.txt", Arrays.asList(".html", ".html5"))) {
runTest(true, file, "okf_itshtml5", null);
}
// run each subdirectory where we assume there is a custom config)
for(File d : IntegrationtestUtils.getSubDirs(ClassUtil.getResourceParent(IntegrationtestUtils.class, "/htmlIts/dummy.txt")))
{
for(File c : IntegrationtestUtils.getConfigFile(d.getPath()))
{
for(File file : IntegrationtestUtils.getTestFiles(d.getPath(), Arrays.asList(".html", ".html5"), true))
{
String configName = Util.getFilename(c.getAbsolutePath(), false);
String customConfigPath = c.getParent();
runTest(true, file, configName, customConfigPath);
}
}
}
}
private void runTest(boolean segment, File file, String configName, String customConfigPath)
throws FileNotFoundException, URISyntaxException {
String f = file.getName();
LOGGER.error(f);
String root = file.getParent() + File.separator;
String xliff = root + f + ".xliff";
String original = root + f;
String tkitMerged = root + f + ".tkitMerged";
String merged = root + f + ".merged";
RoundTripUtils.extract(LocaleId.ENGLISH, LocaleId.FRENCH, original, xliff, configName, customConfigPath, segment, true);
RoundTripUtils.merge(LocaleId.ENGLISH, LocaleId.FRENCH, false, original, xliff, tkitMerged, configName, customConfigPath);
XMLFileCompare compare = new XMLFileCompare();
try {
assertTrue("Compare Lines: " + f, compare.compareFilesPerLines(tkitMerged, tkitMerged));
} catch(Throwable e) {
errCol.addError(e);
}
}
}
package net.sf.okapi.simplifier.integration;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.FileNotFoundException;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import net.sf.okapi.common.ClassUtil;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.XMLFileCompare;
import net.sf.okapi.common.integration.IntegrationtestUtils;
import net.sf.okapi.common.integration.RoundTripUtils;
import net.sf.okapi.filters.xmlstream.XmlStreamFilter;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ErrorCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class RoundTripSimplifyDitaTkitsIT
{
private final Logger LOGGER = LoggerFactory.getLogger(getClass());
private XmlStreamFilter xmlStreamFilter;
@Rule
public ErrorCollector errCol = new ErrorCollector();
@Before
public void setUp() throws Exception {
xmlStreamFilter = new XmlStreamFilter();
}
@After
public void tearDown() throws Exception {
xmlStreamFilter.close();
}
@Test
public void ditaSingleFile() throws FileNotFoundException, URISyntaxException {
for (File file : IntegrationtestUtils.getTestFiles("/dita/dita2.xml", Arrays.asList(".xml"))) {
runTest(true, file, "okf_xmlstream-dita", null);
}
}
@Test
public void ditaFiles() throws FileNotFoundException, URISyntaxException {
// run top level files (without config)
for (File file : IntegrationtestUtils.getTestFiles("/dita/cadminadvtops.dita", Arrays.asList(".dita", ".ditamap"))) {
runTest(true, file, "okf_xmlstream-dita", null);
}
// run each subdirectory where we assume there is a custom config)
for(File d : IntegrationtestUtils.getSubDirs(ClassUtil.getResourceParent(IntegrationtestUtils.class, "/dita/cadminadvtops.dita")))
{
for(File c : IntegrationtestUtils.getConfigFile(d.getPath()))
{
for(File file : IntegrationtestUtils.getTestFiles(d.getPath(), Arrays.asList(".dita", ".ditamap"), true))
{
String configName = Util.getFilename(c.getAbsolutePath(), false);
String customConfigPath = c.getParent();
runTest(true, file, configName, customConfigPath);
}
}
}
}
private void runTest(boolean segment, File file, String configName, String customConfigPath)
throws FileNotFoundException, URISyntaxException {
String f = file.getName();
LOGGER.error(f);
String root = file.getParent() + File.separator;
String xliff = root + f + ".xliff";
String original = root + f;
String tkitMerged = root + f + ".tkitMerged";
String merged = root + f + ".merged";
RoundTripUtils.extract(LocaleId.ENGLISH, LocaleId.FRENCH, original, xliff, configName, customConfigPath, segment, true);
RoundTripUtils.merge(LocaleId.ENGLISH, LocaleId.FRENCH, false, original, xliff, tkitMerged, configName, customConfigPath);
XMLFileCompare compare = new XMLFileCompare();
try {
assertTrue("Compare Lines: " + f, compare.compareFilesPerLines(tkitMerged, tkitMerged));
} catch(Throwable e) {
errCol.addError(e);
}
}
}
package net.sf.okapi.simplifier.integration;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.FileNotFoundException;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import net.sf.okapi.common.ClassUtil;
import net.sf.okapi.common.FileCompare;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.integration.IntegrationtestUtils;
import net.sf.okapi.common.integration.RoundTripUtils;
import net.sf.okapi.filters.html.HtmlFilter;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ErrorCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class RoundTripSimplifyHtmlTkitsIT
{
private final Logger LOGGER = LoggerFactory.getLogger(getClass());
private HtmlFilter htmlFilter;
@Rule
public ErrorCollector errCol = new ErrorCollector();
@Before
public void setUp() throws Exception {
htmlFilter = new HtmlFilter();
htmlFilter.setParametersFromURL(HtmlFilter.class.getResource("nonwellformedConfiguration.yml"));
}
@After
public void tearDown() throws Exception {
htmlFilter.close();
}
@Test
public void htmlFiles() throws FileNotFoundException, URISyntaxException {
// run top level files (without config)
for (File file : IntegrationtestUtils.getTestFiles("/html/324.html", Arrays.asList(".html", ".htm"))) {
htmlFilter.setParametersFromURL(HtmlFilter.class.getResource("nonwellformedConfiguration.yml"));
runTest(true, file, "okf_html", null);
}
// run each subdirectory where we assume there is a custom config)
for(File d : IntegrationtestUtils.getSubDirs(ClassUtil.getResourceParent(IntegrationtestUtils.class, "/html/324.html")))
{
for(File c : IntegrationtestUtils.getConfigFile(d.getPath()))
{
for(File file : IntegrationtestUtils.getTestFiles(d.getPath(), Arrays.asList(".html", ".htm"), true))
{
String configName = Util.getFilename(c.getAbsolutePath(), false);
String customConfigPath = c.getParent();
runTest(true, file, configName, customConfigPath);
}
}
}
}
private void runTest(boolean segment, File file, String configName, String customConfigPath)
throws FileNotFoundException, URISyntaxException {
String f = file.getName();
LOGGER.info(f);
String root = file.getParent() + File.separator;
String xliff = root + f + ".simplify_xliff";
String original = root + f;
String tkitMerged = root + f + ".tkitMerged";
String merged = root + f + ".merged";
RoundTripUtils.extract(LocaleId.ENGLISH, LocaleId.FRENCH, original, xliff, configName, customConfigPath, segment, true);
RoundTripUtils.merge(LocaleId.ENGLISH, LocaleId.FRENCH, false, original, xliff, tkitMerged, configName, customConfigPath);
FileCompare compare = new FileCompare();
try {
assertTrue("Compare Lines: " + f, compare.compareFilesPerLines(tkitMerged, tkitMerged, StandardCharsets.UTF_8.name()));
} catch(Throwable e) {
errCol.addError(e);
}
}
}
package net.sf.okapi.simplifier.integration;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.FileNotFoundException;
import java.net.URISyntaxException;
import java.util.Arrays;
import net.sf.okapi.common.ClassUtil;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.XMLFileCompare;
import net.sf.okapi.common.integration.IntegrationtestUtils;
import net.sf.okapi.common.integration.RoundTripUtils;
import net.sf.okapi.filters.icml.ICMLFilter;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ErrorCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class RoundTripSimplifyIcmlTkitsIT
{
private final Logger LOGGER = LoggerFactory.getLogger(getClass());
private ICMLFilter icmlFilter;
@Rule
public ErrorCollector errCol = new ErrorCollector();
@Before
public void setUp() throws Exception {
icmlFilter = new ICMLFilter();
}
@After
public void tearDown() throws Exception {
icmlFilter.close();
}
@Test
public void icmlTkitTestFiles() throws FileNotFoundException, URISyntaxException {
// run top level files (without config)
for (File file : IntegrationtestUtils.getTestFiles("/icml/valid.icml", Arrays.asList(".icml", ".wcml"))) {
runTest(true, file, "okf_icml", null);
}
// run each subdirectory where we assume there is a custom config)
for(File d : IntegrationtestUtils.getSubDirs(ClassUtil.getResourceParent(IntegrationtestUtils.class, "/icml/valid.icml")))
{
for(File c : IntegrationtestUtils.getConfigFile(d.getPath()))
{
for(File file : IntegrationtestUtils.getTestFiles(d.getPath(), Arrays.asList(".icml", ".wcml"), true))
{
String configName = Util.getFilename(c.getAbsolutePath(), false);
String customConfigPath = c.getParent();
runTest(true, file, configName, customConfigPath);
}
}
}
}
private void runTest(boolean segment, File file, String configName, String customConfigPath)
throws FileNotFoundException, URISyntaxException {
String f = file.getName();
LOGGER.info(f);
String root = file.getParent() + File.separator;
String xliff = root + f + ".xliff";
String original = root + f;
String tkitMerged = root + f + ".tkitMerged";
String merged = root + f + ".merged";
RoundTripUtils.extract(LocaleId.ENGLISH, LocaleId.FRENCH, original, xliff, configName, customConfigPath, segment, true);
RoundTripUtils.merge(LocaleId.ENGLISH, LocaleId.FRENCH, false, original, xliff, tkitMerged, configName, customConfigPath);
XMLFileCompare compare = new XMLFileCompare();
try {
assertTrue("Compare Lines: " + f, compare.compareFilesPerLines(tkitMerged, tkitMerged));
} catch(Throwable e) {
errCol.addError(e);
}
}
}
package net.sf.okapi.simplifier.integration;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.FileNotFoundException;
import java.net.URISyntaxException;
import java.util.Arrays;
import net.sf.okapi.common.ClassUtil;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.ZipXMLFileCompare;
import net.sf.okapi.common.integration.IntegrationtestUtils;
import net.sf.okapi.common.integration.RoundTripUtils;
import net.sf.okapi.filters.idml.IDMLFilter;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ErrorCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class RoundTripSimplifyIdmlTkitsIT
{
private final Logger LOGGER = LoggerFactory.getLogger(getClass());
private IDMLFilter idmlFilter;
@Rule
public ErrorCollector errCol = new ErrorCollector();
@Before
public void setUp() throws Exception {
idmlFilter = new IDMLFilter();
}
@After
public void tearDown() throws Exception {
idmlFilter.close();
}
@Test
public void idmlFiles() throws FileNotFoundException, URISyntaxException {
// run top level files (without config)
for (File file : IntegrationtestUtils.getTestFiles("/idml/Test00.idml", Arrays.asList(".idml"))) {
runTest(true, file, "okf_idml", null);
}
// run each subdirectory where we assume there is a custom config)
for(File d : IntegrationtestUtils.getSubDirs(ClassUtil.getResourceParent(IntegrationtestUtils.class, "/idml/Test00.idml")))
{
for(File c : IntegrationtestUtils.getConfigFile(d.getPath()))
{
for(File file : IntegrationtestUtils.getTestFiles(d.getPath(), Arrays.asList(".idml"), true))
{
String configName = Util.getFilename(c.getAbsolutePath(), false);
String customConfigPath = c.getParent();
runTest(true, file, configName, customConfigPath);
}
}
}
}
private void runTest(boolean segment, File file, String configName, String customConfigPath)
throws FileNotFoundException, URISyntaxException {
String f = file.getName();
LOGGER.info(f);
String root = file.getParent() + File.separator;
String xliff = root + f + ".simplify_xliff";
String original = root + f;
String tkitMerged = root + f + ".tkitMerged";
String merged = root + f + ".merged";
RoundTripUtils.extract(LocaleId.ENGLISH, LocaleId.FRENCH, original, xliff, configName, customConfigPath, segment, true);
RoundTripUtils.merge(LocaleId.ENGLISH, LocaleId.FRENCH, false, original, xliff, tkitMerged, configName, customConfigPath);
ZipXMLFileCompare compare = new ZipXMLFileCompare();
try {
assertTrue("Compare Lines: " + f, compare.compareFiles(tkitMerged, tkitMerged));
} catch(Throwable e) {
errCol.addError(e);
}
}
}
package net.sf.okapi.simplifier.integration;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.FileNotFoundException;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import net.sf.okapi.common.ClassUtil;
import net.sf.okapi.common.FileCompare;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.integration.IntegrationtestUtils;
import net.sf.okapi.common.integration.RoundTripUtils;
import net.sf.okapi.filters.json.JSONFilter;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ErrorCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class RoundTripSimplifyJsonTkitsIT
{
private final Logger LOGGER = LoggerFactory.getLogger(getClass());
private JSONFilter jsonFilter;
@Rule
public ErrorCollector errCol = new ErrorCollector();
@Before
public void setUp() throws Exception {
jsonFilter = new JSONFilter();
}
@After
public void tearDown() throws Exception {
jsonFilter.close();
}
@Test
public void jsonFiles() throws FileNotFoundException, URISyntaxException {
// run top level files (without config)
for (File file : IntegrationtestUtils.getTestFiles("/json/test01.json", Arrays.asList(".json"))) {
runTest(true, file, "okf_json", null);
}
// run each subdirectory where we assume there is a custom config)
for(File d : IntegrationtestUtils.getSubDirs(ClassUtil.getResourceParent(IntegrationtestUtils.class, "/json/test01.json")))
{
for(File c : IntegrationtestUtils.getConfigFile(d.getPath()))
{
for(File file : IntegrationtestUtils.getTestFiles(d.getPath(), Arrays.asList(".json"), true))
{
String configName = Util.getFilename(c.getAbsolutePath(), false);
String customConfigPath = c.getParent();
runTest(true, file, configName, customConfigPath);
}
}
}
}
private void runTest(boolean segment, File file, String configName, String customConfigPath)
throws FileNotFoundException, URISyntaxException {
String f = file.getName();
LOGGER.info(f);
String root = file.getParent() + File.separator;
String xliff = root + f + ".xliff";
String original = root + f;
String tkitMerged = root + f + ".tkitMerged";
String merged = root + f + ".merged";
RoundTripUtils.extract(LocaleId.ENGLISH, LocaleId.FRENCH, original, xliff, configName, customConfigPath, segment, true);
RoundTripUtils.merge(LocaleId.ENGLISH, LocaleId.FRENCH, false, original, xliff, tkitMerged, configName, customConfigPath);
FileCompare compare = new FileCompare();
try {
assertTrue("Compare Lines: " + f, compare.compareFilesPerLines(tkitMerged, tkitMerged, StandardCharsets.UTF_8.name()));
} catch(Throwable e) {
errCol.addError(e);
}
}
}
package net.sf.okapi.simplifier.integration;
import static org.junit.Assert.assertTrue;