Commit 9633550c authored by Jos van den Oever's avatar Jos van den Oever

Fix bug in span merging and increased normalization of font information.

parent 0411eb82
......@@ -6,6 +6,7 @@ import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
......@@ -243,6 +244,77 @@ public class OdfNormalizer {
}
};
static private void removeIfEqual(Element src, String srcns,
String srcname, Element ref, String refns, String refname) {
Attr a = ref.getAttributeNodeNS(refns, refname);
if (a != null
&& a.getNodeValue().equals(src.getAttributeNS(srcns, srcname))) {
ref.removeAttributeNode(a);
}
}
static private void removeUnneededFontAttributes(Document doc) {
Map<String, Element> fonts = new HashMap<String, Element>();
for (Element f : XPath.elementIterator(doc,
"/*/office:font-face-decls/style:font-face")) {
fonts.put(f.getAttributeNS(NC.style, "name"), f);
}
XPathResult<Attr> names = XPath.attrIterator(doc, "//@style:font-name");
for (Attr name : names) {
Element font = fonts.get(name.getNodeValue());
Element style = name.getOwnerElement();
if (font == null) {
continue;
}
removeIfEqual(font, NC.svg, "font-family", style, NC.fo,
"font-family");
removeIfEqual(font, NC.svg, "font-style", style, NC.style,
"font-style-name");
removeIfEqual(font, NC.style, "font-family-generic", style,
NC.style, "font-family-generic");
removeIfEqual(font, NC.style, "font-pitch", style, NC.style,
"font-pitch");
removeIfEqual(font, NC.style, "font-charset", style, NC.style,
"font-charset");
}
names = XPath.attrIterator(doc, "//@style:font-name-asian");
for (Attr name : names) {
Element font = fonts.get(name.getNodeValue());
Element style = name.getOwnerElement();
if (font == null) {
continue;
}
removeIfEqual(font, NC.svg, "font-family", style, NC.style,
"font-family-asian");
removeIfEqual(font, NC.svg, "font-style", style, NC.style,
"font-style-name-asian");
removeIfEqual(font, NC.style, "font-family-generic", style,
NC.style, "font-family-generic-asian");
removeIfEqual(font, NC.style, "font-pitch", style, NC.style,
"font-pitch-asian");
removeIfEqual(font, NC.style, "font-charset", style, NC.style,
"font-charset-asian");
}
names = XPath.attrIterator(doc, "//@style:font-name-complex");
for (Attr name : names) {
Element font = fonts.get(name.getNodeValue());
Element style = name.getOwnerElement();
if (font == null) {
continue;
}
removeIfEqual(font, NC.svg, "font-family", style, NC.style,
"font-family-complex");
removeIfEqual(font, NC.svg, "font-style", style, NC.style,
"font-style-name-complex");
removeIfEqual(font, NC.style, "font-family-generic", style,
NC.style, "font-family-generic-complex");
removeIfEqual(font, NC.style, "font-pitch", style, NC.style,
"font-pitch-complex");
removeIfEqual(font, NC.style, "font-charset", style, NC.style,
"font-charset-complex");
}
}
static private void removeUnusedListIdsAndReferences(Document doc) {
XPathResult<Attr> ids = XPath.attrIterator(doc, "//text:list/@xml:id");
XPathResult<Attr> refs = XPath.attrIterator(doc,
......@@ -308,6 +380,8 @@ public class OdfNormalizer {
"//text:list/@text:continue-list[parent::text:list/text:list-item[1][@text:start-value='1']]");
removeUnusedListIdsAndReferences(doc);
removeUnneededFontAttributes(doc);
IdAndReferenceNormalizer n = new IdAndReferenceNormalizer(doc);
// fonts
n.addIdsAndReferences("F", "//style:font-face/@style:name",
......@@ -660,10 +734,10 @@ public class OdfNormalizer {
|| !span.getLocalName().equals(e.getLocalName())) {
return false;
}
if (!span.getAttributeNodeNS(NC.text, "style-name").equals(
e.getAttributeNodeNS(NC.text, "style-name"))
|| !span.getAttributeNodeNS(NC.text, "class-names").equals(
e.getAttributeNodeNS(NC.text, "class-names"))) {
if (!span.getAttributeNS(NC.text, "style-name").equals(
e.getAttributeNS(NC.text, "style-name"))
|| !span.getAttributeNS(NC.text, "class-names").equals(
e.getAttributeNS(NC.text, "class-names"))) {
return false;
}
return true;
......@@ -672,21 +746,16 @@ public class OdfNormalizer {
static private void mergeSpans(Document document) {
XPathResult<Element> spans = XPath.elementIterator(document,
"//text:span");
List<Element> tomerge = new LinkedList<Element>();
for (Element s : spans) {
Node n = s.getNextSibling();
if (compareSpans(s, n)) {
tomerge.add((Element) n);
}
}
for (int i = tomerge.size() - 1; i >= 0; i--) {
Element e = tomerge.get(i);
Node p = e.getPreviousSibling();
e.getParentNode().removeChild(e);
Node n = e.getFirstChild();
while (n != null) {
p.appendChild(n);
n = e.getFirstChild();
for (Element span : spans) {
Node n = span.getNextSibling();
while (compareSpans(span, n)) {
n.getParentNode().removeChild(n);
Node c = n.getFirstChild();
while (c != null) {
span.appendChild(c);
c = n.getFirstChild();
}
n = span.getNextSibling();
}
}
}
......
<?xml version="1.0" encoding="UTF-8"?>
<office:document-content xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" office:version="1.2">
<office:automatic-styles>
<style:style style:family="text" style:name="T25116d">
<style:text-properties fo:font-size="13pt"/>
</style:style>
<style:style style:family="text" style:name="T3480c5">
<style:text-properties fo:font-size="12pt"/>
</style:style>
</office:automatic-styles>
<office:body>
<office:text>
<text:p>A<text:span text:style-name="T3480c5">BCD</text:span><text:span text:style-name="T25116d">ABC</text:span>D</text:p>
</office:text>
</office:body>
</office:document-content>
<?xml version="1.0" encoding="UTF-8"?>
<office:document-content xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" office:version="1.2">
<office:automatic-styles>
<style:style style:name="T1" style:family="text">
<style:text-properties fo:font-size="12pt"/>
</style:style>
<style:style style:name="T2" style:family="text">
<style:text-properties fo:font-size="13pt"/>
</style:style>
</office:automatic-styles>
<office:body>
<office:text>
<text:p>A<text:span text:style-name="T1">B</text:span><text:span text:style-name="T1">C</text:span><text:span text:style-name="T1">D</text:span><text:span text:style-name="T2">A</text:span><text:span text:style-name="T2">B</text:span><text:span text:style-name="T2">C</text:span>D</text:p>
</office:text>
</office:body>
</office:document-content>
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment