Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
See what's new at GitLab
4
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Switch to GitLab Next
Sign in / Register
Toggle navigation
O
odfhistory
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
2
Issues
2
List
Boards
Labels
Service Desk
Milestones
Iterations
Merge Requests
2
Merge Requests
2
Requirements
Requirements
List
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Incidents
Analytics
Analytics
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
odfplugfest
odfhistory
Commits
9633550c
Commit
9633550c
authored
Oct 08, 2015
by
Jos van den Oever
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix bug in span merging and increased normalization of font information.
parent
0411eb82
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
120 additions
and
19 deletions
+120
-19
src/odf/OdfNormalizer.java
src/odf/OdfNormalizer.java
+88
-19
tests/006after.xml
tests/006after.xml
+16
-0
tests/006before.xml
tests/006before.xml
+16
-0
No files found.
src/odf/OdfNormalizer.java
View file @
9633550c
...
...
@@ -6,6 +6,7 @@ import java.security.NoSuchAlgorithmException;
import
java.util.Arrays
;
import
java.util.Collections
;
import
java.util.Comparator
;
import
java.util.HashMap
;
import
java.util.HashSet
;
import
java.util.Iterator
;
import
java.util.LinkedList
;
...
...
@@ -243,6 +244,77 @@ public class OdfNormalizer {
}
};
static
private
void
removeIfEqual
(
Element
src
,
String
srcns
,
String
srcname
,
Element
ref
,
String
refns
,
String
refname
)
{
Attr
a
=
ref
.
getAttributeNodeNS
(
refns
,
refname
);
if
(
a
!=
null
&&
a
.
getNodeValue
().
equals
(
src
.
getAttributeNS
(
srcns
,
srcname
)))
{
ref
.
removeAttributeNode
(
a
);
}
}
static
private
void
removeUnneededFontAttributes
(
Document
doc
)
{
Map
<
String
,
Element
>
fonts
=
new
HashMap
<
String
,
Element
>();
for
(
Element
f
:
XPath
.
elementIterator
(
doc
,
"/*/office:font-face-decls/style:font-face"
))
{
fonts
.
put
(
f
.
getAttributeNS
(
NC
.
style
,
"name"
),
f
);
}
XPathResult
<
Attr
>
names
=
XPath
.
attrIterator
(
doc
,
"//@style:font-name"
);
for
(
Attr
name
:
names
)
{
Element
font
=
fonts
.
get
(
name
.
getNodeValue
());
Element
style
=
name
.
getOwnerElement
();
if
(
font
==
null
)
{
continue
;
}
removeIfEqual
(
font
,
NC
.
svg
,
"font-family"
,
style
,
NC
.
fo
,
"font-family"
);
removeIfEqual
(
font
,
NC
.
svg
,
"font-style"
,
style
,
NC
.
style
,
"font-style-name"
);
removeIfEqual
(
font
,
NC
.
style
,
"font-family-generic"
,
style
,
NC
.
style
,
"font-family-generic"
);
removeIfEqual
(
font
,
NC
.
style
,
"font-pitch"
,
style
,
NC
.
style
,
"font-pitch"
);
removeIfEqual
(
font
,
NC
.
style
,
"font-charset"
,
style
,
NC
.
style
,
"font-charset"
);
}
names
=
XPath
.
attrIterator
(
doc
,
"//@style:font-name-asian"
);
for
(
Attr
name
:
names
)
{
Element
font
=
fonts
.
get
(
name
.
getNodeValue
());
Element
style
=
name
.
getOwnerElement
();
if
(
font
==
null
)
{
continue
;
}
removeIfEqual
(
font
,
NC
.
svg
,
"font-family"
,
style
,
NC
.
style
,
"font-family-asian"
);
removeIfEqual
(
font
,
NC
.
svg
,
"font-style"
,
style
,
NC
.
style
,
"font-style-name-asian"
);
removeIfEqual
(
font
,
NC
.
style
,
"font-family-generic"
,
style
,
NC
.
style
,
"font-family-generic-asian"
);
removeIfEqual
(
font
,
NC
.
style
,
"font-pitch"
,
style
,
NC
.
style
,
"font-pitch-asian"
);
removeIfEqual
(
font
,
NC
.
style
,
"font-charset"
,
style
,
NC
.
style
,
"font-charset-asian"
);
}
names
=
XPath
.
attrIterator
(
doc
,
"//@style:font-name-complex"
);
for
(
Attr
name
:
names
)
{
Element
font
=
fonts
.
get
(
name
.
getNodeValue
());
Element
style
=
name
.
getOwnerElement
();
if
(
font
==
null
)
{
continue
;
}
removeIfEqual
(
font
,
NC
.
svg
,
"font-family"
,
style
,
NC
.
style
,
"font-family-complex"
);
removeIfEqual
(
font
,
NC
.
svg
,
"font-style"
,
style
,
NC
.
style
,
"font-style-name-complex"
);
removeIfEqual
(
font
,
NC
.
style
,
"font-family-generic"
,
style
,
NC
.
style
,
"font-family-generic-complex"
);
removeIfEqual
(
font
,
NC
.
style
,
"font-pitch"
,
style
,
NC
.
style
,
"font-pitch-complex"
);
removeIfEqual
(
font
,
NC
.
style
,
"font-charset"
,
style
,
NC
.
style
,
"font-charset-complex"
);
}
}
static
private
void
removeUnusedListIdsAndReferences
(
Document
doc
)
{
XPathResult
<
Attr
>
ids
=
XPath
.
attrIterator
(
doc
,
"//text:list/@xml:id"
);
XPathResult
<
Attr
>
refs
=
XPath
.
attrIterator
(
doc
,
...
...
@@ -308,6 +380,8 @@ public class OdfNormalizer {
"//text:list/@text:continue-list[parent::text:list/text:list-item[1][@text:start-value='1']]"
);
removeUnusedListIdsAndReferences
(
doc
);
removeUnneededFontAttributes
(
doc
);
IdAndReferenceNormalizer
n
=
new
IdAndReferenceNormalizer
(
doc
);
// fonts
n
.
addIdsAndReferences
(
"F"
,
"//style:font-face/@style:name"
,
...
...
@@ -660,10 +734,10 @@ public class OdfNormalizer {
||
!
span
.
getLocalName
().
equals
(
e
.
getLocalName
()))
{
return
false
;
}
if
(!
span
.
getAttributeN
odeN
S
(
NC
.
text
,
"style-name"
).
equals
(
e
.
getAttributeN
odeN
S
(
NC
.
text
,
"style-name"
))
||
!
span
.
getAttributeN
odeN
S
(
NC
.
text
,
"class-names"
).
equals
(
e
.
getAttributeN
odeN
S
(
NC
.
text
,
"class-names"
)))
{
if
(!
span
.
getAttributeNS
(
NC
.
text
,
"style-name"
).
equals
(
e
.
getAttributeNS
(
NC
.
text
,
"style-name"
))
||
!
span
.
getAttributeNS
(
NC
.
text
,
"class-names"
).
equals
(
e
.
getAttributeNS
(
NC
.
text
,
"class-names"
)))
{
return
false
;
}
return
true
;
...
...
@@ -672,21 +746,16 @@ public class OdfNormalizer {
static
private
void
mergeSpans
(
Document
document
)
{
XPathResult
<
Element
>
spans
=
XPath
.
elementIterator
(
document
,
"//text:span"
);
List
<
Element
>
tomerge
=
new
LinkedList
<
Element
>();
for
(
Element
s
:
spans
)
{
Node
n
=
s
.
getNextSibling
();
if
(
compareSpans
(
s
,
n
))
{
tomerge
.
add
((
Element
)
n
);
}
}
for
(
int
i
=
tomerge
.
size
()
-
1
;
i
>=
0
;
i
--)
{
Element
e
=
tomerge
.
get
(
i
);
Node
p
=
e
.
getPreviousSibling
();
e
.
getParentNode
().
removeChild
(
e
);
Node
n
=
e
.
getFirstChild
();
while
(
n
!=
null
)
{
p
.
appendChild
(
n
);
n
=
e
.
getFirstChild
();
for
(
Element
span
:
spans
)
{
Node
n
=
span
.
getNextSibling
();
while
(
compareSpans
(
span
,
n
))
{
n
.
getParentNode
().
removeChild
(
n
);
Node
c
=
n
.
getFirstChild
();
while
(
c
!=
null
)
{
span
.
appendChild
(
c
);
c
=
n
.
getFirstChild
();
}
n
=
span
.
getNextSibling
();
}
}
}
...
...
tests/006after.xml
0 → 100644
View file @
9633550c
<?xml version="1.0" encoding="UTF-8"?>
<office:document-content
xmlns:fo=
"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
xmlns:office=
"urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:style=
"urn:oasis:names:tc:opendocument:xmlns:style:1.0"
xmlns:text=
"urn:oasis:names:tc:opendocument:xmlns:text:1.0"
office:version=
"1.2"
>
<office:automatic-styles>
<style:style
style:family=
"text"
style:name=
"T25116d"
>
<style:text-properties
fo:font-size=
"13pt"
/>
</style:style>
<style:style
style:family=
"text"
style:name=
"T3480c5"
>
<style:text-properties
fo:font-size=
"12pt"
/>
</style:style>
</office:automatic-styles>
<office:body>
<office:text>
<text:p>
A
<text:span
text:style-name=
"T3480c5"
>
BCD
</text:span><text:span
text:style-name=
"T25116d"
>
ABC
</text:span>
D
</text:p>
</office:text>
</office:body>
</office:document-content>
tests/006before.xml
0 → 100644
View file @
9633550c
<?xml version="1.0" encoding="UTF-8"?>
<office:document-content
xmlns:office=
"urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:style=
"urn:oasis:names:tc:opendocument:xmlns:style:1.0"
xmlns:text=
"urn:oasis:names:tc:opendocument:xmlns:text:1.0"
xmlns:fo=
"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
office:version=
"1.2"
>
<office:automatic-styles>
<style:style
style:name=
"T1"
style:family=
"text"
>
<style:text-properties
fo:font-size=
"12pt"
/>
</style:style>
<style:style
style:name=
"T2"
style:family=
"text"
>
<style:text-properties
fo:font-size=
"13pt"
/>
</style:style>
</office:automatic-styles>
<office:body>
<office:text>
<text:p>
A
<text:span
text:style-name=
"T1"
>
B
</text:span><text:span
text:style-name=
"T1"
>
C
</text:span><text:span
text:style-name=
"T1"
>
D
</text:span><text:span
text:style-name=
"T2"
>
A
</text:span><text:span
text:style-name=
"T2"
>
B
</text:span><text:span
text:style-name=
"T2"
>
C
</text:span>
D
</text:p>
</office:text>
</office:body>
</office:document-content>
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment