Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Switch to GitLab Next
Sign in / Register
Toggle navigation
Menu
Open sidebar
Inkscape
inkscape
Commits
88f9ed77
Commit
88f9ed77
authored
Dec 24, 2018
by
Thomas Holder
Browse files
rebase_hrefs: store IRI (UTF-8), not URI (ASCII)
parent
bc1972f0
Pipeline
#41202050
passed with stages
in 56 minutes and 24 seconds
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/object/uri.cpp
View file @
88f9ed77
...
...
@@ -365,6 +365,85 @@ bool URI::hasScheme(const char *scheme) const
return
s
&&
g_ascii_strcasecmp
(
s
,
scheme
)
==
0
;
}
/**
* If \c s starts with a "%XX" triplet, return its byte value, 0 otherwise.
*/
static
int
uri_unescape_triplet
(
const
char
*
s
)
{
int
H1
,
H2
;
if
(
s
[
0
]
==
'%'
//
&&
(
H1
=
g_ascii_xdigit_value
(
s
[
1
]))
!=
-
1
//
&&
(
H2
=
g_ascii_xdigit_value
(
s
[
2
]))
!=
-
1
)
{
return
(
H1
<<
4
)
|
H2
;
}
return
0
;
}
/**
* If \c s starts with a percent-escaped UTF-8 sequence, unescape one code
* point and store it in \c out variable. Do nothing and return 0 if \c s
* doesn't start with UTF-8.
*
* @param[in] s percent-escaped string
* @param[out] out out-buffer, must have at least size 5
* @return number of bytes read from \c s
*/
static
int
uri_unescape_utf8_codepoint
(
const
char
*
s
,
char
*
out
)
{
int
n
=
0
;
int
value
=
uri_unescape_triplet
(
s
);
if
((
value
>>
5
)
==
/* 0b110 */
0x6
)
{
// 110xxxxx 10xxxxxx
n
=
2
;
}
else
if
((
value
>>
4
)
==
/* 0b1110 */
0xE
)
{
// 1110xxxx 10xxxxxx 10xxxxxx
n
=
3
;
}
else
if
((
value
>>
3
)
==
/* 0b11110 */
0x1E
)
{
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
n
=
4
;
}
else
{
return
0
;
}
out
[
0
]
=
value
;
out
[
n
]
=
0
;
for
(
int
i
=
1
;
i
<
n
;
++
i
)
{
value
=
uri_unescape_triplet
(
s
+
(
i
*
3
));
if
((
value
>>
6
)
!=
/* 0b10 */
0x2
)
{
return
0
;
}
out
[
i
]
=
value
;
}
return
n
*
3
;
}
std
::
string
uri_to_iri
(
const
char
*
uri
)
{
std
::
string
iri
;
char
utf8buf
[
5
];
for
(
const
char
*
p
=
uri
;
*
p
;)
{
int
n
=
uri_unescape_utf8_codepoint
(
p
,
utf8buf
);
if
(
n
)
{
iri
.
append
(
utf8buf
);
p
+=
n
;
}
else
{
iri
+=
*
p
;
p
+=
1
;
}
}
return
iri
;
}
}
// namespace Inkscape
...
...
src/object/uri.h
View file @
88f9ed77
...
...
@@ -189,6 +189,17 @@ private:
xmlURI
*
_xmlURIPtr
()
const
{
return
m_shared
.
get
();
}
};
/**
* Unescape the UTF-8 parts of the given URI.
*
* Does not decode non-UTF-8 escape sequences (e.g. reserved ASCII characters).
* Does not do any IDN (internationalized domain name) decoding.
*
* @param uri URI or part of a URI
* @return IRI equivalent of \c uri
*/
std
::
string
uri_to_iri
(
const
char
*
uri
);
}
/* namespace Inkscape */
#endif
...
...
src/xml/rebase-hrefs.cpp
View file @
88f9ed77
...
...
@@ -201,6 +201,8 @@ void Inkscape::XML::rebase_hrefs(SPDocument *const doc, gchar const *const new_b
}
auto
href_str
=
url
.
str
(
new_base_url_str
.
c_str
());
href_str
=
Inkscape
::
uri_to_iri
(
href_str
.
c_str
());
ir
->
setAttribute
(
"xlink:href"
,
href_str
);
}
...
...
testfiles/src/uri-test.cpp
View file @
88f9ed77
...
...
@@ -282,6 +282,16 @@ TEST(UriTest, from_native_filename)
#endif
}
TEST
(
UriTest
,
uri_to_iri
)
{
// unescape UTF-8 (U+00D6)
ASSERT_EQ
(
Inkscape
::
uri_to_iri
(
"data:,umlaut-%C3%96"
),
"data:,umlaut-
\xC3\x96
"
);
// don't unescape ASCII (U+003A)
ASSERT_EQ
(
Inkscape
::
uri_to_iri
(
"foo%3Abar"
),
"foo%3Abar"
);
// sequence (U+00D6 U+1F37A U+003A)
ASSERT_EQ
(
Inkscape
::
uri_to_iri
(
"%C3%96%F0%9F%8D%BA%3A"
),
"
\xC3\x96\xF0\x9F\x8D\xBA
%3A"
);
}
/*
Local Variables:
mode:c++
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment