Commit 88f9ed77 authored by Thomas Holder's avatar Thomas Holder

rebase_hrefs: store IRI (UTF-8), not URI (ASCII)

parent bc1972f0
Pipeline #41202050 passed with stages
in 56 minutes and 24 seconds
...@@ -365,6 +365,85 @@ bool URI::hasScheme(const char *scheme) const ...@@ -365,6 +365,85 @@ bool URI::hasScheme(const char *scheme) const
return s && g_ascii_strcasecmp(s, scheme) == 0; return s && g_ascii_strcasecmp(s, scheme) == 0;
} }
/**
* If \c s starts with a "%XX" triplet, return its byte value, 0 otherwise.
*/
static int uri_unescape_triplet(const char *s)
{
int H1, H2;
if (s[0] == '%' //
&& (H1 = g_ascii_xdigit_value(s[1])) != -1 //
&& (H2 = g_ascii_xdigit_value(s[2])) != -1) {
return (H1 << 4) | H2;
}
return 0;
}
/**
* If \c s starts with a percent-escaped UTF-8 sequence, unescape one code
* point and store it in \c out variable. Do nothing and return 0 if \c s
* doesn't start with UTF-8.
*
* @param[in] s percent-escaped string
* @param[out] out out-buffer, must have at least size 5
* @return number of bytes read from \c s
*/
static int uri_unescape_utf8_codepoint(const char *s, char *out)
{
int n = 0;
int value = uri_unescape_triplet(s);
if ((value >> 5) == /* 0b110 */ 0x6) {
// 110xxxxx 10xxxxxx
n = 2;
} else if ((value >> 4) == /* 0b1110 */ 0xE) {
// 1110xxxx 10xxxxxx 10xxxxxx
n = 3;
} else if ((value >> 3) == /* 0b11110 */ 0x1E) {
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
n = 4;
} else {
return 0;
}
out[0] = value;
out[n] = 0;
for (int i = 1; i < n; ++i) {
value = uri_unescape_triplet(s + (i * 3));
if ((value >> 6) != /* 0b10 */ 0x2) {
return 0;
}
out[i] = value;
}
return n * 3;
}
std::string uri_to_iri(const char *uri)
{
std::string iri;
char utf8buf[5];
for (const char *p = uri; *p;) {
int n = uri_unescape_utf8_codepoint(p, utf8buf);
if (n) {
iri.append(utf8buf);
p += n;
} else {
iri += *p;
p += 1;
}
}
return iri;
}
} // namespace Inkscape } // namespace Inkscape
......
...@@ -189,6 +189,17 @@ private: ...@@ -189,6 +189,17 @@ private:
xmlURI *_xmlURIPtr() const { return m_shared.get(); } xmlURI *_xmlURIPtr() const { return m_shared.get(); }
}; };
/**
* Unescape the UTF-8 parts of the given URI.
*
* Does not decode non-UTF-8 escape sequences (e.g. reserved ASCII characters).
* Does not do any IDN (internationalized domain name) decoding.
*
* @param uri URI or part of a URI
* @return IRI equivalent of \c uri
*/
std::string uri_to_iri(const char *uri);
} /* namespace Inkscape */ } /* namespace Inkscape */
#endif #endif
......
...@@ -201,6 +201,8 @@ void Inkscape::XML::rebase_hrefs(SPDocument *const doc, gchar const *const new_b ...@@ -201,6 +201,8 @@ void Inkscape::XML::rebase_hrefs(SPDocument *const doc, gchar const *const new_b
} }
auto href_str = url.str(new_base_url_str.c_str()); auto href_str = url.str(new_base_url_str.c_str());
href_str = Inkscape::uri_to_iri(href_str.c_str());
ir->setAttribute("xlink:href", href_str); ir->setAttribute("xlink:href", href_str);
} }
......
...@@ -282,6 +282,16 @@ TEST(UriTest, from_native_filename) ...@@ -282,6 +282,16 @@ TEST(UriTest, from_native_filename)
#endif #endif
} }
TEST(UriTest, uri_to_iri)
{
// unescape UTF-8 (U+00D6)
ASSERT_EQ(Inkscape::uri_to_iri("data:,umlaut-%C3%96"), "data:,umlaut-\xC3\x96");
// don't unescape ASCII (U+003A)
ASSERT_EQ(Inkscape::uri_to_iri("foo%3Abar"), "foo%3Abar");
// sequence (U+00D6 U+1F37A U+003A)
ASSERT_EQ(Inkscape::uri_to_iri("%C3%96%F0%9F%8D%BA%3A"), "\xC3\x96\xF0\x9F\x8D\xBA%3A");
}
/* /*
Local Variables: Local Variables:
mode:c++ mode:c++
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment