Commit 84578946 authored by Simon Josefsson's avatar Simon Josefsson

Improve utc tests.

parent 6f424ab2
......@@ -25,6 +25,9 @@ Comments on the specification:
* Re U+200C, there are no Joining Type T code points that are
permitted in IDNA2008.
* IdnaTest.txt test vectors found two bugs in libidn2: one cut'n'paste
bug of the U+200C rule (L vs R) and handling of zero size labels.
Example domains IDNA2003-valid but IDNA2008-lookup-invalid:
* example.∡↺⊂ (symbols)
......
This diff is collapsed.
......@@ -25,9 +25,9 @@ EXTRA_DIST = IdnaTest.c
TESTS_ENVIRONMENT = $(VALGRIND)
IdnaTest.txt: $(srcdir)/gen-utc-test.pl
IdnaTest.txt:
test -f IdnaTest.txt || \
wget http://www.unicode.org/Public/idna/6.0.1/IdnaTest.txt
IdnaTest.c: IdnaTest.txt
IdnaTest.c: IdnaTest.txt $(srcdir)/gen-utc-test.pl
$(srcdir)/gen-utc-test.pl < IdnaTest.txt > IdnaTest.c
......@@ -28,8 +28,7 @@ while (<>) {
m,^.*; (.*); (.*); (.*); (NV8)?,;
next if /\uD/;
next if /123456789/;
my $line = $_;
my ($source) = $1;
my ($ustr) = $2;
......@@ -39,21 +38,43 @@ while (<>) {
$ustr = $source if ($ustr eq "");
$astr = $ustr if ($astr eq "");
while ($ustr =~ /(.*)\\u([0-9A-f][0-9A-f][0-9A-f][0-9A-f])(.*)/) {
my $num = hex($2);
#printf "/* hex $2 num $num */";
my $str = unpack ("H*", pack("C0U*",$num));
my $escstr = "";
while ($str) {
$escstr .= "\\x" . substr ($str,0,2);
$str = substr ($str,2);
}
#printf "/* utf8 $escstr */\n";
$ustr = $1.'" "'.$escstr.'" "'.$3;
}
next if ($ustr eq $last);
print "/* $ctr source $source uni $ustr ace $astr nv8 $nv8 line $_ */\n";
print "/* $ctr source $source uni $ustr ace $astr nv8 $nv8 line $line */\n";
if ($ctr == 116) {
print "/* punt2 */\n";
if ($astr =~ /\\u/) {
print "/* IdnaTest.txt bug? */\n";
} elsif ($astr =~ /。/) {
print "/* IdnaTest.txt bug2? */\n";
} elsif ($ustr =~ /123456789012345678901234567890123456789012345678901234567890123.123456789012345678901234567890123456789012345678901234567890123.123456789012345678901234567890123456789012345678901234567890123.12345678901234567890123456789012345678901234567890123456789012/) {
print "/* IdnaTest.txt bug3? */\n";
} elsif ($ustr =~ /123456789012345678901234567890123456789012345678901234567890123.1234567890ä123456789012345678901234567890123456789012345.123456789012345678901234567890123456789012345678901234567890123.12345678901234567890123456789012345678901234567890123456789012/) {
print "/* IdnaTest.txt bug4? */\n";
} elsif ($ustr =~ /a..c/ || $ustr =~ /ä..c/) {
print "/* libidn2 bug? */\n";
} elsif ($nv8 eq "NV8") {
if ($ctr == 103 || $ctr == 93 || $ctr == 99) {
print "{ \"$ustr\", \"$astr\", IDN2_UNASSIGNED },\n";
} else {
print "{ \"$ustr\", \"$astr\", IDN2_DISALLOWED },\n";
}
print "{ \"$ustr\", \"$astr\", -1 },\n";
$ctr++;
} elsif (substr($astr, 0, 1) eq "[" && substr($ustr, 0, 1) ne "[") {
print "{ \"$ustr\", \"$astr\", -1 },\n";
$ctr++;
} elsif (substr($astr, 0, 1) eq "[") {
print "/* punt1 */\n";
print "/* punt1 $line */\n";
} else {
print "{ \"$ustr\", \"$astr\", IDN2_OK },\n";
$ctr++;
......
......@@ -772,7 +772,9 @@ main (void)
rc = idn2_lookup_u8 (idna[i].in, &out, idna[i].flags);
printf ("%3d %-25s %-40s %s\n", i, idn2_strerror_name (rc),
rc == IDN2_OK ? idna[i].out : "", idna[i].in);
if (rc != idna[i].rc)
if (rc != idna[i].rc && rc == IDN2_ENCODING_ERROR)
printf ("utc bug %d\n", i);
else if (rc != idna[i].rc && idna[i].rc != -1)
fail ("expected rc %d got rc %d\n", idna[i].rc, rc);
else if (rc == IDN2_OK && strcmp (out, idna[i].out) != 0)
fail ("expected: %s\ngot: %s\n", idna[i].out, out);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment