Commit 2e26603e authored by Konstantin Narkhov's avatar Konstantin Narkhov

Version 0.0.3

1. Perl6 implementation is added
parent b7c51816
......@@ -2,7 +2,10 @@
Port of javascript LZW algo implementation to Perl 6.
## Contact
## Author
Please contact us via [feedback form](https://pheix.org/feedback.html) at pheix.org
Please contact me via [LinkedIn](https://www.linkedin.com/in/knarkhov/) or [Twitter](https://twitter.com/CondemnedCell). Your feedback is welcome at [narkhov.pro](https://narkhov.pro/contact-information.html).
## See also
[lzw_encoder.js](https://gist.github.com/revolunet/843889)
......@@ -5,6 +5,7 @@
src="https://code.jquery.com/jquery-3.4.1.min.js"
integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo="
crossorigin="anonymous"></script>
<script src="../js/lzw.js"></script>
</head>
<body>
<input type="text" id="input">&nbsp;<button onclick="javascript:gist()">Gist</button>
......@@ -33,5 +34,8 @@
$('#result_unuri').text(unescape(encodeURIComponent($('#input').val())) );
$('#result_deuriesc').text( decodeURIComponent(escape( unescape(encodeURIComponent($('#input').val())) )) );
}
var text = 'Привет, мир! Здравствуй, мир!';
//console.log(LZW.compress(text));
console.log(LZW.decompress(LZW.compress(text)));
</script>
</html>
......@@ -45,6 +45,7 @@ LZW = {
}
else {
phrase = dict['_'+currCode] ? dict['_'+currCode] : (oldPhrase + currChar);
//console.log(phrase);
}
out.push(phrase);
currChar = phrase.charAt(0);
......
unit class LZW::Revolunet;
has Int $!dictsize;
has Int $!dictsize = 57344;
method set_dictsize( Int $dsz ) returns Int {
$!dictsize = $dsz if $dsz;
......@@ -10,7 +10,7 @@ method get_dictsize returns Int {
$!dictsize;
}
method escape( Str $s ) returns Str {
method encode_utf8( Str $s ) returns Str {
my $_rs;
if ( $s ) {
$_rs = $s.encode.decode("windows-1252");
......@@ -18,10 +18,73 @@ method escape( Str $s ) returns Str {
$_rs;
}
method unescape( Str $s ) returns Str {
method decode_utf8( Str $s ) returns Str {
my $_rs;
if ( $s ) {
$_rs = $s.encode("windows-1252").decode;
}
$_rs;
}
method compress( Str $s ) returns Str {
my %dict;
my @out;
my $currChar;
my UInt $i = 0;
my @data = ( $s ).split(q{}, :skip-empty);
my $phrase = @data[0];
my $code = $!dictsize;
for (1..@data.elems-1) {
$currChar = @data[$_];
if ( %dict{ '_' ~ $phrase ~ $currChar }:exists ) {
$phrase ~= $currChar;
} else {
@out.push(
$phrase.chars > 1 ??
%dict{ '_' ~ $phrase } !!
( $phrase.split(q{},:skip-empty) )[0].ord
);
%dict{ '_' ~ $phrase ~ $currChar } = $code;
$code++;
$phrase = $currChar;
}
}
@out.push(
$phrase.chars > 1 ??
%dict{ '_' ~ $phrase } !!
( $phrase.split(q{},:skip-empty) )[0].ord
);
for @out -> $o {
@out[$i] = $o.chr;
$i++;
}
@out.join(q{});
}
method decompress( Str $s ) returns Str {
my %dict;
my @out;
my $phrase;
my UInt $i = 0;
my @data = ( $s ).split(q{}, :skip-empty);
my $currChar = @data[0];
my $oldPhrase = $currChar;
my $code = $!dictsize;
@out.push($currChar);
for (1..@data.elems-1) {
my $currCode = @data[$_].ord;
if ( $currCode < $!dictsize ) {
$phrase = @data[$_];
} else {
$phrase =
%dict{ '_' ~ $currCode } ??
%dict{ '_' ~ $currCode } !! ( $oldPhrase ~ $currChar );
}
@out.push($phrase);
$currChar = ( $phrase.split(q{},:skip-empty) )[0];
%dict{ '_' ~ $code } = $oldPhrase ~ $currChar;
$code++;
$oldPhrase = $phrase;
}
@out.join(q{});
}
......@@ -6,14 +6,37 @@ plan 1;
use LZW::Revolunet;
my $phrase = (
'Тексты - это не энциклопедические и не лингвистические ' ~
' системы. Тексты сужают бесконечные или неопределенные ' ~
'возможности систем и создают закрытый универсум. Системы ' ~
'редельны, но бесконечны. Тексты - предельны и конечны, хотя ' ~
'интерпретаций может быть очень много. Источник - Умберто Эко ' ~
'От интернета к Гуттенбергу: текст и гипертекст - URL: ' ~
'http://kiev.philosophy.ru/library/eco/internet.html Точность ' ~
'цитирования - почти дословно'
) x 255;
subtest {
plan 2;
plan 5;
my $obj = LZW::Revolunet.new;
my $wrd = 'привет, мир!';
my $dsz = 255;
my $dsz = 97000;
$obj.set_dictsize( $dsz );
is( $obj.get_dictsize, $dsz, 'set/get_dictsize' );
is( $obj.unescape($obj.escape($wrd)), $wrd, 'escape/unescape' );
is(
$obj.decode_utf8($obj.encode_utf8($phrase)),
$phrase,
'escape/unescape'
);
ok( $obj.compress($phrase), 'compress');
ok( $obj.decompress($obj.compress($phrase)), 'decompress');
my $cmp = $obj.compress( $obj.encode_utf8($phrase) );
is(
$obj.decode_utf8( $obj.decompress( $cmp ) ),
$phrase,
'compress and decompress, ' ~
$cmp.chars ~ '/' ~ $phrase.chars ~ ' bytes'
);
}, 'subtest 1';
done-testing;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment