sub cumulative_freq(%freq) {
my %cf;
my $total = 0;
for %freq.keys.sort -> $c {
%cf{$c} = $total;
$total += %freq{$c};
}
return %cf;
}
sub arithmethic_coding($str, $radix) {
my @chars = $str.comb;
my %freq;
%freq{$_}++ for @chars;
my %cf = cumulative_freq(%freq);
my $base = @chars.elems;
my $L = 0;
my $pf = 1;
for @chars -> $c {
$L = $L*$base + %cf{$c}*$pf;
$pf *= %freq{$c};
}
my $U = $L + $pf;
my $pow = 0;
loop {
$pf div= $radix;
last if $pf == 0;
++$pow;
}
my $enc = ($U - 1) div ($radix ** $pow);
($enc, $pow, %freq);
}
sub arithmethic_decoding($encoding, $radix, $pow, %freq) {
my $enc = $encoding * $radix**$pow;
my $base = [+] %freq.values;
my %cf = cumulative_freq(%freq);
my %dict;
for %cf.kv -> $k,$v {
%dict{$v} = $k;
}
my $lchar;
for ^$base -> $i {
if (%dict{$i}:exists) {
$lchar = %dict{$i};
}
elsif (defined $lchar) {
%dict{$i} = $lchar;
}
}
my $decoded = '';
for reverse(^$base) -> $i {
my $pow = $base**$i;
my $div = $enc div $pow;
my $c = %dict{$div};
my $fv = %freq{$c};
my $cv = %cf{$c};
my $rem = ($enc - $pow*$cv) div $fv;
$enc = $rem;
$decoded ~= $c;
}
return $decoded;
}
my $radix = 10;
for <DABDDB DABDDBBDDBA ABRACADABRA TOBEORNOTTOBEORTOBEORNOT> -> $str {
my ($enc, $pow, %freq) = arithmethic_coding($str, $radix);
my $dec = arithmethic_decoding($enc, $radix, $pow, %freq);
printf("%-25s=> %19s * %d^%s\n", $str, $enc, $radix, $pow);
if ($str ne $dec) {
die "\tHowever that is incorrect!";
}
}
Output:
DABDDB => 251 * 10^2
DABDDBBDDBA => 167351 * 10^6
ABRACADABRA => 7954170 * 10^4
TOBEORNOTTOBEORTOBEORNOT => 1150764267498783364 * 10^15