#!/usr/bin/perl -w use strict; use constant SEPARATOR => 'Z'; sub replace_all($$$) { my ($text, $key, $replacement) = @_; for(my $i = 0; ($i = index($text, $key, $i)) >= 0;) { $text = substr($text, 0, $i) . $replacement . substr($text, $i + length($key)); } return $text; } my $text = join '', <>; length($text) >= 3 or die "Input too small: " . length($text) . "\n"; if( index($text, '$') >= 0 ) { die "Input must not contain \$\n"; } if( index($text, SEPARATOR) >= 0 ) { die "Input must not contain " . SEPARATOR . "\n"; } my @replacements = (); my $replacement_overhead = 0; for(;;) { # Count all substring sequences until there are no more duplicates to # be found. my %count = (); for(my $x = 3;; $x++) { my $found_duplicates = 0; for(my $i = 0; $i < length($text) - $x; $i++) { my $k = substr($text, $i, $x); # Remove substrings that were inserted by previous round of # replacements. if( $k =~ /[A-F][A-F0-9]/ || ($i > 0 && substr($text, $i - 1, 2) =~ /[A-F][A-F0-9]/) || substr($text, $i + $x - 1, 2) =~ /[A-F][A-F0-9]/ ) { next; } # If the prefix of current string wasn't a duplicate, then full # string won't be a duplicate either. if( $x > 3 && (!(exists $count{substr($k, 0, $x - 1)}) || $count{substr($k, 0, $x - 1)} < 2) ) { next; } if( exists $count{$k} ) { $found_duplicates = 1; $count{$k}++; } else { $count{$k} = 1; } } last unless $found_duplicates; } # Remove non-duplicates. my @unique = (); foreach my $k (keys %count) { if( $count{$k} <= 1 ) { push @unique, $k; } } delete $count{$_} foreach @unique; # Try each replacement and keep the one that provided the best # value. We can't quite do this in a single greedy pass because # replacements might overlap. my $best_key = undef; my $best_length = length($text) + $replacement_overhead; my $replacement = sprintf '%X', (scalar(@replacements) + 160); foreach my $k (sort {length($b) * $count{$b} <=> length($a) * $count{$a} or $a cmp $b} keys %count) { my $output = replace_all($text, $k, $replacement); my $output_length = length($output) + $replacement_overhead + length($k) + 1; if( $output_length < $best_length ) { $best_key = $k; $best_length = $output_length; } } last unless defined $best_key; # Apply the replacement. print STDERR "[$best_key] -> [$replacement] ($count{$best_key}): ", length($text) + $replacement_overhead; $text = replace_all($text, $best_key, $replacement); print STDERR " -> ", length($text) + $replacement_overhead, "\n"; push @replacements, $best_key; $replacement_overhead += length($best_key) + 1; } print '%q$', $text, '$.gsub(/[A-F][0-9A-F]/){|x|%q$', (join 'Z', @replacements), '$.split(/Z/)[x.to_i(16)-160]}', "\n";