#!/usr/bin/perl -w use strict; use Digest; # Combined dictionary of (high byte, low byte, offset) words my @d = qw{ black white red cyan green purple blue yellow transparent gold silver bronze platinum diamond glass stone apples bananas cherries grapes lemons mangoes melons oranges papayas peaches pears persimmons pineapples raspberries strawberries watermelons sweet fresh tasty large small heavy light soft hard solid precious rare vintage expensive spicy bitter }; # Globals my (@l, @g, %u, %U); my $e = 0; my ($a, $c, $h, $i, $s, $w, $x, $y, $z); # Compute full digest for a single opened file sub D { $x = undef; if( open($h, "<$_[0]") ) { binmode $h; eval { $a = Digest->new("MD5"); $x = $a->addfile($h)->digest }; } $x = "ERROR: $!" unless defined $x; } # Convert digest string into list of (offset, byte) items. # Returns empty list if digest can't be parsed. sub P { @l = (); map { ($x, $y, $z) = (undef, undef, undef); map { $w = lc $_; for($i = 0; $i <= $#d && $d[$i] ne $w; $i++) {} if( $i < 16 ) { $x = $i; } elsif( $i < 32 ) { $y = $i - 16; } elsif( $i <= $#d ) { $z = $i - 32; } } split /\s+/, $_; if( defined($x) && defined($y) && defined($z) ) { push @l, [$z, ($x << 4) | $y]; } } split /\+/, $a; } # Check digest for a single opened file sub Q { D($z = $_[0]); if( $x =~ /^ERROR: / ) { print "$z: $x\n"; return 1; } foreach (@l) { if( ord(substr($x, $$_[0], 1)) != $$_[1] ) { print "$z: FAILED\n"; return 1; } } print "$z: OK\n"; return 0; } # Output usage message if nothing is specified on command line or stdin my @n = @ARGV; if( $#n < 0 && -t STDIN ) { die <<"EOT"; To compute digest: $0 files... > digest.txt To check digest: $0 -c digest.txt $0 -c 'digest_string' file EOT } if( @n && $n[0] eq "-c" ) { # Check digests for a set of files shift @n; $a = $#n < 0 ? "-" : shift @n; P(); if( @l ) { # Checking digest using readable digest data specified on # command line. push @n, "-" if $#n < 0; map { $e += Q($_) } @n; } else { # Checking digests using data listed in digest file. open $c, "<$a" or die "Can not open $a: $!\n"; map { chomp $_; $s = index($_, '*'); if( $s > 0 ) { $a = substr($_, 0, $s); P(); $e += Q(substr($_, $s + 1)) if @l; } } <$c>; } } else { # Compute unique digests for a set of files # Get checksum for all inputs @g = (); push @n, "-" if $#n < 0; foreach (@n) { D($_); if( $x =~ /^ERROR: (.*)/ ) { print STDERR "Error reading $_: $1\n"; $e++; next; } push @g, [$x, $_]; } if( @g ) { # Find unique digests %u = map {$$_[0] => ""} @g; $y = scalar keys %u; # Select subset of bytes that is unique to each digest for($a = 1; $y > 0 && $a <= 16; $a++) { for($z = 0; $z < 16 - $a; $z++) { %U = (); map { if( exists $U{$w = substr($_, $z, $a)} ) { # Two or more digests share the same substring, so we # can't use it to tell digests apart. $U{$w} = undef; } else { # We might be able to identify this key using only a # substring, tentatively keep track of it now. $U{$w} = $_; } } keys %u; map { $x = $U{$_}; if( defined $x && $u{$x} eq "" ) { @l = (); for($i = $z; $i < $z + $a; $i++) { $w = ord(substr($x, $i, 1)); push @l, $d[$i + 32] . ' ' . $d[$w >> 4] . ' ' . $d[($w & 15) + 16]; } $u{$x} = join ' + ', @l; $y--; } } keys %U; } } # Output digests $y = 0; map { $y = $_ if $y < $_ } map {length $u{$_}} keys %u; map { $x = $u{$$_[0]}; print $x, " " x ($y + 1 - length($x)), "*$$_[1]\n"; } @g; } } die "$e errors\n" if $e;