#!/usr/bin/perl -w use strict; use Digest; # Combined dictionary of (high byte, low byte, offset) words my @dictionary = qw{ black white red cyan green purple blue yellow transparent gold silver bronze platinum diamond glass stone apples bananas cherries grapes lemons mangoes melons oranges papayas peaches pears persimmons pineapples raspberries strawberries watermelons sweet fresh tasty large small heavy light soft hard solid precious rare vintage expensive spicy bitter }; # Convert a binary digest to readable string sub GenerateLabel { my ($raw_digest, $offset, $length) = @_; my @tokens = (); for(my $i = $offset; $i < $offset + $length; $i++) { my $byte = ord(substr($raw_digest, $i, 1)); push @tokens, $dictionary[$i + 32] . ' ' . $dictionary[$byte >> 4] . ' ' . $dictionary[($byte & 15) + 16]; } return join ' + ', @tokens; } # Compute full digest for a single opened file sub ComputeDigestForFile { my ($file) = @_; my $handle; if( open($handle, "<$file") ) { binmode $handle; my $md5 = Digest->new("MD5"); my $raw_digest; eval { $raw_digest = $md5->addfile($handle)->digest }; unless( defined $raw_digest ) { return "ERROR: $!"; } close $handle; return $raw_digest; } return "ERROR: $!"; } # Compute unique digests for a set of files sub ComputeDigest { # Get checksum for all inputs my $exit_status = 0; my @digests = (); if( $#ARGV < 0 ) { push @ARGV, "-"; } foreach my $a (@ARGV) { my $raw_digest = ComputeDigestForFile($a); if( $raw_digest =~ /^ERROR: (.*)/ ) { print STDERR "Error reading $a: $1\n"; $exit_status = 1; next; } push @digests, [$raw_digest, $a]; } if( $#digests < 0 ) { return $exit_status; } my $digest_length = length($digests[0][0]); # Find unique digests my %unique_digests = (); foreach my $d (@digests) { $unique_digests{$$d[0]} = ""; } my $unassigned = scalar keys %unique_digests; # Select subset of bytes that is unique to each digest for(my $length = 1; $unassigned > 0 && $length <= $digest_length; $length++) { for(my $offset = 0; $offset < $digest_length - $length; $offset++) { my %unique_substrings = (); foreach my $key (keys %unique_digests) { my $substring = substr($key, $offset, $length); if( exists $unique_substrings{$substring} ) { # Two or more digests share the same substring, so we # can't use it to tell digests apart. $unique_substrings{$substring} = undef; } else { # We might be able to identify this key using only a # substring, tentatively keep track of it now. $unique_substrings{$substring} = $key; } } foreach my $substring (keys %unique_substrings) { my $key = $unique_substrings{$substring}; next unless defined $key; next if $unique_digests{$key} ne ""; $unique_digests{$key} = GenerateLabel($key, $offset, $length); $unassigned--; } } } # Output digests my $max_label_length = 0; foreach my $key (keys %unique_digests) { if( $max_label_length < length($unique_digests{$key}) ) { $max_label_length = length($unique_digests{$key}); } } foreach my $p (@digests) { my $label = $unique_digests{$$p[0]}; print $label, " " x ($max_label_length + 1 - length($label)), "*", $$p[1], "\n"; } return $exit_status; } # Look for index of a word in global dictionary sub GetIndex { my ($word) = @_; $word = lc $word; for(my $i = 0; $i <= $#dictionary; $i++) { if( $dictionary[$i] eq $word ) { return $i; } } return undef; } # Convert digest label string to (offset, byte) pair sub ParseDigestElement { my ($label) = @_; my ($high, $low, $offset) = (undef, undef, undef); my @words = split /\s+/, $label; foreach my $t (@words) { my $i = GetIndex($t); if( defined $i ) { if( $i < 16 ) { $high = $i; } elsif( $i < 32 ) { $low = $i - 16; } else { $offset = $i - 32; } } } if( defined($offset) && defined($high) && defined($low) ) { return ($offset, ($high << 4) | $low); } return (undef, undef); } # Convert digest string into list of (offset, byte) items. # Returns empty list if digest can't be parsed. sub ParseDigest { my ($readable_digest) = @_; my @elements = (); foreach my $e (split /\+/, $readable_digest) { my ($offset, $byte) = ParseDigestElement($e); return () unless defined $offset; push @elements, [$offset, $byte]; } return @elements; } # Check digest for a single opened file sub CheckDigestForFile { my ($digest_elements, $file) = @_; my $raw_digest = ComputeDigestForFile($file); if( $raw_digest =~ /^ERROR: / ) { print "$file: $raw_digest\n"; return 1; } foreach my $e (@$digest_elements) { if( ord(substr($raw_digest, $$e[0], 1)) != $$e[1] ) { print "$file: FAILED\n"; return 1; } } print "$file: OK\n"; return 0; } # Check digests for a set of files sub CheckDigest { my $errors = 0; my $a0; if( $#ARGV < 0 ) { $a0 = "-"; } else { $a0 = $ARGV[0]; shift @ARGV; } my @elements = ParseDigest($a0); if( scalar @elements ) { # Checking digest using readable digest data specified on # command line. if( $#ARGV < 0 ) { push @ARGV, "-"; } foreach my $a (@ARGV) { if( CheckDigestForFile(\@elements, $a) ) { $errors++; } } return $errors; } # Checking digests using data listed in digest file. open my $handle, "<$a0" or die "Can not open $a0: $!\n"; while( my $line = <$handle> ) { chomp $line; my $separator = index($line, '*'); next if $separator < 0; @elements = ParseDigest(substr($line, 0, $separator)); next unless scalar @elements; my $file = substr($line, $separator + 1); if( CheckDigestForFile(\@elements, $file) ) { $errors++; } } close $handle; return $errors; } # Output usage message if nothing is specified on command line or stdin if( $#ARGV < 0 && -t STDIN ) { die <<"EOT"; To compute digest: $0 files... > digest.txt To check digest: $0 -c digest.txt $0 -c 'digest_string' file EOT } my $errors; if( $#ARGV >= 0 && $ARGV[0] eq "-c" ) { shift @ARGV; $errors = CheckDigest(); } else { $errors = ComputeDigest(); } if( $errors > 0 ) { die "$errors errors\n"; }