#!/usr/bin/perl -w use strict; use Digest; # Translation from byte offsets to words. Length of this list is 20 # to match the byte length of MD5 digests. # # This list has a particular sort order in that the items near the front # will be used more often, so we want to keep the preferred adjectives # near the front. This reduces the probability of being called bitter # (we should just avoid those adjectives to start with, really). my @offset_dictionary = ( "sweet", "fresh", "tasty", "large", "small", "heavy", "light", "soft", "hard", "solid", "precious", "rare", "vintage", "expensive", "spicy", "bitter", ); # Translation from nibbles to words. First list encodes the upper 4 # bits, second list encodes the lower 4 bits. my @bit_dictionary = ( [ "black", "white", "red", "cyan", "green", "purple", "blue", "yellow", "transparent", "gold", "silver", "bronze", "platinum", "diamond", "glass", "stone", ], [ "apples", "bananas", "cherries", "grapes", "lemons", "mangoes", "melons", "oranges", "papayas", "peaches", "pears", "persimmons", "pineapples", "raspberries", "strawberries", "watermelons", ] ); # Convert a binary digest to readable string sub GenerateLabel { my ($raw_digest, $offset, $length) = @_; my @tokens = (); for(my $i = $offset; $i < $offset + $length; $i++) { my $byte = ord(substr($raw_digest, $i, 1)); push @tokens, $offset_dictionary[$i] . ' ' . $bit_dictionary[0][$byte >> 4] . ' ' . $bit_dictionary[1][$byte & 15]; } return join ' + ', @tokens; } # Compute full digest for a single opened file sub ComputeDigestForFile { my ($file) = @_; my $handle; if( open($handle, "<$file") ) { binmode $handle; my $md5 = Digest->new("MD5"); my $raw_digest; eval { $raw_digest = $md5->addfile($handle)->digest }; unless( defined $raw_digest ) { return "ERROR: $!"; } close $handle; return $raw_digest; } return "ERROR: $!"; } # Compute unique digests for a set of files sub ComputeDigest { # Get checksum for all inputs my $exit_status = 0; my @digests = (); if( $#ARGV < 0 ) { push @ARGV, "-"; } foreach my $a (@ARGV) { my $raw_digest = ComputeDigestForFile($a); if( $raw_digest =~ /^ERROR: (.*)/ ) { print STDERR "Error reading $a: $1\n"; $exit_status = 1; next; } push @digests, [$raw_digest, $a]; } if( $#digests < 0 ) { return $exit_status; } my $digest_length = length($digests[0][0]); # Find unique digests my %unique_digests = (); foreach my $d (@digests) { $unique_digests{$$d[0]} = ""; } my $unassigned = scalar keys %unique_digests; # Select subset of bytes that is unique to each digest for(my $length = 1; $unassigned > 0 && $length <= $digest_length; $length++) { for(my $offset = 0; $offset < $digest_length - $length; $offset++) { my %unique_substrings = (); foreach my $key (keys %unique_digests) { my $substring = substr($key, $offset, $length); if( exists $unique_substrings{$substring} ) { # Two or more digests share the same substring, so we # can't use it to tell digests apart. $unique_substrings{$substring} = undef; } else { # We might be able to identify this key using only a # substring, tentatively keep track of it now. $unique_substrings{$substring} = $key; } } foreach my $substring (keys %unique_substrings) { my $key = $unique_substrings{$substring}; next unless defined $key; next if $unique_digests{$key} ne ""; $unique_digests{$key} = GenerateLabel($key, $offset, $length); $unassigned--; } } } # Output digests my $max_label_length = 0; foreach my $key (keys %unique_digests) { if( $max_label_length < length($unique_digests{$key}) ) { $max_label_length = length($unique_digests{$key}); } } foreach my $p (@digests) { my $label = $unique_digests{$$p[0]}; print $label, " " x ($max_label_length + 1 - length($label)), "*", $$p[1], "\n"; } return $exit_status; } # Look for index of a word in a list sub GetIndex { my ($word, $list) = @_; $word = lc $word; for(my $i = 0; $i <= $#$list; $i++) { if( $$list[$i] eq $word ) { return $i; } } return undef; } # Convert digest label string to (offset, byte) pair sub ParseDigestElement { my ($label) = @_; my ($high, $low, $offset) = (undef, undef, undef); my @words = split /\s+/, $label; foreach my $t (@words) { my $i = GetIndex($t, \@offset_dictionary); if( defined $i ) { $offset = $i; } $i = GetIndex($t, $bit_dictionary[0]); if( defined $i ) { $high = $i; } $i = GetIndex($t, $bit_dictionary[1]); if( defined $i ) { $low = $i; } } if( defined($offset) && defined($high) && defined($low) ) { return ($offset, ($high << 4) | $low); } return (undef, undef); } # Convert digest string into list of (offset, byte) items. # Returns empty list if digest can't be parsed. sub ParseDigest { my ($readable_digest) = @_; my @elements = (); foreach my $e (split /\+/, $readable_digest) { my ($offset, $byte) = ParseDigestElement($e); return () unless defined $offset; push @elements, [$offset, $byte]; } return @elements; } # Check digest for a single opened file sub CheckDigestForFile { my ($digest_elements, $file) = @_; my $raw_digest = ComputeDigestForFile($file); if( $raw_digest =~ /^ERROR: / ) { print "$file: $raw_digest\n"; return 1; } foreach my $e (@$digest_elements) { if( ord(substr($raw_digest, $$e[0], 1)) != $$e[1] ) { print "$file: FAILED\n"; return 1; } } print "$file: OK\n"; return 0; } # Check digests for a set of files sub CheckDigest { my $errors = 0; my $a0; if( $#ARGV < 0 ) { $a0 = "-"; } else { $a0 = $ARGV[0]; shift @ARGV; } my @elements = ParseDigest($a0); if( scalar @elements ) { # Checking digest using readable digest data specified on # command line. if( $#ARGV < 0 ) { push @ARGV, "-"; } foreach my $a (@ARGV) { if( CheckDigestForFile(\@elements, $a) ) { $errors++; } } return $errors; } # Checking digests using data listed in digest file. open my $handle, "<$a0" or die "Can not open $a0: $!\n"; while( my $line = <$handle> ) { chomp $line; my $separator = index($line, '*'); next if $separator < 0; @elements = ParseDigest(substr($line, 0, $separator)); next unless scalar @elements; my $file = substr($line, $separator + 1); if( CheckDigestForFile(\@elements, $file) ) { $errors++; } } close $handle; return $errors; } # Output usage message if nothing is specified on command line or stdin if( $#ARGV < 0 && -t STDIN ) { die <<"EOT"; To compute digest: $0 files... > digest.txt To check digest: $0 -c digest.txt $0 -c 'digest_string' file EOT } my $errors; if( $#ARGV >= 0 && $ARGV[0] eq "-c" ) { shift @ARGV; $errors = CheckDigest(); } else { $errors = ComputeDigest(); } if( $errors > 0 ) { die "$errors errors\n"; }