#!/usr/bin/perl -w

use strict;
use constant MAX_KEY_SIZE => 64;

# Read input from stdin, or files specified on command line
my $input = join '', <>;

# Convert all input characters in string to bytes in [0..25] range, and
# remove all other characters.
$input = lc $input;
$input =~ y/a-z//cd;
$input =~ y/a-z/\0-\31/;

# Expected character probabilities, from "Alice in Wonderland".
# http://www.gutenberg.org/ebooks/11
my @expected_distribution = (
   .07969,
   .01419,
   .02439,
   .04446,
   .12517,
   .01936,
   .02392,
   .06414,
   .07019,
   .00191,
   .01048,
   .04236,
   .02005,
   .06546,
   .07705,
   .01600,
   .00178,
   .05374,
   .05909,
   .09919,
   .03234,
   .00782,
   .02400,
   .00143,
   .02101,
   .00065,
);

# Find best shift offset for a single distribution.
#
# Returns (offset, confidence) pair, where "confidence" is some
# measure of how likely this offset is correct, higher is better.
sub GetShiftOffset($)
{
   my ($data) = @_;

   # Compute character frequency distribution from byte array
   my @freq = ();
   $freq[$_] = 0 foreach (0..25);
   $freq[$_]++ foreach unpack 'C*', $data;
   my @input_dist = map {$_ / length($data)} @freq;

   my @offsets = ();
   for(my $o = 0; $o < 26; $o++)
   {
      # Apply shift offset and compute error amount
      my $error = 0;
      for(my $i = 0; $i < 26; $i++)
      {
         my $diff = $expected_distribution[$i] - $input_dist[($i + $o) % 26];
         $error += $diff * $diff;
      }

      # Record (error, shift offset) pair
      push @offsets, [$error, $o];
   }

   # Sort offsets by error amount
   @offsets = sort {$$a[0] <=> $$b[0]} @offsets;

   # If we guessed the right key size, the best fitting shift amount
   # usually results in significantly less error than the next best
   # fit.  This is due to how most people choose keys with distinct
   # characters, such when an incorrect key size is used, each stride
   # contains samples of characters that are shifted by different
   # amounts instead of a single shift amount, and the resulting mix
   # of character frequencies means no single shift amount is
   # significantly better than other shift amounts.
   #
   # Thus, this scheme doesn't work well when people use keys with
   # repeated characters, such as "banana".
   #
   # This difference in error amount is returned in units of standard
   # deviation, which the caller will make use of after having
   # collected all key candidates.
   my $average_error = 0;
   foreach my $p (@offsets)
   {
      $average_error += $$p[0];
   }
   $average_error /= 26;
   my $error_deviation = 0;
   foreach my $p (@offsets)
   {
      my $delta = $$p[0] - $average_error;
      $error_deviation += $delta * $delta;
   }
   $error_deviation = sqrt($error_deviation / 26);

   # If error_deviation is close to zero, it means the characters were
   # pretty much uniformly distributed, and it's not possible to crack.
   if( $error_deviation < 1e-6 )
   {
      return (0, 0);
   }

   my $margin = $offsets[1][0] - $offsets[0][0];

   # Compute confidence in log units of standard deviation.  Anything
   # in the same threshold bucket seem equally good or bad:
   #  - 1 < confidence
   #  - 0.5 < confidence <= 1
   #  - 0.25 < confidence <= 0.5
   #  - confidence <= 0.25
   #
   # We bucketize these confidence values since they aren't really
   # that precise, and if there are two key candidates with comparable
   # confidence, we want to keep both of them.
   my $confidence = $margin / $error_deviation;
   $confidence = $confidence < 0.25 ? 0 :
                 $confidence < 0.5 ? 1 :
                 $confidence < 1 ? 2 : 3;
   return ($offsets[0][1], $confidence);
}

# Find the best key that would minimize error for a particular key length.
# Returns (key, confidence), where confidence is a value that indicates the
# likelihood of this key being the right one, higher is better.
#
# If no key with confidence above $threshold is found, return (undef, 0)
sub FindKeyWithLength($$)
{
   my ($key_size, $threshold) = @_;

   # Split input to strides by key size.  Because key size is much
   # smaller than input, this guarantees that each stride will always
   # have a few characters in it.
   my @strides;
   $#strides = $key_size - 1;

   my $index = 0;
   foreach my $c (unpack 'C*', $input)
   {
      $strides[$index++] .= chr($c);
      $index %= $key_size;
   }

   # Get candidate shift amounts for each stride
   my $key = "";
   my $min_confidence = undef;
   for(my $i = 0; $i < $key_size; $i++)
   {
      my ($offset, $confidence) = GetShiftOffset($strides[$i]);

      $key .= chr($offset);
      if( !defined($min_confidence) || $min_confidence > $confidence )
      {
         $min_confidence = $confidence;
         if( $min_confidence < $threshold )
         {
            return (undef, 0);
         }
      }
   }
   return ($key, $min_confidence);
}

# Find candidate key in string of bytes.  Output possible keys to stdout.
sub Crack()
{
   # Try key lengths up to 1/5 of original input size.  This is done
   # by simply searching through all key sizes linearly.
   #
   # A more efficient way to do this is to try common divisors for all
   # offsets of repeated strings, but that doesn't work so well with
   # short input strings and we end up having to try all sizes
   # linearly anyways.
   my $max_key_size = length($input) / 5;
   if( $max_key_size > MAX_KEY_SIZE )
   {
      $max_key_size = MAX_KEY_SIZE;
   }

   # Minimum accepted confidence threshold.  Because we sort keys by
   # confidence later, if we already found keys with higher confidence,
   # there is no need to examine partial keys with lower confidence.
   my $min_confidence = 0;

   my %found_keys = ();
   for(my $key_size = 1; $key_size < $max_key_size; $key_size++)
   {
      my ($key, $confidence) = FindKeyWithLength($key_size, $min_confidence);
      next unless defined $key;
      next if exists $found_keys{$key};

      # Remember confidence value for this key
      $found_keys{$key} = $confidence;

      # Remove other keys that are multiples of this key.  This is so that
      # if we see a key candidate in the form of "XX", we would return just
      # "X" as the canonical key.
      my $base_key = $key;
      for(my $j = $key_size; $j < $max_key_size; $j += $key_size)
      {
         $key .= $base_key;
         $found_keys{$key} = 0;
      }

      # Update minimum confidence level.  This new key must be at equal or
      # higher confidence, otherwise it would have been rejected already.
      $min_confidence = $confidence;
   }

   # Output key with the highest confidence, break ties by preferring
   # longer keys.
   foreach my $key (sort {$found_keys{$b} <=> $found_keys{$a} ||
                          length($b) <=> length($a)}
                    keys %found_keys)
   {
      last if $found_keys{$key} == 0;

      # Convert shift amounts to uppercase and lowercase keys
      print
      (
         (join '', map {chr(ord('A') + $_)} unpack 'C*', $key),
         "\n",
         (join '', map {chr(ord('a') + (26 - $_) % 26)} unpack 'C*', $key),
         "\n"
      );
      last;
   }
}


Crack();