#!/usr/bin/perl -w # benchmark_crack.pl - Don Yang (uguu.org) # # Benchmark accuracy of crack script. # # 2015-06-07 use strict; use File::Temp; use Time::HiRes; # The selection of input text has more effect on cracking accuracy # than the key size, so we run more tests over a larger range of input # text variations and only a small range of key variations. use constant SAMPLES_PER_TEXT_SIZE => 25; use constant ITERATIONS_PER_KEY_SIZE => 4; # Maximum key size to test. Note that the crack script has some # built-in limits on what maximum key sizes to try, so small input # size is guaranteed to fail with large key size. use constant MAX_KEY_SIZE => 32; # Generate a key string of a particular length. Generated key is # deterministic as long as seed is constant. sub GenerateKey($) { my ($size) = @_; # Select letter to be used in the first key letter. This is # different from all other letters in the key, which guarantees # that the key is not a multiple of a shorter key. my $first_char = chr(ord('a') + int(rand 26)); # Generate remaining characters my $key = $first_char; for(my $i = 1; $i < $size; $i++) { my $c; do { $c = chr(ord('a') + int(rand 26)); } while( $c eq $first_char ); $key .= $c; } return $key; } # Run benchmark for a single key length. # # Returns (number of successful cracks, sum of all crack times) sub TestKeyLength($$$$$$) { my ($encode, $decode, $plaintext, $ciphertext, $text_size, $key_size) = @_; print "Input = $text_size, key = $key_size"; my $correct = 0; my $total_time = 0; for(my $i = 0; $i < ITERATIONS_PER_KEY_SIZE; $i++) { my $key = GenerateKey($key_size); my $cmd = "$encode $key $plaintext > $ciphertext"; system($cmd) == 0 or die $!; my $start_time = Time::HiRes::gettimeofday(); my @output = `$decode $ciphertext`; $total_time += Time::HiRes::gettimeofday() - $start_time; if( $#output == 1 && $output[0] =~ /^([A-Z]+)/ && lc($1) eq $key ) { $correct++; } } print ", accuracy = $correct, time = $total_time\n"; return ($correct, $total_time); } # Run benchmark for a single input size sub TestInputLength($$$$$$$) { my ($encode, $decode, $original_text, $plaintext, $ciphertext, $text_size, $stats) = @_; for(my $key_size = 1; $key_size <= MAX_KEY_SIZE; $key_size++) { my $correct = 0; my $total_time = 0; for(my $sample = 0; $sample < SAMPLES_PER_TEXT_SIZE; $sample++) { # Generate input plaintext from a random sample of the original text my $offset = int(rand(length($original_text) - $text_size)); open my $file, ">$plaintext" or die $!; print $file substr($original_text, $offset, $text_size); close $file; # Run tests for this key length my @result = TestKeyLength( $encode, $decode, $plaintext, $ciphertext, $text_size, $key_size); $correct += $result[0]; $total_time += $result[1]; } # Update stats $$stats{$text_size}{$key_size}{"accuracy"} = $correct / (ITERATIONS_PER_KEY_SIZE * SAMPLES_PER_TEXT_SIZE); $$stats{$text_size}{$key_size}{"latency"} = $total_time / (ITERATIONS_PER_KEY_SIZE * SAMPLES_PER_TEXT_SIZE); } } # Run benchmark across different input lengths sub RunTests($$$$) { my ($encode, $decode, $original_file, $output_file) = @_; # Load input text to memory and strip all non-alpha characters my $handle; open $handle, "<$original_file" or die $!; my $original_text = join '', <$handle>; close $handle; $original_text =~ s/[^a-zA-Z]//gs; # Create temporary files my $plaintext = tmpnam(); my $ciphertext = tmpnam(); # List of input lengths to test my @lengths = ( 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 320, 330, 340, 350, 360, 370, 380, 390, 400, 410, 420, 430, 440, 450, 460, 470, 480, 490, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000, 2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000 ); # Collect stats my %stats = (); foreach my $text_size (@lengths) { TestInputLength($encode, $decode, $original_text, $plaintext, $ciphertext, $text_size, \%stats); } # Write stats to CSV open $handle, ">$output_file" or die $!; print $handle "text_size,key_size,accuracy,latency\n"; foreach my $text_size (sort {$a <=> $b} keys %stats) { foreach my $key_size (sort {$a <=> $b} keys %{$stats{$text_size}}) { print $handle "$text_size,$key_size,", $stats{$text_size}{$key_size}{"accuracy"}, ",", $stats{$text_size}{$key_size}{"latency"}, "\n"; } } close $handle; # Cleanup unlink $plaintext; unlink $ciphertext; } # Check parameters unless( $#ARGV == 3 ) { die "$0 \n"; } # Make random number sequence deterministic across runs srand 1; # Collect stats RunTests($ARGV[0], $ARGV[1], $ARGV[2], $ARGV[3]);