#!/usr/bin/perl -w # dfa.pl - Don Yang (uguu.org) # # DFA for downsizing PPM or ASCII art. # # Generate DFA string: # ./dfa.pl # # Simulate DFA: # ./dfa <-ppm|-pgm|-txt> [output] # # Each state is encoded in 3 characters: # # # 96 + action bitmask + test bitmask # 96 + state index # # 11/24/11 use strict; use constant BASE_OFFSET => 96; # {{{ DFA data # Actions executed at each state my %state = ( "ppm_header0" => "", "ppm_header1" => "x--", "ppm_even0" => "x=width", "ppm_even1" => "", "ppm_even2" => "x--", "ppm_even3" => "", "ppm_odd0" => "x=width", "ppm_odd1" => "", "ppm_odd2" => "x--", "ppm_odd3" => "flush", "ppm_odd4" => "flush", "ppm_odd5" => "flush;x--", "ppm_odd6" => "", "pgm_header0" => "", "pgm_header1" => "x--", "pgm_even0" => "x=width;x--", "pgm_even1" => "x--", "pgm_odd0" => "x=width;x--", "pgm_odd1" => "flush;x--", "pgm_odd2" => "x--", "txt_even0" => "flush", "txt_even1" => "", "txt_odd0" => "", "txt_odd1" => "", "txt_odd2" => "flush", ); my $PPM_INIT = "ppm_header0"; my $PGM_INIT = "pgm_header0"; my $TXT_INIT = "txt_even1"; # Transitions between states. Each state executes up to one conditional # on exit, and determines the next state based on that conditional. my %edge = ( "ppm_header0" => "newline ? ppm_header1 : ppm_header0", "ppm_header1" => "x!=0 ? ppm_header0 : ppm_even0", "ppm_even0" => "ppm_even1", "ppm_even1" => "ppm_even2", "ppm_even2" => "x!=0 ? ppm_even3 : ppm_odd0", "ppm_even3" => "ppm_even1", "ppm_odd0" => "ppm_odd1", "ppm_odd1" => "ppm_odd2", "ppm_odd2" => "x!=0 ? ppm_odd3 : ppm_even0", "ppm_odd3" => "ppm_odd4", "ppm_odd4" => "ppm_odd5", "ppm_odd5" => "x!=0 ? ppm_odd6 : ppm_even0", "ppm_odd6" => "ppm_odd1", "pgm_header0" => "newline ? pgm_header1 : pgm_header0", "pgm_header1" => "x!=0 ? pgm_header0 : pgm_even0", "pgm_even0" => "x!=0 ? pgm_even1 : pgm_odd0", "pgm_even1" => "x!=0 ? pgm_even1 : pgm_odd0", "pgm_odd0" => "x!=0 ? pgm_odd1 : pgm_even0", "pgm_odd1" => "x!=0 ? pgm_odd2 : pgm_even0", "pgm_odd2" => "x!=0 ? pgm_odd1 : pgm_even0", "txt_even0" => "newline ? txt_odd0 : txt_even1", "txt_even1" => "newline ? txt_odd0 : txt_even1", "txt_odd0" => "newline ? txt_even0 : txt_odd1", "txt_odd1" => "newline ? txt_even0 : txt_odd2", "txt_odd2" => "newline ? txt_even0 : txt_odd1", ); # }}} # {{{ DFA string encoder # Bitmask of accepted actions my %action = ( "" => 0, "flush" => 1, "x--" => 2, "x=width" => 16, ); $action{"flush;x--"} = $action{"flush"} | $action{"x--"}; $action{"x=width;x--"} = $action{"x=width"} | $action{"x--"}; # Bitmask of accepted tests my %test = ( "x!=0" => 4, "newline" => 8, ); # Validate states and edges foreach my $i (keys %state) { exists $edge{$i} or die "Missing edge for $i\n"; unless( exists $action{$state{$i}} ) { die "Bad action for $i: $state{$i}\n"; } } foreach my $i (keys %edge) { exists $state{$i} or die "Missing state for $i\n"; if( $edge{$i} =~ /^(\S+) \? (\S+) : (\S+)$/ && exists $test{$1} && exists $state{$2} && exists $state{$3} ) { next; } if( $edge{$i} =~ /^(\S+)$/ && exists $state{$1} ) { next; } die "Bad edge for $i: $edge{$i}\n"; } # Count number of inbound edges for each state my %inbound; foreach my $i (keys %state) { $inbound{$i} = 0; } foreach my $i (keys %edge) { if( $edge{$i} =~ /^\S+ \? (\S+) : (\S+)$/ ) { $inbound{$1}++; $inbound{$2}++; } else { $inbound{$edge{$i}}++; } } # Assign indices to each state, prefer states with higher inbound edges first my @sorted_state = sort {$inbound{$b} <=> $inbound{$a} || $a cmp $b} keys %state; my %index; for(my $i = 0; $i <= $#sorted_state; $i++) { $index{$sorted_state[$i]} = $i; } # Encode states my $dfa = ""; foreach my $i (@sorted_state) { if( $edge{$i} =~ /^(\S+) \? (\S+) : (\S+)$/ ) { $dfa .= chr(BASE_OFFSET + $action{$state{$i}} + $test{$1}) . chr(BASE_OFFSET + $index{$2}) . chr(BASE_OFFSET + $index{$3}); } else { $dfa .= chr(BASE_OFFSET + $action{$state{$i}}) . chr(BASE_OFFSET + $index{$edge{$i}}) . 'X'; } } # }}} # {{{ Output DFA if( $#ARGV == -1 ) { print "dfa = $dfa\n", "txt_init = ", $index{$TXT_INIT}, "\n", "pgm_init = ", $index{$PGM_INIT}, "\n", "ppm_init = ", $index{$PPM_INIT}, "\n"; exit 0; } if( $#ARGV < 1 || $ARGV[0] !~ /txt|ppm|pgm/ ) { die "$0 <-ppm> [output.ppm]\n" . "$0 <-pgm> [output.pgm]\n" . "$0 <-txt> [output.txt]\n"; } # }}} # {{{ Simulate DFA # Load input my $file; open $file, "< $ARGV[1]" or die $!; my $data = join '', <$file>; close $file; # Check header my $width = 0; if( $ARGV[0] =~ /ppm|pgm/ ) { if( $data =~ /^P[65]\n(\d+) \d+\n\d+\n/s ) { $width = $1; } else { die "Error parsing header from $ARGV[1]\n"; } } # Run DFA and write output if( $#ARGV >= 2 && $ARGV[2] ne "-" ) { open $file, "> $ARGV[2]" or die $!; } else { $file = *STDOUT; } my $ptr; if( $ARGV[0] =~ /txt/ ) { $ptr = $index{$TXT_INIT}; } elsif( $ARGV[0] =~ /ppm/ ) { $ptr = $index{$PPM_INIT}; } elsif( $ARGV[0] =~ /pgm/ ) { $ptr = $index{$PGM_INIT}; } else { die; } my $x = 3; my $c = undef; my @input = map {chr} unpack 'C*', $data; for(;;) { $ptr *= 3; # Execute commands at current state my $cmd = ord(substr($dfa, $ptr, 1)); if( $cmd & 1 ) { print $file $c; } if( $cmd & 16 ) { $x = $width; } if( $cmd & 2 ) { $x--; } # Read input last unless scalar @input; $c = shift @input; # Transition to next state $ptr += ($cmd & 8) ? ($c eq "\n" ? 1 : 2) : ($cmd & 4) ? ($x != 0 ? 1 : 2) : 1; $ptr = ord(substr($dfa, $ptr, 1)) - BASE_OFFSET; } close $file; # }}}