#!/usr/bin/perl -w
# dfa.pl - Don Yang (uguu.org)
#
# DFA for downsizing PPM or ASCII art.
#
# Generate DFA string:
# ./dfa.pl
#
# Simulate DFA:
# ./dfa <-ppm|-pgm|-txt> [output]
#
# Each state is encoded in 3 characters:
#
#
# 96 + action bitmask + test bitmask
# 96 + state index
#
# 11/24/11
use strict;
use constant BASE_OFFSET => 96;
# {{{ DFA data
# Actions executed at each state
my %state =
(
"ppm_header0" => "",
"ppm_header1" => "x--",
"ppm_even0" => "x=width",
"ppm_even1" => "",
"ppm_even2" => "x--",
"ppm_even3" => "",
"ppm_odd0" => "x=width",
"ppm_odd1" => "",
"ppm_odd2" => "x--",
"ppm_odd3" => "flush",
"ppm_odd4" => "flush",
"ppm_odd5" => "flush;x--",
"ppm_odd6" => "",
"pgm_header0" => "",
"pgm_header1" => "x--",
"pgm_even0" => "x=width;x--",
"pgm_even1" => "x--",
"pgm_odd0" => "x=width;x--",
"pgm_odd1" => "flush;x--",
"pgm_odd2" => "x--",
"txt_even0" => "flush",
"txt_even1" => "",
"txt_odd0" => "",
"txt_odd1" => "",
"txt_odd2" => "flush",
);
my $PPM_INIT = "ppm_header0";
my $PGM_INIT = "pgm_header0";
my $TXT_INIT = "txt_even1";
# Transitions between states. Each state executes up to one conditional
# on exit, and determines the next state based on that conditional.
my %edge =
(
"ppm_header0" => "newline ? ppm_header1 : ppm_header0",
"ppm_header1" => "x!=0 ? ppm_header0 : ppm_even0",
"ppm_even0" => "ppm_even1",
"ppm_even1" => "ppm_even2",
"ppm_even2" => "x!=0 ? ppm_even3 : ppm_odd0",
"ppm_even3" => "ppm_even1",
"ppm_odd0" => "ppm_odd1",
"ppm_odd1" => "ppm_odd2",
"ppm_odd2" => "x!=0 ? ppm_odd3 : ppm_even0",
"ppm_odd3" => "ppm_odd4",
"ppm_odd4" => "ppm_odd5",
"ppm_odd5" => "x!=0 ? ppm_odd6 : ppm_even0",
"ppm_odd6" => "ppm_odd1",
"pgm_header0" => "newline ? pgm_header1 : pgm_header0",
"pgm_header1" => "x!=0 ? pgm_header0 : pgm_even0",
"pgm_even0" => "x!=0 ? pgm_even1 : pgm_odd0",
"pgm_even1" => "x!=0 ? pgm_even1 : pgm_odd0",
"pgm_odd0" => "x!=0 ? pgm_odd1 : pgm_even0",
"pgm_odd1" => "x!=0 ? pgm_odd2 : pgm_even0",
"pgm_odd2" => "x!=0 ? pgm_odd1 : pgm_even0",
"txt_even0" => "newline ? txt_odd0 : txt_even1",
"txt_even1" => "newline ? txt_odd0 : txt_even1",
"txt_odd0" => "newline ? txt_even0 : txt_odd1",
"txt_odd1" => "newline ? txt_even0 : txt_odd2",
"txt_odd2" => "newline ? txt_even0 : txt_odd1",
);
# }}}
# {{{ DFA string encoder
# Bitmask of accepted actions
my %action =
(
"" => 0,
"flush" => 1,
"x--" => 2,
"x=width" => 16,
);
$action{"flush;x--"} = $action{"flush"} | $action{"x--"};
$action{"x=width;x--"} = $action{"x=width"} | $action{"x--"};
# Bitmask of accepted tests
my %test =
(
"x!=0" => 4,
"newline" => 8,
);
# Validate states and edges
foreach my $i (keys %state)
{
exists $edge{$i} or die "Missing edge for $i\n";
unless( exists $action{$state{$i}} )
{
die "Bad action for $i: $state{$i}\n";
}
}
foreach my $i (keys %edge)
{
exists $state{$i} or die "Missing state for $i\n";
if( $edge{$i} =~ /^(\S+) \? (\S+) : (\S+)$/ &&
exists $test{$1} &&
exists $state{$2} &&
exists $state{$3} )
{
next;
}
if( $edge{$i} =~ /^(\S+)$/ && exists $state{$1} )
{
next;
}
die "Bad edge for $i: $edge{$i}\n";
}
# Count number of inbound edges for each state
my %inbound;
foreach my $i (keys %state)
{
$inbound{$i} = 0;
}
foreach my $i (keys %edge)
{
if( $edge{$i} =~ /^\S+ \? (\S+) : (\S+)$/ )
{
$inbound{$1}++;
$inbound{$2}++;
}
else
{
$inbound{$edge{$i}}++;
}
}
# Assign indices to each state, prefer states with higher inbound edges first
my @sorted_state = sort {$inbound{$b} <=> $inbound{$a} || $a cmp $b}
keys %state;
my %index;
for(my $i = 0; $i <= $#sorted_state; $i++)
{
$index{$sorted_state[$i]} = $i;
}
# Encode states
my $dfa = "";
foreach my $i (@sorted_state)
{
if( $edge{$i} =~ /^(\S+) \? (\S+) : (\S+)$/ )
{
$dfa .= chr(BASE_OFFSET + $action{$state{$i}} + $test{$1}) .
chr(BASE_OFFSET + $index{$2}) .
chr(BASE_OFFSET + $index{$3});
}
else
{
$dfa .= chr(BASE_OFFSET + $action{$state{$i}}) .
chr(BASE_OFFSET + $index{$edge{$i}}) .
'X';
}
}
# }}}
# {{{ Output DFA
if( $#ARGV == -1 )
{
print "dfa = $dfa\n",
"txt_init = ", $index{$TXT_INIT}, "\n",
"pgm_init = ", $index{$PGM_INIT}, "\n",
"ppm_init = ", $index{$PPM_INIT}, "\n";
exit 0;
}
if( $#ARGV < 1 || $ARGV[0] !~ /txt|ppm|pgm/ )
{
die "$0 <-ppm> [output.ppm]\n" .
"$0 <-pgm> [output.pgm]\n" .
"$0 <-txt> [output.txt]\n";
}
# }}}
# {{{ Simulate DFA
# Load input
my $file;
open $file, "< $ARGV[1]" or die $!;
my $data = join '', <$file>;
close $file;
# Check header
my $width = 0;
if( $ARGV[0] =~ /ppm|pgm/ )
{
if( $data =~ /^P[65]\n(\d+) \d+\n\d+\n/s )
{
$width = $1;
}
else
{
die "Error parsing header from $ARGV[1]\n";
}
}
# Run DFA and write output
if( $#ARGV >= 2 && $ARGV[2] ne "-" )
{
open $file, "> $ARGV[2]" or die $!;
}
else
{
$file = *STDOUT;
}
my $ptr;
if( $ARGV[0] =~ /txt/ ) { $ptr = $index{$TXT_INIT}; }
elsif( $ARGV[0] =~ /ppm/ ) { $ptr = $index{$PPM_INIT}; }
elsif( $ARGV[0] =~ /pgm/ ) { $ptr = $index{$PGM_INIT}; }
else { die; }
my $x = 3;
my $c = undef;
my @input = map {chr} unpack 'C*', $data;
for(;;)
{
$ptr *= 3;
# Execute commands at current state
my $cmd = ord(substr($dfa, $ptr, 1));
if( $cmd & 1 ) { print $file $c; }
if( $cmd & 16 ) { $x = $width; }
if( $cmd & 2 ) { $x--; }
# Read input
last unless scalar @input;
$c = shift @input;
# Transition to next state
$ptr += ($cmd & 8) ? ($c eq "\n" ? 1 : 2) :
($cmd & 4) ? ($x != 0 ? 1 : 2) :
1;
$ptr = ord(substr($dfa, $ptr, 1)) - BASE_OFFSET;
}
close $file;
# }}}