#!/usr/bin/perl # cs-dfa2.pl - Don Yang (uguu.org) # Compress whitespaces and remove comments. # # 04/27/05 use strict; my (%DFA, %Output); my (%InputMap, %OutputMap); # $DFA{state}{input} = nextstate # $Output{state}{input} = output set $DFA{'init'}{'slash'} = 'slash'; $DFA{'init'}{'single-quote'} = 'single-quote'; $DFA{'init'}{'double-quote'} = 'double-quote'; $DFA{'init'}{'space'} = 'space'; $DFA{'init'}{'newline'} = 'newline'; $DFA{'init'}{''} = 'init'; $Output{'init'}{'slash'} = ''; $Output{'init'}{'single-quote'} = 'char'; $Output{'init'}{'double-quote'} = 'char'; $Output{'init'}{'space'} = ''; $Output{'init'}{'newline'} = ''; $Output{'init'}{''} = 'char'; # ' $DFA{'single-quote'}{'single-quote'} = 'init'; $DFA{'single-quote'}{'backslash'} = 'single-quote-escape'; $DFA{'single-quote'}{''} = 'single-quote'; $Output{'single-quote'}{''} = 'char'; # ' \ $DFA{'single-quote-escape'}{''} = 'single-quote'; $Output{'single-quote-escape'}{''} = 'char'; # " $DFA{'double-quote'}{'double-quote'} = 'init'; $DFA{'double-quote'}{'backslash'} = 'double-quote-escape'; $DFA{'double-quote'}{''} = 'double-quote'; $Output{'double-quote'}{''} = 'char'; # " \ $DFA{'double-quote-escape'}{''} = 'double-quote'; $Output{'double-quote-escape'}{''} = 'char'; # _ $DFA{'space'}{'space'} = 'space'; $DFA{'space'}{'newline'} = 'newline'; $DFA{'space'}{'single-quote'} = 'single-quote'; $DFA{'space'}{'double-quote'} = 'double-quote'; $DFA{'space'}{'slash'} = 'space-slash'; $DFA{'space'}{''} = 'init'; $Output{'space'}{'space'} = ''; $Output{'space'}{'newline'} = ''; $Output{'space'}{'slash'} = ''; $Output{'space'}{''} = 'space char'; # \n $DFA{'newline'}{'space'} = 'newline'; $DFA{'newline'}{'newline'} = 'newline'; $DFA{'newline'}{'single-quote'} = 'single-quote'; $DFA{'newline'}{'double-quote'} = 'double-quote'; $DFA{'newline'}{'slash'} = 'newline-slash'; $DFA{'newline'}{''} = 'init'; $Output{'newline'}{'space'} = ''; $Output{'newline'}{'newline'} = ''; $Output{'newline'}{'slash'} = ''; $Output{'newline'}{''} = 'newline char'; # / $DFA{'slash'}{'star'} = 'space-c-comment'; $DFA{'slash'}{'slash'} = 'space-cpp-comment'; $DFA{'slash'}{'single-quote'} = 'single-quote'; $DFA{'slash'}{'double-quote'} = 'double-quote'; $DFA{'slash'}{''} = 'init'; $Output{'slash'}{'star'} = ''; $Output{'slash'}{'slash'} = ''; $Output{'slash'}{''} = 'slash char'; # _ / $DFA{'space-slash'}{'star'} = 'space-c-comment'; $DFA{'space-slash'}{'slash'} = 'space-cpp-comment'; $DFA{'space-slash'}{''} = 'init'; $Output{'space-slash'}{'star'} = ''; $Output{'space-slash'}{'slash'} = ''; $Output{'space-slash'}{''} = 'space slash char'; # / * $DFA{'space-c-comment'}{'star'} = 'space-end-comment'; $DFA{'space-c-comment'}{''} = 'space-c-comment'; $Output{'space-c-comment'}{''} = ''; # / * * $DFA{'space-end-comment'}{'star'} = 'space-end-comment'; $DFA{'space-end-comment'}{'slash'} = 'space'; $DFA{'space-end-comment'}{''} = 'space-c-comment'; $Output{'space-end-comment'}{''} = ''; # / / $DFA{'space-cpp-comment'}{'newline'} = 'newline'; $DFA{'space-cpp-comment'}{''} = 'space-cpp-comment'; $Output{'space-cpp-comment'}{''} = ''; # \n / $DFA{'newline-slash'}{'star'} = 'newline-c-comment'; $DFA{'newline-slash'}{'slash'} = 'newline-cpp-comment'; $DFA{'newline-slash'}{''} = 'init'; $Output{'newline-slash'}{'star'} = ''; $Output{'newline-slash'}{'slash'} = ''; $Output{'newline-slash'}{''} = 'newline slash char'; # \n / * $DFA{'newline-c-comment'}{'star'} = 'newline-end-comment'; $DFA{'newline-c-comment'}{''} = 'newline-c-comment'; $Output{'newline-c-comment'}{''} = ''; # \n / * * $DFA{'newline-end-comment'}{'star'} = 'newline-end-comment'; $DFA{'newline-end-comment'}{'slash'} = 'newline'; $DFA{'newline-end-comment'}{''} = 'newline-c-comment'; $Output{'newline-end-comment'}{''} = ''; # \n / / $DFA{'newline-cpp-comment'}{'newline'} = 'newline'; $DFA{'newline-cpp-comment'}{''} = 'newline-cpp-comment'; $Output{'newline-cpp-comment'}{''} = ''; %InputMap = ( " " => 'space', "\t" => 'space', "\n" => 'newline', "\r" => 'newline', "/" => 'slash', "\\" => 'backslash', "\'" => 'single-quote', "\"" => 'double-quote', "*" => 'star' ); %OutputMap = ( 'slash' => "/", 'space' => " ", 'newline' => "\n" ); sub CheckDFA { my ($i, $j, $o, %input, %target, %output); %input = (); foreach $i (keys %DFA) { foreach $j (keys %{$DFA{$i}}) { $input{$j}++; unless( exists $DFA{$DFA{$i}{$j}} ) { print "($i, $j) leads to undefined state ", $DFA{$i}{$j}, "\n"; } } } print STDOUT (scalar keys %DFA), " states, ", (scalar keys %input), " inputs\n"; print "targets:\n"; %target = (); foreach $i (keys %DFA) { foreach $j (keys %input) { if( exists $DFA{$i}{$j} ) { $target{$DFA{$i}{$j}}++; } else { $target{$DFA{$i}{''}}++; } } } foreach $i (sort keys %target) { print "\t$i = ", $target{$i}, "\n"; } print "outputs:\n"; %output = (); foreach $i (keys %DFA) { foreach $j (keys %input) { if( exists $Output{$i}{$j} ) { $o = $Output{$i}{$j}; } else { $o = $Output{$i}{''}; } $output{$_}++ foreach (split /\s+/, $o); } } foreach $i (keys %output) { print "\t$i = ", $output{$i}, "\n"; } } sub RunDFA { my ($line, $char, $state, $nextstate, $i, $o); $state = 'init'; while($line = <>) { foreach $char (unpack 'C*', $line) { $i = ($InputMap{chr $char} || ''); if( exists $DFA{$state}{$i} ) { $nextstate = $DFA{$state}{$i}; } else { $nextstate = $DFA{$state}{''}; } if( exists $Output{$state}{$i} ) { $o = $Output{$state}{$i}; } else { $o = $Output{$state}{''}; } foreach $i (split /\s+/, $o) { if( $i eq 'char' ) { print (chr $char); } else { print $OutputMap{$i}; } } $state = $nextstate; } } print "\n"; } sub CompileDFA { my (%StateMap, %EdgeMap, %PrintMap, $Shift); my ($i, $j, $o, $str, $nextstate, $space); $Shift = 3; %PrintMap = ( 'space' => 0x80, 'newline' => 0x04, 'slash' => 0x02, 'char' => 0x01 ); %StateMap = ( 'init' => 5, 'single-quote-escape' => 2, 'double-quote-escape' => 1, 'slash' => 0, 'space-c-comment' => 4, 'newline-slash' => 3, 'single-quote' => 9, 'double-quote' => 6, 'space' => 15, 'newline' => 11, 'space-slash' => 12, 'space-cpp-comment' => 8, 'space-end-comment' => 14, 'newline-c-comment' => 7, 'newline-cpp-comment' => 13, 'newline-end-comment' => 10 ); %EdgeMap = ( 'space' => 1, 'newline' => 0, 'slash' => 4, 'backslash' => 6, 'single-quote' => 5, 'double-quote' => 3, 'star' => 7, '' => 2 ); $str = "\""; $space = 0; foreach $i (sort {$StateMap{$a} <=> $StateMap{$b}} keys %StateMap) { foreach $j (sort {$EdgeMap{$a} <=> $EdgeMap{$b}} keys %EdgeMap) { if( exists $DFA{$i}{$j} ) { $nextstate = $DFA{$i}{$j}; } else { $nextstate = $DFA{$i}{''}; } if( exists $Output{$i}{$j} ) { $o = $Output{$i}{$j}; } else { $o = $Output{$i}{''}; } $a = $StateMap{$nextstate} << $Shift; foreach (split /\s+/, $o) { $a |= $PrintMap{$_}; } if( $a == 9 ) { $str .= '\t'; } elsif( $a == 10 ) { $str .= '\n'; } elsif( $a == 13 ) { $str .= '\r'; } elsif( $a == 34 ) { $str .= '\"'; } elsif( $a == 92 ) { $str .= '\\'; } elsif( $a == 32 ) { $str .= ' '; $space++; } elsif( $a < 33 || $a > 126 ) { $str .= (sprintf '\x%02x', $a); } else { $str .= (chr $a); } } } $str .= "\"\n"; print "$str\n", (length $str), " - $space\n"; } if( 1 ) { RunDFA(); } else { CheckDFA(); CompileDFA(); }