#!/usr/bin/perl # cs-dfa1.pl - Don Yang (uguu.org) # Compress whitespaces and remove comments. # # 04/27/05 use strict; my (%DFA, %Output); my (%InputMap, %OutputMap); # $DFA{state}{input} = nextstate # $Output{state}{input} = output set $DFA{'init'}{'slash'} = 'slash0'; $DFA{'init'}{'single-quote'} = 'single-quote'; $DFA{'init'}{'double-quote'} = 'double-quote'; $DFA{'init'}{'space'} = 'space'; $DFA{'init'}{'newline'} = 'space'; $DFA{'init'}{''} = 'init'; $Output{'init'}{'slash'} = ''; $Output{'init'}{'single-quote'} = 'char'; $Output{'init'}{'double-quote'} = 'char'; $Output{'init'}{'space'} = ''; $Output{'init'}{'newline'} = 'set-newline'; $Output{'init'}{''} = 'char'; # ' $DFA{'single-quote'}{'single-quote'} = 'init'; $DFA{'single-quote'}{'backslash'} = 'single-quote-escape'; $DFA{'single-quote'}{''} = 'single-quote'; $Output{'single-quote'}{''} = 'char'; # ' \ $DFA{'single-quote-escape'}{''} = 'single-quote'; $Output{'single-quote-escape'}{''} = 'char'; # " $DFA{'double-quote'}{'double-quote'} = 'init'; $DFA{'double-quote'}{'backslash'} = 'double-quote-escape'; $DFA{'double-quote'}{''} = 'double-quote'; $Output{'double-quote'}{''} = 'char'; # " \ $DFA{'double-quote-escape'}{''} = 'double-quote'; $Output{'double-quote-escape'}{''} = 'char'; # _ $DFA{'space'}{'space'} = 'space'; $DFA{'space'}{'newline'} = 'space'; $DFA{'space'}{'single-quote'} = 'single-quote'; $DFA{'space'}{'double-quote'} = 'double-quote'; $DFA{'space'}{'slash'} = 'slash'; $DFA{'space'}{''} = 'init'; $Output{'space'}{'space'} = ''; $Output{'space'}{'newline'} = 'set-newline'; $Output{'space'}{'slash'} = ''; $Output{'space'}{''} = 'space char'; # / $DFA{'slash0'}{'star'} = 'c-comment'; $DFA{'slash0'}{'slash'} = 'cpp-comment'; $DFA{'slash0'}{'single-quote'} = 'single-quote'; $DFA{'slash0'}{'double-quote'} = 'double-quote'; $DFA{'slash0'}{''} = 'init'; $Output{'slash0'}{'star'} = ''; $Output{'slash0'}{'slash'} = ''; $Output{'slash0'}{''} = 'slash char'; # _ / $DFA{'slash'}{'star'} = 'c-comment'; $DFA{'slash'}{'slash'} = 'cpp-comment'; $DFA{'slash'}{'single-quote'} = 'single-quote'; $DFA{'slash'}{'double-quote'} = 'double-quote'; $DFA{'slash'}{''} = 'init'; $Output{'slash'}{'star'} = ''; $Output{'slash'}{'slash'} = ''; $Output{'slash'}{''} = 'space slash char'; # / * $DFA{'c-comment'}{'star'} = 'end-comment'; $DFA{'c-comment'}{''} = 'c-comment'; $Output{'c-comment'}{''} = ''; # / * * $DFA{'end-comment'}{'star'} = 'end-comment'; $DFA{'end-comment'}{'slash'} = 'space'; $DFA{'end-comment'}{''} = 'c-comment'; $Output{'end-comment'}{''} = ''; # / / $DFA{'cpp-comment'}{'newline'} = 'space'; $DFA{'cpp-comment'}{''} = 'cpp-comment'; $Output{'cpp-comment'}{'newline'} = 'set-newline'; $Output{'cpp-comment'}{''} = ''; %InputMap = ( " " => 'space', "\t" => 'space', "\n" => 'newline', "\r" => 'newline', "/" => 'slash', "\\" => 'backslash', "\'" => 'single-quote', "\"" => 'double-quote', "*" => 'star' ); sub CheckDFA { my ($i, $j, $o, %input, %target, %output); %input = (); foreach $i (keys %DFA) { foreach $j (keys %{$DFA{$i}}) { $input{$j}++; unless( exists $DFA{$DFA{$i}{$j}} ) { print "($i, $j) leads to undefined state ", $DFA{$i}{$j}, "\n"; } } } print STDOUT (scalar keys %DFA), " states, ", (scalar keys %input), " inputs\n"; print "targets:\n"; %target = (); foreach $i (keys %DFA) { foreach $j (keys %input) { if( exists $DFA{$i}{$j} ) { $target{$DFA{$i}{$j}}++; } else { $target{$DFA{$i}{''}}++; } } } foreach $i (sort keys %target) { print "\t$i = ", $target{$i}, "\n"; } print "outputs:\n"; %output = (); foreach $i (keys %DFA) { foreach $j (keys %input) { if( exists $Output{$i}{$j} ) { $o = $Output{$i}{$j}; } else { $o = $Output{$i}{''}; } $output{$_}++ foreach (split /\s+/, $o); } } foreach $i (keys %output) { print "\t$i = ", $output{$i}, "\n"; } } sub RunDFA { my ($line, $char, $state, $newline, $nextstate, $i, $o); $state = 'init'; while($line = <>) { foreach $char (unpack 'C*', $line) { $i = ($InputMap{chr $char} || ''); if( exists $DFA{$state}{$i} ) { $nextstate = $DFA{$state}{$i}; } else { $nextstate = $DFA{$state}{''}; } if( exists $Output{$state}{$i} ) { $o = $Output{$state}{$i}; } else { $o = $Output{$state}{''}; } foreach $i (split /\s+/, $o) { if( $i eq 'char' ) { print (chr $char); } elsif( $i eq 'set-newline' ) { $newline = 1; } elsif( $i eq 'space' ) { print ($newline ? "\n" : " "); } elsif( $i eq 'slash' ) { print "/"; } else { die; } } $state = $nextstate; $newline = 0 if( $state eq 'init' ); } } print "\n"; } sub CompileDFA { my (%StateMap, %EdgeMap, %PrintMap, $Shift); my ($i, $j, $o, $c, $str, $nextstate, $space); $Shift = 3; %PrintMap = ( 'space' => 0x04, 'set-newline' => 0x80, 'char' => 0x01, 'slash' => 0x02, ); %StateMap = ( 'init' => 9, 'c-comment' => 4, 'single-quote' => 7, 'double-quote' => 8, 'cpp-comment' => 5, 'space' => 10, 'end-comment' => 6, 'single-quote-escape' => 1, 'double-quote-escape' => 3, 'slash' => 0, 'slash0' => 2 ); %EdgeMap = ( 'space' => 1, 'newline' => 0, 'slash' => 3, 'backslash' => 4, 'single-quote' => 5, 'double-quote' => 6, 'star' => 7, '' => 2 ); $str = "\""; $space = 0; foreach $i (sort {$StateMap{$a} <=> $StateMap{$b}} keys %StateMap) { foreach $j (sort {$EdgeMap{$a} <=> $EdgeMap{$b}} keys %EdgeMap) { if( exists $DFA{$i}{$j} ) { $nextstate = $DFA{$i}{$j}; } else { $nextstate = $DFA{$i}{''}; } if( exists $Output{$i}{$j} ) { $o = $Output{$i}{$j}; } else { $o = $Output{$i}{''}; } $a = $StateMap{$nextstate} << $Shift; foreach $c (split /\s+/, $o) { $a |= $PrintMap{$c}; } if( $a == 9 ) { $str .= '\t'; } elsif( $a == 10 ) { $str .= '\n'; } elsif( $a == 13 ) { $str .= '\r'; } elsif( $a == 34 ) { $str .= '\"'; } elsif( $a == 92 ) { $str .= '\\'; } elsif( $a == 32 ) { $str .= ' '; $space++; } elsif( $a < 33 || $a > 126 ) { $str .= (sprintf '\x%02x', $a); } else { $str .= (chr $a); } } } $str .= "\"\n"; print "$str\n", (length $str), " - $space\n"; } if( 1 ) { RunDFA(); } else { CheckDFA(); CompileDFA(); }