#!/usr/bin/perl -w # C pre-preprocessor. The main purpose of this is to process output # generated by our encoders to make them more palatable to real # preprocessors, and also to help with debugging. It can evaluation # of all preprocessor directives. But no macro expansion, no comment # parsing, no string parsing, and really pretty much nothing else # other than evaluating preprocessor directives. # # Besides all the missing bits, it does have these extra features not # typically found in other preprocessors: # # - Entire source file is read only once into memory before all # preprocessing is done. This means "#include __FILE__" works even if # input is fed from a pipe. # # - "#dump regex" will dump all currently defined macros matching "regex". # # - Running with "-trace" will trace input lines. use strict; use constant MAX_INCLUDE_DEPTH => 20; # Global stats and options. my $include_count = -1; my $max_include_depth = 0; my $max_conditional_depth = 0; my $trace = 0; # Push state to conditional stack. sub Push($$$) { my ($stack, $line_num, $item) = @_; push @$stack, [$line_num, $item]; if( $max_conditional_depth < scalar @$stack ) { $max_conditional_depth = scalar @$stack; } } # Evaluate a constant expression. sub Evaluate($$) { my ($macros, $expr) = @_; # Apply macro substitutions. my $original_expr = $expr; my %used_macros = (); while( $expr =~ /^(.*?)([[:alpha:]]\w*)(.*)$/ ) { if( exists $used_macros{$2} ) { die "Infinite loop while substituting macros inside expression: " . $original_expr . "\n"; } $expr = $1 . (exists $$macros{$2} ? $$macros{$2} : "") . $3; $used_macros{$2} = 1; } return eval $expr; } # Process a single file recursively and write output to stdout. sub Process($$$); sub Process($$$) { my ($depth, $macros, $input) = @_; # Update global stats. $include_count++; if( $max_include_depth < $depth ) { $max_include_depth = $depth; if( $max_include_depth > MAX_INCLUDE_DEPTH ) { die "Include stack too deep: $depth\n"; } } # Conditional stack status for handling #if-#endif blocks. my @stack = (); my @line_num_stack = (); use constant SKIP_SECTION => 1; use constant SKIP_TO_ENDIF => 2; use constant PROCESS_SECTION => 3; for(my $i = 0; $i < scalar @$input; $i++) { my $line = $$input[$i]; my $line_num = $i + 1; my $current_section = (scalar @stack ? $stack[$#stack][1] : PROCESS_SECTION); if( $trace ) { printf '%s%s%4d:%s', " " x $depth, ($current_section == PROCESS_SECTION ? "|" : " "), $line_num, $line; } if( $line =~ /^\s*#.*\\\s*$/s ) { die "$line_num: This preprocessor does not support " . "line continuations:\n$line"; } if( $line =~ /^\s*#\s*(ifdef|ifndef|if|elif|else|endif)(.*)/ ) { # All directives that operate on the conditional stack. my ($directive, $expr) = ($1, $2); if( $expr =~ /\S/ ) { $expr =~ s/^\s*(\S.*)/$1/s; $expr =~ s/(.*\S)\s*$/$1/s; } if( $directive eq "if" ) { my $nested_section = $current_section == PROCESS_SECTION ? Evaluate($macros, $expr) ? PROCESS_SECTION : SKIP_SECTION : SKIP_TO_ENDIF; Push(\@stack, $line_num, $nested_section); } elsif( $directive eq "elif" ) { die "$line_num: Unmatched #elif\n" unless scalar @stack; my $nested_section = $current_section == SKIP_SECTION ? Evaluate($macros, $expr) ? PROCESS_SECTION : SKIP_SECTION : SKIP_TO_ENDIF; $stack[$#stack][1] = $nested_section; } elsif( $directive eq "else" ) { die "$line_num: Unmatched #elif\n" unless scalar @stack; my $nested_section = $current_section == SKIP_SECTION ? PROCESS_SECTION : SKIP_TO_ENDIF; $stack[$#stack][1] = $nested_section; } elsif( $directive eq "endif" ) { die "$line_num: Unmatched #endif\n" unless scalar @stack; pop @stack; } else # ifdef or ifndef { my $nested_section = $current_section == PROCESS_SECTION ? ($directive eq "ifdef" ? exists $$macros{$expr} : !exists $$macros{$expr}) ? PROCESS_SECTION : SKIP_SECTION : SKIP_TO_ENDIF; Push(\@stack, $line_num, $nested_section); } } elsif( $current_section == PROCESS_SECTION ) { # Other directives that do not involve the conditional stack. # These are only processed if current section is in a true branch. if( $line =~ /^\s*#\s*define\s+(\S.*)/ ) { my $expr = $1; $expr =~ s/(.*\S)\s*$/$1/s; if( $expr =~ /^\S+\(/ ) { die "$line_num: This preprocessor does not support " . "macros with arguments:\n$line"; } if( $expr =~ /^(\S+)\s+(\S.*)/ ) { $$macros{$1} = $2; } else { $$macros{$expr} = ""; } } elsif( $line =~ /^\s*#\s*undef\s+(\S+)/ ) { delete $$macros{$1}; } elsif( $line =~ /^\s*#\s*include\s*(\S+)/ ) { my $file = $1; my %used_macros = (); while( exists $$macros{$file} ) { if( $used_macros{$file}++ ) { die "$line_num: Recursive macro reference:\n$line"; } $file = $$macros{$file}; } if( $file eq "__FILE__" ) { Process($depth + 1, $macros, $input); } else { print $line; } } elsif( $line =~ /^\s*#\s*warning\b/ ) { print STDERR $line; } elsif( $line =~ /^\s*#\s*error\b/ ) { die "$line_num: Stopped processing due to #error:\n$line"; } elsif( $line =~ /^\s*#\s*dump\s+(\S.*)/ ) { # "dump" directive is our custom extension. my $pattern = $1; $pattern =~ s/^(.*\S)\s*$/$1/; print "# macros:"; foreach my $m (sort keys %$macros) { if( $m =~ /$pattern/ ) { print " $m"; } } print "\n"; } else # Non-preprocessor lines, or directives that we don't care about { print $line; } } } if( scalar @stack ) { my $error = ""; foreach my $s (@stack) { my @item = @$s; $error .= "$item[0]: Unmatched conditional\n"; } die $error; } } # Process command line options macro definitions. my %macros = (); while( $#ARGV >= 0 && $ARGV[0] =~ /^-+(\S+)/ ) { my $option = $1; if( $option =~ /^D(\S+)=(.*)/ ) { $macros{$1} = $2; } elsif( $option =~ /^D(\S+)/ ) { $macros{$1} = ""; } elsif( $option =~ /t(race)?/ ) { $trace = 1; } shift @ARGV; } # Load input lines. my @input_lines = <>; # Run perprocessor on loaded lines. Process(0, \%macros, \@input_lines); print "# /* include_count = $include_count */\n", "# /* max_include_depth = $max_include_depth */\n", "# /* max_conditional_depth = $max_conditional_depth */\n"; exit 0;