#include #define TAB_SIZE 8 /* Convert hexadecimal digit. */ static int Convert(char d) { return d >= '0' && d <= '9' ? d - '0' : (d >= 'a' && d <= 'f') || (d >= 'A' && d <= 'F') ? (d & 15) + 9 : -1; } /* Rewrite escape sequences in string. */ static void Unescape(char *r) { char *w = r; for(; *r; r++) { *w++ = *r != '\\' ? *r : *++r != 'n' ? *r != 'r' ? *r != 't' ? *r != 'b' ? *r != 'a' ? *r != 'v' ? *r != 'x' ? *r : Convert(r[1]) >= 0 ? Convert(r[2]) >= 0 ? (r += 2), Convert(r[-1]) * 16 + Convert(*r) : Convert(*++r) : 'x' : '\v' : '\a' : '\b' : '\t' : '\r' : '\n'; } *w = '\0'; } /* Write text to stdout. */ static void Output(const char *t) { int c; for(; (c = *t); t++) { c > 32 || c == 9 ? c >= 'S' && c <= 'Z' ? printf("$%c", c + 32) : putchar(c != 'P' ? c != 'Q' ? c != 'R' ? c : ',' : '\n' : ' ') : 0; } } int main(int argc, char **argv) { int n, s, length, bit, target; char *p, *q; if( argc != 2 && argc != 3 ) return printf("%s {msg1} [msg2]\n", *argv); /* Output header. */ #define Q(q) #q Output("#include/*Q" "S=\"*/Q" Q(static P int P l = 1 R I R i R o; int P main(int P O R char **s) {O=O?O: Q)); p = q = argv[1]; Unescape(p); if( argc > 2 ) { q = argv[2]; Unescape(q); } for(; *p || *q; p++, q++) { int c_tab = 1; /* Column number with tabs expanded. */ int c_space = 1; /* Column number with only character counts. */ /* Encode a single byte. First message is encoded using tab-expanded column numbers, second message is encoded using unexpanded column numbers. If bytes for both messages were identical, we only need to insert spaces until we get the desired column number. If they are different, we first brute-force search for the right mix of space and tabs to get the delta between the two bytes, then insert spaces to get the column numbers we wanted. We aim for the right delta first before getting the desired column numbers as opposed to brute-forcing both column numbers in one go, since doing the latter would require trying significantly more combinations. (We can also avoid brute-force by doing a bit of dynamic programming to pre-generate a table of tab+space mix, but the expectation is that the messages will be short anyways). To reduce brute-force search space size and line length, each byte is encoded in two nibbles. */ for(n = 4; n >= 0; n -= 4) { const int delta = ((*p >> n) - (*q >> n)) & 0x0f; /* Encoded sequence of space+tabs to be appended to achieve the desired delta. First character is the least significant bit and last character is the most significant "1" bit. Each "0" bit encodes a space and each "1" bit encodes a tab. We don't need a separate variable to store the sequence length since the highest bit is always "1". This is because all non-empty sequences will always end with a trailing tab, since trailing spaces do not affect deltas. */ int append = 0; if( delta != ((c_tab - c_space) & 15) ) { for(length = 0; !append; length++) { for(s = 0; s < (1 << length); s++) { int ct = c_tab; int cs; for(bit = 0; bit < length; bit++) { if( (s & (1 << bit)) != 0 ) ct += TAB_SIZE - ((ct - 1) % TAB_SIZE); else ct++; } cs = c_space + length; if( ((ct - cs) & 0x0f) == delta ) append = s; } } } /* Output adjustment sequence. */ for(; append != 0; append /= 2) { if( (append & 1) != 0 ) { Output("\t"); c_tab += TAB_SIZE - ((c_tab - 1) % TAB_SIZE); } else { Output("P"); c_tab++; } c_space++; } /* Increment column numbers until we get the desired bits. The -1 is to account for the expression prefix. */ target = ((*p >> n) - 1) & 0x0f; while( (c_tab & 0x0f) != target ) { Output("P"); c_tab++; c_space++; } /* Output expression. */ Output("O/0-"); c_tab += 4; c_space += 4; } Output("Q"); /* Once the shorter string has reached the end, pad all the remaining bytes with the longer string, so that all characters in the suffix do not have any deltas. */ if( *p == '\0' ) p = q; else if( *q == '\0' ) q = p; } /* Output footer. */ Output( Q( **s; /* Consume input until EOF. We need to consume all bytes even if we have already observed a NUL character, otherwise we might get a pipe error if the decoder returns before the compiler is done. */ while( (O = getchar()) - EOF ) { l = l ? l - 100 ? l - 105 ? O - 58 ? O > 47 && O < 58 ? (i = i * 10 + O - 48) R l : 10 - O ? 100 - O || l - 5 ? l : O : 1 : 2 - l ? l + 1 : (i = 0) + 3 : O - 118 ? 6 : o ? (I |= i & 15) ? (o = !putchar(I)) + 8 : 0 : 11 + (I = (i & 15) << (o = 4)) : O - 'i' ? 6 : O : 0; } return P 0; )); /* Output the footer in two pieces avoid having string literals longer than 509 bytes (for C90 compatibility). */ Output( "}/*\";" Q( W = X = Y = Z = 1; /* Ruby decoder entry point (enclosed by #{}). Note that ruby decoder expands tabs while perl does not. It would cost the same number of bytes to swap the interpretation of perl and ruby decoders, but we went with this setup because it seems more consistent lexicographically: gcc expands tabs while clang does not, and "gcc > clang" seems consistent with "ruby > perl". */ V = "#{W = 8; S.each_byte{|i| U = i; %q["; /* Perl decoder entry point (enclosed by brackets). */ if( V ) { foreach U (unpack "C*" R S) { ) "#];Q" Q( U == 47 && Z == 79 && print(0, ":" R Y + 1 R ":" R X R ":: div\n"); X += U == 9 ? W - ~-X % W : 1; Y += U == 10 ? X = 1 : 0; Z = U; } } ) "#\"#*/Q"); return 0; }