#include #include #define ENCODE_BUFFER_SIZE 0x10000 #define INPUT_BUFFER_SIZE (ENCODE_BUFFER_SIZE * 2) #define MAX_OFFSET 256 #define MIN_REPEAT 3 #define MAX_REPEAT (255 + MIN_REPEAT) #define MAX_STACK 999 /* Input byte. */ static int c; /* Ring buffer of input bytes. */ static char input[INPUT_BUFFER_SIZE]; static int i_history, i_begin, i_end; /* Ring buffer of output code offsets (measured in number of instructions). */ static int code_offset[ENCODE_BUFFER_SIZE]; /* Ring buffer of runtime stack sizes. */ static int stack_size[ENCODE_BUFFER_SIZE]; /* Output code size (number of instructions). */ static int code_size; /* Define a single header dictionary entry. */ #define D(d) ,#d /* Header text: - Uppercase letters causes a dictionary entry to be expanded recursively. - Tilde (~) causes the next letter to be copied as is. - Everything else is copied as is. */ static char *header_data[] = { /*M*/ ",", /*N*/ "Pzoltraak" /*O*/ D( ZifWUU Qjmp_bufPj;XmMIMi=1MlMk[999]={1<<30}M*o=k; Xmain(XdMchar**a){switch(S(j)){default:Yi;I=14; VP_P__LI~NE__VPpPT(I=_+1)VPqP(i=3M--R(jM*o--)):VNPcaseP_:i%2?YiMp:(_-4) %9?T(m|=1<<(_-I)):_-I-17?(putchar(m^32)MY*o)?p:q(l=m%256+3Mm=m/512+3Mi) ?(*o-=l)>0?m=i=0Mp:q((o[2]=*o 32 ) c >= 'M' && c <= 'Z' ? Expand(header_data[c - 'M']) : (void)putchar(c != '~' ? c : *++s); } } /* Output a set of lines. */ static void OutputInstruction(int i) { for(; i; i /= 2) puts(header_data[i & 1] + 1); } /* Count number of bits in a 8-bit value. */ static int BitCount(int i) { assert(i >= 0); assert(i < 256); i = (i & 0x55) + ((i >> 1) & 0x55); i = (i & 0x33) + ((i >> 2) & 0x33); i = (i & 0x0f) + ((i >> 4) & 0x0f); return i; } int main(int argc, char **argv) { /* Number of already-encoded bytes that are available for matching. */ int history_size; /* Offset of matched substring that will be referenced to replace buffered bytes. */ int matched_input_offset, tentative_input_offset; /* Offset of code that will generate the matched substring. */ int matched_code_offset, tentative_code_offset; /* Number of bytes in longest match. */ int matched_length; /* Number of bytes from start of matched string that overlaps with start of input string. */ int overlap; /* Number of bytes from start of match to current cursor position. */ int prefix_length; /* Number of times the prefix is repeated as part of a match. */ int depth; int i, j; Expand(header_data[2]); for(;;) { /* Fill ring buffer with enough bytes for the encoding process to consume, without overwriting old history entries. */ while( i_end - i_begin < ENCODE_BUFFER_SIZE ) { if( (c = getchar()) == EOF ) break; input[i_end++ % INPUT_BUFFER_SIZE] = c; } if( i_begin == i_end ) break; assert(i_history <= i_begin); assert(i_begin <= i_end); /* Expire old code entries that are too far away to be matched. */ while( i_history < i_begin ) { if( code_offset[i_history % ENCODE_BUFFER_SIZE] >= 0 && code_size - code_offset[i_history % ENCODE_BUFFER_SIZE] <= MAX_OFFSET ) { break; } i_history++; } /* Match input against already-encoded bytes. */ history_size = i_begin - i_history; assert(history_size >= 0); matched_code_offset = matched_input_offset = matched_length = -1; for(i = 0; i < history_size; i++) { /* Do not match against regions that was covered by repeat code, since there is no address to branch to. */ tentative_input_offset = i_history + i; tentative_code_offset = code_offset[tentative_input_offset % ENCODE_BUFFER_SIZE]; if( tentative_code_offset < 0 ) continue; for(j = 0; j < MAX_REPEAT; j++) { if( i_begin + j >= i_end ) break; if( input[(tentative_input_offset + j) % INPUT_BUFFER_SIZE] != input[(i_begin + j) % INPUT_BUFFER_SIZE] ) { break; } /* Verify that referencing the previous string here will not overflow the runtime stack. */ overlap = tentative_input_offset + j - i_begin; if( overlap < 0 ) { /* Matching some string before the current read position. Stack size is guaranteed to be defined. */ if( stack_size[(tentative_input_offset + j) % ENCODE_BUFFER_SIZE] + 2 >= MAX_STACK ) { break; } } else { /* Matching some string at or after the current read position. Stack size depends on how many times we have repeated the prefix. */ prefix_length = i_begin - tentative_input_offset; assert(prefix_length > 0); depth = overlap / prefix_length + 1; assert(depth > 0); if( stack_size[(tentative_input_offset + overlap % prefix_length) % ENCODE_BUFFER_SIZE] + 2 * depth >= MAX_STACK ) { break; } } } if( j > matched_length ) { /* Newly matched substring is better than previous match. */ matched_input_offset = tentative_input_offset; matched_code_offset = tentative_code_offset; matched_length = j; } else if( j == matched_length && BitCount(code_size - tentative_code_offset - 1) < BitCount(code_size - matched_code_offset - 1) ) { /* Newly matched substring is same length as the previous match. Prefer this new match if it costs fewer bits to encode it. */ matched_input_offset = tentative_input_offset; matched_code_offset = tentative_code_offset; matched_length = j; } } if( matched_length >= MIN_REPEAT ) { /* Use previously generated code to cover incoming bytes. */ assert(matched_length - MIN_REPEAT >= 0); assert(matched_length - MIN_REPEAT < 256); assert(code_size - matched_code_offset - 1 >= 0); assert(code_size - matched_code_offset - 1 < 256); OutputInstruction((matched_length - MIN_REPEAT) | ((code_size - matched_code_offset - 1) << 9) | (1 << 17)); /* Advance ring buffer past encoded region. Also record address for start of region so that it can be used for future matches, while removing addresses for all subsequent bytes. */ for(i = 0; i < matched_length; i++, i_begin++) { stack_size[i_begin % ENCODE_BUFFER_SIZE] = stack_size[(matched_input_offset + i) % ENCODE_BUFFER_SIZE] + 2; assert(stack_size[i_begin % ENCODE_BUFFER_SIZE] >= 3); assert(stack_size[i_begin % ENCODE_BUFFER_SIZE] % 2 == 1); code_offset[i_begin % ENCODE_BUFFER_SIZE] = i > 0 ? -1 : code_size; } code_size += 2; } else { /* Did not find a sufficiently long entry in history, output a literal byte. */ code_offset[i_begin % ENCODE_BUFFER_SIZE] = code_size++; stack_size[i_begin % ENCODE_BUFFER_SIZE] = 1; OutputInstruction( (input[i_begin++ % INPUT_BUFFER_SIZE] & 0xff) ^ 0x120); } } return 0; }