/* encode.c - Don Yang (uguu.org) 2013-09-07 */ #include #include #include typedef struct { char *data; /* File contents */ int size; /* File size */ int dict[256]; /* Character to translated offset mappings */ int dict_size; /* Number of unique characters */ } InputData; /* Load file to memory, returns NULL on error */ static InputData *LoadFile(FILE *infile) { int buffer_size, read_size, i; char *buffer; InputData *data; data = calloc(1, sizeof(InputData)); if( data == NULL ) { puts("Out of memory"); return NULL; } /* Load file to memory */ buffer_size = 0; for(;;) { #define READ_BATCH_SIZE 0x10000 buffer_size += READ_BATCH_SIZE; buffer = realloc(data->data, buffer_size); if( buffer == NULL ) { puts("Out of memory"); if( data->data != NULL ) free(data->data); free(data); return NULL; } data->data = buffer; read_size = fread(data->data + data->size, 1, READ_BATCH_SIZE, infile); data->size += read_size; if( read_size < READ_BATCH_SIZE ) break; #undef READ_BATCH_SIZE } printf("size = %d\n", data->size); /* Find all characters that are used */ for(i = 0; i < data->size; i++) data->dict[data->data[i] & 0xff] = 1; /* Build dictionary offsets */ printf("dictionary = ["); for(i = 0; i < 256; i++) { if( data->dict[i] != 0 ) { data->dict[i] = data->dict_size++; if( i >= 32 && i < 127 ) { printf("%c", i); } else { if( i == '\n' ) printf("\\n"); else printf("\\x%02x", i); } } } printf("]\ndictionary_size = %d\n", data->dict_size); return data; } /* Encode file using preset parameters Output code contains two variants: x = quote next (x - x1) + 1 chars y = repeat next byte (y - y1) + 3 times Where x is in a range of [x1..x2] and y is in a range of [y1..y2], and both ranges do not overlap. are specified as offsets into dictionary, in the range of [x1..x2]. */ static void EncodeFileWithParameters(const InputData *input, int max_output_size, int x1, int x2, int y1, int y2, char *output, int *output_size, int *xor) { const int max_quote = x2 - x1 + 1; const int max_repeat = y2 - y1 + 3; char *encoded = output; int i, j, c, next1, next2; *output_size = *xor = 0; for(i = 0; i < input->size;) { /* Get length of repeated span */ for(j = i + 1; j < input->size && j - i < max_repeat; j++) { if( input->data[i] != input->data[j] ) break; } /* j = one character beyond end of current repeated span. Encode this as a repeated span if it's long enough. */ if( j - i >= 3 ) { /* Overflow check. Due to this overflow check and a similar one few lines below, encoded size must be smaller than original size, otherwise this encoder outputs nothing. */ max_output_size -= 2; if( max_output_size < 0 ) return; /* Add code for repeated span */ *(encoded++) = (char)(y1 + j - i - 3); *(encoded++) = (char)(x1 + input->dict[input->data[i] & 0xff]); i = j; continue; } /* Quote the next few characters until we get 3 characters in a row */ next1 = (i + 1 < input->size) ? input->data[i + 1] : -1; next2 = (i + 2 < input->size) ? input->data[i + 2] : -2; for(j = i + 1; j < input->size && j - i < max_quote; j++) { c = next1; next1 = next2; next2 = (j + 2 < input->size) ? input->data[j + 2] : -1; if( c == next1 && next1 == next2 ) break; } /* Overflow check */ max_output_size -= 1 + j - i; if( max_output_size < 0 ) return; /* Add code for quoted span */ *(encoded++) = (char)(x1 + j - i - 1); for(; i < j; i++) *(encoded++) = (char)(x1 + input->dict[input->data[i] & 0xff]); } /* output now contains encoded bytes. Check if there is some xor value that will keep all encoded characters within printable range */ *output_size = encoded - output; for(i = 0; i < 128; i++) { for(j = 0; j < *output_size; j++) { c = (int)(output[j]) ^ i; if( c <= 32 || c >= 127 || c == '\"' || c == '\\' ) break; } if( j == *output_size ) { for(j = 0; j < *output_size; j++) output[j] = (char)((int)output[j] ^ i); *xor = i; printf("%d -> %d: x1=%d, x2=%d, y1=%d, y2=%d, xor=%d\n", input->size, *output_size, x1, x2, y1, y2, *xor); return; } } /* No suitable xor value found */ *output_size = *xor = 0; } #if 0 /* Find optimal encoding */ static void BruteForce(const InputData *input) { int x1, x2, y1, y2; int max_encoded_size, encoded_size, xor; char *encoded; if( (encoded = (char*)malloc(input->size)) == NULL ) { puts("Out of memory"); return; } max_encoded_size = input->size; for(x1 = 0; x1 < 128 - (input->dict_size - 1); x1++) { for(x2 = x1 + input->dict_size - 1; x2 < 128; x2++) { /* Quote codes < repeat codes */ for(y1 = x2 + 1; y1 < 128 - (input->dict_size - 1); y1++) { for(y2 = y1 + input->dict_size - 1; y2 < 128; y2++) { EncodeFileWithParameters(input, max_encoded_size, x1, x2, y1, y2, encoded, &encoded_size, &xor); if( encoded_size > 0 && encoded_size < max_encoded_size ) { printf("%d %d %d %d %d -> %d\n", x1, x2, y1, y2, xor, encoded_size); max_encoded_size = encoded_size; } } } /* Repeat codes < quote codes */ for(y1 = 0; y1 < x1 - (input->dict_size - 1); y1++) { for(y2 = y1 + input->dict_size - 1; y2 < x1; y2++) { EncodeFileWithParameters(input, max_encoded_size, x1, x2, y1, y2, encoded, &encoded_size, &xor); if( encoded_size > 0 && encoded_size < max_encoded_size ) { printf("%d %d %d %d %d -> %d\n", x1, x2, y1, y2, xor, encoded_size); max_encoded_size = encoded_size; } } } } } free(encoded); } #endif /* Encode file to stdout */ static void EncodeFile(const InputData *input) { int encoded_size, xor; char *encoded; if( (encoded = (char*)malloc(input->size)) == NULL ) { puts("Out of memory"); return; } #if 0 EncodeFileWithParameters(input, input->size, 'a', 'z', 'A', 'Z', encoded, &encoded_size, &xor); EncodeFileWithParameters(input, input->size, 'A', 'Z', 'a', 'z', encoded, &encoded_size, &xor); EncodeFileWithParameters(input, input->size, '\"' + 1, '\\' - 1, '\\' + 1, 126, encoded, &encoded_size, &xor); EncodeFileWithParameters(input, input->size, '\\' + 1, 126, '\"' + 1, '\\' - 1, encoded, &encoded_size, &xor); #endif EncodeFileWithParameters(input, input->size, '_' + 2, 126, '\"' + 1, '\\' - 1, encoded, &encoded_size, &xor); fwrite(encoded, encoded_size, 1, stdout); putchar('\n'); free(encoded); } /* Program entry */ int main(int argc, char **argv) { FILE *infile; InputData *data; /* Load input to memory */ if( argc != 2 ) { printf("%s \n", *argv); return 0; } if( (infile = fopen(argv[1], "rb")) == NULL ) { printf("Error opening %s\n", argv[1]); return 1; } data = LoadFile(infile); fclose(infile); if( data == NULL ) return 1; /* Encode file */ EncodeFile(data); free(data->data); free(data); return 0; }