// Tool to insert a specific CRC into a text file. // // ./force_crc {target_crc} {input.txt} {input.map} > {output.txt} // // Where {input.map} if of the exact same size as {input.txt}, with // "X" characters marking where adjustments will be made. // // Default behavior is to stop after finding the first match. When // compiled with LIST_ALL_VARIATIONS, patch characters for all working // variations will be printed to stderr, and the last match is returned. #include #include #include #include #include #include #include #include namespace { // Special character in input map to mark where adjustments are made. static constexpr char kReplacementKey = 'X'; // Table for updating CRC one byte at a time. static std::array crc_table; // Mapping from upper 8 bits of a CRC to an index in crc_table, // such that (crc_table[prefix_table[byte]] >> 24) == byte. static std::array prefix_table; // Load file to memory. static std::string LoadInput(const char *name) { std::string data; FILE *infile = fopen(name, "rb"); if( infile == nullptr ) { fprintf(stderr, "%s: read error\n", name); return data; } for(std::array buffer;;) { const size_t read_size = fread(buffer.data(), 1, buffer.size(), infile); if( read_size == 0 ) break; data.append(buffer.data(), read_size); if( read_size < buffer.size() ) break; } fclose(infile); return data; } // Insert target CRC into string if there is a sufficiently long run of // placeholder characters. static void InsertCrc(std::string *text, std::string *input_map, uint32_t target_crc) { const std::string insert_key(8, kReplacementKey); const size_t offset = input_map->find(insert_key); if( offset == std::string::npos ) return; std::array buffer; sprintf(buffer.data(), "%08x", target_crc); for(int i = 0; i < 8; i++) { // Write CRC character. (*text)[offset + i] = buffer[i]; // Remove the corresponding placeholder location from input map. (*input_map)[offset + i] = '_'; } } // Initialize crc_table and prefix_table. static void InitCrcTables() { static constexpr uint32_t kPoly = 0xedb88320U; for(int i = 0; i < 256; i++) { uint32_t crc = i; for(int j = 8; j > 0; j--) { if( (crc & 1) != 0 ) crc = (crc >> 1) ^ kPoly; else crc >>= 1; } crc_table[i] = crc; } for(int i = 0; i < 256; i++) prefix_table[(crc_table[i] >> 24) & 0xff] = i; } // Compute CRC for a single byte. static inline uint32_t ByteCrc(uint8_t byte, uint32_t crc) { return ((crc >> 8) & 0xffffff) ^ crc_table[(crc ^ byte) & 0xff]; } // Compute partial CRC by scanning forward one byte at a time. static uint32_t ForwardCrc(const std::string_view &data, uint32_t crc) { for(uint8_t byte : data) crc = ByteCrc(byte, crc); return crc; } // Extend CRC with a run of zero bytes. This is functionally identical to // ForwardCrc(std::string(length, '\0'), crc) // // This function exists so that we can implement a trick to combine two // CRCs, where // // CRC(AB) = CRC(A . 0) ^ CRC(0 . B) = CRC(A . 0) ^ CRC(B) // // This function is used to compute the "0" extension part of CRC(A.0). By // using this combination trick, we are able to change "A" and "B" and compute // CRCs for each segment independently, without having to recompute "CRC(B)" // due to changes in "A". This is useful since it makes the number of total // CRC calculations proportional to the sum of all segment variations, and not // the product of all variations. We still need to do O(product) number of // operations to combine the CRCs from all segment variations, but that is a // cheaper operation than recomputing all the CRCs if the segments are long. // // This essentially implements "trick 1" of what's described here: // https://stackoverflow.com/questions/23122312/crc-calculation-of-a-mostly-static-data-stream/23126768#23126768 // // "Trick 2" involves converting the CRC operations into matrix transformations // and doing exponentiation to reduce number of operations. zlib has done it // but it's rather complicated, so we do the straightforward thing here. static uint32_t ExtendZeroCrc(int length, uint32_t crc) { for(int i = 0; i < length; i++) crc = ByteCrc(0, crc); return crc; } // Compute reverse partial CRC by scanning backward one byte at a time. static uint32_t ReverseCrc(const std::string_view &data, uint32_t crc) { for(int i = static_cast(data.size()); i-- > 0;) { // a1.b1.c1.d1 = (a0.b0.c0) ^ crc_table[d0 ^ byte] // // There is exactly one crc_table entry with the prefix a1, so we can // get the right side of the XOR with a single lookup. const int r_index = prefix_table[(crc >> 24) & 0xff]; const uint32_t r_value = crc_table[r_index]; // r_index = d0^byte -> d0 = r_index^byte. const uint8_t byte = data[i]; const uint32_t d0 = r_index ^ byte; // b1.c1.d1 = (a0.b0.c0) ^ (r_value & 0xffffff) -> // a0.b0.c0 = b1.c1.d1 ^ (r_value & 0xffffff) const uint32_t abc0 = r_value ^ crc; // Reassemble the combined CRC bytes. crc = ((abc0 & 0xffffff) << 8) | d0; } return crc; } // Brute-force update characters to reach target CRC. // Returns updated text on success, or empty string on error. static std::string BruteForceCrc(std::string text, const std::string &input_map, uint32_t init_crc, uint32_t target_crc) { assert(!text.empty()); assert(input_map.size() == text.size()); // Record for tracking adjustment location and status. using AdjustmentFrame = struct { // Offset where the replacement can be made. int offset; // Index of current replacement character. int char_index; // Ordered list of replacement character candidates. Original input // character is always listed first. // // It seems like maintaining an ordered list would be inefficient // when we can just use the input string itself to track progress // (e.g. by incrementing the characters directly), but that turned // out to make things run slightly slower. std::string replacements; // Partial CRC of current segment plus trailing zeroes, similar // to this expression: // // text.substr(offset, next_offset - offset) + // string(text.size() - next_offset, '\0') // // Array elements are indexed by the first byte in the segment. // Some entries might be left uninitialized, since we only // compute checksums for the characters in "replacements" list. std::array crc_table; // Combined CRC for all segments from start to current segment. uint32_t combined_crc; }; using AdjustmentStack = std::vector; AdjustmentStack a_stack; // Preallocate some space to avoid resizing. 8 entries seems like a // good start, especially since inputs with more than 5 adjustment // spots will probably take too long to run anyway. a_stack.reserve(8); // Find all positions where adjustments can be made. for(int i = 0; i < static_cast(input_map.size()); i++) { if( input_map[i] != kReplacementKey ) continue; a_stack.push_back(AdjustmentFrame()); AdjustmentFrame &entry = a_stack.back(); entry.offset = i; entry.char_index = 0; entry.replacements.push_back(text[i]); for(int j = 33; j < 127; j++) { if( j == '/' || j == text[i] ) continue; entry.replacements.push_back(j); } } #ifdef LIST_ALL_VARIATIONS std::string result; #endif // Compute partial CRCs for each segment. assert(!a_stack.empty()); for(AdjustmentStack::iterator i = a_stack.begin();;) { AdjustmentStack::iterator next = i; ++next; // Set initial CRC for this segment. // // + The first segment gets initial CRC from file prefix. We apply // adjustments after the file prefix, but what we are really doing // is to compute the CRC of file prefix plus first segment. // // + All subsequent segments get an initial CRC of zero. This is // because the CRCs for all subsequent segments will be computed by // combining CRCs with earlier segments. See comments near // ExtendZeroCrc for how this is done. const uint32_t c0 = i == a_stack.begin() ? init_crc : 0; if( next == a_stack.end() ) { assert(i->offset + 1 == static_cast(text.size())); for(uint8_t c : i->replacements) i->crc_table[c] = ByteCrc(c, c0); break; } std::string_view s = std::string_view(text).substr( i->offset, next->offset - i->offset); for(char c : i->replacements) { text[i->offset] = c; i->crc_table[c & 0xff] = ExtendZeroCrc(text.size() - next->offset, ForwardCrc(s, c0)); } i = next; } // Find the combination of partial CRCs that would produce the target. for(int a_index = 0; a_index >= 0;) { // Assemble CRC across all segments. uint32_t last_crc = a_index == 0 ? 0 : a_stack[a_index - 1].combined_crc; for(; a_index < static_cast(a_stack.size()); a_index++) { AdjustmentFrame &frame = a_stack[a_index]; last_crc ^= frame.crc_table[frame.replacements[frame.char_index] & 0xff]; frame.combined_crc = last_crc; } if( last_crc == target_crc ) { // Successfully matched target CRC. #if LIST_ALL_VARIATIONS result = text; for(const AdjustmentFrame &f : a_stack) { fputc(f.replacements[f.char_index], stderr); result[f.offset] = f.replacements[f.char_index]; } fputc('\n', stderr); #else // Patch in all the adjustment bytes and return. for(const AdjustmentFrame &f : a_stack) text[f.offset] = f.replacements[f.char_index]; return text; #endif } // Didn't work. Adjust trailing variations. for(a_index--; a_index >= 0; a_index--) { AdjustmentFrame &frame = a_stack[a_index]; frame.char_index++; if( frame.char_index < static_cast(frame.replacements.size()) ) break; frame.char_index = 0; } } // Exhausted all search space. #ifdef LIST_ALL_VARIATIONS return result; #else text.clear(); return text; #endif } } // namespace int main(int argc, char **argv) { uint32_t target_crc; if( argc != 4 || sscanf(argv[1], "%x", &target_crc) != 1 ) { return fprintf(stderr, "%s {target_crc} {input.txt} {input.map}\n", *argv); } // Load input to memory. // // Here we don't differentiate between read error versus reading an // empty file. Read errors will get an error message printed to stderr, // but empty files will just silently fail. std::string text = LoadInput(argv[2]); if( text.empty() ) return 1; std::string input_map = LoadInput(argv[3]); if( input_map.empty() ) return 1; if( text.size() != input_map.size() ) { fprintf(stderr, "%s and %s must have the same size\n", argv[2], argv[3]); return 1; } InitCrcTables(); // Replace substring with desired CRC if there is room for it. InsertCrc(&text, &input_map, target_crc); // Break input into three sections: prefix + middle + suffix. const size_t adjustment_offset = input_map.find(kReplacementKey); if( adjustment_offset == std::string::npos ) { // If there are no adjustment placeholders available, we just need // to check that the whole file matches the expected CRC. const uint32_t full_crc = ~ForwardCrc(text, ~0); if( full_crc != target_crc ) { fprintf(stderr, "%s: can not attain %08x, " "and there are no placeholder bytes to make adjustments.\n", argv[2], target_crc); return 1; } fwrite(text.data(), text.size(), 1, stdout); return 0; } const std::string_view prefix(text.data(), adjustment_offset); const std::string_view suffix(text.data() + input_map.rfind(kReplacementKey) + 1); // Compute CRC of prefix and suffix. const uint32_t prefix_crc_end = ForwardCrc(prefix, ~0); const uint32_t suffix_crc_start = ReverseCrc(suffix, ~target_crc); // Brute-force the middle part. const size_t adjustment_size = text.size() - adjustment_offset - suffix.size(); const std::string middle = BruteForceCrc( text.substr(adjustment_offset, adjustment_size), input_map.substr(adjustment_offset, adjustment_size), prefix_crc_end, suffix_crc_start); if( middle.empty() ) { fprintf(stderr, "%s: can not attain %08x\n", argv[2], target_crc); return 1; } // Write output. assert(prefix.size() + middle.size() + suffix.size() == text.size()); fwrite(prefix.data(), prefix.size(), 1, stdout); fwrite(middle.data(), middle.size(), 1, stdout); fwrite(suffix.data(), suffix.size(), 1, stdout); return 0; }