// crack.cc - Don Yang (uguu.org) // // Read input from files or stdin, output candidate passwords in CSV // format to stdout. // // 2015-06-21 #include #include #include #include #include #include #include namespace { using namespace std; // Minimum number of input alpha characters need for crack operation to proceed static const int kMinInputSize = 100; // Maximum key size to try static const int kMaxKeySize = 64; static_assert(kMinInputSize > kMaxKeySize, "Need to increase kMinInputSize"); // Expected character distributions from "Alice In Wonderland" // http://www.gutenberg.org/ebooks/11 static const int kCharCount = 26; static const array kCharDistribution = { 0.079698184390474, // a 0.0141963915472115, // b 0.0243924253388514, // c 0.0444673913927262, // d 0.125173796030539, // e 0.019367585719048, // f 0.0239289692574133, // g 0.064143947832733, // h 0.0701932693167682, // i 0.0019107399848767, // j 0.0104887428957061, // k 0.042369642813585, // l 0.0200587044369822, // m 0.065461138801031, // n 0.0770556716454317, // o 0.0160014310222865, // p 0.00178877785818244, // q 0.053744643829936, // r 0.0590947157875908, // s 0.0991958630446625, // t 0.032344355999317, // u 0.00782996853377131, // v 0.0240021465334298, // w 0.00143102228654595, // x 0.0210100090251974, // y 0.000650464675702705 // z }; struct ShiftInfo { // Shift amount int offset; // Error margin in standard deviation units double margin; }; // Split input string into strides static vector SplitToStrides(const string &input, int strides) { vector s(strides, string()); for(int i = 0; i < strides; i++) s.reserve(input.size() / strides + 1); for(int i = 0, x = 0; i < static_cast(input.size()); i++) { s[x++].push_back(input[i]); x %= strides; } return s; } // Get shift amounts for a single input stride static ShiftInfo GetShiftAmount(const string &input) { // Get character frequencies array freq; freq.fill(0); for(int c : input) freq[c]++; // Convert frequencies to probabilities const double denominator = static_cast(input.size()); array prob; for(int i = 0; i < kCharCount; i++) prob[i] = static_cast(freq[i]) / denominator; // Compute error values for each of the shift amounts vector> shift_list; shift_list.reserve(kCharCount); for(int offset = 0; offset < kCharCount; offset++) { double error = 0.0; for(int i = 0; i < kCharCount; i++) { const double delta = prob[(i + offset) % kCharCount] - kCharDistribution[i]; error += delta * delta; } shift_list.push_back(make_pair(error, offset)); } // Find minimum error sort(shift_list.begin(), shift_list.end()); ShiftInfo shift_info; shift_info.offset = shift_list[0].second; // Compute standard deviation double average_error = 0.0; for(const auto &p : shift_list) average_error += p.first; average_error /= kCharCount; double standard_deviation = 0.0; for(const auto &p : shift_list) { const double delta = p.first - average_error; standard_deviation += delta * delta; } standard_deviation = sqrt(standard_deviation / kCharCount); if( standard_deviation < 1e-6 ) { shift_info.margin = 0.0; return shift_info; } // Compute error margin in standard deviation units const double margin = shift_list[1].first - shift_list[0].first; shift_info.margin = margin / standard_deviation; return shift_info; } // Append input from opened file handle to string, converting alpha // characters to binary. static void LoadInput(FILE *infile, string *data) { static const int kBlockSize = 4096; char buffer[kBlockSize]; int read_size = kBlockSize; while( read_size == kBlockSize && !feof(infile) ) { read_size = fread(buffer, 1, kBlockSize, infile); for(int i = 0; i < read_size; i++) { if( buffer[i] >= 'a' && buffer[i] <= 'z' ) data->push_back(buffer[i] - 'a'); else if( buffer[i] >= 'A' && buffer[i] <= 'Z' ) data->push_back(buffer[i] - 'A'); } } } // Find candidate keys to decrypt input static void Crack(const string &data) { // Find best keys for each key size vector> keys; keys.reserve(kMaxKeySize); unordered_set redundant_keys; for(int key_size = 1; key_size <= kMaxKeySize; key_size++) { const vector strides = SplitToStrides(data, key_size); // Get offsets and confidence amount vector offsets; offsets.reserve(key_size); double confidence = kCharCount * kCharCount; for(int i = 0; i < key_size; i++) { const ShiftInfo shift = GetShiftAmount(strides[i]); if( confidence > shift.margin ) confidence = shift.margin; offsets.push_back(shift.offset); } // Build uppercase key string upper_key; upper_key.reserve(kMaxKeySize * 2); for(int i : offsets) upper_key.push_back('A' + i); // Ignore key if a shorter canonical form exists if( redundant_keys.find(upper_key) != redundant_keys.end() ) continue; // Remember margin and key keys.push_back(make_pair(confidence, upper_key)); // Remove non-canonical keys const string base_key = upper_key; while( upper_key.size() <= kMaxKeySize ) { redundant_keys.insert(upper_key); upper_key.append(base_key); } } // Output keys in sorted order, highest confidence last sort(keys.begin(), keys.end()); for(const pair &p : keys) { // Generate lowercase key string lower_key = p.second; for(char &c : lower_key) c = (kCharCount - (c - 'A')) % kCharCount + 'a'; printf("%s,%s,%f\n", p.second.c_str(), lower_key.c_str(), p.first); } } } // namespace int main(int argc, char **argv) { // Load input text from stdin or files specified on command line string data; if( argc == 1 ) { LoadInput(stdin, &data); } else { for(int i = 1; i < argc; i++) { if( string(argv[i]) == "-" ) { LoadInput(stdin, &data); continue; } FILE *infile = fopen(argv[i], "rb"); if( infile == nullptr ) { fprintf(stderr, "Can not open %s\n", argv[i]); return 1; } LoadInput(infile, &data); fclose(infile); } } if( data.size() < kMinInputSize ) { fputs("Not enough alpha characters in input\n", stderr); return 1; } Crack(data); return 0; }