/* l3.c - Don Yang (uguu.org) 04/17/05 */ #include #include #include #include typedef union { char *w; int i; } Element; typedef struct { int size, max; Element *d; } List; List Index[26], AllWords; int SLen(char *d) { int r; for(r = 0; *d != '\0'; d++, r++); return r; } void Reset(List *l) { l->d = NULL; l->size = l->max = 0; } void MemCpy(Element *a, Element *b, int size) { for(; size-- > 0; a++->i = b++->i); } void AddToList(List *l, char *d) { Element *tmp; if( l->d == NULL ) { if( (l->d = (Element*)malloc((l->max = 32) * sizeof(Element))) == NULL ) return; } if( l->size + 1 > l->max ) { if( (tmp = (Element*)malloc((l->max * 2) * sizeof(Element))) == NULL ) return; MemCpy(tmp, l->d, l->size); free(l->d); l->d = tmp; l->max <<= 1; } l->d[l->size++].w = d; } void FreeList(List *l) { if( l->d != NULL ) free(l->d); } void Tokenize(char *text, int size) { char *p; for(p = text; size-- > 0; *(p++) = '\0') { if( isalpha(*p) ) { *p = tolower(*p); AddToList(&Index[*p - 'a'], (char*)AllWords.size); AddToList(&AllWords, p); for(; size-- > 0 ? isalnum(*p) : 0; p++) *p = tolower(*p); size++; } } } void PrettyPrint(int width, List *l) { int i, column; column = width; for(i = 0; i < l->size; i++) { if( (column + 1 + SLen(l->d[i].w)) >= width ) { putchar('\n'); column = 0; } else { putchar(' '); column++; } printf(l->d[i].w); column += SLen(l->d[i].w); } } int PartialMatch(char *a, char *b) { int c; for(c = 0; *a != '\0' && *b != '\0' && *a == *b; a++, b++, c++); return c; } void Encode(int width, int argc, char **argv) { List output; int a, c0, column, margin, i, j, k, k0, k1; char *token; Reset(&output); for(a = argc; a-- > 3;) { token = argv[a]; column = SLen(token); for(i = 0; i < column; i++) token[i] = tolower(token[i]); AddToList(&output, token); margin = width - 1 - SLen(argv[a - 1]); while( column < margin ) { do { c0 = *token - 'a'; do { i = rand() % AllWords.size; if( c0 < 0 || c0 >= 26 ) break; if( Index[c0].size == 0 ) break; i = Index[c0].d[rand() % Index[c0].size].i; k0 = PartialMatch(token, AllWords.d[i].w); for(j = 0; j < Index[c0].size / 2; j++) { k = Index[c0].d[rand() % Index[c0].size].i; if( (k1 = PartialMatch(token, AllWords.d[k].w)) > k0 ) { i = k; k0 = k1; } } i = (i + 1) % AllWords.size; } while(0); } while( column + 1 + SLen(AllWords.d[i].w) >= width ); AddToList(&output, token = AllWords.d[i].w); column += 1 + SLen(token); } } PrettyPrint(width, &output); FreeList(&output); } int main(int argc, char **argv) { FILE *infile; char *text; int size, i; if( argc < 2 ) return printf("%s [width] [...]\n", *argv); if( (infile = fopen(argv[1], "rb")) == NULL ) return printf("Can not open %s\n", argv[1]); fseek(infile, 0, SEEK_END); if( (size = ftell(infile)) < 1 ) { fclose(infile); return printf("%s too small\n", argv[1]); } fseek(infile, 0, SEEK_SET); if( (text = (char*)malloc(size + 1)) == NULL ) { fclose(infile); return puts("Out of memory"); } fread(text, size, 1, infile); text[size] = '\0'; fclose(infile); for(i = 0; i < 26; Reset(&Index[i++])); Reset(&AllWords); Tokenize(text, size); if( (size = (argc > 2) ? atoi(argv[2]) : 80) < 1 ) size = 80; srand(time(NULL)); if( argc > 3 ) Encode(size, argc, argv); else PrettyPrint(size, &AllWords); puts("\n"); for(i = 0; i < 26; FreeList(&Index[i++])); FreeList(&AllWords); free(text); return 0; }