/* l0.c - Don Yang (uguu.org) 04/15/05 */ /*@ -compdef -compdestroy -nullstate -temptrans -usereleased @*/ #include #include #include #include #include #include typedef union { char *w; int i; } Element; typedef struct { int size, max; Element *d; } List; static List Index[26], AllWords; static void AddToList(List *l, char *d); static void Encode(int width, int argc, char **argv); static void FreeList(List *l); static int PartialMatch(char *a, char *b); static void PrettyPrint(int width, List *l); static void Tokenize(char *text, int size); int main(int argc, char **argv) { FILE *infile; char *text; int size, i; if( argc < 2 ) return printf("%s [width] [...]\n", *argv); /* Load data */ if( (infile = fopen(argv[1], "rb")) == NULL ) return printf("Can not open %s\n", argv[1]); (void)fseek(infile, 0, SEEK_END); if( (size = (int)ftell(infile)) < 1 ) { (void)fclose(infile); return printf("%s too small\n", argv[1]); } (void)fseek(infile, 0, SEEK_SET); if( (text = (char*)malloc((size_t)size + 1)) == NULL ) { (void)fclose(infile); return puts("Out of memory"); } (void)fread(text, (size_t)size, 1, infile); text[size] = '\0'; (void)fclose(infile); /* Tokenize words */ memset(&Index, 0, 26 * sizeof(List)); memset(&AllWords, 0, sizeof(List)); Tokenize(text, size); /* Set screen width */ if( (size = (argc > 2) ? atoi(argv[2]) : 80) < 1 ) size = 80; /* Process */ srand((unsigned)time(NULL)); if( argc > 3 ) Encode(size, argc, argv); else PrettyPrint(size, &AllWords); (void)puts("\n"); /* Cleanup */ for(i = 0; i < 26; FreeList(&Index[i++])); FreeList(&AllWords); free(text); return 0; } static void AddToList(List *l, char *d) { Element *tmp; if( l->d == NULL ) { if( (l->d = (Element*)malloc((l->max = 32) * sizeof(Element))) == NULL ) return; assert(l->size == 0); } if( l->size + 1 > l->max ) { if( (tmp = (Element*)malloc((l->max * 2) * sizeof(Element))) == NULL ) return; memcpy(tmp, l->d, l->size * sizeof(Element)); free(l->d); l->d = tmp; /*@i1@*/l->max <<= 1; } assert(l->size < l->max); l->d[l->size++].w = d; } static void Encode(int width, int argc, char **argv) { List output; int a, c0, column, margin, i, j, k, k0, k1; char *token; memset(&output, 0, sizeof(List)); /* Process arguments backwards */ for(a = argc; a-- > 3;) { /* Add first word (message text) */ token = argv[a]; column = (int)strlen(token); for(i = 0; i < column; i++) token[i] = tolower(token[i]); AddToList(&output, token); /* Set margin */ margin = width - 1 - (int)strlen(argv[a - 1]); /* Add words following current word until margin is overflowed */ while( column < margin ) { do { assert(token != NULL); assert(*token != '\0'); c0 = (int)*token - (int)'a'; do { /* Choose words that are more likely to follow current word */ i = rand() % AllWords.size; if( c0 < 0 || c0 >= 26 ) break; if( Index[c0].size == 0 ) break; i = Index[c0].d[rand() % Index[c0].size].i; k0 = PartialMatch(token, AllWords.d[i].w); assert(k0 > 0); for(j = 0; j < Index[c0].size / 2; j++) { k = Index[c0].d[rand() % Index[c0].size].i; if( (k1 = PartialMatch(token, AllWords.d[k].w)) > k0 ) { assert(k1 > 0); i = k; k0 = k1; } } i = (i + 1) % AllWords.size; } while(0); /* If the distribution of the length of the words are too even, this part could take a long time, or even loop forever. No resolutions here, we will rely on good input data. */ } while( column + 1 + (int)strlen(AllWords.d[i].w) >= width ); /* Here we should adjust the last word so that it flows better into the next word... but backward searching is trouble so bleh */ AddToList(&output, token = AllWords.d[i].w); column += 1 + strlen(token); } } PrettyPrint(width, &output); FreeList(&output); } static void FreeList(List *l) { if( l->d != NULL ) free(l->d); } static int PartialMatch(char *a, char *b) { int c; for(c = 0; *a != '\0' && *b != '\0' && *a == *b; a++, b++, c++); return c; } static void PrettyPrint(int width, List *l) { int i, column; column = width; for(i = 0; i < l->size; i++) { if( (column + 1 + (int)strlen(l->d[i].w)) >= width ) { (void)putchar('\n'); column = 0; } else { (void)putchar(' '); column++; } assert(strchr(l->d[i].w, '%') == NULL); /*@i1@*/printf(l->d[i].w); column += strlen(l->d[i].w); } } static void Tokenize(char *text, int size) { char *p; for(p = text; size-- > 0; *(p++) = '\0') { if( isalpha(*p) ) { *p = tolower(*p); AddToList(&Index[(int)*p - (int)'a'], (char*)AllWords.size); AddToList(&AllWords, p); /* "size-- > 0 && isalnum(*p)" does the right thing in most places, except for compilers like MSVC which doesn't short circuit properly */ for(; size-- > 0 ? isalnum(*p) : 0; p++) *p = tolower(*p); size++; } } }