/* Copyright (c) 1994 Burra Gopal, Udi Manber. All Rights Reserved. */ /* * string.c: String table manipulation routines. Can be used to compute * the dictionary as well as uncompress files. */ #include "defs.h" extern int MAX_WORDS; extern int RESERVED_CHARS; int next_free_strtable = 0; char *free_strtable = NULL; /*[DEF_MAX_WORDS * AVG_WORD_LEN]; */ extern int usemalloc; /* debugging only */ int dump_string(string_table, string_file, index_file) char **string_table; unsigned char *string_file, *index_file; { FILE *stringfp; FILE *indexfp; int i; if ((stringfp = fopen(string_file, "w")) == NULL) { fprintf(stderr, "cannot open for writing: %s\n", string_file); return 0; } if ((indexfp = fopen(index_file, "r")) == NULL) { fprintf(stderr, "cannot open for reading: %s\n", index_file); fclose(stringfp); return 0; } for(i=0; i in padded area */ c = '\0'; str[numread++] = c; return numread; } else str[numread++] = c; } str[numread] = '\0'; if (c == EOF) return -1; return numread; } int build_string(string_table, stringfp, bytestoread, initialwordindex) char *string_table[DEF_MAX_WORDS]; /*[MAX_WORD_LEN+2]; */ FILE *stringfp; int bytestoread; int initialwordindex; { int wordindex = initialwordindex; int numread = 0; int ret; char dummybuf[MAX_WORD_BUF]; char *word; if (bytestoread == -1) { /* read until end of file */ while (wordindex < MAX_WORDS) { if (usemalloc) word = dummybuf; else { if (free_strtable == NULL) free_strtable = (char *)malloc(AVG_WORD_LEN * DEF_MAX_WORDS); if (free_strtable == NULL) break; word = &free_strtable[next_free_strtable]; } if ((ret = mystringread(stringfp, word)) == 0) continue; if (ret == -1) break; if (usemalloc) { if ((word = (char *)malloc(ret + 2)) == NULL) break; strcpy(word, dummybuf); } else next_free_strtable += ret + 2; string_table[wordindex] = word; #if 0 printf("word=%s index=%d\n", string_table[wordindex], wordindex); #endif /*0*/ wordindex ++; } } else { /* read only the specified number of bytes */ while((wordindex < MAX_WORDS) && (bytestoread > numread)) { if (usemalloc) word = dummybuf; else { if (free_strtable == NULL) free_strtable = (char *)malloc(AVG_WORD_LEN * DEF_MAX_WORDS); if (free_strtable == NULL) break; word = &free_strtable[next_free_strtable]; } if ((ret = mystringread(stringfp, word)) <= 0) break; /* quit if EOF OR if padded area */ if (usemalloc) { if ((word = (char *)malloc(ret + 2)) == NULL) break; strcpy(word, dummybuf); } else next_free_strtable += ret + 2; string_table[wordindex] = word; #if 0 printf("word=%s index=%d\n", string_table[wordindex], wordindex); #endif /*0*/ wordindex ++; numread += ret; } } return wordindex; } /* * Interprets srcbuf as a set of srclen/2 short integers. It looks for all the * short-integers encoding words in the matched line and loads only those blocks * of the string table. Note: srcbuf must be aligned on a short-int boundary. */ int build_partial_string(string_table, stringfp, srcbuf, srclen, linebuf, linelen, blocksize, loaded_string_table) char *string_table[DEF_MAX_WORDS]; /* [MAX_WORD_LEN+2]; */ FILE *stringfp; unsigned char *srcbuf; int srclen; unsigned char *linebuf; int linelen; int blocksize; char loaded_string_table[STRING_FILE_BLOCKS]; { unsigned char *srcpos; int blockindex = 0; unsigned short srcinit, srcend; unsigned short wordnums[MAX_NAME_LEN]; /* maximum pattern length */ int numwordnums = 0; int i; /* * Find all the relevant wordnums in the line. */ i = 0; while(i= srcinit) && (wordnums[i] <= srcend)) goto include_page; blockindex++; continue; include_page: /* Include it if any of the word-indices fit within this range */ if (loaded_string_table[blockindex++]) continue; #if 0 printf("build_partial_string: hashing words in page# %d\n", blockindex); #endif /*0*/ loaded_string_table[blockindex - 1] = 1; fseek(stringfp, (blockindex-1)*blocksize, 0); build_string(string_table, stringfp, blocksize, srcinit); } return 0; } pad_string_file(filename, FILEBLOCKSIZE) unsigned char *filename; int FILEBLOCKSIZE; { FILE *outfp, *infp, *indexfp; int offset = 0, len; unsigned char buf[MAX_NAME_LEN]; int pid = getpid(); int i; unsigned short wordindex = 0; if ((infp = fopen(filename, "r")) == NULL) { fprintf(stderr, "cannot open for reading: %s\n", filename); exit(2); } sprintf(buf, "%s.index", filename); if ((indexfp = fopen(buf, "w")) == NULL) { fprintf(stderr, "cannot open for writing: %s\n", buf); fclose(infp); exit(2); } sprintf(buf, "%s.%d", filename, pid); if ((outfp = fopen(buf, "w")) == NULL) { fprintf(stderr, "cannot open for writing: %s\n", buf); fclose(infp); fclose(indexfp); exit(2); } if ((FILEBLOCKSIZE % MIN_BLOCKSIZE) != 0) { fprintf(stderr, "invalid block size %d: changing to %d\n", FILEBLOCKSIZE, MIN_BLOCKSIZE); FILEBLOCKSIZE = MIN_BLOCKSIZE; } fprintf(indexfp, "%d\n", FILEBLOCKSIZE); buf[0] = '\0'; if ((char *)buf != fgets(buf, MAX_NAME_LEN, infp)) goto end_of_input; len = strlen((char *)buf); fputs(buf, outfp); fprintf(indexfp, "%d\n", wordindex); offset += len; wordindex ++; while(fgets(buf, MAX_NAME_LEN, infp) == (char *)buf) { len = strlen((char *)buf); if (offset + len > FILEBLOCKSIZE) { for (i=0; i