#include "glimpse.h" /* n is guaranteed to be < MaxNum4bPartition */ int encode4b(n) int n; { if (n=='\0') return MaxNum4bPartition; if (n=='\n') return MaxNum4bPartition+1; return n; } int decode4b(n) int n; { if (n==MaxNum4bPartition) return '\0'; if (n==MaxNum4bPartition+1) return '\n'; return n; } /* n is guaranteed to be < MaxNum8bPartition */ int encode8b(n) int n; { if (n=='\0') return MaxNum8bPartition; if (n=='\n') return MaxNum8bPartition+1; return n; } int decode8b(n) int n; { if (n==MaxNum8bPartition) return '\0'; if (n==MaxNum8bPartition+1) return '\n'; return n; } /* n is guaranteed to be < MaxNum12bPartition */ int encode12b(n) int n; { unsigned char msb, lsb; msb = (n / MaxNum8bPartition); lsb = (n % MaxNum8bPartition); msb = encode4b(msb); lsb = encode8b(lsb); return (msb<<8)|lsb; } int decode12b(n) int n; { unsigned char msb, lsb; msb = ((n&0x00000f00) >> 8); lsb = (n&0x000000ff); msb = decode4b(msb); lsb = decode8b(lsb); return (msb * MaxNum8bPartition) + lsb; } /* n is guaranteed to be < MaxNum16bPartition */ int encode16b(n) int n; { unsigned char msb, lsb; msb = (n / MaxNum8bPartition); lsb = (n % MaxNum8bPartition); msb = encode8b(msb); lsb = encode8b(lsb); return (msb<<8)|lsb; } int decode16b(n) int n; { unsigned char msb, lsb; msb = ((n&0x0000ff00) >> 8); lsb = (n&0x000000ff); msb = decode8b(msb); lsb = decode8b(lsb); return (msb * MaxNum8bPartition) + lsb; } /* n is guaranteed to be < MaxNum24bPartition */ int encode24b(n) int n; { unsigned short msb, lsb; msb = (n / MaxNum16bPartition); lsb = (n % MaxNum16bPartition); msb = encode8b(msb); lsb = encode16b(lsb); return (msb<<16)|lsb; } int decode24b(n) int n; { unsigned short msb, lsb; msb = ((n&0x00ff0000) >> 16); lsb = (n&0x0000ffff); msb = decode8b(msb); lsb = decode16b(lsb); return (msb * MaxNum16bPartition) + lsb; } /* n is guaranteed to be < MaxNum32bPartition */ int encode32b(n) int n; { unsigned short msb, lsb; msb = (n / MaxNum16bPartition); lsb = (n % MaxNum16bPartition); msb = encode16b(msb); lsb = encode16b(lsb); return (msb<<16)|lsb; } int decode32b(n) int n; { unsigned short msb, lsb; msb = ((n&0xffff0000) >> 16); lsb = (n&0x0000ffff); msb = decode16b(msb); lsb = decode16b(lsb); return (msb * MaxNum16bPartition) + lsb; } /* * converts file-names with *,. and ? and converts it to # \. and ? ALL OTHER agrep-special characters are masked off. * if the filename NOT a regular expression involving ? or *, it leaves the name untouched and returns the string * length of the file name (so that we can avoid memagrep calls): otherwise, it returns the -ve strlength of the name * after performing the above conversion: hence we never need to call agrep if the length is +ve. */ int convert2agrepregexp(buf, len) char *buf; int len; { char tbuf[MAX_PAT]; int i=0, j=0; /* Ignore '*' at the beginning and '*' at the end */ if (len < 1) return 0; if ( ((len == 1) && (buf[len-1] == '*')) || ((len >= 2) && (buf[len-1] == '*') && (buf[len-1] != '\\')) ) { buf[len-1] = '\0'; len--; } if (buf[0] == '*') { for (i=0; i= len) return len; i = j = 0; while ((i') || (buf[i] == '<')|| /* (buf[i] == '^') || (buf[i] == '$') || */ (buf[i] == '+')|| (buf[i] == '{') || (buf[i] == '}') || (buf[i] == '~')){ tbuf[j++] = '\\'; tbuf[j++] = buf[i]; i++; } /* Interpret ONLY ? and * in file-names */ else if (buf[i] == '?') { tbuf[j++] = '.'; i++; } else if (buf[i] == '*') { tbuf[j++] = '.'; tbuf[j++] = '*'; i++; } else tbuf[j++] = buf[i++]; } if (j >= MAX_PAT) { tbuf[j-1] = '\0'; fprintf(stderr, "glimpseindex: pattern '%s' too long\n", buf); j--; } else { tbuf[j] = '\0'; } strcpy(buf, tbuf); #if 0 printf("%s=%d\n", buf, j); #endif /*0*/ return -j; /* strlen-compatible, -ve to indicate memagrep must be called */ } /* ----------------------------------------------------------------- input: a word (a string of ascii character terminated by NULL) output: a hash_value of the input word. hash function: if the word has length <= 4 the hash value is just a concatenation of the last four bits of the characters. if the word has length > 4, then after the above operation, the hash value is updated by adding each remaining character. (and AND with the 16-bits mask). bug-fixes in all hashing functions: Chris Dalton ---------------------------------------------------------------- */ int hash64k(word, len) char *word; int len; { unsigned int hash_value=0; unsigned int mask_4=017; unsigned int mask_16=0177777; int i; if(len<=4) { for(i=0; i 5 bits is waste since there are only 26 lower case letters */ int hash32k(word, len) char *word; int len; { unsigned int hash_value=0; unsigned int mask_5=037; unsigned int mask_15=077777; int i; if(len<=3) { for(i=0; i