/* Copyright (c) 1994 Burra Gopal, Udi Manber. All Rights Reserved. */ #include "glimpse.h" #include "defs.h" #if BG_DEBUG extern FILE *debug; #endif /*BG_DEBUG*/ extern char INDEX_DIR[MAX_LINE_LEN]; extern int Only_first; extern int PRINTAPPXFILEMATCH; extern int OneFilePerBlock; extern int StructuredIndex; extern int WHOLEFILESCOPE; extern unsigned int *dest_index_set; extern unsigned char *dest_index_buf; extern int mask_int[32]; extern int errno; extern int ByteLevelIndex; extern int NOBYTELEVEL; extern int OPTIMIZEBYTELEVEL; extern int RegionLimit; extern int PRINTINDEXLINE; extern struct offsets **src_offset_table; extern unsigned int *multi_dest_index_set[MAXNUM_PAT]; extern struct offsets **multi_dest_offset_table[MAXNUM_PAT]; extern char *index_argv[MAX_ARGS]; extern int index_argc; extern CHAR GProgname[MAXNAME]; extern FILE *indexfp, *minifp; extern int REAL_PARTITION, REAL_INDEX_BUF, MAX_ALL_INDEX, FILEMASK_SIZE; extern int p_table[MAX_PARTITION]; extern int GNumpartitions; extern int INVERSE; /* agrep's global: need here to implement ~ in index-search */ free_list(p1) struct offsets **p1; { struct offsets *tp1; while (*p1 != NULL) { tp1 = *p1; *p1 = (*p1)->next; my_free(tp1, sizeof(struct offsets)); } } /* Unions offset lists list2 with list1 sorted in increasing order (deletes elements from list2) => changes both list1 and list2: f += #elems added */ sorted_union(list1, list2, f, pf, cf) struct offsets **list1, **list2; int *f, pf, cf; { register struct offsets **p1 = list1, *p2; register int count = *f; /* don't update *f if setting NOBYTELEVEL */ if (NOBYTELEVEL) { /* cannot come here! */ free_list(list1); free_list(list2); return; } if ( ((pf > MIN_OCCURRENCES) && (count > MAX_UNION * pf)) || (count > MAX_ABSOLUTE) || ((count > MIN_OCCURRENCES) && (pf > MAX_UNION * count)) || (pf > MAX_ABSOLUTE) ) { /* enough if we check the second condition at the beginning since it won't surely be satisfied after this when count ++ */ NOBYTELEVEL = 1; return; } while (*list2 != NULL) { /* extract 1st element, update list2 */ p2 = *list2; *list2 = (*list2)->next; p2->next = NULL; /* find position to insert p2, and do so */ p1 = list1; while (((*p1) != NULL) && ((*p1)->offset < p2->offset)) p1 = &(*p1)->next; if (*p1 == NULL) { /* end of list1: append list2 to it and return */ *p1 = p2; p2->next = *list2; *list2 = NULL; if (cf > 0) count = *f + cf; if ( ((pf > MIN_OCCURRENCES) && (count > MAX_UNION * pf)) || (count > MAX_ABSOLUTE)) { NOBYTELEVEL = 1; return; } *f = count; return; } else if (p2->offset == (*p1)->offset) my_free(p2, sizeof(struct offsets)); else { p2->next = *p1; *p1 = p2; count ++; if ( ((pf > MIN_OCCURRENCES) && (count > MAX_UNION * pf)) || (count > MAX_ABSOLUTE) ) { NOBYTELEVEL = 1; return; } /* update list1 */ list1 = &(*p1)->next; } } *f = count; } /* Intersects offset lists list2 with list1 sorted in increasing order (deletes elements from list2) => changes both list1 and list2 */ sorted_intersection(filenum, list1, list2, f) struct offsets **list1, **list2; int *f; { register struct offsets **p1 = list1, *p2, *tp1; register int diff; if (NOBYTELEVEL) { /* cannot come here! */ free_list(list1); free_list(list2); return; } /* find position to intersect list2, and do so: REMEBER: list1 is in increasing order, and so is list2 !!! */ p1 = list1; while ( ((*p1) != NULL) && (*list2 != NULL) ) { diff = (*list2)->offset - (*p1)->offset; if ( (diff >= -RegionLimit) && (diff <= RegionLimit) ) { (*p1)->done = 1; /* p1 is in */ p1 = &(*p1)->next; /* Can't increment p2 here since it might keep others after p1 also in */ } else { if (diff < 0) { p2 = *list2; *list2 = (*list2)->next; my_free(p2, sizeof(struct offsets)); /* p1 can intersect with list2's next */ } else { if((*p1)->done) p1 = &(*p1)->next; /* imposs */ else { tp1 = *p1; *p1 = (*p1)->next; my_free(tp1, sizeof(struct offsets)); (*f) --; } /* list2 can intersect with p1's next */ } } } while (*list2 != NULL) { p2 = *list2; *list2 = (*list2)->next; my_free(p2, sizeof(struct offsets)); } p1 = list1; while (*p1 != NULL) { if ((*p1)->done == 0) { tp1 = *p1; *p1 = (*p1)->next; my_free(tp1, sizeof(struct offsets)); (*f) --; } else { (*p1)->done = 0; /* for the next round! */ p1 = &(*p1)->next; } } } purge_offsets(p1) struct offsets **p1; { struct offsets *tp1; while (*p1 != NULL) { if ((*p1)->sign == 0) { tp1 = *p1; (*p1) = (*p1)->next; my_free(tp1, sizeof(struct offsets)); } else p1 = &(*p1)->next; } } /* Returns 1 if it is a Universal set, 0 otherwise. Constraint: WORD_END_MARK/ALL_INDEX_MARK must occur at or after buffer[0] */ get_set(buffer, set, offset_table, patlen, pattern, patattr, outfile, partfp, frequency, prevfreq) unsigned char *buffer; unsigned int *set; struct offsets **offset_table; int patlen; char *pattern; int patattr; FILE *outfile; FILE *partfp; int *frequency, prevfreq; { int bdx2, j; int ret; int x=0, y=0, diff, even_words=1, prevy; int indexattr = 0; struct offsets *o, *tailo, *heado; int delim = encode8b(0); int curfreq = 0; unsigned char c; /* buffer[0] is '\n', search must start from buffer[1] */ bdx2 = 1; if (OneFilePerBlock) while((bdx2= REAL_INDEX_BUF+1) return 0; if (StructuredIndex) { if (StructuredIndex < MaxNum8bPartition - 1) { indexattr = decode8b(buffer[bdx2+1]); } else { indexattr = decode16b((buffer[bdx2+1] << 8) | (buffer[bdx2 + 2])); } /* printf("i=%d p=%d\n", indexattr, patattr); */ if ((patattr > 0) && (indexattr != patattr)) { #if BG_DEBUG fprintf(debug, "indexattr=%d DOES NOT MATCH patattr=%d\n", indexattr, patattr); #endif /*BG_DEBUG*/ return 0; } } if (PRINTINDEXLINE) { c = buffer[bdx2]; buffer[bdx2] = '\0'; printf("%s %d", &buffer[1], indexattr); buffer[bdx2] = c; if (c == ALL_INDEX_MARK) printf(" ! "); else printf(" : "); } if (OneFilePerBlock && (buffer[bdx2] == ALL_INDEX_MARK)) { /* A intersection Univ-set = A: so src_index_set won't change; A union Univ-set = Univ-set: so src_index_set = all 1s */ #if BG_DEBUG buffer[bdx2] = '\0'; fprintf(debug, "All indices search for %s\n", buffer + 1); buffer[bdx2] = ALL_INDEX_MARK; #endif /*BG_DEBUG*/ set[REAL_PARTITION - 1] = 1; for(bdx2=0; bdx2= OneFilePerBlock) break; set[bdx2] |= mask_int[j]; } if (ByteLevelIndex) NOBYTELEVEL = 1; return 1; } else if (!OneFilePerBlock) { /* check only if index+partitions are NOT split */ #if BG_DEBUG buffer[bdx2] = '\0'; fprintf(debug, "memagrep-line: %s\t\tpattern: %s\n", buffer, pattern); #endif /*BG_DEBUG*/ /* ignore if pattern with all its options matches block number sequence: bg+udi: Feb/16/93 */ buffer[bdx2] = '\n'; /* memagrep needs buffer to end with '\n' */ if ((ret = memagrep_search(patlen, pattern, bdx2+1, buffer, 0, outfile)) <= 0) return 0; else buffer[bdx2] = WORD_END_MARK; } if ((StructuredIndex > 0) && (StructuredIndex < MaxNum8bPartition - 1)) bdx2 ++; else if (StructuredIndex > 0) bdx2 += 2; bdx2++; /* bdx2 now points to the first byte of the offset */ even_words = 1; /* Code identical to that in merge_in() in glimpseindex */ if (OneFilePerBlock) { get_block_numbers(&buffer[bdx2], &buffer[bdx2], partfp); while((bdx2MIN_OCCURRENCES)&&(curfreq+*frequency > MAX_UNION*prevfreq)) || (curfreq+*frequency > MAX_ABSOLUTE))) { /* These o's will be in sorted order. Just collect all of them and merge with &offset_table[x]. */ o = (struct offsets *)my_malloc(sizeof(struct offsets)); o->offset = y; o->next = NULL; o->sign = o->done = 0; if (heado == NULL) { heado = o; tailo = o; } else { tailo->next = o; tailo = o; } } else { if (heado != NULL) free_list(&heado); /* printf("1 "); */ NOBYTELEVEL = 1; /* can't return since have to or the bitmasks */ } if ((bdx2= OneFilePerBlock) break; if (dest_index_set[i] & mask_int[j]) dest_index_set[i] &= ~mask_int[j]; else dest_index_set[i] |= mask_int[j]; } } else { for(i=0; i=GNumpartitions-1) break; /* STUPID: get_table returns 1 + part_num, where part_num was no. of partitions glimpseindex found */ if ((i == 0) || (i == '\n')) continue; if (dest_index_set[i]) dest_index_set[i] = 0; else dest_index_set[i] = 1; } } } /* Take intersection if parse=ANDPAT or 0 (one terminal pattern), union if OR_EXP; Take care of universal sets in index_tab[REAL_PARTITION - 1] */ if (OneFilePerBlock) { if (parse & OR_EXP) { if (ret) { ret_is_1: index_tab[REAL_PARTITION - 1] = 1; for(i=0; i= OneFilePerBlock) break; index_tab[i] |= mask_int[j]; } if (ByteLevelIndex && !NOBYTELEVEL && !(Only_first && !PRINTAPPXFILEMATCH)) for (i=0; i= OneFilePerBlock) break; index_tab[i] |= mask_int[j]; } } first_time = 0; if (ByteLevelIndex && !NOBYTELEVEL && !(Only_first && !PRINTAPPXFILEMATCH)) for (i=0; i 0) ? round(OneFilePerBlock, 8*sizeof(int)) : MAX_PARTITION); i++) { if(index_tab[i]) fprintf(debug, "%d,%x\n", i, index_tab[i]); } #endif /*BG_DEBUG*/ fclose(f_in); return 0; } /* * Same as above, but uses mgrep to search the index for many patterns at one go, * and interprets the output obtained from the -M and -P options (set in main.c). */ mgrep_get_index(infile, index_tab, offset_tab, pat_list, pat_lens, pat_attr, mgrep_pat_index, num_mgrep_pat, patbufpos, index_argv, index_argc, outfile, partfp, parse, first_time) char *infile; int *index_tab; struct offsets **offset_tab; char *pat_list[]; int pat_lens[]; int pat_attr[]; int mgrep_pat_index[]; int num_mgrep_pat; int patbufpos; char *index_argv[]; int index_argc; FILE *outfile; FILE *partfp; int parse; int first_time; { int i=0, j, temp, iii, jjj; FILE *f_in; int ret; int x=0, y=0, even_words=1; int patnum; unsigned int *setptr; struct offsets **offsetptr; CHAR dummypat[MAX_PAT]; int dummylen=0; char allindexmark[MAXNUM_PAT]; int k; int sorted[MAXNUM_PAT], min, max; if (OneFilePerBlock && (parse & OR_EXP) && (index_tab[REAL_PARTITION - 1] == 1)) return 0; /* Do the mgrep() */ if ((f_in = fopen(infile, "w")) == NULL) { fprintf(stderr, "%s: run out of file descriptors!\n", GProgname); return -1; } errno = 0; if ((ret = fileagrep(index_argc, index_argv, 0, f_in)) < 0) { fprintf(stderr, "%s: error in searching index\n", HARVEST_PREFIX); fclose(f_in); return -1; } fflush(f_in); fclose(f_in); f_in = NULL; index_argv[patbufpos] = NULL; /* For index-search with memgrep and get-filenames */ dummypat[0] = '\0'; if ((dummylen = memagrep_init(index_argc, index_argv, MAX_PAT, dummypat)) <= 0) { fclose(f_in); return -1; } /* Interpret the result */ if((f_in = fopen(infile, "r")) == NULL) { fprintf(stderr, "%s: can't open for reading: %s/%s\n", GProgname, INDEX_DIR, infile); return -1; } if (OneFilePerBlock) { for (patnum=0; patnum num_mgrep_pat)) continue; /* error! */ setptr = multi_dest_index_set[patnum - 1]; offsetptr = multi_dest_offset_table[patnum - 1]; for(k=0; dest_index_buf[k] != ' '; k++); dest_index_buf[k] = '\n'; if (!allindexmark[patnum - 1]) allindexmark[patnum - 1] = (char)get_set(&dest_index_buf[k], setptr, offsetptr, pat_lens[mgrep_pat_index[patnum-1]], pat_list[mgrep_pat_index[patnum-1]], pat_attr[mgrep_pat_index[patnum-1]], outfile, partfp, &setptr[REAL_PARTITION - 2], min); /* To test the maximum disparity to stop unions within above */ if (!allindexmark[patnum-1]) min = setptr[REAL_PARTITION - 2]; for (patnum=0; patnum multi_dest_index_set[max][REAL_PARTITION - 2]) max = patnum; } /* Sort them according to the lengths of the lists in increasing order: min first */ for (patnum=0; patnum MAX_DISPARITY * multi_dest_index_set[sorted[0]][REAL_PARTITION - 2]) { NOBYTELEVEL = 1; /* printf("4 "); */ for (iii=0; iii= OneFilePerBlock) break; index_tab[i] |= mask_int[j]; } if (ByteLevelIndex && !NOBYTELEVEL && !(Only_first && !PRINTAPPXFILEMATCH)) for (i=0; i= OneFilePerBlock) break; index_tab[i] |= mask_int[j]; } } first_time = 0; if (ByteLevelIndex && !NOBYTELEVEL && !(Only_first && !PRINTAPPXFILEMATCH)) for (i=0; i 0) ? round(OneFilePerBlock, 8*sizeof(int)) : MAX_PARTITION); i++) { if(index_tab[i]) fprintf(debug, "%d,%x\n", i, index_tab[i]); } #endif /*BG_DEBUG*/ fclose(f_in); return 0; } /* All borrowed from main.c and are needed for searching the index */ extern CHAR *pat_list[MAXNUM_PAT]; /* complete words within global pattern */ extern int pat_lens[MAXNUM_PAT]; /* their lengths */ extern int pat_attr[MAXNUM_PAT]; /* set of attributes */ extern int num_pat; extern CHAR pat_buf[(MAXNUM_PAT + 2)*MAXPAT]; extern int pat_ptr; extern int is_mgrep_pat[MAXNUM_PAT]; extern int mgrep_pat_index[MAXNUM_PAT]; extern int num_mgrep_pat; extern unsigned int *src_index_set; extern struct offsets **src_offset_table; extern char tempfile[]; extern int patindex; extern int patbufpos; extern ParseTree terminals[MAXNUM_PAT]; extern int GBESTMATCH; /* Should I change -B to -# where # = no. of errors? */ extern int bestmatcherrors; /* set during index search, used later on */ extern FILE *partfp; /* glimpse partitions */ extern FILE *nullfp; /* to discard output: agrep -s doesn't work properly */ extern int ComplexBoolean; extern int num_terminals; #if 0 extern struct token *hash_table[MAX_64K_HASH]; #else /*0*/ extern int mini_array_len; #endif /*0*/ extern int WORDBOUND, NOUPPER, D, LINENUM; int veryfastsearch(argc, argv, num_pat, pat_list, pat_lens, minifp) int argc; char *argv[]; int num_pat; CHAR *pat_list[MAXNUM_PAT]; int pat_lens[MAXNUM_PAT]; FILE *minifp; { /* * Figure out from options if very fast search is possible. */ if (minifp == NULL) return 0; if (!OneFilePerBlock) return 0; /* you did not build index for speed anyway */ if (!(WORDBOUND && NOUPPER /*&& (D<=0)*/)) return 0; if (LINENUM) return 0; return 1; /* if ((num_mgrep_pat == num_pat) || ((1 == num_pat) && (1 == checksg(pat_list[0], D, 0)))) return 1; */ /* either all >= 2 patterns are mgrep-able (simple) or there is just one simple pattern: i.e., "cast" can be used! */ /* return 0; */ } int mini_agrep(inword, inlen, outfp) CHAR *inword; int inlen; FILE *outfp; { static struct stat st; static int statted = 0; unsigned char s[MAX_LINE_LEN], word[MAX_NAME_LEN]; long beginoffset, endoffset, curroffset; unsigned char c; int j, num = 0, cmp, len; if (!statted) { sprintf((char*)s, "%s/%s", INDEX_DIR, INDEX_FILE); if (stat(s, &st) == -1) { fprintf(stderr, "Can't stat file: %s\n", s); exit(2); } statted = 1; } j = 0; while (*inword) { if (*inword == '\\') { inword++; continue; } if (isupper(*inword)) word[j] = tolower(*inword); else word[j] = *inword; j++; inword ++; } word[j] = '\0'; len = j; if (!get_mini(word, len, &beginoffset, &endoffset, 0, mini_array_len, minifp)) return 0; if (endoffset == -1) endoffset = st.st_size; if (endoffset <= beginoffset) return 0; /* We must find all occurrences of the word (in all attributes) so can't quit when we find the first match */ fseek(indexfp, beginoffset, 0); curroffset = ftell(indexfp); /* = beginoffset */ while ((curroffset < endoffset) && (fgets(s, MAX_LINE_LEN, indexfp) != NULL)) { j = 0; while ((j < MAX_LINE_LEN) && (s[j] != WORD_END_MARK) && (s[j] != ALL_INDEX_MARK) && (s[j] != '\0') && (s[j] != '\n')) j++; if ((j >= MAX_LINE_LEN) || (s[j] == '\0') || (s[j] == '\n')) { curroffset = ftell(indexfp); continue; } /* else it is WORD_END_MARK or ALL_INDEX_MARK */ c = s[j]; s[j] = '\0'; cmp = strcmp(word, s); #if WORD_SORTED if (cmp < 0) break; /* since index is sorted by word */ else #endif /* WORD_SORTED */ if (cmp != 0) { /* not IDENTICALLY EQUAL */ s[j] = c; curroffset = ftell(indexfp); continue; } s[j] = c; fputs(s, outfp); num++; curroffset = ftell(indexfp); } return num; } /* Returns the number of times a successful search was conducted: unused info at present. */ fillup_target(result_index_set, result_offset_table, parse) unsigned int *result_index_set; struct offsets **result_offset_table; long parse; { int i=0; FILE *tmpfp; int dummylen = 0; char dummypat[MAX_PAT]; int successes = 0, ret; int first_time = 1; int veryfast = veryfastsearch(index_argc, index_argv, num_pat, pat_list, pat_lens, minifp); int prev_INVERSE = INVERSE; while (i < num_pat) { if (!veryfast) { if (is_mgrep_pat[i] && (num_mgrep_pat > 1)) { /* do later */ i++; continue; } strcpy(index_argv[patindex], pat_list[i]); /* i-th pattern in its right position */ } /* printf("pat_list[%d] = %s\n", i, pat_list[i]); */ if ((tmpfp = fopen(tempfile, "w")) == NULL) { fprintf(stderr, "%s: cannot open for writing: %s, errno=%d\n", GProgname, tempfile, errno); return(-1); } errno = 0; if (veryfast && is_mgrep_pat[i]) { ret = mini_agrep(pat_list[i], pat_lens[i], tmpfp); } /* If this is the glimpse server, since the process doesn't die, most of its data pages might still remain in memory */ else if ((ret = fileagrep(index_argc, index_argv, 0, tmpfp)) < 0) { /* reinitialization here takes care of agrep_argv changes AFTER split_pattern */ fprintf(stderr, "%s: error in searching index\n", HARVEST_PREFIX); fclose(tmpfp); return(-1); } /* Now, the output of index search is in tempfile: need to use files here since index is too large */ fflush(tmpfp); fclose(tmpfp); tmpfp = NULL; /* Keep track of the maximum number of errors: will never enter veryfast */ if (GBESTMATCH) { if (errno > bestmatcherrors) bestmatcherrors = errno; } /* At this point, all index-search options are properly set due to the above fileagrep */ INVERSE = prev_INVERSE; if (-1 == get_index(tempfile, result_index_set, result_offset_table, pat_list[i], pat_lens[i], pat_attr[i], index_argv, index_argc, nullfp, partfp, parse, first_time)) return(-1); successes ++; first_time = 0; i++; } fflush(stderr); if (veryfast) return successes; /* For index-search with memgrep in mgrep_get_index, and get-filenames */ dummypat[0] = '\0'; if ((dummylen = memagrep_init(index_argc, index_argv, MAX_PAT, dummypat)) <= 0) return(-1); if (num_mgrep_pat > 1) { CHAR *old_buf = (CHAR *)index_argv[patbufpos]; /* avoid my_free and re-my_malloc */ index_argv[patbufpos] = (char*)pat_buf; /* this contains all the patterns with the right -m and -M options */ #if BG_DEBUG fprintf(debug, "pat_buf = %s\n", pat_buf); #endif /*BG_DEBUG*/ strcpy(index_argv[patindex], "-z"); /* no-op: patterns are in patbufpos; also avoid shift-left of index_argv */ if (-1 == mgrep_get_index(tempfile, result_index_set, result_offset_table, pat_list, pat_lens, pat_attr, mgrep_pat_index, num_mgrep_pat, patbufpos, index_argv, index_argc, nullfp, partfp, parse, first_time)) { index_argv[patbufpos] = (char *)old_buf; /* else will my_free array! */ fprintf(stderr, "%s: error in searching index\n", HARVEST_PREFIX); return(-1); } successes ++; first_time = 0; index_argv[patbufpos] = (char *)old_buf; } return successes; } /* * Now, I search the index by doing an in-order traversal of the boolean parse tree starting at GParse. * The results at each node are stored in src_offset_table and src_index_set. Before the right child is * evaluated, results of the left child are stored in curr_offset_table and curr_index_set (accumulators) * and are unioned/intersected/noted with the right child's results (which get stored in src_...) and * passed on above. The accumulators are allocated at each internal node and freed after evaluation. * Left to right evaluation is good since number of curr_offset_tables that exist simultaneously depends * entirely on the maximum depth of a right branch (REAL_PARTITION is small so it won't make a difference). */ int search_index(tree) ParseTree *tree; { int prev_INVERSE; int i, j, iii; int first_time = 0; /* since it is used AFTER left child has been computed */ unsigned int *curr_index_set = NULL; struct offsets **curr_offset_table = NULL; if (ComplexBoolean) { /* recursive */ if (tree == NULL) return -1; if (tree->type == LEAF) { /* always AND pat of individual words at each term: initialize accordingly */ if (OneFilePerBlock) { for(i=0; iterminalindex, tree->terminalindex+1) <= 0) return -1; prev_INVERSE = INVERSE; /* agrep's global to implement NOT */ if (tree->op & NOTPAT) INVERSE = 1; if (fillup_target(src_index_set, src_offset_table, AND_EXP) <= 0) return -1; INVERSE = prev_INVERSE; return 1; } else if (tree->type == INTERNAL) { /* Search the left node and see if the right node can be searched */ if (search_index(tree->data.internal.left) <= 0) return -1; if (OneFilePerBlock && ((tree->op & OPMASK) == ORPAT) && (src_index_set[REAL_PARTITION - 1] == 1)) goto quit; /* nothing to do */ if ((tree->data.internal.right == NULL) || (tree->data.internal.right->type == 0)) return -1; /* uninitialized: see main.c */ curr_index_set = (unsigned int *)my_malloc(sizeof(int)*REAL_PARTITION); memset(curr_index_set, '\0', sizeof(int)*REAL_PARTITION); /* Save previous src_index_set and src_offset_table in fresh accumulators */ if (OneFilePerBlock) { memcpy(curr_index_set, src_index_set, round(OneFilePerBlock,8)); curr_index_set[REAL_PARTITION - 1] = src_index_set[REAL_PARTITION - 1]; src_index_set[REAL_PARTITION - 1] = 0; curr_index_set[REAL_PARTITION - 2] = src_index_set[REAL_PARTITION - 2]; src_index_set[REAL_PARTITION - 2] = 0; } else memcpy(curr_index_set, src_index_set, MAX_PARTITION * sizeof(int)); if (ByteLevelIndex && !NOBYTELEVEL && !(Only_first && !PRINTAPPXFILEMATCH)) { if ((curr_offset_table = (struct offsets **)my_malloc(sizeof(struct offsets *) * OneFilePerBlock)) == NULL) { fprintf(stderr, "%s: malloc failure at: %s:%d\n", GProgname, __FILE__, __LINE__); my_free(curr_index_set, REAL_PARTITION*sizeof(int)); return -1; } memcpy(curr_offset_table, src_offset_table, OneFilePerBlock * sizeof(struct offsets *)); memset(src_offset_table, '\0', sizeof(struct offsets *) * OneFilePerBlock); } /* Now evaluate the right node which automatically put the results in src_index_set/src_offset_table */ if (search_index(tree->data.internal.right) <= 0) { if (curr_offset_table != NULL) free(curr_offset_table); my_free(curr_index_set, REAL_PARTITION*sizeof(int)); return -1; } /* * Alpha substitution of the code in get_index(): * index_tab <- src_index_set * dest_index_table <- curr_index_set * offset_tab <- src_offset_table * dest_offset_table <- curr_offset_table * ret <- src_index_set[REAL_PARTITION - 1] for ORPAT, curr_index_set for ANDPAT * frequency = src_index_set[REAL_PARTITION - 2] in both ORPAT and ANDPAT * first_time <- 0 * return 0 <- goto quit * Slight difference since we want the results to go to src rather than curr. */ if (OneFilePerBlock) { if ((tree->op & OPMASK) == ORPAT) { if (src_index_set[REAL_PARTITION - 1] == 1) { /* curr..[..] can never be 1 since we would have quit above itself */ ret_is_1: src_index_set[REAL_PARTITION - 1] = 1; for(i=0; i= OneFilePerBlock) break; src_index_set[i] |= mask_int[j]; } if (ByteLevelIndex && !NOBYTELEVEL && !(Only_first && !PRINTAPPXFILEMATCH)) for (i=0; i= OneFilePerBlock) break; src_index_set[i] |= mask_int[j]; } } first_time = 0; if (ByteLevelIndex && !NOBYTELEVEL && !(Only_first && !PRINTAPPXFILEMATCH)) for (i=0; iop & OPMASK) == ORPAT) for(i=0; iop & NOTPAT) { if (ByteLevelIndex) { /* Can't recover the discarded offsets */ fprintf(stderr, "%s: can't handle NOT of AND/OR terms with ByteLevelIndex: please simplify the query\n", HARVEST_PREFIX); my_free(curr_index_set, REAL_PARTITION*sizeof(int)); return -1; } if (OneFilePerBlock) for (i=0; i