/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ #include #include #include "glimpse.h" #include #define CHAR unsigned char /* ---------------------------------------------------------------------- get_filenames() input: an index table, (an index vector, i-th entry is ON if i-th partition is to be searched.), the partition table in src_index_set[] and the list of all files in "NAME_LIST". output: the list of filenames to be searched. ------------------------------------------------------------------------- */ #if BG_DEBUG extern FILE *debug; #endif /*BG_DEBUG*/ extern int p_table[MAX_PARTITION]; extern CHAR **GTextfiles; extern CHAR **GTextfilenames; extern int *GFileIndex; extern int GNumfiles; extern CHAR GProgname[]; extern CHAR FileNamePat[]; extern int MATCHFILE; extern int agrep_outpointer; extern int mask_int[32]; extern int OneFilePerBlock; extern char INDEX_DIR[MAX_LINE_LEN]; extern unsigned int *multi_dest_index_set[MAXNUM_PAT]; extern int file_num; /* in index/io.c */ int bigbuffer_size; char *bigbuffer = NULL; /* constant buffer to read all filenames in NAME_LIST */ char *outputbuffer = NULL; /* keeps changing: used for -F search via memagrep */ extern int REAL_PARTITION, REAL_INDEX_BUF, MAX_ALL_INDEX, FILEMASK_SIZE; read_filenames() { struct stat st; unsigned char buffer[MAX_NAME_SIZE]; char *currptr; int i; /* one time processing: assumes during one run of glimpse, the index remains constant! */ if (bigbuffer == NULL) { FILE *fp = fopen(NAME_LIST, "r"); if (fp == NULL) { fprintf(stderr, "Can't open for reading: %s/%s\n", INDEX_DIR, NAME_LIST); exit(2); } if (-1 == stat(NAME_LIST, &st)) { fclose(fp); fprintf(stderr, "Can't stat: %s/%s\n", INDEX_DIR, NAME_LIST); exit(2); } fgets(buffer, MAX_NAME_SIZE, fp); bigbuffer_size = st.st_size - strlen(buffer); sscanf(buffer, "%d", &file_num); if ((file_num < 0) || (file_num > MaxNum24bPartition)) { fclose(fp); fprintf(stderr, "Error in reading: %s/%s\n", INDEX_DIR, NAME_LIST); exit(2); } initialize_data_structures(file_num); for (i=0; i 0) ? round(file_num, 8*sizeof(int)) : MAX_PARTITION); i++) if(index_vect[i]) fprintf(debug, "i=%d,%x\n", i, index_vect[i]); #endif /*BG_DEBUG*/ GNumfiles = 0; filesseen = 0; endptr = beginptr = bigbuffer; if(MATCHFILE == OFF) { /* just copy the filenames */ if (OneFilePerBlock) { for (i=0; i= file_num) goto end_files; end_of_loop1: beginptr = endptr = endptr + 1; /* skip over '\n' */ filesseen ++; } } } } /* one file per block */ else { /* Just the outer for-loop and initial begin/end values are different: rest is same */ for (i=0; i 0) { start = p_table[i]; end = p_table[i+1]; if (start >= end) continue; #if BG_DEBUG fprintf(debug, "start=%d, end=%d\n", start, end); #endif /*BG_DEBUG*/ /* * skip over so many filenames and get the filenames to copy. * NOTE: successive "start"s ALWAYS increase. */ while(filesseen < start) { while(*beginptr != '\n') beginptr ++; beginptr ++; /* skip over '\n' */ filesseen ++; } endptr = beginptr; while (filesseen < end) { while(*endptr != '\n') endptr ++; if (endptr == beginptr + 1) goto end_of_loop2; /* null name of non-existent file */ *endptr = '\0'; /* return with all the names you COULD get */ if ((GTextfiles[GNumfiles] = (CHAR *)strdup(beginptr)) == NULL) { *endptr = '\n'; fprintf(stderr, "Out of memory at: %s:%d\n", __FILE__, __LINE__); return; } GFileIndex[GNumfiles] = filesseen; *endptr = '\n'; if (++GNumfiles >= file_num) goto end_files; end_of_loop2: beginptr = endptr = endptr + 1; /* skip over '\n' */ filesseen ++; } } } } } else { /* search and copy matched filenames */ extern int REGEX, FASTREGEX; /* agrep global which tells us whether the pattern is a regular expression or not */ int myREGEX, myFASTREGEX; if ((dummylen = memagrep_init(argc, argv, MAX_PAT, dummypat)) <= 0) goto end_files; ret = memagrep_search(dummylen, dummypat, dummylen*2, beginptr, FILES_PER_PARTITION(file_num)*MAX_NAME_SIZE, outputbuffer); myREGEX = REGEX; myFASTREGEX = FASTREGEX; if (OneFilePerBlock) { for (i=0; i 0) { #if BG_DEBUG { char c = outputbuffer[agrep_outpointer + 1]; outputbuffer[agrep_outpointer + 1] = '\0'; fprintf(debug, "OUTPUTBUFFER=%s\n", outputbuffer); outputbuffer[agrep_outpointer + 1] = c; } #endif /*BG_DEBUG*/ k = prevk = 0; while(k+1= file_num) goto end_files; k = prevk = k+1; } } } else { index_vect[i] &= ~mask_int[j]; /* remove it from the list: used if ByteLevelIndex */ } end_of_loop3: beginptr = endptr = endptr + 1; } } } /* one file per block */ else { /* Just the outer for-loop and initial begin/end values are different: rest is same */ for (i=0; i 0) { start = p_table[i]; end = p_table[i+1]; if (start >= end) continue; #if BG_DEBUG fprintf(debug, "start=%d, end=%d\n", start, end); #endif /*BG_DEBUG*/ /* * skip over so many filenames and get the region to search = * beginptr to endptr: NOTE: successive "start"s ALWAYS increase. */ while(filesseen < start) { while(*beginptr != '\n') beginptr ++; beginptr ++; /* skip over '\n' */ filesseen ++; } beginptr --; /* I need '\n' for memory search */ endptr = beginptr+1; while (filesseen < end) { while(*endptr != '\n') endptr ++; endptr ++; /* skip over '\n' */ filesseen ++; } endptr --; /* I need '\n' for memory search */ if (endptr == beginptr + 1) goto end_of_loop4; /* null name of non-existent file */ #if BG_DEBUG *endptr = '\0'; fprintf(debug, "From %d searching:\n%s\n", filesseen, beginptr+1); *endptr = '\n'; #endif /*BG_DEBUG*/ /* if file in the partition matches then copy it */ if (myREGEX || myFASTREGEX) ret = memagrep_search(dummylen, dummypat, endptr-beginptr + 1, beginptr, FILES_PER_PARTITION(file_num)*MAX_NAME_SIZE, outputbuffer); else ret = memagrep_search(dummylen, dummypat, endptr-beginptr/* + 1*/, beginptr+1, FILES_PER_PARTITION(file_num)*MAX_NAME_SIZE, outputbuffer); if (ret > 0) { k = prevk = 0; while(k+1= file_num) goto end_files; k = prevk = k+1; } } } else { index_vect[i] = 0; /* mask it off */ } end_of_loop4: beginptr = endptr = endptr + 1; } } } } end_files: #if BG_DEBUG fprintf(debug, "The following %d filenames are ON\n", GNumfiles); for (i=0; i