/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ /* if the pattern is not simple fixed pattern, then after preprocessing */ /* and generating the masks, the program goes here. four cases: 1. */ /* the pattern is simple regular expression and no error, then do the */ /* matching here. 2. the pattern is simple regular expression and */ /* unit cost errors are allowed: then go to asearch(). */ /* 3. the pattern is simple regular expression, and the edit cost is */ /* not uniform, then go to asearch1(). */ /* if the pattern is regular expression then go to re() if M < 14, */ /* else go to re1() */ /* input parameters: old_D_pat: delimiter pattern. */ /* fd, input file descriptor, M: size of pattern, D: # of errors. */ #include "agrep.h" #include "memory.h" extern int CurrentByteOffset; extern unsigned Init1, D_endpos, endposition, Init[], Mask[], Bit[]; extern int LIMITOUTPUT, LIMITPERFILE; extern int DELIMITER, FILENAMEONLY, D_length, I, AND, REGEX, JUMP, INVERSE; extern char D_pattern[]; extern int TRUNCATE, DD, S; extern char Progname[], CurrentFileName[]; extern int num_of_matched, prev_num_of_matched; extern int agrep_initialfd; extern int EXITONERROR; extern int agrep_inlen; extern CHAR *agrep_inbuffer; extern int agrep_inpointer; extern CHAR *agrep_outbuffer; extern int agrep_outlen; extern int agrep_outpointer; extern FILE *agrep_finalfp; extern int errno; extern int NEW_FILE, POST_FILTER; /* bitap dispatches job */ int bitap(old_D_pat, Pattern, fd, M, D) char old_D_pat[], *Pattern; int fd, M, D; { char c; register unsigned r1, r2, r3, CMask, i; register unsigned end, endpos, r_Init1; register unsigned D_Mask; int ResidueSize , FIRSTROUND, lasti, print_end, j, num_read; int k; CHAR *buffer; D_length = strlen(old_D_pat); for(i=0; i 4) { fprintf(stderr, "%s: the maximum number of erorrs allowed for full regular expressions is 4\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } if (M <= SHORTREG) { return re(fd, M, D); /* SUN: need to find a even point */ } else { return re1(fd, M, D); } } if (D > 0 && JUMP == ON) { return asearch1(old_D_pat, fd, D); } if (D > 0) { return asearch(old_D_pat, fd, D); } if(I == 0) Init1 = (unsigned)037777777777; j=0; r_Init1 = Init1; r1 = r2 = r3 = Init[0]; endpos = D_endpos; D_Mask = D_endpos; for(i=1 ; i 0) { i=Max_record; end = Max_record + num_read; if(FIRSTROUND) { i = Max_record - 1 ; if(DELIMITER) { for(k=0; k=D_length) j--; } FIRSTROUND = OFF; } if(num_read < BlockSize) { strncpy(buffer+Max_record+num_read, old_D_pat, D_length); end = end + D_length; buffer[end] = '\0'; } /* BITAP_PROCESS: the while-loop below */ while (i < end) { c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; r1 = r_Init1 & r3; r2 = (( r3 >> 1 ) & CMask) | r1; if ( r2 & endpos ) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(((AND == 1) && ((r2 & endposition) == endposition)) || ((AND == 0) && (r2 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s\n", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } else agrep_outbuffer[agrep_outpointer+outindex++] = '\n'; agrep_outpointer += outindex; } free_buf(fd, buffer); NEW_FILE = OFF; return 0; } print_end = i - D_length - 1; if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) ) if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, buffer); return 0; /* done */ } } lasti = i - D_length; TRUNCATE = OFF; r2 = r3 = r1 = Init[0]; r1 = r_Init1 & r3; r2 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; r1 = r_Init1 & r2; r3 = (( r2 >> 1 ) & CMask) | r1; if ( r3 & endpos ) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(((AND == 1) && ((r3 & endposition) == endposition)) || ((AND == 0) && (r3 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s\n", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } else agrep_outbuffer[agrep_outpointer+outindex++] = '\n'; agrep_outpointer += outindex; } free_buf(fd, buffer); NEW_FILE = OFF; return 0; } print_end = i - D_length - 1; if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) ) if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, buffer); return 0; /* done */ } } lasti = i - D_length ; TRUNCATE = OFF; r2 = r3 = r1 = Init[0]; r1 = r_Init1 & r2; r3 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } } ResidueSize = num_read + Max_record - lasti; if(ResidueSize > Max_record) { ResidueSize = Max_record; TRUNCATE = ON; } strncpy(buffer+Max_record-ResidueSize, buffer+lasti, ResidueSize); lasti = Max_record - ResidueSize; if(lasti < 0) { lasti = 1; } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, buffer); return 0; /* done */ } } free_buf(fd, buffer); return 0; #if AGREP_POINTER } else { buffer = agrep_inbuffer; num_read = agrep_inlen; end = num_read; /* buffer[end-1] = '\n';*/ /* at end of the text. */ /* buffer[0] = '\n';*/ /* in front of the text. */ i = 0; lasti = 1; if(DELIMITER) { for(k=0; k=D_length) j--; } /* An exact copy of the above: BITAP_PROCESS: the while-loop below */ while (i < end) { c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; r1 = r_Init1 & r3; r2 = (( r3 >> 1 ) & CMask) | r1; if ( r2 & endpos ) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(((AND == 1) && ((r2 & endposition) == endposition)) || ((AND == 0) && (r2 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s\n", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } else agrep_outbuffer[agrep_outpointer+outindex++] = '\n'; agrep_outpointer += outindex; } free_buf(fd, buffer); NEW_FILE = OFF; return 0; } print_end = i - D_length - 1; if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) ) if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, buffer); return 0; /* done */ } } lasti = i - D_length; TRUNCATE = OFF; r2 = r3 = r1 = Init[0]; r1 = r_Init1 & r3; r2 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; r1 = r_Init1 & r2; r3 = (( r2 >> 1 ) & CMask) | r1; if ( r3 & endpos ) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(((AND == 1) && ((r3 & endposition) == endposition)) || ((AND == 0) && (r3 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s\n", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } else agrep_outbuffer[agrep_outpointer+outindex++] = '\n'; agrep_outpointer += outindex; } free_buf(fd, buffer); NEW_FILE = OFF; return 0; } print_end = i - D_length - 1; if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) ) if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, buffer); return 0; /* done */ } } lasti = i - D_length ; TRUNCATE = OFF; r2 = r3 = r1 = Init[0]; r1 = r_Init1 & r2; r3 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } } return 0; } #endif /*AGREP_POINTER*/ } fill_buf(fd, buf, record_size) int fd, record_size; unsigned char *buf; { int num_read=1; int total_read=0; extern int glimpse_clientdied; if (fd >= 0) { while(total_read < record_size && num_read > 0) { if (glimpse_clientdied) return 0; num_read = read(fd, buf+total_read, record_size - total_read); total_read = total_read + num_read; } } #if AGREP_POINTER else return 0; /* should not call this function if buffer is a pointer to a user-specified region! */ #else /*AGREP_POINTER*/ else { /* simulate a file */ total_read = (record_size > (agrep_inlen - agrep_inpointer)) ? (agrep_len - agrep_inpointer) : record_size; memcpy(buf, agrep_inbuffer + agrep_inpointer, total_read); agrep_inpointer += total_read; /* printf("agrep_inpointer %d total_read %d\n", agrep_inpointer, total_read);*/ } #endif /*AGREP_POINTER*/ if (glimpse_clientdied) return 0; return(total_read); } /* * In these functions no allocs/copying is done when * fd == -1, i.e., agrep is called to search within memory. */ void alloc_buf(fd, buf, size) int fd; char **buf; int size; { #if AGREP_POINTER if (fd != -1) #endif /*AGREP_POINTER*/ *buf = (char *)malloc(size); } void free_buf(fd, buf) int fd; char *buf; { #if AGREP_POINTER if (fd != -1) #endif /*AGREP_POINTER*/ free(buf); }