/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ /* ./glimpse/index/dir.c */ /* The function of the program is to traverse the direcctory tree and print the size of the files in the tree. This program is derived from the C-programming language book It opens a directory file using opendir system call, and use readdir() to read each entry of the directory. */ #include "autoconf.h" /* ../libtemplate/include */ #include #include #if HAVE_DIRENT_H # include # define NAMLEN(dirent) strlen((dirent)->d_name) #else # define dirent direct # define NAMLEN(dirent) (dirent)->d_namlen # if HAVE_SYS_NDIR_H # include # endif # if HAVE_SYS_DIR_H # include # endif # if HAVE_NDIR_H # include # endif #endif #include #include #define BUFSIZE 256 #define DIRSIZE 14 #include "glimpse.h" #undef MAX_LIST #define MAX_LIST 100000 #if SFS_COMPAT #define FS_TYPEMASK 0x700000 #define FS_LINK 0x300000 #endif #if BG_DEBUG extern FILE *LOGFILE; #endif /*BG_DEBUG*/ extern FILE *MESSAGEFILE; int ndx = 0; /* file index */ extern char **name_list[MAXNUM_INDIRECT]; /* store the file names */ extern int *size_list[MAXNUM_INDIRECT]; /* store the sizes of the files */ extern unsigned int *disable_list; /* store whether to DISABLE indexing or not: only with FastIndex or AddToIndex */ extern int file_num; extern int file_id; /* borrowed from filetype.c */ extern char INDEX_DIR[MAX_LINE_LEN]; extern int AddToIndex; extern int DeleteFromIndex; extern int FastIndex; extern int OneFilePerBlock; extern int IncludeHigherPriority; extern int BuildDictionaryExisting; extern int IndexEverything; extern int printed_warning; extern int p_table[]; extern FILE *STATFILE; extern int IndexableFile; extern int files_per_partition; extern int new_partition; extern int files_in_partition; extern struct stat istbuf; /* imported from glimpse.c */ extern int memory_usage; extern int mask_int[]; extern char exin_argv[8]; extern int exin_argc; extern char current_dir_buf[2*MAX_LINE_LEN + 4]; /* must have space to store pattern after directory name */ extern unsigned char dummypat[MAX_PAT]; extern int dummylen; extern FILE *dummyout; extern struct stat excstbuf; extern struct stat incstbuf; extern int num_filter; extern int filter_len[MAX_FILTER]; extern CHAR *filter[MAX_FILTER]; extern CHAR *filter_command[MAX_FILTER]; /* * Exclude/Include priorities with exclude > include (IncludeHigherPriority = OFF = default): * 1. Command line arguments (inclusion --> exclude list is never applied) * 2. Exclude list (exclusion) * 3. Include list (inclusion) * 5. Symbolic links (exclusion --> applying exclude list is unnecessary) * 4. filetype (exclusion) * * Exclude/Include priorities with include > exclude (IncludeHigherPriority = ON = -i): * 1. Command line arguments (inclusion --> exclude list is never applied) * 2. Include list (inclusion) * 3. Symbolic links (exclusion --> applying exclude list is unnecessary) * 3. Exclude list (exclusion) * 4. filetype (exclusion) */ char outname[MAX_LINE_LEN]; char inname[MAX_LINE_LEN]; fsize(name, pat, pat_len, num_pat, inc, inc_len, num_inc, toplevel) char *name; char **pat; int *pat_len; int num_pat; char **inc; int *inc_len; int num_inc; int toplevel; { struct stat stbuf; int i; int fileindex = -1; int force_include = 0; int len_current_dir_buf = strlen(current_dir_buf) + 1; /* includes the '\0' which is going to be replaced by '\n' below */ int name_len; char *t1; if ((name == NULL) || (*name == '\0')) return 0; name_len = strlen(name); /* name[name_len] is '\0' */ #ifdef SW_DEBUG printf("num_pat= %d num_inc= %d\n", num_pat, num_inc); printf("name= %s\n", name); #endif /* * Find out what to exclude, what to include and skip * over symbolic links that don't HAVE to be included. * Some Extra get_filename_index calls are done but * that won't cost you anything (just #ing twice). */ /* Check if cache set in glimpse.c is correct */ if (!IndexableFile && !DeleteFromIndex && FastIndex && ((fileindex = get_filename_index(name, name_list, file_num)) != -1) && (disable_list[block2index(fileindex)] & mask_int[fileindex % (8*sizeof(int))])) { if (num_pat <= 0) { if (num_inc <= 0) return 0; else if (incstbuf.st_ctime <= istbuf.st_ctime) return 0; } else { if (num_inc <= 0) { if (excstbuf.st_ctime <= istbuf.st_ctime) return 0; } else if ((excstbuf.st_ctime <= istbuf.st_ctime) && (incstbuf.st_ctime <= istbuf.st_ctime)) return 0; } } #define PROCESS_EXIT \ {\ if (AddToIndex || FastIndex || DeleteFromIndex) {\ if ((fileindex = get_filename_index(name, name_list, file_num)) != -1) \ remove_filename(fileindex, new_partition);\ }\ } #define PROCESS_EXCLUDE \ {\ if (!toplevel) for(i=0; i 0) {\ name[name_len] = '\0';\ if (strstr(name, pat[i]) != NULL) {\ PROCESS_EXIT;\ return 0;\ }\ }\ else { /* must call memagrep */\ int ret;\ name[name_len] = '\n'; /* memagrep wants names to end with '\n': '\0' is not necessary */\ /* printf("i=%d patlen=%d pat=%s inlen=%d input=%s\n", i, -pat_len[i], pat[i], len_current_dir_buf, current_dir_buf); */\ if (((pat_len[i] == -2) && (pat[i][0] == '.') && (pat[i][1] == '*')) ||\ ((ret = memagrep_search(-pat_len[i], pat[i], len_current_dir_buf, current_dir_buf, 0, dummyout)) > 0))\ {\ /* printf("excluding with %d %s\n", ret, name); */\ name[name_len] = '\0'; /* restore */\ PROCESS_EXIT;\ return 0; \ }\ /* else printf("ret=%d\n");*/\ }\ }\ name[name_len] = '\0';\ } #define PROCESS_INCLUDE \ {\ /*\ * When include has higher priority, we want to include directories\ * by default and match the include patterns only against filenames.\ * Based on bug reports for glimpse-2.1. bg: 2/mar/95.\ */\ if (IncludeHigherPriority && ((stbuf.st_mode & S_IFMT) == S_IFDIR)) force_include = 1;\ else for (i=0; i 0) {\ name[name_len] = '\0';\ if (strstr(name, inc[i]) != NULL) {\ force_include = 1;\ break;\ }\ }\ else { /* must call memagrep */\ name[name_len] = '\n'; /* memagrep wants names to end with '\n': '\0' is not necessary */\ /* printf("pat=%s input=%s\n", pat[i], current_dir_buf); */\ if (((inc_len[i] == -2) && (inc[i][0] == '.') && (inc[i][1] == '*')) ||\ (memagrep_search(-inc_len[i], inc[i], len_current_dir_buf, current_dir_buf, 0, dummyout) > 0))\ {\ force_include = 1;\ break;\ }\ }\ }\ if (!force_include) for (i=0; i 0) {\ name[name_len] = '\0';\ if (strstr(name, filter[i]) != NULL) {\ force_include = 1;\ break;\ }\ }\ else { /* must call memagrep */\ name[name_len] = '\n'; /* memagrep wants names to end with '\n': '\0' is not necessary */\ /* printf("pat=%s input=%s\n", pat[i], current_dir_buf); */\ if (((filter_len[i] == -1) && (filter[i][0] == '.') && (filter[i][1] == '*')) ||\ (memagrep_search(-filter_len[i], filter[i], len_current_dir_buf, current_dir_buf, 0, dummyout) > 0))\ {\ force_include = 1;\ break;\ }\ }\ }\ name[name_len] = '\0'; /* restore */\ if (toplevel) force_include = 1;\ } if(lstat(name, &stbuf) == -1) { if (IndexableFile) return 0; /* Can happen for command line arguments, not stuff obtained from fsize_directory() */ #if BG_DEBUG fprintf(LOGFILE, "cannot find %s -- not indexing\n", name); #endif /*BG_DEBUG*/ PROCESS_EXIT; return 0; } /* Else lstat has all the requisite information */ #if SFS_COMPAT if ((stbuf.st_spare1 & FS_TYPEMASK) == FS_LINK) return 0; #endif if ((stbuf.st_mode & S_IFMT) == S_IFLNK) { /* if (IndexableFile) return 0; ---> not correct! must process include/exclude with -I too */ PROCESS_INCLUDE; if (!force_include) { #if BG_DEBUG fprintf(LOGFILE, "%s is a symbolic link -- not indexing\n", name); #endif /*BG_DEBUG*/ PROCESS_EXIT; return 0; } if (-1 == stat(name, &stbuf)) { #if BG_DEBUG fprintf(LOGFILE, "cannot find target of symbolic link %s -- not indexing\n", name); #endif /*BG_DEBUG*/ PROCESS_EXIT; return 0; } } else /* if (!IndexableFile) ---> not correct! must process include/exclude with -I too */ { /* Put exclude include processing here... stat all the time: that is faster than former! */ if (FastIndex && ((fileindex = get_filename_index(name, name_list, file_num)) != -1)) { /* Don't process exclude/include if the file `name' is older then the index AND the exclude/include file is older then the index */ if (IncludeHigherPriority) { if (!((stbuf.st_ctime <= istbuf.st_ctime) && (incstbuf.st_ctime <= istbuf.st_ctime))) PROCESS_INCLUDE; if (!force_include && !((stbuf.st_ctime <= istbuf.st_ctime) && (excstbuf.st_ctime <= istbuf.st_ctime))) PROCESS_EXCLUDE; } else { if (!((stbuf.st_ctime <= istbuf.st_ctime) && (excstbuf.st_ctime <= istbuf.st_ctime))) PROCESS_EXCLUDE; if (!((stbuf.st_ctime <= istbuf.st_ctime) && (incstbuf.st_ctime <= istbuf.st_ctime))) PROCESS_INCLUDE; } } else { /* Either AddToIndex or fresh indexing or previously excluded file: process exclude and include */ if (IncludeHigherPriority) { PROCESS_INCLUDE; if (!force_include) PROCESS_EXCLUDE; } else { PROCESS_EXCLUDE; PROCESS_INCLUDE; } } } /* Here, the file exists and has not been excluded -- possibly has been included */ index_everything: if ((stbuf.st_mode & S_IFMT) == S_IFDIR) { if (-1 == fsize_directory(name, pat, pat_len, num_pat, inc, inc_len, num_inc)) return -1; } else if ((stbuf.st_mode & S_IFMT) == S_IFREG) { /* regular file */ if (IndexableFile) { if (!filetype(name, IndexEverything?2:1)) printf("%s\n", name); return 0; } if (DeleteFromIndex) { if ((fileindex = get_filename_index(name, name_list, file_num)) != -1) { remove_filename(fileindex, new_partition); } /* else doesn't exist in index, so doesn't matter */ return 0; } file_id ++; if (BuildDictionaryExisting) { /* Don't even store the names of the files that are not uncompressible */ if (file_num >= MaxNum24bPartition) { fprintf(stderr, "Too many files in index: indexing the first %d only.\n", MaxNum24bPartition); return -1; } if (tuncompress_file(name, outname, TC_EASYSEARCH | TC_OVERWRITE | TC_NOPROMPT) <= 0) return 0; file_num++; t1 = (char *) my_malloc(strlen(outname) + 2); strcpy(t1, outname); /* name_list[ndx] = t1; */ LIST_ADD(name_list, ndx, t1, char*); /* size_list[ndx] = stbuf.st_size;*/ LIST_ADD(size_list, ndx, stbuf.st_size, int); ndx ++; return 0; } #ifdef SW_DEBUG printf("%s: ", name); #endif if (AddToIndex || FastIndex) { if ((fileindex = get_filename_index(name, name_list, file_num)) != -1) { LIST_ADD(size_list, fileindex, stbuf.st_size, int); if (FastIndex && (stbuf.st_ctime <= istbuf.st_ctime)) disable_list[block2index(fileindex)] |= mask_int[fileindex % (8*sizeof(int))]; else { /* AddToIndex or file was modified (=> its type might have changed!) */ if (filetype(name, IndexEverything?2:1)) { if (!force_include) { remove_filename(fileindex, new_partition); return 0; } else { #if BG_DEBUG fprintf(LOGFILE, "overriding and indexing: %s\n", name); #endif /*BG_DEBUG*/ } } disable_list[block2index(fileindex)] &= ~(mask_int[fileindex % (8*sizeof(int))]); } } else { /* new file not in filenames so no point in checking */ if(filetype(name, IndexEverything?2:1)) { if (!force_include) return 0; else { #if BG_DEBUG fprintf(LOGFILE, "overriding and indexing: %s\n", name); #endif /*BG_DEBUG*/ } } if (file_num >= MaxNum24bPartition) { fprintf(stderr, "Too many files in index: indexing the first %d only.\n", MaxNum24bPartition); return -1; } t1 = (char *)my_malloc(strlen(name) + 2); strcpy(t1, name); /* name_list[file_num] = t1; */ LIST_ADD(name_list, file_num, t1, char*); /* size_list[file_num] = stbuf.st_size; */ LIST_ADD(size_list, file_num, stbuf.st_size, int); insert_filename(LIST_GET(name_list, file_num), file_num); file_num ++; if (!OneFilePerBlock) { if (files_in_partition + 1 > files_per_partition) { if (new_partition + 1 > MaxNumPartition) { if (!printed_warning) { printed_warning = 1; if (AddToIndex) { fprintf(MESSAGEFILE, "Warning: partition-table overflow! Fresh indexing recommended.n"); } else { fprintf(MESSAGEFILE, "Warning: partition-table overflow! Commencing fresh indexing...\n"); return -1; } } } else new_partition++; files_in_partition = 0; /* so that we don't get into this if-branch until another files_per_partition new files are seen */ } p_table[new_partition] = file_num; files_in_partition ++; } } } else { /* Fresh indexing: very simple -- add everything */ if(filetype(name, IndexEverything?2:1)) { if (!force_include) return 0; else { #if BG_DEBUG fprintf(LOGFILE, "overriding and indexing: %s\n", name); #endif /*BG_DEBUG*/ } } if (file_num >= MaxNum24bPartition) { fprintf(stderr, "Too many files in index: indexing the first %d only.\n", MaxNum24bPartition); return -1; } file_num++; t1 = (char *) my_malloc(strlen(name) + 2); strcpy(t1, name); /* name_list[ndx] = t1; */ LIST_ADD(name_list, ndx, t1, char*); /* size_list[ndx] = stbuf.st_size; */ LIST_ADD(size_list, ndx, stbuf.st_size, int); ndx++; } } return 0; } /* uses the space in the same "name" to get names of files in that directory and calls fsize */ /* pat, pat_len, num_pat, inc, inc_len, num_inc are just used for recursive calls to fsize */ fsize_directory(name, pat, pat_len, num_pat, inc, inc_len, num_inc) char *name; char **pat; int *pat_len; int num_pat; char **inc; int *inc_len; int num_inc; { struct dirent *dp; char *nbp, *nep; int i; DIR *dirp; /* printf("in fsize_directory, name= %s\n",name); */ if ((name == NULL) || (*name == '\0')) return 0; nbp = name + strlen(name); if( nbp+DIRSIZE+2 >= name+BUFSIZE ) /* name too long */ { fprintf(stderr, "name too long: %s\n", name); return 0; } if((dirp = opendir(name)) == NULL) { fprintf(stderr, "permission denied or non-existent directory: %s\n", name); return 0; } *nbp++ = '/'; for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) { if (dp->d_name[0] == '\0' || strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0) continue; for(i=0, nep=nbp; (dp->d_name[i] != '\0') && (nep < name+BUFSIZ-1); i++) *nep++ = dp->d_name[i]; if (dp->d_name[i] != '\0') { *nep = '\0'; fprintf(stderr, "name too long: %s\n", name); continue; } *nep = '\0'; /* printf("name= %s\n", name); */ if (-1 == fsize(name, pat, pat_len, num_pat, inc, inc_len, num_inc, 0)) return -1; } closedir (dirp); *--nbp = '\0'; /* restore name */ return 0; }