Thank you everyone for your suggestions and comments regarding my generic search script. Special thanks to Paul J. Schinder and Chris Nandor who's suggestions helped make this a much better script. Also, thanks to Raffael Cavallaro, RenŽ Laterveer, Jay Bedsole, Richard Gordon, and Thomas R. Kimtpon for their helpful comments. I learned a bunch, and I hope it is of some use to a few of you. Don't hesitate to contact me to offer more suggestions. Ero Brown <ero@fiber.net> #======================================================================== #======================================================================== # # Search.pl # # Author: Ero Brown <ero@fiber.net> # # Version: 2.0 # # Changed from version 1.6: # - fixed an error with opening files that had leading or trailing # spaces in their names. # - check for long line lengths -- don't try to print long ones # - fixed some bad output logic # # Changed from version 1.5: # - script continues instead of aborts when 'open' errors occurr. # - made all file opens "read only". # - changed 'SearchFilter' function to be more selective. # - added a new "print summary info" option # - changed much the format for output. # # Changed from version 1.4: # - no references to the 'find.pl', # the 'File::Find' module is used directly instead. # - rewrote 'ReadArguments' using the Getopt::Long # module's 'GetOptions' function call. # - simplified 'GetHostOS'. # - changed the way some progress information is written. # # Changed from version 1.3: # - corrected tab, line-length, and wrapping issues # - replaced use of "local" with "my" # # Special thanks to Paul J. Schinder and Chris Nandor who's # suggestions helped make this a much better script. # Also, thanks to Raffael Cavallaro, RenŽ Laterveer, Jay Bedsole, # Richard Gordon, and Thomas R. Kimtpon for their helpful comments. # #======================================================================== #======================================================================== #!/usr/local/bin/perl ### required modules needed for this script use File::Find; use Getopt::Long; #======================================================================== ### define true and false. $TRUE = 1; $FALSE = 0; #======================================================================== ### Forward declare all sub-routines used sub Usage; sub GetHostOS; sub GetDirSeperatorChar; sub ReadArguments; sub DoAFile; sub SearchFilter; #======================================================================== # Function: Usage # Purpose: this details the usage of this script -- we can call this # function when a invalid usage error occurs. #======================================================================== sub Usage { die <<END_USAGE; usage: $0 [-r] [-c] [-i] [-s] [-m] [-p] # [-f <expression>] -e <expression> <folder(s) &| file(s) . . .> # # -r recursively process encountered sub-directories # -c only list a total count of matching occurrances # -i make pattern matching case insensitive # -s print summary information only # -m mpw style output # -p print progress and summary information # -f file expression representing a file filter # -e the expression representing what you are searching for # # *Parameters can be given in any order. # # *Fully resolved pathnames are required, partial pathnames will # cause this script to barf. # # Version 2.0 END_USAGE } #======================================================================== # Function: GetHostOS # Purpose: Determine what OS we are running on -- $^O is a Perl variable # that contains to name of the OS that Perl was compiled on. #======================================================================== sub GetHostOS { return($^O); } #======================================================================== # Function: GetDirSeperatorChar # Purpose: Each platform (OS) uses a unique character to seperate # directory names within a full path specification string. # Here we set a variable to contain the correct directory # seperator character to be correct for the OS that we are # currently running this script on. #======================================================================== sub GetDirSeperatorChar { my ($tOSString) = @_; my ($tDirSepChar); if ($tOSString eq 'MacOS') { #it's a Macintosh! $tDirSepChar = ':'; } elsif ($tOSString =~ /nix/i) { #it's probably some flavor of Unix $tDirSepChar = '/'; } else { #it could be other things, but assume a Wintel PC $tDirSepChar = '/'; #!?! -- same as unix, not '\' as expected! } return ($tDirSepChar); } #======================================================================== # Function: ReadArguments # Purpose: Parse the arguments array and set the appropriate flags. #======================================================================== sub ReadArguments { #if no arguments are passed in, assume the user needs some help if (@ARGV < 1) { Usage; } $res = Getopt::Long::GetOptions("r", "c", "i", "s", "m", "p", "f=s" => \$fileFilter, "e=s" => \$expression); if (!$res) { Usage; } $recurseOpt = $opt_r; $countOnlyOpt = $opt_c; $caseInSensitive = $opt_i; $summaryInfoOnly = $opt_s; $mpwOutputOpt = $opt_m; $progressOpt = $opt_p; $fileFilterOpt = $fileFilter ne ''; $expressionOpt = $expression ne ''; $pad = ""; if ($progressOpt) { $pad = "\t"; } $lst = ""; if ($mpwOutputOpt) { $lst = "# "; } #if no directories/files are passed in, assume the user needs some help if (@ARGV < 1) { print "# At least one directory or file to search must be specified.\n"; Usage; } if (!$expressionOpt) { print "# The required -e expression option was NOT specified.\n"; Usage; } } #======================================================================== # Function: DoAFile # Purpose: process the file/directory # this is where the action is -- look at every file and # directory and perform the desired operations. #======================================================================== sub DoAFile { my ($theFileName) = @_; my ($shortName, $tempFName, $doSearch, $found, $lineCount, $tFoundCount); my (@dirNameParts); if (-f $theFileName) { $fileCount++; if ($progressOpt) { print $pad, $lst, "FILE: $theFileName\n"; } $shortName = $theFileName; @dirNameParts = split($dirSepChar,$shortName); #break full path apart $shortName = $tempName = pop(@dirNameParts); #filename by itself if ($summaryInfoOnly) { $tempFName = $theFileName; } else { $tempFName = $shortName; } $doSearch = $TRUE; if ($fileFilterOpt) { $doSearch = $FALSE; if ($caseInSensitive) { if (eval($shortName =~ /$fileFilter/oi)) { $doSearch = $TRUE; } } else { if (eval($shortName =~ /$fileFilter/o)) { $doSearch = $TRUE; } } } if ($doSearch) { $tFoundCount = 0; $lineCount = 1; if ($progressOpt) { print $pad, $pad, $lst, "Searching $shortName...\n"; } if (open(THEFILE, "< $theFileName\0")) { while ($lineStr= <THEFILE>) { $found = $FALSE; if ($caseInSensitive) { if (eval($lineStr =~ /$expression/oi)) { $found = $TRUE; } } else { if (eval($lineStr =~ /$expression/o)) { $found = $TRUE; } } if ($found) { $tFoundCount++; if (!$countOnlyOpt && !$summaryInfoOnly) { if (length($lineStr) > 255) { $lineStr = "### LINE TOO LONG TO PRINT ###\n"; } if ($mpwOutputOpt) { print $pad, $pad, "File \"", $theFileName, "\"; "; print "Line ", $lineCount, ":¤ \t# ", $lineStr; } else { print $pad, $pad, $theFileName, " -- line #"; print $lineCount, "\n", $pad, $pad, $pad, $lineStr; } } } $lineCount++; } close(THEFILE); if (($progressOpt || $summaryInfoOnly) && ($tFoundCount > 0)) { if ($mpwOutputOpt && $summaryInfoOnly) { print $pad, $pad, "OPEN \"", $theFileName, "\" \t# ", " -- "; print $tFoundCount, " occurrances of the search "; print "expression.\n"; } else { print $pad, $pad, $lst, $tempFName, " -- had ", $tFoundCount; print " occurrances of the search expression.\n"; } } $foundCount += $tFoundCount; } else { $errorCount++; if ($progressOpt || $summaryInfoOnly) { print $pad, $pad, "### ERROR! COULDN'T OPEN FILE: $tempFName\n"; } } } } # it must be a directory (-d $theFileName) else { $dirCount++; if ($progressOpt) { print $lst, "DIRECTORY: $theFileName\n"; } } } #======================================================================== # Function: SearchFilter # Purpose: We need to provide this function for the File::Find::find() # function in the File::Find module -- this is so we can control # (via filter) what kind of files and/or directories get handled. #======================================================================== sub SearchFilter { #filter so we only process the files/directories we want ( ( -e $_ && -s $_ ) && ( -f $_ && -r $_ && -T $_ ) || ( -d $_ && -x $_ && ( $recurseOpt || ( !$recurseOpt && ($File::Find::prune = 1) ) ) ) ) && DoAFile($File::Find::name); #call our function that does the checking } #======================================================================== #============================ MAIN ================================== #======================================================================== ### main part of the program -- ### we go through every file and directory passed in on ### the command line and check it for validity. #what OS are we running on? $osString = GetHostOS; #what's the right dir seperator char to use? $dirSepChar = GetDirSeperatorChar($osString); #parse the command line for commands and arguments ReadArguments; if ($progressOpt) { print $lst, "Executing this script on a computer running $osString...\n"; } #initialize these counter variables $fileCount = 0; $dirCount = 0; $unknownCount = 0; $errorCount = 0; $foundCount = 0; #process every directory (and/or file) provided on the command line while (@ARGV) { $arg = shift(@ARGV); #get the next directory/file to examine if ((-f $arg) || (-d $arg)) { #check for a valid disk entity File::Find::find(\&SearchFilter, $arg); #'find' calls "SearchFilter" } #which we use as a filter and to call our "DoAFile". else { $unknownCount++; print STDERR "$PROGRAM: could not read $arg: $!\n\n"; } } if ($progressOpt || $summaryInfoOnly || $countOnlyOpt) { print $lst, "FOUND $foundCount occurrances of \"$expression\" "; print "in the specified files.\n"; } if ($progressOpt || $summaryInfoOnly) { print $lst, "Examined $fileCount files, and $dirCount directories"; if ($errorCount > 0) { print " -- encountered $errorCount errors"; } print ".\n"; if ($unknownCount > 0) { print $lst, "THERE WERE $unknownCount UNKNOWN ENTITIES ENCOUNTERED.\n"; } } exit; #======================================================================== #======================================================================== ***** Want to unsubscribe from this list? ***** Send mail with body "unsubscribe" to mac-perl-request@iis.ee.ethz.ch