[Date Prev][Date Next][Thread Prev][Thread Next] [Search] [Date Index] [Thread Index]

[MacPerl] Generic Search Script 2




Thank you everyone for your suggestions and comments regarding
my generic search script.

Special thanks to Paul J. Schinder and Chris Nandor who's
suggestions helped make this a much better script.

Also, thanks to Raffael Cavallaro, RenŽ Laterveer, Jay Bedsole,
Richard Gordon, and Thomas R. Kimtpon for their helpful comments.

I learned a bunch, and I hope it is of some use to a few of you.
Don't hesitate to contact me to offer more suggestions.

Ero Brown <ero@fiber.net>



#========================================================================
#========================================================================
#
# Search.pl
#
# Author: Ero Brown <ero@fiber.net>
#
# Version: 2.0
#
# Changed from version 1.6:
#      - fixed an error with opening files that had leading or trailing
#        spaces in their names.
#      - check for long line lengths -- don't try to print long ones
#      - fixed some bad output logic
#
# Changed from version 1.5:
#      - script continues instead of aborts when 'open' errors occurr.
#      - made all file opens "read only".
#      - changed 'SearchFilter' function to be more selective.
#      - added a new "print summary info" option
#      - changed much the format for output.
#
# Changed from version 1.4:
#      - no references to the 'find.pl',
#        the 'File::Find' module is used directly instead.
#      - rewrote 'ReadArguments' using the Getopt::Long
#        module's 'GetOptions' function call.
#      - simplified 'GetHostOS'.
#      - changed the way some progress information is written.
#
# Changed from version 1.3:
#      - corrected tab, line-length, and wrapping issues
#      - replaced use of "local" with "my"
#
# Special thanks to Paul J. Schinder and Chris Nandor who's
# suggestions helped make this a much better script.
# Also, thanks to Raffael Cavallaro, RenŽ Laterveer, Jay Bedsole,
# Richard Gordon, and Thomas R. Kimtpon for their helpful comments.
#
#========================================================================
#========================================================================

#!/usr/local/bin/perl

### required modules needed for this script
use File::Find;
use Getopt::Long;


#========================================================================

### define true and false.
$TRUE = 1;
$FALSE = 0;


#========================================================================

### Forward declare all sub-routines used
sub Usage;
sub GetHostOS;
sub GetDirSeperatorChar;
sub ReadArguments;
sub DoAFile;
sub SearchFilter;


#========================================================================
# Function: Usage
# Purpose:  this details the usage of this script -- we can call this
#           function when a invalid usage error occurs.
#========================================================================
sub Usage {
   die <<END_USAGE;
usage: $0 [-r] [-c] [-i] [-s] [-m] [-p]
#               [-f <expression>] -e <expression> <folder(s) &| file(s) . . .>
#
#   -r   recursively process encountered sub-directories
#   -c   only list a total count of matching occurrances
#   -i   make pattern matching case insensitive
#   -s   print summary information only
#   -m   mpw style output
#   -p   print progress and summary information
#   -f   file expression representing a file filter
#   -e   the expression representing what you are searching for
#
#   *Parameters can be given in any order.
#
#   *Fully resolved pathnames are required, partial pathnames will
#    cause this script to barf.
#
#   Version 2.0
END_USAGE
}


#========================================================================
# Function: GetHostOS
# Purpose:  Determine what OS we are running on -- $^O is a Perl variable
#           that contains to name of the OS that Perl was compiled on.
#========================================================================
sub GetHostOS
{
   return($^O);
}


#========================================================================
# Function: GetDirSeperatorChar
# Purpose:  Each platform (OS) uses a unique character to seperate
#           directory names within a full path specification string.
#           Here we set a variable to contain the correct directory
#           seperator character to be correct for the OS that we are
#           currently running this script on.
#========================================================================
sub GetDirSeperatorChar
{
   my ($tOSString) = @_;
   my ($tDirSepChar);
   if ($tOSString eq 'MacOS') {  #it's a Macintosh!
      $tDirSepChar = ':';
   }
   elsif ($tOSString =~ /nix/i) {  #it's probably some flavor of Unix
      $tDirSepChar = '/';
   }
   else {  #it could be other things, but assume a Wintel PC
      $tDirSepChar = '/';  #!?! -- same as unix, not '\' as expected!
   }
   return ($tDirSepChar);
}


#========================================================================
# Function: ReadArguments
# Purpose:  Parse the arguments array and set the appropriate flags.
#========================================================================
sub ReadArguments
{
   #if no arguments are passed in, assume the user needs some help
   if (@ARGV < 1) {
      Usage;
   }

   $res = Getopt::Long::GetOptions("r", "c", "i", "s", "m", "p",
                           "f=s" => \$fileFilter,
                           "e=s" => \$expression);
   if (!$res) {
      Usage;
   }

   $recurseOpt = $opt_r;
   $countOnlyOpt = $opt_c;
   $caseInSensitive = $opt_i;
   $summaryInfoOnly = $opt_s;
   $mpwOutputOpt = $opt_m;
   $progressOpt = $opt_p;
   $fileFilterOpt = $fileFilter ne '';
   $expressionOpt = $expression ne '';

   $pad = "";
   if ($progressOpt) {
      $pad = "\t";
   }
   $lst = "";
   if ($mpwOutputOpt) {
      $lst = "# ";
   }

   #if no directories/files are passed in, assume the user needs some help
   if (@ARGV < 1) {
      print "# At least one directory or file to search must be specified.\n";
      Usage;
   }
   if (!$expressionOpt) {
      print "# The required -e expression option was NOT specified.\n";
      Usage;
   }
}


#========================================================================
# Function: DoAFile
# Purpose:  process the file/directory
#           this is where the action is -- look at every file and
#           directory and perform the desired operations.
#========================================================================
sub DoAFile {
   my ($theFileName) = @_;
   my ($shortName, $tempFName, $doSearch, $found, $lineCount, $tFoundCount);
   my (@dirNameParts);

   if (-f $theFileName) {
      $fileCount++;
      if ($progressOpt) {
         print $pad, $lst, "FILE: $theFileName\n";
      }
      $shortName = $theFileName;
      @dirNameParts = split($dirSepChar,$shortName);  #break full path apart
      $shortName = $tempName = pop(@dirNameParts);  #filename by itself
      if ($summaryInfoOnly) {
         $tempFName = $theFileName;
      }
      else {
         $tempFName = $shortName;
      }

      $doSearch = $TRUE;
      if ($fileFilterOpt) {
         $doSearch = $FALSE;
         if ($caseInSensitive) {
            if (eval($shortName =~ /$fileFilter/oi)) {
               $doSearch = $TRUE;
            }
         }
         else {
            if (eval($shortName =~ /$fileFilter/o)) {
               $doSearch = $TRUE;
            }
         }
      }

      if ($doSearch) {
         $tFoundCount = 0;
         $lineCount = 1;
         if ($progressOpt) {
            print $pad, $pad, $lst, "Searching $shortName...\n";
         }

         if (open(THEFILE, "< $theFileName\0")) {
            while ($lineStr= <THEFILE>) {
               $found = $FALSE;
               if ($caseInSensitive) {
                  if (eval($lineStr =~ /$expression/oi)) {
                     $found = $TRUE;
                  }
               }
               else {
                  if (eval($lineStr =~ /$expression/o)) {
                     $found = $TRUE;
                  }
               }
               if ($found) {
                  $tFoundCount++;
                  if (!$countOnlyOpt && !$summaryInfoOnly) {
                     if (length($lineStr) > 255) {
                        $lineStr =  "### LINE TOO LONG TO PRINT ###\n";
                     }
                     if ($mpwOutputOpt) {
                        print $pad, $pad, "File \"", $theFileName, "\"; ";
                        print "Line ", $lineCount, ":¤ \t# ", $lineStr;
                     }
                     else {
                        print $pad, $pad, $theFileName, " -- line #";
                        print $lineCount, "\n", $pad, $pad, $pad, $lineStr;
                     }
                  }
               }
               $lineCount++;
            }
            close(THEFILE);
            if (($progressOpt || $summaryInfoOnly) && ($tFoundCount > 0)) {
               if ($mpwOutputOpt && $summaryInfoOnly) {
                  print $pad, $pad, "OPEN \"", $theFileName, "\" \t# ", " -- ";
                  print $tFoundCount, " occurrances of the search ";
                  print "expression.\n";
               }
               else
               {
                  print $pad, $pad, $lst, $tempFName, " -- had ", $tFoundCount;
                  print " occurrances of the search expression.\n";
               }
            }
            $foundCount += $tFoundCount;
         }
         else {
            $errorCount++;
            if ($progressOpt || $summaryInfoOnly) {
               print $pad, $pad, "### ERROR! COULDN'T OPEN FILE: $tempFName\n";
            }
         }
      }

   }
   # it must be a directory (-d $theFileName)
   else {
      $dirCount++;
      if ($progressOpt) {
         print $lst, "DIRECTORY: $theFileName\n";
      }
   }
}


#========================================================================
# Function: SearchFilter
# Purpose:  We need to provide this function for the File::Find::find()
#           function in the File::Find module -- this is so we can control
#           (via filter) what kind of files and/or directories get handled.
#========================================================================
sub SearchFilter
{
   #filter so we only process the files/directories we want
   (
      ( -e $_ && -s $_ ) &&
      ( -f $_ && -r $_ && -T $_ ) ||
      ( -d $_ && -x $_ && ( $recurseOpt ||
         ( !$recurseOpt && ($File::Find::prune = 1) ) ) )
   ) &&
   DoAFile($File::Find::name);  #call our function that does the checking
}


#========================================================================
#============================   MAIN   ==================================
#========================================================================

### main part of the program --
### we go through every file and directory passed in on
### the command line and check it for validity.

#what OS are we running on?
$osString = GetHostOS;

#what's the right dir seperator char to use?
$dirSepChar = GetDirSeperatorChar($osString);

#parse the command line for commands and arguments
ReadArguments;

if ($progressOpt) {
   print $lst, "Executing this script on a computer running $osString...\n";
}

#initialize these counter variables
$fileCount = 0;
$dirCount = 0;
$unknownCount = 0;
$errorCount = 0;
$foundCount = 0;

#process every directory (and/or file) provided on the command line
while (@ARGV) {
   $arg = shift(@ARGV);  #get the next directory/file to examine
   if ((-f $arg) || (-d $arg)) {  #check for a valid disk entity
      File::Find::find(\&SearchFilter, $arg);  #'find' calls "SearchFilter"
   }   #which we use as a filter and to call our "DoAFile".
   else {
      $unknownCount++;
      print STDERR "$PROGRAM: could not read $arg: $!\n\n";
   }
}

if ($progressOpt || $summaryInfoOnly || $countOnlyOpt) {
   print $lst, "FOUND $foundCount occurrances of \"$expression\" ";
   print "in the specified files.\n";
}
if ($progressOpt || $summaryInfoOnly) {
   print $lst, "Examined $fileCount files, and $dirCount directories";
   if ($errorCount > 0) {
      print " -- encountered $errorCount errors";
   }
   print ".\n";
   if ($unknownCount > 0) {
      print $lst, "THERE WERE $unknownCount UNKNOWN ENTITIES ENCOUNTERED.\n";
   }
}
exit;

#========================================================================
#========================================================================



***** Want to unsubscribe from this list?
***** Send mail with body "unsubscribe" to mac-perl-request@iis.ee.ethz.ch