[Date Prev][Date Next][Thread Prev][Thread Next] [Search] [Date Index] [Thread Index]

[MacPerl] File conversion script problem




I wrote the following script to convert files from one line
ending format to another (i.e., Mac2Unix, Dos2Mac, etc.).

1) Please let me know if you know of a better script to do
   this sort of thing.

2) This script has a bug, when performing several of the
   conversions (Mac2Dos for one), where any run of
   multiple carriage returns get converted to a single
   cr-lf pair.

The skinny of what happens in the script is:

$/ = "";   # enable paragraph mode
$* = 1;    # enable multi-line pattern matching
# ...
# ...
$fromExpression = "(\x0D{1}\x0A?|\x0A{1})";
$toExpression = "\x0D\x0A";
# ...
# ...
eval($filestr =~ s/$fromExpression/$toExpression/g);

I realize the 'g' option will cause a "global" replace,
but I was thinking the the explicit "\x0D{1}" would
cause only one <cr> to be replaced at a time, not a
whole bunch of them globbed together.

I'm sure I'm just overlooking something stupid.

TIA for your help!




#========================================================================
#========================================================================
#
# Convert2.pl
#
# Author: Ero Brown <ero@fiber.net>
#
# Version: 1.0
#
#========================================================================
#========================================================================

#!/usr/local/bin/perl

### required modules needed for this script
use File::Find;
use Getopt::Long;


#========================================================================

### Set some special Perl system variables that effect pattern matching
$/ = "";   # enable paragraph mode
$* = 1;    # enable multi-line pattern matching

### define true and false.
$TRUE = 1;
$FALSE = 0;


#========================================================================

### Forward declare all sub-routines used
sub Usage;
sub GetHostOS;
sub GetDirSeperatorChar;
sub ReadArguments;
sub DoAFile;
sub SearchFilter;


#========================================================================
# Function: Usage
# Purpose:  this details the usage of this script -- we can call this
#           function when a invalid usage error occurs.
#========================================================================
sub Usage {
   die <<END_USAGE;
# usage: $0 [-p] [-r] [-x] [-f <expression>] -t <translation>
#                                            <folder(s) &| file(s) . . .>
#
#   -p   print progress information
#   -r   recursively process encountered sub-directories
#   -x   fix the file type if possible
#   -f   file expression representing a file filter
#   -t   translation to be performed, where <translation> is:
#           Cnv2Dos | Mac2Dos | Unx2Dos = convert to a DOS format
#           Cnv2Mac | Dos2Mac | Unx2Mac = convert to a Mac format
#           Cnv2Unx | Dos2Unx | Mac2Unx = convert to a Unix format
#
#   *Parameters can be given in any order.
#
#   *Fully resolved pathnames are required, partial pathnames will
#    cause this script to barf.
#
#   Version 1.0
END_USAGE
}


#========================================================================
# Function: GetHostOS
# Purpose:  Determine what OS we are running on -- $^O is a Perl variable
#           that contains to name of the OS that Perl was compiled on.
#========================================================================
sub GetHostOS
{
   return($^O);
}


#========================================================================
# Function: GetDirSeperatorChar
# Purpose:  Each platform (OS) uses a unique character to seperate
#           directory names within a full path specification string.
#           Here we set a variable to contain the correct directory
#           seperator character to be correct for the OS that we are
#           currently running this script on.
#========================================================================
sub GetDirSeperatorChar
{
   my ($tOSString) = @_;
   my ($tDirSepChar);
   if ($tOSString eq 'MacOS') {  #it's a Macintosh!
      $tDirSepChar = ':';
   }
   elsif ($tOSString =~ /nix/i) {  #it's probably some flavor of Unix
      $tDirSepChar = '/';
   }
   else {  #it could be other things, but assume a Wintel PC
      $tDirSepChar = '/';  #!?! -- same as unix, not '\' as expected!
   }
   return ($tDirSepChar);
}


#========================================================================
# Function: ReadArguments
# Purpose:  Parse the arguments array and set the appropriate flags.
#========================================================================
sub ReadArguments
{
   #if no arguments are passed in, assume the user needs some help
   if (@ARGV < 1) {
      Usage;
   }

   $res = Getopt::Long::GetOptions("r", "p", "x",
                           "f=s" => \$fileFilter,
                           "t=s" => \$translation);
   if (!$res) {
      Usage;
   }

   #if no directories/files are passed in, assume the user needs some help
   if (@ARGV < 1) {
      print "# At least one directory or file to search must be specified.\n";
      Usage;
   }

   $recurseOpt = $opt_r;
   $progressOpt = $opt_p;
   $fixTypeOpt = $opt_x;
   $fileFilterOpt = $fileFilter ne '';

   if ($translation eq '') {
      print "# The required -t (translation) option was NOT specified.\n";
      Usage;
   }

   $translation =~ s/Conv/Cnv/gi;
   $translation =~ s/Pc/Dos/gi;
   $translation =~ s/Unix/Unx/gi;
   if ($translation =~ /(Cnv2Dos|Mac2Dos|Unx2Dos)/i) {
      $fromExpression = "(\x0D{1}\x0A?|\x0A{1})";
      $toExpression = "\x0D\x0A";
   }
   elsif ($translation =~ /(Cnv2Mac|Dos2Mac|Unx2Mac)/i) {
      $fromExpression = "\x0D?\x0A{1}";
      $toExpression = "\x0D";
   }
   elsif ($translation =~ /(Cnv2Unx|Dos2Unx|Mac2Unx)/i) {
      $fromExpression = "\x0D{1}\x0A?";
      $toExpression = "\x0A";
   }
   else {
      print "# Unkown -t (translation) option was specified.\n";
      Usage;
   }

}


#========================================================================
# Function: DoAFile
# Purpose:  process the file/directory
#           this is where the action is -- look at every file and
#           directory and perform the desired operations.
#========================================================================
sub DoAFile {
   my ($theFileName) = @_;
   my ($result,$filestr);
   my ($shortName, $doSearch);
   my (@dirNameParts);

   if (-f $theFileName) {
      $fileCount++;
      $shortName = $theFileName;
      @dirNameParts = split($dirSepChar,$shortName);  #break full path apart
      $shortName = $tempName = pop(@dirNameParts);  #filename by itself
      if ($progressOpt) {
         print "# FILE: $shortName -- \"$theFileName\"\n";
      }

      $doSearch = $TRUE;
      if ($fileFilterOpt) {
         $doSearch = $FALSE;
         if (eval($shortName =~ /$fileFilter/oi)) {
            $doSearch = $TRUE;
         }
      }

      $result = "";
      if ($doSearch) {
         $fileCount++;
         if (open(THEFILE, "< $theFileName\0")) {
            $result = "";
            while ($filestr= <THEFILE>) {
               eval($filestr =~ s/$fromExpression/$toExpression/g);
               $result = join('',$result,$filestr);
            }
            close(THEFILE);

            if ($fixTypeOpt) {
               if ($osString eq 'MacOS') {
                  &MacPerl'SetFileInfo("MPS ","TEXT", $theFileName);
               }
            }

            if (open(THEFILE, "> $theFileName\0")) {
               print THEFILE $result;
               close(THEFILE);
               $processedCount++;
               if ($progressOpt) {
                  print "\t# CHANGED FILE: $shortName\n";
               }
            }
            else {
               $errorCount++;
               print "# ERROR: Couldn't write to the file: \"$theFileName\"\n";
            }
         }
         else {
            $errorCount++;
            print "# ERROR: Couldn't open the file: \"$theFileName\"\n";
         }
      }
   }
   # it must be a directory (-d $theFileName)
   else {
      $dirCount++;
   }
}


#========================================================================
# Function: SearchFilter
# Purpose:  We need to provide this function for the File::Find::find()
#           function in the File::Find module -- this is so we can control
#           (via filter) what kind of files and/or directories get handled.
#========================================================================
sub SearchFilter
{
   #filter so we only process the files/directories we want
   (
      ( -e $_ && -s $_ ) &&
      (
         ( -f $_ && -r $_ && -T $_ ) ||
         ( -d $_ && -x $_ &&
         ( $recurseOpt || ( !$recurseOpt && ($File::Find::prune = 1) ) ) )
      )
   ) &&
   DoAFile($File::Find::name);  #call our function that does the checking
}


#========================================================================
#============================   MAIN   ==================================
#========================================================================

### main part of the program --
### we go through every file and directory passed in on
### the command line and check it for validity.

#what OS are we running on?
$osString = GetHostOS;

#what's the right dir seperator char to use?
$dirSepChar = GetDirSeperatorChar($osString);

#parse the command line for commands and arguments
ReadArguments;

if ($progressOpt) {
   print $lst, "Executing this script on a computer running $osString...\n";
}

#initialize these counter variables
$fileCount = 0;
$dirCount = 0;
$processedCount = 0;
$errorCount = 0;

#process every directory (and/or file) provided on the command line
while (@ARGV) {
   $arg = shift(@ARGV);  #get the next directory/file to examine
   if ((-f $arg) || (-d $arg)) {
      File::Find::find(\&SearchFilter, $arg);  #'find' calls "SearchFilter"
   }   #which we use as a filter and to call our "DoAFile".
   else {
      print STDERR "# COULD NOT READ \"$arg\"\n\n";
   }
}

if ($progressOpt) {
   print $lst, "Changed $processedCount files from Pc to Mac text format.\n";

   print $lst, "Examined $fileCount files, in $dirCount directories";
   if ($errorCount > 0) {
      print " -- encountered $errorCount errors";
   }
   print ".\n";

   if ($unknownCount > 0) {
      print $lst, "THERE WERE $unknownCount UNKNOWN ENTITIES ENCOUNTERED.\n";
   }
}
exit;

#========================================================================
#========================================================================



***** Want to unsubscribe from this list?
***** Send mail with body "unsubscribe" to mac-perl-request@iis.ee.ethz.ch