[Date Prev][Date Next][Thread Prev][Thread Next] [Search] [Date Index] [Thread Index]

New File Sort Script



Disregard the last script ... it had a flaw in it that would chop out the
file modulus $chunk.  Argh.  Anyway, this new version is set up like a
droplet, and can take "any" number of files to sort.

#!perl -w
#sortfile.dp
# sort a file without slurping the whole thing into memory
# set $chunk to be equal to the number of lines you want to take at once

use strict 'vars';
my($chunk, $file, $verbose);
$chunk = 5000;
$verbose = 1;

foreach $file (@ARGV) {
    my($count1, $count2, @lines, $line, @fh, %fh);
    $count1 = 0;
    $count2 = 0;

    print "Sorting file $file ...\n" if $verbose;
    open(F, "<$file") || die($!);
    print "Creating temp files ...\n" if $verbose;
    while (defined($line=<F>)) {
        push @lines, $line;
        $count1++;
        if ($count1 >= $chunk) {
            ($file, $count1, $count2, @lines) =
                writeTemp($file, $count1, $count2, @lines);
        }
    }
    if (@lines) {
        ($file, $count1, $count2, @lines) =
            writeTemp($file, $count1, $count2, @lines);
    }
    close(F);

    foreach (0 .. $count2-1) {
        push @fh, "${file}_sorttemp$_";
        open($fh[$_], $fh[$_]) || die("$!: $fh[$_]");
    }
    %fh = map {($_ => scalar <$_>)} @fh;

    print "\nCreating sorted ${file}_new ...\n" if $verbose;
    open(F, ">${file}_new") || die($!);
    O: while (keys %fh) {
        I: foreach (sort {$fh{$a} cmp $fh{$b}} keys %fh) {
            print F $fh{$_};
            if (defined($line=<$_>)) {
                $fh{$_} = $line;
            } else {
                delete $fh{$_};
            }
            last I;
        }
    }

    print "\nDeleting temp files ...\n" if $verbose;
    foreach (@fh) {
        close($_);
        $verbose ? (unlink($_) && print "  $_\n") : unlink($_);
    }

    print "\nDone!\n\n" if $verbose;
}

sub writeTemp {
    my($file, $count1, $count2, @lines) = @_;
    print "  ${file}_sorttemp$count2\n" if $verbose;
    open(N, ">${file}_sorttemp$count2") || die($!);
    print N sort @lines;
    close(N);
    $count1 = 0;
    $count2++;
    @lines=();
    return($file, $count1, $count2, @lines);
}

__END__

--
Chris Nandor               pudge@pobox.com           http://pudge.net/
%PGPKey=('B76E72AD',[1024,'0824 090B CE73 CA10  1FF7 7F13 8180 B6B6'])
#==                    MacPerl: Power and Ease                     ==#
#==    Publishing Date: Early 1998. http://www.ptf.com/macperl/    ==#