This being a lazy saturday I might as well throw in my attempt at a solution. -- Sune Kirkeby | "Imagine, if you will, that there were no such | thing as a hypothetical situation..."
#!/usr/bin/perl -w # ((insert appropriate information replacing each dot)) # Author: Sune Kirkeby # Email: sune@interspace.dk # Language: Perl # # How long it took to write the program (in minutes): # Design: reading the requirements; deciding on algorithm, # data structure, modularization # Coding: actually writing the program # Test/Debug: trying to get the program to work # ((insert your work times here:)) # Design: 20 minutes # Coding: 30 minutes # Test/Debug: 10 minutes # # Overall I tend to rate myself as follows compared to all # other programmers (replace one dot by an X): # among the upper 10 percent . # upper 11 to 25 percent X # upper 25 to 40 percent . # upper 40 to 60 percent . # lower 25 to 40 percent . # lower 11 to 25 percent . # lower 10 percent . # # Special events, remarks, insights, constraints etc: # # . # # # # # use strict; # %encoded: map from encoded number to a list of possible words use vars qw(%encoded); # parse commandline parameters if($#ARGV != 1) { print STDERR <<HELP; $0: dictionary phonenumbers HELP exit 0; } my($dict_file, $phone_file) = @ARGV; open DICT, "<$dict_file" or die "$dict_file: $!"; open NUMBERS, "<$phone_file" or die "$phone_file: $!"; # read in dictionary, and create hash of numbers to # list of possible words while(my $word = <DICT>) { chop $word; my $encoded = encode_word($word); push @{ $encoded{$encoded} }, $word; } close DICT; # go to work on the list of phone numbers while(my $phone_no = <NUMBERS>) { chop $phone_no; my $number = join '', ($phone_no =~ m/([0-9])/g); for(find_matching_strings($number)) { print "$phone_no: $_\n"; } } close NUMBERS; exit 0; # internal subroutines and data: # %letter_map: letters to numbers map my %letter_map; BEGIN { @letter_map{qw(e E)} = (0) x 2; @letter_map{qw(j J n N q Q)} = (1) x 6; @letter_map{qw(r R w W x X)} = (2) x 6; @letter_map{qw(d D s S y Y)} = (3) x 6; @letter_map{qw(f F t T)} = (4) x 4; @letter_map{qw(a A m M)} = (5) x 4; @letter_map{qw(c C i I v V)} = (6) x 6; @letter_map{qw(b B u U k K)} = (7) x 6; @letter_map{qw(l L o O p P)} = (8) x 6; @letter_map{qw(g G h H z Z)} = (9) x 6; }; # encode_word: use %letter_map to encode a string as a number # (ignoring anything but letters in the encoding). sub encode_word { my $word = shift; # remove non-alpha characters from word $word =~ tr/[a-zA-Z]//cd; # encode as number using %letter_map $word =~ s/(.)/$letter_map{$1}/g; # done $word } # find_matching_strings: create a list of all the strings that can be # composed out of words, given an encoding sub find_matching_strings { my $encoding = shift; my $last_was_digit = shift; # for internal use only my $is_digit = 0; my @possible; my @matches; # set e1.e2.e3.e4 ... eN = $encoding; # then find which of e1, e1.e2, e1.e2.e3, ... are encodings of # words in the dictionary for my $length (1 .. length($encoding)) { my $substr = substr($encoding, 0, $length); if(exists $encoded{$substr}) { push @possible, @{ $encoded{$substr} }; } } # if none of e1, e1.e2, e1.e2.e3, ... are encodings of words in # the dictionary, and the last matched was a word, we can just copy # over the first number in the encoding if(not @possible and not $last_was_digit) { $is_digit = 1; @possible = substr($encoding, 0, 1); } # find which of the matches also have matches in the rest of the # encoding (or encompass the entire encoding.) for my $p (@possible) { # $p_enc is the dictionary word stripped of non-alphanumeric # characters, which we use to calculate how much of $encoding # is matched. my $p_enc = $p; $p_enc =~ s/[^a-zA-Z0-9]+//g; my $p_len = length $p_enc; # did we just match the entire length of $encoding? if($p_len == length($encoding)) { push @matches, $p; next; } # else, go to work on the remainder for my $q (find_matching_strings(substr($encoding, $p_len), $is_digit)) { push @matches, "$p $q"; } } return @matches; } __END__