[Date Prev][Date Next][Thread Prev][Thread Next] [Search] [Date Index] [Thread Index]

Re: [FWP] Call for Programs: the "phonecode" benchmark



This being a lazy saturday I might as well throw in my attempt
at a solution.

-- 
Sune Kirkeby | "Imagine, if you will, that there were no such
             |  thing as a hypothetical situation..."
#!/usr/bin/perl -w
# ((insert appropriate information replacing each dot))
# Author:   Sune Kirkeby
# Email:    sune@interspace.dk
# Language: Perl
#
# How long it took to write the program (in minutes):
# Design:     reading the requirements; deciding on algorithm, 
#             data structure, modularization
# Coding:     actually writing the program
# Test/Debug: trying to get the program to work
# ((insert your work times here:))
#   Design:      20 minutes
#   Coding:      30 minutes
#   Test/Debug:  10 minutes
#
# Overall I tend to rate myself as follows compared to all 
# other programmers (replace one dot by an X):
#   among the upper 10 percent     .
#   upper 11 to 25 percent         X
#   upper 25 to 40 percent         .
#   upper 40 to 60 percent         .
#   lower 25 to 40 percent         .
#   lower 11 to 25 percent         .
#   lower 10 percent               .
#
# Special events, remarks, insights, constraints etc:
#
#   .
#
#
#
#
#

use strict;
# %encoded: map from encoded number to a list of possible words
use vars qw(%encoded);

# parse commandline parameters
if($#ARGV != 1) {
	print STDERR <<HELP;
$0: dictionary phonenumbers
HELP
	exit 0;
}
my($dict_file, $phone_file) = @ARGV;
open DICT, "<$dict_file"
	or die "$dict_file: $!";
open NUMBERS, "<$phone_file"
	or die "$phone_file: $!";

# read in dictionary, and create hash of numbers to
# list of possible words
while(my $word = <DICT>) {
	chop $word;
	my $encoded = encode_word($word);
	push @{ $encoded{$encoded} }, $word;
}
close DICT;

# go to work on the list of phone numbers
while(my $phone_no = <NUMBERS>) {
	chop $phone_no;
	my $number = join '', ($phone_no =~ m/([0-9])/g);
	for(find_matching_strings($number)) {
		print "$phone_no: $_\n";
	}
}
close NUMBERS;

exit 0;

# internal subroutines and data:
# %letter_map: letters to numbers map
my %letter_map;
BEGIN {
@letter_map{qw(e E)}		= (0) x 2;
@letter_map{qw(j J n N q Q)}	= (1) x 6;
@letter_map{qw(r R w W x X)}	= (2) x 6;
@letter_map{qw(d D s S y Y)}	= (3) x 6;
@letter_map{qw(f F t T)}	= (4) x 4;
@letter_map{qw(a A m M)}	= (5) x 4;
@letter_map{qw(c C i I v V)}	= (6) x 6;
@letter_map{qw(b B u U k K)}	= (7) x 6;
@letter_map{qw(l L o O p P)}	= (8) x 6;
@letter_map{qw(g G h H z Z)}	= (9) x 6;
};

# encode_word: use %letter_map to encode a string as a number
# (ignoring anything but letters in the encoding).
sub encode_word
{
	my $word = shift;
	# remove non-alpha characters from word
	$word =~ tr/[a-zA-Z]//cd;
	# encode as number using %letter_map
	$word =~ s/(.)/$letter_map{$1}/g;
	# done
	$word
}

# find_matching_strings: create a list of all the strings that can be
# composed out of words, given an encoding
sub find_matching_strings
{
	my $encoding = shift;
	my $last_was_digit = shift; # for internal use only
	my $is_digit = 0;
	my @possible;
	my @matches;

	# set e1.e2.e3.e4 ... eN = $encoding;
	# then find which of e1, e1.e2, e1.e2.e3, ... are encodings of
	# words in the dictionary
	for my $length (1 .. length($encoding)) {
		my $substr = substr($encoding, 0, $length);
		if(exists $encoded{$substr}) {
			push @possible, @{ $encoded{$substr} };
		}
	}
	# if none of e1, e1.e2, e1.e2.e3, ... are encodings of words in
	# the dictionary, and the last matched was a word, we can just copy
	# over the first number in the encoding
	if(not @possible and not $last_was_digit) {
		$is_digit = 1;
		@possible = substr($encoding, 0, 1);
	}
	# find which of the matches also have matches in the rest of the
	# encoding (or encompass the entire encoding.)
	for my $p (@possible) {
		# $p_enc is the dictionary word stripped of non-alphanumeric
		# characters, which we use to calculate how much of $encoding
		# is matched.
		my $p_enc = $p;
		$p_enc =~ s/[^a-zA-Z0-9]+//g;
		my $p_len = length $p_enc;
		# did we just match the entire length of $encoding?
		if($p_len == length($encoding)) {
			push @matches, $p;
			next;
		}
		# else, go to work on the remainder
		for my $q (find_matching_strings(substr($encoding, $p_len), $is_digit)) {
			push @matches, "$p $q";
		}
	}
	return @matches;
}

__END__