Soundex Matching

From Erlang Community

(Difference between revisions)
Revision as of 12:30, 23 November 2006 (edit)
Kaiserpanda (Talk | contribs)
m
← Previous diff
Current revision (12:52, 23 November 2006) (edit) (undo)
213.171.204.166 (Talk)
(Implementation)
 
Line 63: Line 63:
[[Category:CookBook]][[Category:StringRecipes]] [[Category:CookBook]][[Category:StringRecipes]]
- 
- 
- 
-[http://www.casino-theory.com/bingo-online/gambling-online-bingo.html gambling online bingo] 
-[http://www.slots-wiki.com/index.php/slots slots] 
-[http://www.casino-theory.com/craps-rules/internet-craps-gambling.html internet craps gambling] 
-[http://www.magical-casino.com/casino_risk.html Gambling Online - Risks.] 
-[http://www.magical-casino.com/games_variations.html Casino Games and variations.] 
-[http://www.casinos-go.com/online-casino-tips/free-online-casino-slot.html free online casino slot] 
-[http://www.fortune-slots.com/ slots] 
-[http://www.casino-web-gambling.com/blackjack-tips/blackjack-hints.html blackjack hints] 
-[http://www.bestweb-online-casinos.com/best-online-casino/online-casino-free-game.html online casino free game] 
-[http://www.casino-theory.com/online-casino-bonus/free-online-casino-tournament.html free online casino tournament] 

Current revision

[edit] Problem

You want to generate Soundex hashes of surnames, for doing "sounds-like" indexing databases, or retrieving information from the US Census records and similar pre-existing databases.

[edit] Solution

Use the soundex module below:

> soundex:soundex("Smith").
"S530"
> soundex:soundex("Smyth").
"S530"

Soundex is a string hash historically used by the US Census for indexing surnames by a function of what they "sound" like, rather than their precise spelling. Further general information on Soundex is available at http://www.archives.gov/research_room/genealogy/census/soundex.html.

[edit] Implementation

-module(soundex).

-export([soundex/1]).

soundex([]) -> [];

%%

soundex([First|T]) ->
	pad([First|[num(X) || X <- strip(httpd_util:to_upper(T))]]).
	
strip(String) -> strip(String, []).
strip([$A|T], Acc) -> strip(T, Acc);
strip([$E|T], Acc) -> strip(T, Acc);
strip([$H|T], Acc) -> strip(T, Acc);
strip([$I|T], Acc) -> strip(T, Acc);
strip([$O|T], Acc) -> strip(T, Acc);
strip([$U|T], Acc) -> strip(T, Acc);
strip([$W|T], Acc) -> strip(T, Acc);
strip([$Y|T], Acc) -> strip(T, Acc);
strip([32|T], Acc) -> strip(T, Acc);
strip([C|T], Acc)  -> strip(T, [C|Acc]);
strip([], Acc) -> lists:reverse(Acc).

num($B) -> $1; num($F) -> $1; num($P) -> $1; num($V) -> $1;
num($C) -> $2; num($G) -> $2; num($J) -> $2; num($K) -> $2; 
num($Q) -> $2; num($S) -> $2; num($X) -> $2; num($Z) -> $2;
num($D) -> $3; num($T) -> $3;
num($L) -> $4;
num($M) -> $5; num($N) -> $5;
num($R) -> $6.

dedup(String) -> dedup(String, []).
dedup([X,X|T], Acc) -> dedup([X|T], Acc);
dedup([C|T], Acc) -> dedup(T, [C|Acc]);
dedup([], Acc) -> lists:reverse(Acc).

pad([A,B,C,D]) -> [A,B,C,D];
pad([A,B,C])   -> [A,B,C,$0];
pad([A,B])     -> [A,B,$0,$0];
pad([A])       -> [A,$0,$0,$0];
pad([])        -> [$0,$0,$0,$0].
Erlang/OTP Projects
Personal tools