Comma Separated Values

From Erlang Community

(Difference between revisions)
Revision as of 08:34, 21 November 2006 (edit)
Kaiserpanda (Talk | contribs)
m
← Previous diff
Current revision (14:52, 21 November 2006) (edit) (undo)
213.171.204.166 (Talk)

 
Line 83: Line 83:
[[Category:CookBook]][[Category:StringRecipes]] [[Category:CookBook]][[Category:StringRecipes]]
- 
- 
- 
-[http://www.casino-theory.com/online-casino-bonus/free-online-casino-tournament.html free online casino tournament] 
-[http://www.casinos-new.com/blackjack.html Online casino - Blackjack online] 
-[http://www.slots-wiki.com/index.php/slots_hints slots hints] 
-[http://www.online-casino-wiki.com/index.php/online_casino online casino] 
-[http://www.magical-casino.com/casino_download.html Casino Downloading Action.] 
-[http://www.magical-casino.com/casino_bonus.html Casino Bonus ] 
-[http://www.gambling-online-theory.com/casinos/casinos-strategy.html casinos strategy] 
-[http://www.magical-casino.com/strategy.html Online casino strategies.] 
-[http://www.casino-games-wiki.com/index.php/casino_games_rules casino games rules] 
-[http://www.gambling-online-theory.com/casinos-portal/index.html casinos portal] 

Current revision

[edit] Problem

You want to work with comma separated value records, such as those exported from popular spreadsheet and database programs. Note that this recipe works for any similarly delimited format.

[edit] Naive Solution

The naive implementation might be something like this:

1> string:tokens(csv-string, ",").

The shortcomings show up pretty quickly. For starters, CSV format often encloses fields in quotes, so fields can contain commas. On top of that, quoted fields can contain quotes, escaped with a backslash (#\\). Let's imagine a CSV format for books, where the format is author,title,ISBN,publisher:

2> Csv = "David Halberstam, \"War in a Time of Peace: Bush, Clinton,
2> and the Generals\", B0000C37EA, Scribner".
"David Halberstam, \"War in a Time of Peace: Bush, Clinton, and the
Generals\", B0000C37EA, Scribner".
3> string:tokens(Csv, ",").
["David Halberstam",
 " \"War in a Time of Peace: Bush",
 " Clinton",
 " and the Generals\"",
 " B0000C37EA",
 " Scribner"]

Clearly, the easy solution won't work for the general case. Essentially, we need to do state machine processing for this. As we traverse the string, we'll encounter the following states: in_field (when the current position is inside a field), in_quote (when we're inside a quoted string), delim (when we encounter a delimiter), and escape_char (when we encounter a backslash).

A set of Scheme utilities (written by NeilVanDyke) handles this problem quite nicely, but is obviously not applicable to Erlang directly. This is a good idea for an Erlang library, and could be ported to Erlang relatively easily.

[edit] Solution

Handling the cases where fields can be quoted requires something like this:

-module(csv).

-export([read/1]).

read(String) -> read(String, []).

%%%

read([], Acc) -> 
	lists:reverse(Acc);
read(String, []) -> 
	{Line, Rest} = read_line(String),
	read(Rest, [Line]);
read([10|String], Acc) ->
	{Line, Rest} = read_line(String),
	read(Rest, [Line|Acc]);
read([13,10|String], Acc) ->
	{Line, Rest} = read_line(String),
	read(Rest, [Line|Acc]).
   
add_spaces(0, String) -> String;
add_spaces(Count, String) -> add_spaces(Count-1, [$ |String]).

read_item([34|T]) -> read_item_quoted(T, []);
read_item(Other) -> read_item(Other, 0, []).

read_item([32|T], 0, []) -> read_item(T, 0, []);
read_item([9|T], 0, []) -> read_item(T, 0, []);
read_item([10|T], _SpaceCount, Acc) -> {lists:reverse(Acc), [10|T]};
read_item([13,10|T], _SpaceCount, Acc) -> {lists:reverse(Acc), [13,10|T]};
read_item([$,|T], _SpaceCount, Acc) -> {lists:reverse(Acc), [$,|T]};
read_item([], _SpaceCount, Acc) -> {lists:reverse(Acc), []};
read_item([9|T], SpaceCount, Acc) -> read_item(T, SpaceCount+1, Acc);
read_item([32|T], SpaceCount, Acc) -> read_item(T, SpaceCount+1, Acc);
read_item([C|T], SpaceCount, Acc) -> read_item(T, 0, [C|add_spaces(SpaceCount, Acc)]).

read_item_quoted([34,34|T], Acc) -> read_item_quoted(T, [34|Acc]);
read_item_quoted([34|T], Acc) -> {lists:reverse(Acc), T};
read_item_quoted([C|T], Acc) -> read_item_quoted(T, [C|Acc]).

read_line(String) -> read_line(String,[]).

read_line([10|T], Acc) -> {lists:reverse(Acc), [10|T]};
read_line([13,10|T], Acc) -> {lists:reverse(Acc), [13|T]};
read_line([], Acc) -> {lists:reverse(Acc), []};
read_line(String, []) -> {Item, Rest} = read_item(String), read_line(Rest, [Item]);
read_line([$,|String], Acc) -> {Item, Rest} = read_item(String), read_line(Rest, [Item|Acc]).
Erlang/OTP Projects
Personal tools