Parsing CSV files
Shawn P. Garbett
listman@garbett.org
Tue, 29 Jul 2003 10:57:18 -0500
Nevermind the previous version, I've solved a few bugs in it (like unquoted
numbers and correctly handling blank fields).
Here's my current version:
------------------------------------------------------------------------
import Parsec
import System.IO
-- Sometimes unquoted numbers appear
number :: Parser String
number = do
f <- (char '-' <|> digit)
fs <- many digit
return (f:fs)
-- A cell can be a quoted value, a number or empty
cell :: Parser String
cell = do { char '\"';
w <- (many1 (noneOf "\""));
char '\"' <?> "end of cell";
return w;
}
<|> number
<|> return "" -- Empty cell
-- A comma of course
separator :: Parser Char
separator = char ','
-- Group of cells with a separator
cells :: Parser [String]
cells = sepBy1 cell separator
-- For extracting comma delimited values of a cell
contents :: Parser [String]
contents = sepBy1 (many (noneOf ",")) separator
-- Rows are a set of cells followed by a newline,
-- This is followed by more rows or nothing
rows :: Parser [[String]]
rows = do c <- cells
newline
do { cs <- rows; return (c:cs); } <|> return [c]
-- Comman Separated Values, set of rows followed by eof
csv :: Parser [[String]]
csv = do r <- rows
eof
return r
-- Main routine, for testing
main :: IO ()
main = do result <- parseFromFile csv "sb.txt"
case (result) of
Left err -> print err
Right sb -> print sb