[Haskell-cafe] Why so slow?

Lyle Kopnicky lists at qseep.net
Mon Dec 11 02:40:32 EST 2006


The code below is using way more RAM than it should. It seems to only 
take so long when I build the 'programs' list - the actual 
reading/parsing is fast. For a 5MB input file, it's using 50MB of RAM! 
Any idea how to combat this?

Thanks,
Lyle

{-# OPTIONS_GHC -fglasgow-exts #-}

-- linear_importer.hs

import Control.Monad (unless)
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS
import Text.Regex as RE
import Data.Time.Calendar
import Data.Time.LocalTime
import System.Environment

-- Consider adding strictness as necessary for performance.

--                                 STB time      channel
data ChannelChange = ChannelChange Int LocalTime Int
    deriving Show

--                                     channel program_no start     
end       network_no
data ScheduleProgram = ScheduleProgram Int     Int        LocalTime 
LocalTime (Maybe Int)
    deriving Show

main = do
    fileNames <- getArgs
    ensure (length fileNames == 2) usageMessage
    let [eventFileName,programFileName] = fileNames
    putStrLn ("Reading program schedule file from '" ++ programFileName 
++ "'...")
    text <- BS.readFile programFileName
    programs <- sequence $ map parseScheduleProgram (BS.lines text)
    print (take 20 programs)
    return ()

usageMessage = "Usage: linear_importer <channel change file> <schedule 
file>"

parseScheduleProgram :: ByteString -> IO ScheduleProgram
parseScheduleProgram s = do
    let fields = BS.split '|' s
    ensure (length fields == 7) ("Wrong number of fields in schedule 
program: " ++ BS.unpack s)
    let 
[_,channelNoText,programNoText,_,startTimeText,endTimeText,networkNoText] 
= fields
    let channelNo = read $ BS.unpack channelNoText
        programNo = read $ BS.unpack programNoText
    startTime <- parseProgramTime startTimeText
    endTime <- parseProgramTime endTimeText
    let networkNo = if BS.null networkNoText then Nothing else Just 
(read (BS.unpack networkNoText))
    return $ ScheduleProgram channelNo programNo startTime endTime networkNo

parseProgramTime :: ByteString -> IO LocalTime
parseProgramTime s = do
    let parts = BS.split 'T' s
    ensure (length parts == 2)
        ("Expected exactly one T in eventChannelChange time: " ++ 
BS.unpack s)
    let [datePart,timePart] = parts
    ensure (BS.length datePart == 8)
        ("Expected 8 digits in date part of eventChannelChange time: " 
++ BS.unpack s)
    let (yearPart, monthDayPart) = BS.splitAt 4 datePart
        (monthPart, dayPart) = BS.splitAt 2 monthDayPart
        year = read $ BS.unpack yearPart
    month = read $ BS.unpack monthPart
    day = read $ BS.unpack dayPart
    let date = fromGregorian year month day
    ensure (BS.length timePart == 6)
        ("Expected 6 digits in time part of eventChannelChange time: " 
++ BS.unpack s)
    let (hoursPart,minutesSecondsPart) = BS.splitAt 2 timePart
    (minutesPart,secondsPart) = BS.splitAt 2 minutesSecondsPart
    hours = read $ BS.unpack hoursPart
    minutes = read $ BS.unpack minutesPart
    seconds = read $ BS.unpack secondsPart
    let time = TimeOfDay hours minutes (fromInteger seconds)
    return (LocalTime date time)

ensure :: Bool -> String -> IO ()
ensure x s = unless x $ ioError (userError s)


More information about the Haskell-Cafe mailing list