Text I/O library proposal, first draft

Ben Rudiak-Gould libraries@haskell.org
Thu, 31 Jul 2003 15:35:19 -0700 (PDT)


[Crossposted to Haskell and Libraries. Replies to Libraries.]

{-
Good things about this text library design:

  * Efficient implementation should be straightforward

  * Character coder interface is public, so users can supply their own
    encodings, or write coder transformers (there are some in the
    proposal)

Bad things:

  * There's no way to implement fgetpos/fsetpos type functionality,
    because coders don't expose their internal state. (In fact, there
    would need to be a way to explicitly copy the state, since it may
    well include IORefs, Ptrs, etc.) Is this a serious problem?
-}


module System.TextIOFirstDraft (...) where

-- A BlockRecoder takes source and destination buffers and does some sort
-- of translation between them. It returns the number of values (not
-- bytes!) consumed and the number of values produced. It does not have to
-- empty the input buffer or fill the output buffer on each call, but it
-- must do something (i.e. it's not acceptable to return (0,0)). Coders
-- will in general have internal state which is updated on each call.

type BlockRecoder from to =
  Ptr from -> BlockLength -> Ptr to -> BlockLength
   -> IO (BlockLength,BlockLength)

type TextEncoder = BlockRecoder Word32 Octet
type TextDecoder = BlockRecoder Octet Word32

-- IO TextEncoder and IO TextDecoder below denote "coder factories" which
-- produce a new coder in its initial state each time they're called.

compatibilityEncoder :: IO TextEncoder	-- "mod 256"
currentLocaleEncoder :: IO TextEncoder
iso88591Encoder :: IO TextEncoder
latin1Encoder = iso88591Encoder

utf8Encoder, utf16BEEncoder, utf16LEEncoder,
  utf32BEEncoder, utf32LEEncoder :: IO TextEncoder

compatibilityDecoder :: IO TextDecoder
currentLocaleDecoder :: IO TextDecoder
iso88591Decoder :: IO TextDecoder
latin1Decoder = iso88591Decoder

utf8Decoder, utf16BEDecoder, utf16LEDecoder,
  utf32BEDecoder, utf32LEDecoder :: IO TextDecoder

-- An attempt at supporting setlocale-type locale strings.

lookupEncoder :: String -> Maybe (IO TextEncoder)
lookupDecoder :: String -> Maybe (IO TextDecoder)

-- prependBOM takes an existing UTF encoder and causes it to prepend
-- a BOM (byte-order mark) to its output. autodetectUTF takes an existing
-- decoder and modifies it to check for a BOM, switching to the
-- appropriate type of UTF decoding if one is found.

prependBOM :: IO TextEncoder -> IO TextEncoder
autodetectUTF :: IO TextDecoder -> IO TextDecoder

-- Attaches a TextInputChannel to the supplied InputChannel. After
-- this operation the InputChannel should be considered owned by the
-- TextInputChannel; any attempt to use it directly will cause
-- unpredictable results. This takes a decoder factory rather than a
-- decoder to prevent the error of attaching the same decoder to more than
-- one channel.

icAttachTextDecoder :: InputChannel -> IO TextDecoder -> IO TextInputChannel

ticGetChar :: TextInputChannel -> IO Char
ticGetLine :: TextInputChannel -> IO String
ticLazyGetContents :: TextInputChannel -> IO String

ocAttachTextEncoder :: OutputChannel -> IO TextEncoder -> IO TextOutputChannel

tocPutChar  :: TextOutputChannel -> Char -> IO ()
tocPutStr   :: TextOutputChannel -> String -> IO ()
tocPutStrLn :: TextOutputChannel -> String -> IO ()

-- ... etc ...


-- Ben