[Haskell-beginners] filesystem verification utility

Anand Mitra mitra at kqinfotech.com
Tue Jan 11 05:19:47 CET 2011


Based  on feedback I inferred that the huge memory usage was mostly
because of the String handling in the patern generation. To make it
more efficient I have used Data.ByteString.Lazy.Char8

but now I get the following error when I execute

stress: tmp/asdf-1: hPutBuf: illegal operation (handle is closed)
stress: tmp/asdf-3: hPutBuf: illegal operation (handle is closed)
stress: tmp/asdf-5: hPutBuf: illegal operation (handle is closed)
stress: tmp/asdf-2: hPutBuf: illegal operation (handle is closed)
stress: tmp/asdf-7: hPutBuf: illegal operation (handle is closed)
stress: tmp/asdf-10: hPutBuf: illegal operation (handle is closed)

I have explicitly remove all calls to close despite this I get this
error. I tried both version of seek the fdSeek as well as hSeek. The
documentation on hSeek had a confusing comment that "The offset i is
given in terms of 8-bit bytes"

At this point I am unable to understand how the handle is getting
explicitly closed.

{-# OPTIONS -fglasgow-exts #-}
import Data.Int
import qualified Data.ByteString.Lazy.Char8 as L
import System.Random
import Data.List
import Monad
import System.Posix.IO
import System.Posix.Types
import Data.Time.Clock
import System.Posix.Files
import Data.Maybe
import GHC.IO.Device (SeekMode(..))
import Control.Exception
import Data.Typeable
import System
import Control.Concurrent
import System.IO

--myrandomRlist :: (Num t, Random t) => t -> IO [t]
--myrandomRlist x = liftM (randomRs (0,x)) newStdGen

myrandomRlist :: (Num t, Random t) => t ->t -> StdGen  ->  [t]
myrandomRlist min max seed = randomRs (min,max)  seed

data IoLoc = IoLoc {offset ::FileOffset, size::Int, num::Int  }
             deriving (Show, Typeable)
instance Exception IoLoc

data Corrupt = Corrupt String IoLoc
             deriving (Show, Typeable)
instance Exception Corrupt

data FileHdr = FileHdr {fileName::FilePath, seed::StdGen,
                        minIoSize::Int, maxIoSize::Int, ioCount::Int}
               deriving (Show)

data FileIO = FileIO {fd::Handle, params::FileHdr, fileData::[IoLoc]}


check hdl = do
  closed <- hIsClosed hdl
  if closed
   then do
    putStrLn $ "file handle was closed" ++ (show hdl)
   else do
    return ()

genPattern :: FilePath -> IoLoc -> L.ByteString
genPattern f l =
     L.take (fromIntegral (size l)) (L.cycle  $ L.pack ("(" ++ f ++
")"  ++ "[" ++ (show (offset l)) ++ ":" ++ (show (size l)) ++ (show
(num l)) ++ "]"))

doHdrRead :: Handle -> IO [Char]
doHdrRead x = do
  fd <- handleToFd x
  (str, count) <- fdRead fd 100
  return (takeWhile (\x-> not (x == '\n')) str)

doGetHdr :: Handle -> IO FileHdr
doGetHdr fd = do
  file <- doHdrRead fd
  seed <- doHdrRead fd
  minIoSize <- doHdrRead fd
  maxIoSize <- doHdrRead fd
  ioCount <- doHdrRead fd
  return (FileHdr file (read seed) (read minIoSize) (read maxIoSize)
(read ioCount))

doHdrFromFile :: Handle -> IO FileIO
doHdrFromFile fd = do
--  fd <- openBinaryFile name  ReadWriteMode -- (Just ownerModes)
(OpenFileFlags {append=False, exclusive=False, noctty=True,
nonBlock=False, trunc=False})
  hSeek fd AbsoluteSeek 0
  hdr <- doGetHdr fd
  return (FileIO fd hdr [])

doHdrWrite :: Handle -> [Char] -> IO ()
doHdrWrite fd str = do
  hPutStr fd (take 100 (str ++ "\n" ++ (cycle ['\0'])))

doWriteHdr :: Handle -> FileHdr -> IO ()
doWriteHdr fd hdr = do
  doHdrWrite fd (show (fileName hdr))
  doHdrWrite fd (show (seed hdr))
  doHdrWrite fd (show (minIoSize hdr))
  doHdrWrite fd (show (maxIoSize hdr))
  doHdrWrite fd (show (ioCount hdr))

doWriteFile :: FilePath -> IO FileIO
doWriteFile name = do
  fd <- openBinaryFile name  ReadWriteMode -- (Just ownerModes)
(OpenFileFlags {append=False, exclusive=False, noctty=True,
nonBlock=False, trunc=True})
  seed <- newStdGen
  hdr <- return (FileHdr name seed 4096 (2*8096) 200)
  doWriteHdr fd hdr
  return (FileIO fd hdr [])

overLap (IoLoc off1' sz1' num1) (IoLoc off2' sz2' num2) =
    ((off1 > off2) && (off1 < off2 +sz2)) ||((off1+sz1 >off2)
&&(off1+sz1 < off2 +sz2))
    where
      off1 = read (show off1')
      sz1 = read (show sz1')
      off2 = read (show off2')
      sz2 = read (show sz2')


genIoList1 :: [IoLoc] -> [Int] -> [Int] -> Int -> Int -> [IoLoc]

genIoList1 list offset size _ 0 =
    list
genIoList1 list offset size 0 _ =
    list

genIoList1 list (offset:os) (size:ss) count bound =
    if isNothing $ find (overLap x) list
             then genIoList1 ([x] ++ list) os ss (count - 1) (bound -1)
             else genIoList1 list os ss count (bound -1)
    where
      x = IoLoc (read (show offset)) (read(show size)) count
--      offset = (read (show offset1))
--      size = (read(show size1))

genIoList :: FileHdr -> [IoLoc]
genIoList (FileHdr name seed min max count) =
    genIoList1 []  (myrandomRlist 4000 1099511627776 seed)
(myrandomRlist min max seed) count (count*2)

doActualIo :: Handle -> IoLoc -> L.ByteString -> IO ()
doActualIo fd (IoLoc off sz num) str = do
--  hSeek fd AbsoluteSeek off
  rfd <- (handleToFd fd)
  fdSeek  rfd AbsoluteSeek off
  L.hPut fd str
  return ()

doVerifyIo :: Handle -> IoLoc -> L.ByteString -> IO ()
doVerifyIo fd (IoLoc off sz num) str = do
--  hSeek fd AbsoluteSeek off
  rfd <- (handleToFd fd)
  fdSeek rfd AbsoluteSeek off
  filedata  <- L.hGet fd sz
  if str == filedata
     then return ()
     else throwIO (Corrupt ("Data corruption in #" ++  (take 200
(L.unpack str)) ++ "#"  ++ (take 200 (L.unpack filedata)))(IoLoc off
sz num))

mainWrite file  = do
  (FileIO hd _ _) <- doWriteFile file
  hdrIO <- doHdrFromFile hd
  check hd
  (FileIO _ hdr _) <- return hdrIO
  check hd
  iolst <- return $ genIoList hdr
  app <- return $ zip iolst (map (genPattern (fileName hdr)) iolst)
  putStrLn (fileName hdr)
  mapM (\(x,y)-> doActualIo (fd hdrIO)  x y) app
  mapM (\(x,y)-> doVerifyIo (fd hdrIO)  x y) app
--  hClose hd
  return hdr

mainVerify file = do
  hd <- openBinaryFile file  ReadWriteMode
  hdrIO <- doHdrFromFile hd
  hdr <- return (params hdrIO)
  iolst <- return $ genIoList hdr
  file <- return $ filter (\x->not ( x=='"')) (fileName hdr)
  app <- return $ zip iolst (map (genPattern file) iolst)
  putStrLn $ filter (\x->not ( x=='"')) (fileName hdr)
  mapM (\(x,y)-> doVerifyIo (fd hdrIO)  x y) app
--  hClose hd
  return hdr


main =  do
  x <- getArgs
  if (length x) == 3
    then do
      main1
    else do
      putStrLn "USAGE:\nfile-io write/verify <full-path file name>
<number of threads>  \
\\n\n\
\Simple IO load generator with write verification. This utility is designed\n\
\to generate multi-threaded IO load which will write a pattern to the file. \n\
\When this is invoked with the same parameters with the verify option \n\
\the data written will be verified.\n\
\ "

main1 = do
  [op, name, numProcs] <- getArgs
  m <- newEmptyMVar
  n <- return $read numProcs
  case op of
    "write" -> do mapM forkIO  [(fillfile (name ++ (show i)) ) m|i<-[1..n]]
    "verify" -> do mapM forkIO [(verifyfile (name ++ (show i)) ) m|i<-[1..n]]
  x <- mapM takeMVar $ take (read numProcs) $ repeat m
  putStrLn $ show $ and x

fillfile filename m = do
  mainWrite filename
  putMVar m True

verifyfile filename m = do
  mainVerify filename
  putMVar m True



On Tue, Jan 11, 2011 at 1:02 AM, Anand Mitra <mitra at kqinfotech.com> wrote:
> Hi,
>
> I had a requirement to generate various kinds of I/O patterns on a
> filesystem and subsequently verify this. The initial version of the
> program below implements a random I/O pattern with multiple threads.
> Even when the number of I/O is as small as 200 and 10 concurrent
> theads, the amount of memory used is huge. When I run the program it
> consumes close to 1to2GB memory. Moreover the rate at which it
> generates the I/O is very low which is not good for testing a
> filesystem. I have used System.POSIX.IO but I tried System.IO and did
> not see much difference either. I would appreciate help in identifying
> ways to improve this.
>
>



More information about the Beginners mailing list