[Haskell] filesystem verification utility

Anand Mitra mitra at kqinfotech.com
Tue Jan 11 03:25:03 CET 2011


Hi,

I had a requirement to generate various kinds of I/O patterns on a
filesystem and subsequently verify this. The initial version of the
program below implements a random I/O pattern with multiple threads.
Even when the number of I/O is as small as 200 and 10 concurrent
theads, the amount of memory used is huge. When I run the program it
consumes close to 1to2GB memory. Moreover the rate at which it
generates the I/O is very low which is not good for testing a
filesystem. I have used System.POSIX.IO but I tried System.IO and did
not see much difference either. I would appreciate help in identifying
ways to improve this.


{-# OPTIONS -fglasgow-exts #-}
import System.Random
import Data.List
import Monad
import System.Posix.IO
import System.Posix.Types
import Data.Time.Clock
import System.Posix.Files
import Data.Maybe
import GHC.IO.Device (SeekMode(..))
import Control.Exception
import Data.Typeable
import System
import Control.Concurrent

--myrandomRlist :: (Num t, Random t) => t -> IO [t]
--myrandomRlist x = liftM (randomRs (0,x)) newStdGen

myrandomRlist :: (Num t, Random t) => t ->t -> StdGen  ->  [t]
myrandomRlist min max seed = randomRs (min,max)  seed

data IoLoc = IoLoc {offset :: FileOffset, size::ByteCount, num::Int  }
            deriving (Show, Typeable)
instance Exception IoLoc

data Corrupt = Corrupt String IoLoc
            deriving (Show, Typeable)
instance Exception Corrupt

data FileHdr = FileHdr {fileName::FilePath, seed::StdGen,
                       minIoSize::Int, maxIoSize::Int, ioCount::Int}
              deriving (Show)

data FileIO = FileIO {fd::Fd, params::FileHdr, fileData::[IoLoc]}

genPattern :: FilePath -> IoLoc -> String
genPattern f l =
    take (read $ show $size l) (cycle  $ "(" ++ f ++ ")"  ++ "[" ++
(show (offset l)) ++ ":" ++ (show (size l)) ++ (show (num l)) ++ "]")

doHdrRead :: Fd -> IO [Char]
doHdrRead fd = do
 (str, count) <- fdRead fd 100
 return (takeWhile (\x-> not (x == '\n')) str)

doGetHdr :: Fd -> IO FileHdr
doGetHdr fd = do
 file <- doHdrRead fd
 seed <- doHdrRead fd
 minIoSize <- doHdrRead fd
 maxIoSize <- doHdrRead fd
 ioCount <- doHdrRead fd
 return (FileHdr file (read seed) (read minIoSize) (read maxIoSize)
(read ioCount))

doHdrFromFile :: FilePath -> IO FileIO
doHdrFromFile name = do
 fd <- openFd name  ReadWrite (Just ownerModes)  (OpenFileFlags
{append=False, exclusive=False, noctty=True, nonBlock=False,
trunc=False})
 hdr <- doGetHdr fd
 return (FileIO fd hdr [])

doHdrWrite :: Fd -> [Char] -> IO ByteCount
doHdrWrite fd str = do
 fdWrite fd (take 100 (str ++ "\n" ++ (cycle ['\0'])))

doWriteHdr :: Fd -> FileHdr -> IO ByteCount
doWriteHdr fd hdr = do
 doHdrWrite fd (show (fileName hdr))
 doHdrWrite fd (show (seed hdr))
 doHdrWrite fd (show (minIoSize hdr))
 doHdrWrite fd (show (maxIoSize hdr))
 doHdrWrite fd (show (ioCount hdr))

doWriteFile :: FilePath -> IO FileHdr
doWriteFile name = do
 fd <- openFd name  ReadWrite (Just ownerModes)  (OpenFileFlags
{append=False, exclusive=False, noctty=True, nonBlock=False,
trunc=True})
 seed <- newStdGen
 hdr <- return (FileHdr name seed 4096 (2*8096) 200)
 doWriteHdr fd hdr
 return hdr

overLap (IoLoc off1' sz1' num1) (IoLoc off2' sz2' num2) =
   ((off1 > off2) && (off1 < off2 +sz2)) ||((off1+sz1 >off2)
&&(off1+sz1 < off2 +sz2))
   where
     off1 = read (show off1')
     sz1 = read (show sz1')
     off2 = read (show off2')
     sz2 = read (show sz2')


genIoList1 :: [IoLoc] -> [Int] -> [Int] -> Int -> Int -> [IoLoc]

genIoList1 list offset size _ 0 =
   list
genIoList1 list offset size 0 _ =
   list

genIoList1 list (offset:os) (size:ss) count bound =
   if isNothing $ find (overLap x) list
            then genIoList1 ([x] ++ list) os ss (count - 1) (bound -1)
            else genIoList1 list os ss count (bound -1)
   where
     x = IoLoc (read (show offset)) (read(show size)) count
--      offset = (read (show offset1))
--      size = (read(show size1))

genIoList :: FileHdr -> [IoLoc]
genIoList (FileHdr name seed min max count) =
   genIoList1 []  (myrandomRlist 4000 1099511627776 seed)
(myrandomRlist min max seed) count (count*2)

doActualIo :: Fd -> IoLoc -> String -> IO ()
doActualIo fd (IoLoc off sz num) str = do
 off <- fdSeek fd AbsoluteSeek off
 fdWrite fd str
 return ()

doVerifyIo :: Fd -> IoLoc -> String -> IO ()
doVerifyIo fd (IoLoc off sz num) str = do
 off <- fdSeek fd AbsoluteSeek off
 (filedata, count) <- fdRead fd sz
 if str == filedata
    then return ()
    else throwIO (Corrupt ("Data corruption in #" ++  (take 200 str)
++ "#"  ++ (take 200 filedata))(IoLoc off sz num))

mainWrite file  = do
 hdr <- doWriteFile file
 hdrIO <- doHdrFromFile file
 iolst <- return $ genIoList hdr
 app <- return $ zip iolst (map (genPattern (fileName hdr)) iolst)
 putStrLn (fileName hdr)
 mapM (\(x,y)-> doActualIo (fd hdrIO)  x y) app
 mapM (\(x,y)-> doVerifyIo (fd hdrIO)  x y) app
 return hdr

mainVerify file = do
 hdrIO <- doHdrFromFile file
 hdr <- return (params hdrIO)
 iolst <- return $ genIoList hdr
 file <- return $ filter (\x->not ( x=='"')) (fileName hdr)
 app <- return $ zip iolst (map (genPattern file) iolst)
 putStrLn $ filter (\x->not ( x=='"')) (fileName hdr)
 mapM (\(x,y)-> doVerifyIo (fd hdrIO)  x y) app
 return hdr


main =  do
 x <- getArgs
 if (length x) == 3
   then do
     main1
   else do
     putStrLn "USAGE:\nfile-io write/verify <full-path file name>
<number of threads>  \
\\n\n\
\Simple IO load generator with write verification. This utility is designed\n\
\to generate multi-threaded IO load which will write a pattern to the file. \n\
\When this is invoked with the same parameters with the verify option \n\
\the data written will be verified.\n\
\ "

main1 = do
 [op, name, numProcs] <- getArgs
 m <- newEmptyMVar
 n <- return $read numProcs
 case op of
   "write" -> do mapM forkIO  [(fillfile (name ++ (show i)) ) m|i<-[1..n]]
   "verify" -> do mapM forkIO [(verifyfile (name ++ (show i)) ) m|i<-[1..n]]
 x <- mapM takeMVar $ take (read numProcs) $ repeat m
 putStrLn $ show $ and x

fillfile filename m = do
 mainWrite filename
 putMVar m True

verifyfile filename m = do
 mainVerify filename
 putMVar m True



More information about the Haskell mailing list