[Haskell-beginners] filesystem verification utility

Anand Mitra mitra at kqinfotech.com
Mon Jan 10 20:32:34 CET 2011


Hi,

I had a requirement to generate various kinds of I/O patterns on a
filesystem and subsequently verify this. The initial version of the
program below implements a random I/O pattern with multiple threads.
Even when the number of I/O is as small as 200 and 10 concurrent
theads, the amount of memory used is huge. When I run the program it
consumes close to 1to2GB memory. Moreover the rate at which it
generates the I/O is very low which is not good for testing a
filesystem. I have used System.POSIX.IO but I tried System.IO and did
not see much difference either. I would appreciate help in identifying
ways to improve this.


{-# OPTIONS -fglasgow-exts #-}
import System.Random
import Data.List
import Monad
import System.Posix.IO
import System.Posix.Types
import Data.Time.Clock
import System.Posix.Files
import Data.Maybe
import GHC.IO.Device (SeekMode(..))
import Control.Exception
import Data.Typeable
import System
import Control.Concurrent

--myrandomRlist :: (Num t, Random t) => t -> IO [t]
--myrandomRlist x = liftM (randomRs (0,x)) newStdGen

myrandomRlist :: (Num t, Random t) => t ->t -> StdGen  ->  [t]
myrandomRlist min max seed = randomRs (min,max)  seed

data IoLoc = IoLoc {offset :: FileOffset, size::ByteCount, num::Int  }
             deriving (Show, Typeable)
instance Exception IoLoc

data Corrupt = Corrupt String IoLoc
             deriving (Show, Typeable)
instance Exception Corrupt

data FileHdr = FileHdr {fileName::FilePath, seed::StdGen,
                        minIoSize::Int, maxIoSize::Int, ioCount::Int}
               deriving (Show)

data FileIO = FileIO {fd::Fd, params::FileHdr, fileData::[IoLoc]}

genPattern :: FilePath -> IoLoc -> String
genPattern f l =
     take (read $ show $size l) (cycle  $ "(" ++ f ++ ")"  ++ "[" ++
(show (offset l)) ++ ":" ++ (show (size l)) ++ (show (num l)) ++ "]")

doHdrRead :: Fd -> IO [Char]
doHdrRead fd = do
  (str, count) <- fdRead fd 100
  return (takeWhile (\x-> not (x == '\n')) str)

doGetHdr :: Fd -> IO FileHdr
doGetHdr fd = do
  file <- doHdrRead fd
  seed <- doHdrRead fd
  minIoSize <- doHdrRead fd
  maxIoSize <- doHdrRead fd
  ioCount <- doHdrRead fd
  return (FileHdr file (read seed) (read minIoSize) (read maxIoSize)
(read ioCount))

doHdrFromFile :: FilePath -> IO FileIO
doHdrFromFile name = do
  fd <- openFd name  ReadWrite (Just ownerModes)  (OpenFileFlags
{append=False, exclusive=False, noctty=True, nonBlock=False,
trunc=False})
  hdr <- doGetHdr fd
  return (FileIO fd hdr [])

doHdrWrite :: Fd -> [Char] -> IO ByteCount
doHdrWrite fd str = do
  fdWrite fd (take 100 (str ++ "\n" ++ (cycle ['\0'])))

doWriteHdr :: Fd -> FileHdr -> IO ByteCount
doWriteHdr fd hdr = do
  doHdrWrite fd (show (fileName hdr))
  doHdrWrite fd (show (seed hdr))
  doHdrWrite fd (show (minIoSize hdr))
  doHdrWrite fd (show (maxIoSize hdr))
  doHdrWrite fd (show (ioCount hdr))

doWriteFile :: FilePath -> IO FileHdr
doWriteFile name = do
  fd <- openFd name  ReadWrite (Just ownerModes)  (OpenFileFlags
{append=False, exclusive=False, noctty=True, nonBlock=False,
trunc=True})
  seed <- newStdGen
  hdr <- return (FileHdr name seed 4096 (2*8096) 200)
  doWriteHdr fd hdr
  return hdr

overLap (IoLoc off1' sz1' num1) (IoLoc off2' sz2' num2) =
    ((off1 > off2) && (off1 < off2 +sz2)) ||((off1+sz1 >off2)
&&(off1+sz1 < off2 +sz2))
    where
      off1 = read (show off1')
      sz1 = read (show sz1')
      off2 = read (show off2')
      sz2 = read (show sz2')


genIoList1 :: [IoLoc] -> [Int] -> [Int] -> Int -> Int -> [IoLoc]

genIoList1 list offset size _ 0 =
    list
genIoList1 list offset size 0 _ =
    list

genIoList1 list (offset:os) (size:ss) count bound =
    if isNothing $ find (overLap x) list
             then genIoList1 ([x] ++ list) os ss (count - 1) (bound -1)
             else genIoList1 list os ss count (bound -1)
    where
      x = IoLoc (read (show offset)) (read(show size)) count
--      offset = (read (show offset1))
--      size = (read(show size1))

genIoList :: FileHdr -> [IoLoc]
genIoList (FileHdr name seed min max count) =
    genIoList1 []  (myrandomRlist 4000 1099511627776 seed)
(myrandomRlist min max seed) count (count*2)

doActualIo :: Fd -> IoLoc -> String -> IO ()
doActualIo fd (IoLoc off sz num) str = do
  off <- fdSeek fd AbsoluteSeek off
  fdWrite fd str
  return ()

doVerifyIo :: Fd -> IoLoc -> String -> IO ()
doVerifyIo fd (IoLoc off sz num) str = do
  off <- fdSeek fd AbsoluteSeek off
  (filedata, count) <- fdRead fd sz
  if str == filedata
     then return ()
     else throwIO (Corrupt ("Data corruption in #" ++  (take 200 str)
++ "#"  ++ (take 200 filedata))(IoLoc off sz num))

mainWrite file  = do
  hdr <- doWriteFile file
  hdrIO <- doHdrFromFile file
  iolst <- return $ genIoList hdr
  app <- return $ zip iolst (map (genPattern (fileName hdr)) iolst)
  putStrLn (fileName hdr)
  mapM (\(x,y)-> doActualIo (fd hdrIO)  x y) app
  mapM (\(x,y)-> doVerifyIo (fd hdrIO)  x y) app
  return hdr

mainVerify file = do
  hdrIO <- doHdrFromFile file
  hdr <- return (params hdrIO)
  iolst <- return $ genIoList hdr
  file <- return $ filter (\x->not ( x=='"')) (fileName hdr)
  app <- return $ zip iolst (map (genPattern file) iolst)
  putStrLn $ filter (\x->not ( x=='"')) (fileName hdr)
  mapM (\(x,y)-> doVerifyIo (fd hdrIO)  x y) app
  return hdr


main =  do
  x <- getArgs
  if (length x) == 3
    then do
      main1
    else do
      putStrLn "USAGE:\nfile-io write/verify <full-path file name>
<number of threads>  \
\\n\n\
\Simple IO load generator with write verification. This utility is designed\n\
\to generate multi-threaded IO load which will write a pattern to the file. \n\
\When this is invoked with the same parameters with the verify option \n\
\the data written will be verified.\n\
\ "

main1 = do
  [op, name, numProcs] <- getArgs
  m <- newEmptyMVar
  n <- return $read numProcs
  case op of
    "write" -> do mapM forkIO  [(fillfile (name ++ (show i)) ) m|i<-[1..n]]
    "verify" -> do mapM forkIO [(verifyfile (name ++ (show i)) ) m|i<-[1..n]]
  x <- mapM takeMVar $ take (read numProcs) $ repeat m
  putStrLn $ show $ and x

fillfile filename m = do
  mainWrite filename
  putMVar m True

verifyfile filename m = do
  mainVerify filename
  putMVar m True



More information about the Beginners mailing list