[Haskell] filesystem verification utility
Anand Mitra
mitra at kqinfotech.com
Tue Jan 11 03:25:03 CET 2011
Hi,
I had a requirement to generate various kinds of I/O patterns on a
filesystem and subsequently verify this. The initial version of the
program below implements a random I/O pattern with multiple threads.
Even when the number of I/O is as small as 200 and 10 concurrent
theads, the amount of memory used is huge. When I run the program it
consumes close to 1to2GB memory. Moreover the rate at which it
generates the I/O is very low which is not good for testing a
filesystem. I have used System.POSIX.IO but I tried System.IO and did
not see much difference either. I would appreciate help in identifying
ways to improve this.
{-# OPTIONS -fglasgow-exts #-}
import System.Random
import Data.List
import Monad
import System.Posix.IO
import System.Posix.Types
import Data.Time.Clock
import System.Posix.Files
import Data.Maybe
import GHC.IO.Device (SeekMode(..))
import Control.Exception
import Data.Typeable
import System
import Control.Concurrent
--myrandomRlist :: (Num t, Random t) => t -> IO [t]
--myrandomRlist x = liftM (randomRs (0,x)) newStdGen
myrandomRlist :: (Num t, Random t) => t ->t -> StdGen -> [t]
myrandomRlist min max seed = randomRs (min,max) seed
data IoLoc = IoLoc {offset :: FileOffset, size::ByteCount, num::Int }
deriving (Show, Typeable)
instance Exception IoLoc
data Corrupt = Corrupt String IoLoc
deriving (Show, Typeable)
instance Exception Corrupt
data FileHdr = FileHdr {fileName::FilePath, seed::StdGen,
minIoSize::Int, maxIoSize::Int, ioCount::Int}
deriving (Show)
data FileIO = FileIO {fd::Fd, params::FileHdr, fileData::[IoLoc]}
genPattern :: FilePath -> IoLoc -> String
genPattern f l =
take (read $ show $size l) (cycle $ "(" ++ f ++ ")" ++ "[" ++
(show (offset l)) ++ ":" ++ (show (size l)) ++ (show (num l)) ++ "]")
doHdrRead :: Fd -> IO [Char]
doHdrRead fd = do
(str, count) <- fdRead fd 100
return (takeWhile (\x-> not (x == '\n')) str)
doGetHdr :: Fd -> IO FileHdr
doGetHdr fd = do
file <- doHdrRead fd
seed <- doHdrRead fd
minIoSize <- doHdrRead fd
maxIoSize <- doHdrRead fd
ioCount <- doHdrRead fd
return (FileHdr file (read seed) (read minIoSize) (read maxIoSize)
(read ioCount))
doHdrFromFile :: FilePath -> IO FileIO
doHdrFromFile name = do
fd <- openFd name ReadWrite (Just ownerModes) (OpenFileFlags
{append=False, exclusive=False, noctty=True, nonBlock=False,
trunc=False})
hdr <- doGetHdr fd
return (FileIO fd hdr [])
doHdrWrite :: Fd -> [Char] -> IO ByteCount
doHdrWrite fd str = do
fdWrite fd (take 100 (str ++ "\n" ++ (cycle ['\0'])))
doWriteHdr :: Fd -> FileHdr -> IO ByteCount
doWriteHdr fd hdr = do
doHdrWrite fd (show (fileName hdr))
doHdrWrite fd (show (seed hdr))
doHdrWrite fd (show (minIoSize hdr))
doHdrWrite fd (show (maxIoSize hdr))
doHdrWrite fd (show (ioCount hdr))
doWriteFile :: FilePath -> IO FileHdr
doWriteFile name = do
fd <- openFd name ReadWrite (Just ownerModes) (OpenFileFlags
{append=False, exclusive=False, noctty=True, nonBlock=False,
trunc=True})
seed <- newStdGen
hdr <- return (FileHdr name seed 4096 (2*8096) 200)
doWriteHdr fd hdr
return hdr
overLap (IoLoc off1' sz1' num1) (IoLoc off2' sz2' num2) =
((off1 > off2) && (off1 < off2 +sz2)) ||((off1+sz1 >off2)
&&(off1+sz1 < off2 +sz2))
where
off1 = read (show off1')
sz1 = read (show sz1')
off2 = read (show off2')
sz2 = read (show sz2')
genIoList1 :: [IoLoc] -> [Int] -> [Int] -> Int -> Int -> [IoLoc]
genIoList1 list offset size _ 0 =
list
genIoList1 list offset size 0 _ =
list
genIoList1 list (offset:os) (size:ss) count bound =
if isNothing $ find (overLap x) list
then genIoList1 ([x] ++ list) os ss (count - 1) (bound -1)
else genIoList1 list os ss count (bound -1)
where
x = IoLoc (read (show offset)) (read(show size)) count
-- offset = (read (show offset1))
-- size = (read(show size1))
genIoList :: FileHdr -> [IoLoc]
genIoList (FileHdr name seed min max count) =
genIoList1 [] (myrandomRlist 4000 1099511627776 seed)
(myrandomRlist min max seed) count (count*2)
doActualIo :: Fd -> IoLoc -> String -> IO ()
doActualIo fd (IoLoc off sz num) str = do
off <- fdSeek fd AbsoluteSeek off
fdWrite fd str
return ()
doVerifyIo :: Fd -> IoLoc -> String -> IO ()
doVerifyIo fd (IoLoc off sz num) str = do
off <- fdSeek fd AbsoluteSeek off
(filedata, count) <- fdRead fd sz
if str == filedata
then return ()
else throwIO (Corrupt ("Data corruption in #" ++ (take 200 str)
++ "#" ++ (take 200 filedata))(IoLoc off sz num))
mainWrite file = do
hdr <- doWriteFile file
hdrIO <- doHdrFromFile file
iolst <- return $ genIoList hdr
app <- return $ zip iolst (map (genPattern (fileName hdr)) iolst)
putStrLn (fileName hdr)
mapM (\(x,y)-> doActualIo (fd hdrIO) x y) app
mapM (\(x,y)-> doVerifyIo (fd hdrIO) x y) app
return hdr
mainVerify file = do
hdrIO <- doHdrFromFile file
hdr <- return (params hdrIO)
iolst <- return $ genIoList hdr
file <- return $ filter (\x->not ( x=='"')) (fileName hdr)
app <- return $ zip iolst (map (genPattern file) iolst)
putStrLn $ filter (\x->not ( x=='"')) (fileName hdr)
mapM (\(x,y)-> doVerifyIo (fd hdrIO) x y) app
return hdr
main = do
x <- getArgs
if (length x) == 3
then do
main1
else do
putStrLn "USAGE:\nfile-io write/verify <full-path file name>
<number of threads> \
\\n\n\
\Simple IO load generator with write verification. This utility is designed\n\
\to generate multi-threaded IO load which will write a pattern to the file. \n\
\When this is invoked with the same parameters with the verify option \n\
\the data written will be verified.\n\
\ "
main1 = do
[op, name, numProcs] <- getArgs
m <- newEmptyMVar
n <- return $read numProcs
case op of
"write" -> do mapM forkIO [(fillfile (name ++ (show i)) ) m|i<-[1..n]]
"verify" -> do mapM forkIO [(verifyfile (name ++ (show i)) ) m|i<-[1..n]]
x <- mapM takeMVar $ take (read numProcs) $ repeat m
putStrLn $ show $ and x
fillfile filename m = do
mainWrite filename
putMVar m True
verifyfile filename m = do
mainVerify filename
putMVar m True
More information about the Haskell
mailing list