[Haskell] filesystem verification utility
Anand Mitra
mitra at kqinfotech.com
Tue Jan 11 05:19:47 CET 2011
Based on feedback I inferred that the huge memory usage was mostly
because of the String handling in the patern generation. To make it
more efficient I have used Data.ByteString.Lazy.Char8
but now I get the following error when I execute
stress: tmp/asdf-1: hPutBuf: illegal operation (handle is closed)
stress: tmp/asdf-3: hPutBuf: illegal operation (handle is closed)
stress: tmp/asdf-5: hPutBuf: illegal operation (handle is closed)
stress: tmp/asdf-2: hPutBuf: illegal operation (handle is closed)
stress: tmp/asdf-7: hPutBuf: illegal operation (handle is closed)
stress: tmp/asdf-10: hPutBuf: illegal operation (handle is closed)
I have explicitly remove all calls to close despite this I get this
error. I tried both version of seek the fdSeek as well as hSeek. The
documentation on hSeek had a confusing comment that "The offset i is
given in terms of 8-bit bytes"
At this point I am unable to understand how the handle is getting
explicitly closed.
{-# OPTIONS -fglasgow-exts #-}
import Data.Int
import qualified Data.ByteString.Lazy.Char8 as L
import System.Random
import Data.List
import Monad
import System.Posix.IO
import System.Posix.Types
import Data.Time.Clock
import System.Posix.Files
import Data.Maybe
import GHC.IO.Device (SeekMode(..))
import Control.Exception
import Data.Typeable
import System
import Control.Concurrent
import System.IO
--myrandomRlist :: (Num t, Random t) => t -> IO [t]
--myrandomRlist x = liftM (randomRs (0,x)) newStdGen
myrandomRlist :: (Num t, Random t) => t ->t -> StdGen -> [t]
myrandomRlist min max seed = randomRs (min,max) seed
data IoLoc = IoLoc {offset ::FileOffset, size::Int, num::Int }
deriving (Show, Typeable)
instance Exception IoLoc
data Corrupt = Corrupt String IoLoc
deriving (Show, Typeable)
instance Exception Corrupt
data FileHdr = FileHdr {fileName::FilePath, seed::StdGen,
minIoSize::Int, maxIoSize::Int, ioCount::Int}
deriving (Show)
data FileIO = FileIO {fd::Handle, params::FileHdr, fileData::[IoLoc]}
check hdl = do
closed <- hIsClosed hdl
if closed
then do
putStrLn $ "file handle was closed" ++ (show hdl)
else do
return ()
genPattern :: FilePath -> IoLoc -> L.ByteString
genPattern f l =
L.take (fromIntegral (size l)) (L.cycle $ L.pack ("(" ++ f ++
")" ++ "[" ++ (show (offset l)) ++ ":" ++ (show (size l)) ++ (show
(num l)) ++ "]"))
doHdrRead :: Handle -> IO [Char]
doHdrRead x = do
fd <- handleToFd x
(str, count) <- fdRead fd 100
return (takeWhile (\x-> not (x == '\n')) str)
doGetHdr :: Handle -> IO FileHdr
doGetHdr fd = do
file <- doHdrRead fd
seed <- doHdrRead fd
minIoSize <- doHdrRead fd
maxIoSize <- doHdrRead fd
ioCount <- doHdrRead fd
return (FileHdr file (read seed) (read minIoSize) (read maxIoSize)
(read ioCount))
doHdrFromFile :: Handle -> IO FileIO
doHdrFromFile fd = do
-- fd <- openBinaryFile name ReadWriteMode -- (Just ownerModes)
(OpenFileFlags {append=False, exclusive=False, noctty=True,
nonBlock=False, trunc=False})
hSeek fd AbsoluteSeek 0
hdr <- doGetHdr fd
return (FileIO fd hdr [])
doHdrWrite :: Handle -> [Char] -> IO ()
doHdrWrite fd str = do
hPutStr fd (take 100 (str ++ "\n" ++ (cycle ['\0'])))
doWriteHdr :: Handle -> FileHdr -> IO ()
doWriteHdr fd hdr = do
doHdrWrite fd (show (fileName hdr))
doHdrWrite fd (show (seed hdr))
doHdrWrite fd (show (minIoSize hdr))
doHdrWrite fd (show (maxIoSize hdr))
doHdrWrite fd (show (ioCount hdr))
doWriteFile :: FilePath -> IO FileIO
doWriteFile name = do
fd <- openBinaryFile name ReadWriteMode -- (Just ownerModes)
(OpenFileFlags {append=False, exclusive=False, noctty=True,
nonBlock=False, trunc=True})
seed <- newStdGen
hdr <- return (FileHdr name seed 4096 (2*8096) 200)
doWriteHdr fd hdr
return (FileIO fd hdr [])
overLap (IoLoc off1' sz1' num1) (IoLoc off2' sz2' num2) =
((off1 > off2) && (off1 < off2 +sz2)) ||((off1+sz1 >off2)
&&(off1+sz1 < off2 +sz2))
where
off1 = read (show off1')
sz1 = read (show sz1')
off2 = read (show off2')
sz2 = read (show sz2')
genIoList1 :: [IoLoc] -> [Int] -> [Int] -> Int -> Int -> [IoLoc]
genIoList1 list offset size _ 0 =
list
genIoList1 list offset size 0 _ =
list
genIoList1 list (offset:os) (size:ss) count bound =
if isNothing $ find (overLap x) list
then genIoList1 ([x] ++ list) os ss (count - 1) (bound -1)
else genIoList1 list os ss count (bound -1)
where
x = IoLoc (read (show offset)) (read(show size)) count
-- offset = (read (show offset1))
-- size = (read(show size1))
genIoList :: FileHdr -> [IoLoc]
genIoList (FileHdr name seed min max count) =
genIoList1 [] (myrandomRlist 4000 1099511627776 seed)
(myrandomRlist min max seed) count (count*2)
doActualIo :: Handle -> IoLoc -> L.ByteString -> IO ()
doActualIo fd (IoLoc off sz num) str = do
-- hSeek fd AbsoluteSeek off
rfd <- (handleToFd fd)
fdSeek rfd AbsoluteSeek off
L.hPut fd str
return ()
doVerifyIo :: Handle -> IoLoc -> L.ByteString -> IO ()
doVerifyIo fd (IoLoc off sz num) str = do
-- hSeek fd AbsoluteSeek off
rfd <- (handleToFd fd)
fdSeek rfd AbsoluteSeek off
filedata <- L.hGet fd sz
if str == filedata
then return ()
else throwIO (Corrupt ("Data corruption in #" ++ (take 200
(L.unpack str)) ++ "#" ++ (take 200 (L.unpack filedata)))(IoLoc off
sz num))
mainWrite file = do
(FileIO hd _ _) <- doWriteFile file
hdrIO <- doHdrFromFile hd
check hd
(FileIO _ hdr _) <- return hdrIO
check hd
iolst <- return $ genIoList hdr
app <- return $ zip iolst (map (genPattern (fileName hdr)) iolst)
putStrLn (fileName hdr)
mapM (\(x,y)-> doActualIo (fd hdrIO) x y) app
mapM (\(x,y)-> doVerifyIo (fd hdrIO) x y) app
-- hClose hd
return hdr
mainVerify file = do
hd <- openBinaryFile file ReadWriteMode
hdrIO <- doHdrFromFile hd
hdr <- return (params hdrIO)
iolst <- return $ genIoList hdr
file <- return $ filter (\x->not ( x=='"')) (fileName hdr)
app <- return $ zip iolst (map (genPattern file) iolst)
putStrLn $ filter (\x->not ( x=='"')) (fileName hdr)
mapM (\(x,y)-> doVerifyIo (fd hdrIO) x y) app
-- hClose hd
return hdr
main = do
x <- getArgs
if (length x) == 3
then do
main1
else do
putStrLn "USAGE:\nfile-io write/verify <full-path file name>
<number of threads> \
\\n\n\
\Simple IO load generator with write verification. This utility is designed\n\
\to generate multi-threaded IO load which will write a pattern to the file. \n\
\When this is invoked with the same parameters with the verify option \n\
\the data written will be verified.\n\
\ "
main1 = do
[op, name, numProcs] <- getArgs
m <- newEmptyMVar
n <- return $read numProcs
case op of
"write" -> do mapM forkIO [(fillfile (name ++ (show i)) ) m|i<-[1..n]]
"verify" -> do mapM forkIO [(verifyfile (name ++ (show i)) ) m|i<-[1..n]]
x <- mapM takeMVar $ take (read numProcs) $ repeat m
putStrLn $ show $ and x
fillfile filename m = do
mainWrite filename
putMVar m True
verifyfile filename m = do
mainVerify filename
putMVar m True
On Tue, Jan 11, 2011 at 1:02 AM, Anand Mitra <mitra at kqinfotech.com> wrote:
> Hi,
>
> I had a requirement to generate various kinds of I/O patterns on a
> filesystem and subsequently verify this. The initial version of the
> program below implements a random I/O pattern with multiple threads.
> Even when the number of I/O is as small as 200 and 10 concurrent
> theads, the amount of memory used is huge. When I run the program it
> consumes close to 1to2GB memory. Moreover the rate at which it
> generates the I/O is very low which is not good for testing a
> filesystem. I have used System.POSIX.IO but I tried System.IO and did
> not see much difference either. I would appreciate help in identifying
> ways to improve this.
>
>
More information about the Haskell
mailing list