[Haskell-cafe] Comments and suggestions on code
Andre Nathan
andre at digirati.com.br
Wed Jan 9 22:57:09 EST 2008
Hello
I've just found time to finish writing my first "real world" program, so
I thought I'd post it here and ask for insight on general issues such as
if there's anything that isn't done "the Haskell way", or if there's
something that could be done more efficiently.
The code is at the bottom of this message and also at
http://hpaste.org/4893. I realize it's a bit long, so if anyone could
just skim through it and see if there's anything really ugly or stupid
and point it out, it would be of great help :)
Just to make it easier to follow the code, its idea is simple:
- Build a process tree by reading entries from /proc (represented as a
map);
- Compare each child of the init process against a whitelist (which
comes from a configuration file);
- For each child not in the whitelist, send it a KILL signal.
The idea here is to run this on webservers and try to catch bad
customers who try to run daemons from their accounts, the typical script
kiddie stuff.
Anyway, there's one specific question I'd like to ask. I'm using "StateT
PsTree IO" to keep program state (the process tree). IO is necessary
because all information is read from /proc files. Now consider the
following function:
appendChild :: Pid -> Pid -> StateT PsTree IO Bool
appendChild ppid pid = do
tree <- get
let PsInfo psData children = mapLookup ppid tree
put $ Map.insert ppid (PsInfo psData (pid:children)) tree
return True
It changes the program state by modifying a process tree entry, but it
does no I/O at all. The return type is there basically to match the
return type of the function which calls it (insertParent), which calls
functions that do I/O. Is there anyway to avoid the "IO" in
appendChild's signature (other than making it a pure function by passing
the process tree as a parameter and returning a modified map)?
I would also like to try ways to improve efficiency, maybe trying a hash
table instead of a map for the state, and also using bytestrings. I
guess I could try making it parallel, since each child of init can be
checked independently.
Anyway, this is already longer than I thought it would be (I hope I'm
not abusing too much :) The code follows. Thanks in advance for any
comments or suggestions.
Andre
module Main where
import qualified Data.Map as Map
import Directory
import Control.Monad.State
import Maybe
import System.Environment
import System.IO
import System.Posix.Files
import System.Posix.Signals
import System.Posix.Unistd
import System.Posix.User
import Text.Printf
import Text.Regex
import Text.Regex.Posix
type Pid = FilePath
type Uid = String
type PsData = Map.Map String String
type PsChildren = [Pid]
type KillFunction = PsTree -> Pid -> IO ()
data PsInfo = PsInfo PsData PsChildren
type PsTree = Map.Map Pid PsInfo
type Whitelist = Map.Map FilePath String
mapLookup :: (Ord a) => a -> Map.Map a b -> b
mapLookup k = fromJust . Map.lookup k
-- Process Tree construction
parentPid :: PsInfo -> Pid
parentPid (PsInfo psData _) = mapLookup "PPid" psData
getProcInfo :: String -> PsData -> PsData
getProcInfo line psData = do
case line =~~ "^([A-Za-z]+):[[:space:]]+(.*)$" of
Nothing -> psData
Just ([_, key, value]:_) -> Map.insert key value psData
getIds :: String -> PsData -> (String, String)
getIds id psData = (rId, eId)
where (rId:eId:_) = words (mapLookup id psData)
processData :: String -> PsData
processData procData = addIds psData
where psData = foldr getProcInfo Map.empty (lines procData)
addIds psData = Map.union psData (idMap psData)
idMap psData = Map.fromList [("RUid", rUid), ("EUid", eUid),
("RGid", rGid), ("EGid", eGid)]
(rUid, eUid) = getIds "Uid" psData
(rGid, eGid) = getIds "Gid" psData
readLink :: String -> IO String
readLink link = catch (readSymbolicLink link) (\e -> return "?")
procInfo :: Pid -> IO PsInfo
procInfo pid = do
let dir = "/proc/" ++ pid ++ "/"
procData <- readFile $ dir ++ "status"
exe <- readLink $ dir ++ "exe"
cwd <- readLink $ dir ++ "cwd"
cmd <- readFile $ dir ++ "cmdline"
let cmd' = subRegex (mkRegex "[^a-zA-z[:space:]\\/\\.-]") cmd " "
info = processData procData
adminInfo = Map.fromList [("Exe", exe), ("Cwd", cwd),
("Cmd", cmd')]
return $ PsInfo (Map.union info adminInfo) []
addProc :: Pid -> StateT PsTree IO PsInfo
addProc pid = do
info <- lift $ procInfo pid
modify (Map.insert pid info)
return info
appendChild :: Pid -> Pid -> StateT PsTree IO Bool
appendChild ppid pid = do
tree <- get
let PsInfo psData children = mapLookup ppid tree
put $ Map.insert ppid (PsInfo psData (pid:children)) tree
return True
insertParent :: Pid -> Pid -> StateT PsTree IO Bool
insertParent ppid pid = do
tree <- get
if Map.member ppid tree
then appendChild ppid pid
else do
built <- insertInTree ppid
if built
then appendChild ppid pid
else return False
insertPid :: Pid -> StateT PsTree IO Bool
insertPid "1" = do
info <- addProc "1"
return True
insertPid pid = do
info <- addProc pid
let ppid = parentPid info
if ppid == "0" then return False else insertParent ppid pid
insertInTree :: Pid -> StateT PsTree IO Bool
insertInTree pid = do
tree <- get
if Map.member pid tree then return True else insertPid pid
buildTree :: FilePath -> StateT PsTree IO Bool
buildTree entry | entry =~ "^[0-9]+$" = insertInTree entry
| otherwise = return False
createTree :: IO PsTree
createTree = do
entries <- getDirectoryContents "/proc"
execStateT (mapM_ buildTree entries) Map.empty
-- Process Tree pretty-printing
treeStr :: PsTree -> PsChildren -> Int -> String -> String
treeStr tree children level str = foldr append str children
where append pid s = treeStr tree children' (level + 1) newstr
where PsInfo _ children' = mapLookup pid tree
pad = take (4 * level) [' ', ' ' ..]
newstr = s ++ "\n" ++ pad ++ pid
printTree :: PsTree -> Pid -> IO ()
printTree tree pid = putStrLn (treeStr tree children 1 pid)
where PsInfo _ children = mapLookup pid tree
-- Process killing
tryToKill :: PsTree -> KillFunction -> Pid -> Uid -> Uid -> Bool ->
IO Bool
tryToKill tree killFun pid "0" allowedUid killed = do
-- The process may be starting, give it a second chance.
sleep 10 -- conservative value.
let PsInfo psData _ = mapLookup pid tree
if allowedUid /= mapLookup "EUid" psData
then killFun tree pid >> return True
else return (killed || False)
tryToKill tree killFun pid _ _ _ = killFun tree pid >> return True
buildWhitelist :: String -> Whitelist -> Whitelist
buildWhitelist line whitelist = do
case line =~~ "^[ \t]*([^: \t]+)[ \t]*:[ \t]*([^ \t]+)[ \t]*$" of
Nothing -> error "Invalid configuration file"
Just ([_, exe, user]:_) -> Map.insert exe user whitelist
readWhiteList :: FilePath -> IO Whitelist
readWhiteList file = do
contents <- readFile file
return $ foldr buildWhitelist Map.empty (lines contents)
allowedUidForExecutable :: Whitelist -> FilePath -> IO Uid
allowedUidForExecutable whitelist exe = do
case Map.lookup exe whitelist of
Nothing -> return "0"
Just user -> do
entry <- getUserEntryForName user
return $ show (userID entry)
processBastard :: PsTree -> Whitelist -> KillFunction -> Bool ->
Pid -> IO Bool
processBastard tree whitelist killFun killed pid = do
let PsInfo psData _ = mapLookup pid tree
euid = mapLookup "EUid" psData
exe = mapLookup "Exe" psData
allowedUid <- allowedUidForExecutable whitelist exe
if euid /= allowedUid
then tryToKill tree killFun pid euid allowedUid killed
else return killed
withEachBastard :: PsTree -> Whitelist -> KillFunction -> IO Bool
withEachBastard tree whitelist killFun =
foldM (processBastard tree whitelist killFun) False children
where (PsInfo _ children) = mapLookup "1" tree
printWarnings :: Pid -> PsData -> IO ()
printWarnings pid psData = do
let exe = mapLookup "Exe" psData
let cmd = mapLookup "Cmd" psData
let cwd = mapLookup "Cwd" psData
let ppid = mapLookup "PPid" psData
let euid = mapLookup "EUid" psData
let ruid = mapLookup "RUid" psData
let egid = mapLookup "EGid" psData
if ruid /= euid
then hPrintf stderr "PID %s: RUID=%s, EUID=%s\n" pid ruid euid
else return ()
hPrintf stderr "Killing proc %s (%s, UID=%s, GID=%s), child of %s\n"
pid exe euid egid ppid
hPrintf stderr " Process command line: %s\n" cmd
hPrintf stderr " Process working directory: %s\n" cwd
killTree :: KillFunction
killTree tree pid = do
let PsInfo psData children = mapLookup pid tree
printWarnings pid psData
signalProcess sigKILL (read pid)
mapM_ (killTree tree) children
killBastards :: PsTree -> Whitelist -> Int -> IO ()
killBastards tree whitelist n = do
runAgain <- withEachBastard tree whitelist killTree
if runAgain && n > 1
then do
sleep 2
killBastards tree whitelist (n - 1)
else
return ()
-- Configuration
printConfig :: KillFunction
printConfig tree pid = do
let PsInfo psData _ = mapLookup pid tree
exe = mapLookup "Exe" psData
euid = mapLookup "EUid" psData
entry <- getUserEntryForID (read euid)
putStrLn $ exe ++ ": " ++ (userName entry)
-- main helpers
config :: PsTree -> IO ()
config tree = do
withEachBastard tree Map.empty printConfig
return ()
pstree :: PsTree -> IO ()
pstree tree = do
printTree tree "1"
-- In newer kernels, process 2 is kthreadd, which is not a child
-- of init.
let info = mapLookup "2" tree
if parentPid info /= "1" then printTree tree "2" else return ()
kill :: PsTree -> IO ()
kill tree = do
whitelist <- readWhiteList "killbastards.conf"
killBastards tree whitelist 5
main :: IO ()
main = do
args <- getArgs
tree <- createTree
case args of
["config"] -> config tree
["pstree"] -> pstree tree
[] -> kill tree
More information about the Haskell-Cafe
mailing list