[Haskell-cafe] Comments and suggestions on code

Andre Nathan andre at digirati.com.br
Wed Jan 9 22:57:09 EST 2008


I've just found time to finish writing my first "real world" program, so
I thought I'd post it here and ask for insight on general issues such as
if there's anything that isn't done "the Haskell way", or if there's
something that could be done more efficiently.

The code is at the bottom of this message and also at
http://hpaste.org/4893. I realize it's a bit long, so if anyone could
just skim through it and see if there's anything really ugly or stupid
and point it out, it would be of great help :)

Just to make it easier to follow the code, its idea is simple:

- Build a process tree by reading entries from /proc (represented as a 
- Compare each child of the init process against a whitelist (which 
  comes from a configuration file);
- For each child not in the whitelist, send it a KILL signal.

The idea here is to run this on webservers and try to catch bad
customers who try to run daemons from their accounts, the typical script
kiddie stuff.

Anyway, there's one specific question I'd like to ask. I'm using "StateT
PsTree IO" to keep program state (the process tree). IO is necessary
because all information is read from /proc files. Now consider the
following function:

appendChild :: Pid -> Pid -> StateT PsTree IO Bool
appendChild ppid pid = do
  tree <- get
  let PsInfo psData children = mapLookup ppid tree
  put $ Map.insert ppid (PsInfo psData (pid:children)) tree
  return True

It changes the program state by modifying a process tree entry, but it
does no I/O at all. The return type is there basically to match the
return type of the function which calls it (insertParent), which calls
functions that do I/O. Is there anyway to avoid the "IO" in
appendChild's signature (other than making it a pure function by passing
the process tree as a parameter and returning a modified map)?

I would also like to try ways to improve efficiency, maybe trying a hash
table instead of a map for the state, and also using bytestrings. I
guess I could try making it parallel, since each child of init can be
checked independently.

Anyway, this is already longer than I thought it would be (I hope I'm
not abusing too much :) The code follows. Thanks in advance for any
comments or suggestions.


module Main where

import qualified Data.Map as Map
import Directory
import Control.Monad.State
import Maybe
import System.Environment
import System.IO
import System.Posix.Files
import System.Posix.Signals
import System.Posix.Unistd
import System.Posix.User
import Text.Printf
import Text.Regex
import Text.Regex.Posix

type Pid = FilePath
type Uid = String

type PsData       = Map.Map String String
type PsChildren   = [Pid]
type KillFunction = PsTree -> Pid -> IO ()

data PsInfo = PsInfo PsData PsChildren
type PsTree = Map.Map Pid PsInfo

type Whitelist = Map.Map FilePath String

mapLookup :: (Ord a) => a -> Map.Map a b -> b
mapLookup k = fromJust . Map.lookup k

-- Process Tree construction

parentPid :: PsInfo -> Pid
parentPid (PsInfo psData _) = mapLookup "PPid" psData
getProcInfo :: String -> PsData -> PsData
getProcInfo line psData = do
  case line =~~ "^([A-Za-z]+):[[:space:]]+(.*)$" of
    Nothing                  -> psData
    Just ([_, key, value]:_) -> Map.insert key value psData 
getIds :: String -> PsData -> (String, String)
getIds id psData = (rId, eId)
  where (rId:eId:_) = words (mapLookup id psData)
processData :: String -> PsData
processData procData = addIds psData
  where psData = foldr getProcInfo Map.empty (lines procData)
        addIds psData = Map.union psData (idMap psData)
        idMap psData = Map.fromList [("RUid", rUid), ("EUid", eUid),
                                     ("RGid", rGid), ("EGid", eGid)]
        (rUid, eUid) = getIds "Uid" psData
        (rGid, eGid) = getIds "Gid" psData

readLink :: String -> IO String
readLink link = catch (readSymbolicLink link) (\e -> return "?")

procInfo :: Pid -> IO PsInfo
procInfo pid = do
  let dir = "/proc/" ++ pid ++ "/"
  procData <- readFile $ dir ++ "status"
  exe <- readLink $ dir ++ "exe"
  cwd <- readLink $ dir ++ "cwd"
  cmd <- readFile $ dir ++ "cmdline"
  let cmd' = subRegex (mkRegex "[^a-zA-z[:space:]\\/\\.-]") cmd " "
      info = processData procData
      adminInfo = Map.fromList [("Exe", exe), ("Cwd", cwd),
                                ("Cmd", cmd')]
  return $ PsInfo (Map.union info adminInfo) []

addProc :: Pid -> StateT PsTree IO PsInfo
addProc pid = do
  info <- lift $ procInfo pid
  modify (Map.insert pid info)
  return info

appendChild :: Pid -> Pid -> StateT PsTree IO Bool
appendChild ppid pid = do
  tree <- get
  let PsInfo psData children = mapLookup ppid tree
  put $ Map.insert ppid (PsInfo psData (pid:children)) tree
  return True

insertParent :: Pid -> Pid -> StateT PsTree IO Bool
insertParent ppid pid = do
  tree <- get
  if Map.member ppid tree
    then appendChild ppid pid
    else do
      built <- insertInTree ppid
      if built
        then appendChild ppid pid
        else return False

insertPid :: Pid -> StateT PsTree IO Bool
insertPid "1" = do
  info <- addProc "1"
  return True
insertPid pid = do
  info <- addProc pid
  let ppid = parentPid info
  if ppid == "0" then return False else insertParent ppid pid

insertInTree :: Pid -> StateT PsTree IO Bool
insertInTree pid = do
  tree <- get
  if Map.member pid tree then return True else insertPid pid

buildTree :: FilePath -> StateT PsTree IO Bool
buildTree entry | entry =~ "^[0-9]+$" = insertInTree entry
                | otherwise           = return False

createTree :: IO PsTree
createTree = do
  entries <- getDirectoryContents "/proc"
  execStateT (mapM_ buildTree entries) Map.empty

-- Process Tree pretty-printing

treeStr :: PsTree -> PsChildren -> Int -> String -> String
treeStr tree children level str = foldr append str children
  where append pid s = treeStr tree children' (level + 1) newstr
          where PsInfo _ children' = mapLookup pid tree
                pad = take (4 * level) [' ', ' ' ..]
                newstr = s ++ "\n" ++ pad ++ pid

printTree :: PsTree -> Pid -> IO ()
printTree tree pid = putStrLn (treeStr tree children 1 pid)
  where PsInfo _ children = mapLookup pid tree

-- Process killing

tryToKill :: PsTree -> KillFunction -> Pid -> Uid -> Uid -> Bool ->
             IO Bool
tryToKill tree killFun pid "0" allowedUid killed = do
  -- The process may be starting, give it a second chance.
  sleep 10 -- conservative value.
  let PsInfo psData _ = mapLookup pid tree
  if allowedUid /= mapLookup "EUid" psData
    then killFun tree pid >> return True
    else return (killed || False)
tryToKill tree killFun pid _ _ _ = killFun tree pid >> return True

buildWhitelist :: String -> Whitelist -> Whitelist
buildWhitelist line whitelist = do
  case line =~~ "^[ \t]*([^: \t]+)[ \t]*:[ \t]*([^ \t]+)[ \t]*$" of
    Nothing                 -> error "Invalid configuration file"
    Just ([_, exe, user]:_) -> Map.insert exe user whitelist

readWhiteList :: FilePath -> IO Whitelist
readWhiteList file = do
  contents <- readFile file
  return $ foldr buildWhitelist Map.empty (lines contents)

allowedUidForExecutable :: Whitelist -> FilePath -> IO Uid
allowedUidForExecutable whitelist exe = do
  case Map.lookup exe whitelist of
    Nothing   -> return "0"
    Just user -> do
      entry <- getUserEntryForName user
      return $ show (userID entry)

processBastard :: PsTree -> Whitelist -> KillFunction -> Bool ->
                  Pid -> IO Bool
processBastard tree whitelist killFun killed pid = do
  let PsInfo psData _ = mapLookup pid tree
      euid = mapLookup "EUid" psData
      exe = mapLookup "Exe" psData
  allowedUid <- allowedUidForExecutable whitelist exe
  if euid /= allowedUid
    then tryToKill tree killFun pid euid allowedUid killed
    else return killed

withEachBastard :: PsTree -> Whitelist -> KillFunction -> IO Bool
withEachBastard tree whitelist killFun =
  foldM (processBastard tree whitelist killFun) False children
  where (PsInfo _ children) = mapLookup "1" tree

printWarnings :: Pid -> PsData -> IO ()
printWarnings pid psData = do
  let exe = mapLookup "Exe" psData
  let cmd = mapLookup "Cmd" psData
  let cwd = mapLookup "Cwd" psData
  let ppid = mapLookup "PPid" psData
  let euid = mapLookup "EUid" psData
  let ruid = mapLookup "RUid" psData
  let egid = mapLookup "EGid" psData
  if ruid /= euid
    then hPrintf stderr "PID %s: RUID=%s, EUID=%s\n" pid ruid euid
    else return ()
  hPrintf stderr "Killing proc %s (%s, UID=%s, GID=%s), child of %s\n"
          pid exe euid egid ppid
  hPrintf stderr "  Process command line: %s\n" cmd
  hPrintf stderr "  Process working directory: %s\n" cwd

killTree :: KillFunction
killTree tree pid = do
  let PsInfo psData children = mapLookup pid tree
  printWarnings pid psData
  signalProcess sigKILL (read pid)
  mapM_ (killTree tree) children

killBastards :: PsTree -> Whitelist -> Int -> IO ()
killBastards tree whitelist n = do
  runAgain <- withEachBastard tree whitelist killTree
  if runAgain && n > 1
    then do
      sleep 2
      killBastards tree whitelist (n - 1)
      return ()

-- Configuration

printConfig :: KillFunction
printConfig tree pid = do
  let PsInfo psData _ = mapLookup pid tree
      exe = mapLookup "Exe" psData
      euid = mapLookup "EUid" psData
  entry <- getUserEntryForID (read euid)
  putStrLn $ exe ++ ": " ++ (userName entry)

-- main helpers

config :: PsTree -> IO ()
config tree = do
  withEachBastard tree Map.empty printConfig
  return ()

pstree :: PsTree -> IO ()
pstree tree = do
  printTree tree "1"
  -- In newer kernels, process 2 is kthreadd, which is not a child
  -- of init.
  let info = mapLookup "2" tree
  if parentPid info /= "1" then printTree tree "2" else return ()

kill :: PsTree -> IO ()
kill tree = do
  whitelist <- readWhiteList "killbastards.conf"
  killBastards tree whitelist 5

main :: IO ()
main = do
  args <- getArgs
  tree <- createTree
  case args of
    ["config"] -> config tree
    ["pstree"] -> pstree tree
    []         -> kill tree

More information about the Haskell-Cafe mailing list