I squeezed a bit more out (see attached).  I think the main bottleneck 
is now the random number generator, in particular it is supplying boxed 
Doubles which have to be unboxed again before storing in the array.

import Data.Array.IO
import Data.Array.Base
import System.Environment (getArgs)
import System.Random

type Vector = IOUArray Int Double

main = do  (n:f:m:_) <- getArgs
           let  (nelems,niterations) = case f of
                    "elements"    -> (read n, read m)
                    "iterations"  -> (read m, read n)

           x <- newArray (0,nelems-1) 0 :: IO Vector
           v <- newArray_ (0,nelems-1) :: IO Vector

	   x `seq` v `seq` return ()

           for 0 nelems $ \i ->
             do  r <- randomRIO (-1,1)
                 unsafeWrite v i r

           for 0 niterations $ \_ ->
             for 0 nelems $ \i ->
               do  xi <- unsafeRead x i
                   vi <- unsafeRead v i
                   unsafeWrite x i (xi+vi)

           --for 0 nelems $ \i ->
           --  do  xi <- unsafeRead x i
           --      putStr (show xi)
           --      putChar ' '
           --putChar '\n'

for :: Int -> Int -> (Int -> IO a) -> IO ()
-- Faster equivalent of "mapM_ action []"
for from to action | from `seq` to `seq` False = undefined
for from to action  = go from
    go i | i>=to      = return ()
         | otherwise = do action i
                          go $! (i+1)

