Weak pointers and STM

John Meacham john at repetae.net
Tue Dec 2 09:24:26 EST 2008


Well, the actual problem I am trying to solve involves properly
reclaiming elements in a  circularly linked list (next and prev pointers
are TVars). I have a linked list and I need to be able to remove values
from the list when all references to the node no longer exist, not
counting the linked list references themselves.

Using Weak pointers inside the list itself doesn't work, since if an
element is collected, you also lose the information needed to stitch up
the list.

Originally, I had a wacky plan involving weak pointers in the nodes
themselves pointing to sentinal nodes, when the sentinal was collected,
I then know I can delete the node. The idea was that I can lazily delete
entire chains of nodes rather than one by one. I gave up on that idea.
(deRefWeak not working in STM was sort of a show stopper, and it was
overly complicated)

So now I have a scheme whereby I attach a finalizer to a proxy thunk.


> data TimeStamp = TimeStamp TS
>
> data TS = TS {
>     tsWord :: TVar Word64,
>     tsPrev :: TVar TS,
>     tsNext :: TVar TS
>     }

so, the finalizer attached to 'TimeStamp' values simply deletes the
value it points to from the linked list. The module interface ensures
that only 'TimeStamp' values can escape and each has a finalizer
attached. the interface is quite simple:

> newTimeStamp :: IO TimeStamp
> insertAfter :: TimeStamp -> IO TimeStamp

now, the problem is that I want to export insertAfter in the 'STM'
monad, not the IO one. however, I am not sure how to pull it off. I
cannot ensure the finalizer is only attached once to the node, if I use
'unsafeIOToSTM' then STM retrying could end up created multiple finalized
nodes, possibly prematurely deleting the element from the linked list.

basically, what would be really nice is if there were something like

> registerCommitIO :: IO () -> STM ()

where all IO actions registered with this function (within an atomically
block) are executed exactly once if and only if the atomically block
commits. The IO action is not run within the STM block, notably

        atomically $ do foo; registerCommitIO bar; baz

is equivalent to 

        atomically (do foo; baz) >> bar


I found I needed the equivalent of 'touchForeignPtr' for arbitrary
objects (which I was able to do with the touch# primitive, but

> touch :: a -> IO ()
> touchSTM :: a -> STM ()

would both be at home in System.Mem.Weak.


While I am wishing for things,

> unsafePerformSTM :: STM a -> a

would be really handy too :)

        John

attached is my module in question, so, my challenge is basically to
make insertAfter/insertBefore have STM types and to make the 'Ord'
instance not fail with "nested atomically" error when evaluated within an
STM block (so I can get rid of compareTimeStamp).



{-# OPTIONS_GHC -fglasgow-exts #-}
module TimeStamp(
    TimeStamp(),
    newTimeStamp,
    compareTimeStamp,
    insertAfter,
    insertBefore
    ) where

import Data.Word
import System.IO.Unsafe
import Control.Concurrent.STM
import System.Mem.Weak
import GHC.Prim
import GHC.IO
import GHC.Conc
import Foreign.ForeignPtr

data TimeStamp = TimeStamp TS

data TS = TS {
    tsWord :: TVar Word64,
    tsPrev :: TVar TS,
    tsNext :: TVar TS
    }


instance Eq TimeStamp where
    TimeStamp a == TimeStamp b = tsWord a == tsWord b
instance Ord TimeStamp where
    compare x y = unsafePerformIO . atomically $ compareTimeStamp x y

{-# NOINLINE theBase #-}
theBase :: TS
theBase = unsafePerformIO $ newTimeStampSpace

newTimeStampSpace :: IO TS
newTimeStampSpace = mdo
    bot <- newTVarIO 0
    botNext <- newTVarIO beginningOfTime
    botPrev <- newTVarIO beginningOfTime
    let beginningOfTime = TS {
            tsWord = bot,
            tsNext = botNext,
            tsPrev = botPrev
            }
    return beginningOfTime

newTimeStamp :: IO TimeStamp
newTimeStamp = insertAfter (TimeStamp theBase)

deleteTimeStamp :: TS -> IO ()
deleteTimeStamp ts = do
    atomically $ do
        tsn <- readTVar (tsNext ts)
        tsp <- readTVar (tsPrev ts)
        writeTVar (tsPrev tsn) tsp
        writeTVar (tsNext tsp) tsn
        writeTVar (tsWord ts) (error "time stamp was deleted")

compareTimeStamp :: TimeStamp -> TimeStamp -> STM Ordering
compareTimeStamp t1@(TimeStamp ts1) t2@(TimeStamp ts2) = do
    bw <- readTVar $ tsWord theBase
    x <- readTVar (tsWord ts1)
    y <- readTVar (tsWord ts2)
    unsafeIOToSTM $ touchTS t1
    unsafeIOToSTM $ touchTS t2
    return $ compare (x + bw) (y + bw)

insertAfter' :: TS -> STM (TS,TimeStamp)
insertAfter' ts = do
    nts <- newTS
    doInsertAfter nts ts
    return (nts,TimeStamp nts)


insertAfter :: TimeStamp -> IO TimeStamp
insertAfter t@(TimeStamp ts) = do
    (tts,ts) <- atomically $ insertAfter' ts
    touchTS t
    addFinalizer ts (deleteTimeStamp tts)
    return ts

insertBefore :: TimeStamp -> IO TimeStamp
insertBefore t@(TimeStamp ts) = do
    (tts,ts) <- atomically $ readTVar (tsPrev ts) >>= insertAfter'
    touchTS t
    addFinalizer ts (deleteTimeStamp tts)
    return ts


touchTS :: TimeStamp -> IO ()
touchTS ts =  touchForeignPtr (unsafeCoerce# ts)

doInsertAfter :: TS -> TS -> STM ()
doInsertAfter ts1 ts2 = do
    v0 <- readTVar (tsWord ts2)
    let makeRoom j ts = do
            ts <- readTVar (tsNext ts)
            wj' <- if tsWord ts /= tsWord ts2 then readTVar (tsWord ts) else return (maxBound + v0)
            if fromIntegral (wj' - v0) <= j*j then makeRoom (j + 1) ts else relabel (wj' - v0) j
        relabel _ 1 = return ()
        relabel wj j = do
            rl ts2 [  ((toInteger wj * k) `div` j) + fromIntegral v0 | k <- [ 1 .. j - 1] ]
        rl ts [] = return ()
        rl ts (n:ns) = do
            ts <- readTVar (tsNext ts)
            writeTVar (tsWord ts) (fromIntegral n)
            rl ts ns
    makeRoom 1 ts2

    tn <- readTVar (tsNext ts2)
    wn <- readTVar (tsWord tn)
    writeTVar (tsNext ts1) tn
    writeTVar (tsPrev ts1) ts2
    writeTVar (tsNext ts2) ts1
    writeTVar (tsPrev tn) ts1
    bw <- readTVar $ tsWord theBase
    let avg = a1 `div` 2 + (a2 `div` 2) + (a1 `mod` 2 + a2 `mod` 2) `div` 2
        a1 = v0 - bw
        a2 = if tsWord tn == tsWord theBase then maxBound else wn - bw
    writeTVar (tsWord ts1) avg


showTimeStamp :: TimeStamp -> IO ()
showTimeStamp (TimeStamp ts) = atomically (f ts) >>= print where
    f t = do
        x <- readTVar (tsWord t)
        tsn <- readTVar (tsNext t)
        if tsWord tsn == tsWord ts then return [x] else fmap (x:) (f tsn)

showTimeStamps :: IO ()
showTimeStamps = showTimeStamp (TimeStamp theBase)

newTS = do
    w <- newTVar undefined
    next <- newTVar undefined
    prev <- newTVar undefined
    return $ TS w next prev




More information about the Glasgow-haskell-users mailing list