[Git][ghc/ghc][wip/T24150] 2 commits: wip

Wed Feb 28 16:12:38 UTC 2024


Teo Camarasu pushed to branch wip/T24150 at Glasgow Haskell Compiler / GHC


Commits:
66dd3ab8 by Teo Camarasu at 2024-02-27T18:37:14+00:00
wip

- - - - -
6ce7951a by Teo Camarasu at 2024-02-28T16:12:19+00:00
wip

- - - - -


6 changed files:

- rts/include/rts/storage/Block.h
- rts/sm/BlockAlloc.c
- rts/sm/NonMoving.c
- rts/sm/NonMoving.h
- rts/sm/NonMovingAllocate.c
- testsuite/tests/rts/GcStaticPointers.hs


Changes:

=====================================
rts/include/rts/storage/Block.h
=====================================
@@ -318,6 +318,10 @@ bdescr *allocGroupOnNode(uint32_t node, W_ n);
 //
 bdescr *allocAlignedGroupOnNode(uint32_t node, W_ n);
 
+// Allocate a MBlock worth of `n` block sized chunks aligned at `n`-block boundry.
+// This returns a linked list of `bdescr` of length `BLOCKS_PER_MBLOCK / n`.
+bdescr *allocMBlockAlignedGroupOnNode(uint32_t node, W_ n);
+
 EXTERN_INLINE bdescr* allocBlockOnNode(uint32_t node);
 EXTERN_INLINE bdescr* allocBlockOnNode(uint32_t node)
 {


=====================================
rts/sm/BlockAlloc.c
=====================================
@@ -393,6 +393,51 @@ split_block_low (bdescr *bd, W_ n)
     return bd;
 }
 
+static bdescr *
+split_group_low (bdescr *bd, W_ n)
+{
+    ASSERT(bd->blocks > n);
+
+    bdescr* bd_ = bd + n;
+    bd_->blocks = bd->blocks - n;
+    bd_->start = bd_->free = bd->start + n*BLOCK_SIZE_W;
+
+    bd->blocks = n;
+
+    setup_tail(bd);
+    setup_tail(bd_);
+
+    return bd_;
+}
+
+// Allocate a MBlock worth of `n` block sized chunks aligned at `n`-block boundry.
+// This returns a linked list of `bdescr` of length `BLOCKS_PER_MBLOCK / n`.
+// We assume relevant locks are held.
+bdescr *
+allocMBlockAlignedGroupOnNode(uint32_t node, W_ n)
+{
+    bdescr *bd = allocGroupOnNode(node, BLOCKS_PER_MBLOCK);
+
+    // Free unaligned blocks, as we can't use these.
+    bdescr *unaligned_blocks = bd;
+    ASSERT(bd->blocks == BLOCKS_PER_MBLOCK);
+    bd = split_group_low(unaligned_blocks, bd->blocks % n);
+    freeGroup(unaligned_blocks);
+    ASSERT(bd->blocks % n == 0);
+
+    bdescr *start = bd;
+    // Chain the aligned groups together onto a linked-list
+    while (bd->blocks > n) {
+      bdescr *chunk = bd;
+      bd = split_group_low(chunk, n);
+      chunk->link = bd;
+    }
+    bd->link = NULL;
+    ASSERT(bd->link == NULL);
+
+    return start;
+}
+
 
 /* Find a fitting block for the allocation request in the given free list.
    Returns:


=====================================
rts/sm/NonMoving.c
=====================================
@@ -579,17 +579,17 @@ static void nonmovingExitConcurrentWorker(void);
 void nonmovingPushFreeSegment(struct NonmovingSegment *seg)
 {
     // See Note [Live data accounting in nonmoving collector].
-    if (RELAXED_LOAD(&nonmovingHeap.n_free) > NONMOVING_MAX_FREE) {
-        bdescr *bd = Bdescr((StgPtr) seg);
-        ACQUIRE_SM_LOCK;
-        ASSERT(oldest_gen->n_blocks >= bd->blocks);
-        ASSERT(oldest_gen->n_words >= BLOCK_SIZE_W * bd->blocks);
-        oldest_gen->n_blocks -= bd->blocks;
-        oldest_gen->n_words  -= BLOCK_SIZE_W * bd->blocks;
-        freeGroup(bd);
-        RELEASE_SM_LOCK;
-        return;
-    }
+    /* if (RELAXED_LOAD(&nonmovingHeap.n_free) > NONMOVING_MAX_FREE) { */
+    /*     bdescr *bd = Bdescr((StgPtr) seg); */
+    /*     ACQUIRE_SM_LOCK; */
+    /*     ASSERT(oldest_gen->n_blocks >= bd->blocks); */
+    /*     ASSERT(oldest_gen->n_words >= BLOCK_SIZE_W * bd->blocks); */
+    /*     oldest_gen->n_blocks -= bd->blocks; */
+    /*     oldest_gen->n_words  -= BLOCK_SIZE_W * bd->blocks; */
+    /*     freeGroup(bd); */
+    /*     RELEASE_SM_LOCK; */
+    /*     return; */
+    /* } */
 
     SET_SEGMENT_STATE(seg, FREE);
     while (true) {
@@ -601,6 +601,94 @@ void nonmovingPushFreeSegment(struct NonmovingSegment *seg)
     __sync_add_and_fetch(&nonmovingHeap.n_free, 1);
 }
 
+static int
+cmp_segment_ptr (const void *x, const void *y)
+{
+    const struct NonMovingSegment *p1 = *(const struct NonMovingSegment**)x;
+    const struct NonMovingSegment *p2 = *(const struct NonMovingSegment**)y;
+    if (p1 > p2) return +1;
+    else if (p1 < p2) return -1;
+    else return 0;
+}
+
+void nonmovingPruneFreeSegmentList(void)
+{
+  // grab the entire free list
+  struct NonmovingSegment *free;
+  size_t length;
+  while (true) {
+    free = ACQUIRE_LOAD(&nonmovingHeap.free);
+    length = ACQUIRE_LOAD(&nonmovingHeap.n_free);
+    if (cas((StgVolatilePtr) &nonmovingHeap.free,
+            (StgWord) free,
+            (StgWord) NULL) == (StgWord) free) {
+        __sync_sub_and_fetch(&nonmovingHeap.n_free, length);
+        break;
+    }
+  }
+  // sort the free list by address
+  struct NonmovingSegment **sorted = stgMallocBytes(sizeof(struct NonmovingSegment*) * length, "sorted free segment list");
+  for(size_t i = 0; i<length; i++) {
+    sorted[i] = free;
+    free = free->link;
+  }
+  // we should have reached the end of the free list
+  ASSERT(free == NULL);
+
+  qsort(sorted, length, sizeof(struct NonmovingSegment*), cmp_segment_ptr);
+
+  // Walk the sorted list and either:
+  // - free segments if the entire megablock is free
+  // - put it back on the free list
+  size_t new_length = 0;
+  size_t free_in_megablock = 0;
+  for(size_t i = 0; i<length; i+=free_in_megablock) {
+    free_in_megablock = 1;
+    for(;i + free_in_megablock < length; free_in_megablock++) {
+      if (((W_)sorted[i] & ~MBLOCK_MASK) != ((W_)sorted[i + free_in_megablock] & ~MBLOCK_MASK))
+        break;
+    }
+    if (free_in_megablock < BLOCKS_PER_MBLOCK / NONMOVING_SEGMENT_BLOCKS) {
+      // the entire block isn't free so put it back on the list
+      for(size_t j = 0; j < free_in_megablock;j++){
+        struct NonmovingSegment *last = free;
+        free = sorted[i+j];
+        free->link = last;
+        new_length++;
+      }
+    } else {
+      ACQUIRE_SM_LOCK;
+      for(size_t j = 0; j < free_in_megablock;j++){
+        bdescr *bd = Bdescr((StgPtr)sorted[i+j]);
+        freeGroup(bd);
+        // See Note [Live data accounting in nonmoving collector].
+        oldest_gen->n_blocks -= bd->blocks;
+        oldest_gen->n_words  -= BLOCK_SIZE_W * bd->blocks;
+      }
+      RELEASE_SM_LOCK;
+    }
+  }
+  stgFree(sorted);
+  // If we couldn't free any segments, then put them back on the list.
+  //printf("%d %d\n", length, new_length);
+  if(free) {
+    struct NonmovingSegment* tail = free;
+    while(tail->link) {
+      tail = tail->link;
+    }
+    while (true) {
+      struct NonmovingSegment* rest = ACQUIRE_LOAD(&nonmovingHeap.free);
+      tail->link = rest;
+      if (cas((StgVolatilePtr) &nonmovingHeap.free,
+              (StgWord) rest,
+              (StgWord) free) == (StgWord) rest) {
+          __sync_add_and_fetch(&nonmovingHeap.n_free, new_length);
+          break;
+      }
+    }
+  }
+}
+
 void nonmovingInitAllocator(struct NonmovingAllocator* alloc, uint16_t block_size)
 {
   *alloc = (struct NonmovingAllocator)
@@ -1216,6 +1304,7 @@ concurrent_marking:
     nonmovingSweepStableNameTable();
 
     nonmovingSweep();
+    nonmovingPruneFreeSegmentList();
     ASSERT(nonmovingHeap.sweep_list == NULL);
     debugTrace(DEBUG_nonmoving_gc, "Finished sweeping.");
     traceConcSweepEnd();


=====================================
rts/sm/NonMoving.h
=====================================
@@ -128,6 +128,7 @@ struct NonmovingHeap {
     // NONMOVING_MAX_FREE segments to avoid thrashing the block allocator.
     // Note that segments in this list are still counted towards
     // oldest_gen->n_blocks.
+    // TODO: Update?
     struct NonmovingSegment *free;
     // how many segments in free segment list? accessed atomically.
     unsigned int n_free;
@@ -172,6 +173,7 @@ void nonmovingCollect(StgWeak **dead_weaks,
                       bool concurrent);
 
 void nonmovingPushFreeSegment(struct NonmovingSegment *seg);
+void nonmovingPruneFreeSegmentList(void);
 
 INLINE_HEADER unsigned long log2_ceil(unsigned long x)
 {


=====================================
rts/sm/NonMovingAllocate.c
=====================================
@@ -65,19 +65,36 @@ static struct NonmovingSegment *nonmovingAllocSegment(enum AllocLockMode mode, u
     struct NonmovingSegment *ret;
     ret = nonmovingPopFreeSegment();
 
-    // Nothing in the free list, allocate a new segment...
+    // Nothing in the free list, allocate a new segment.
+    // We allocate a full megablock, and add spare segments to our free list.
+    // TODO: link note
     if (ret == NULL) {
+        // TODO: should we try again in case another thread
+        // allocated some while we were blocked?
         acquire_alloc_lock(mode);
-        bdescr *bd = allocAlignedGroupOnNode(node, NONMOVING_SEGMENT_BLOCKS);
-        // See Note [Live data accounting in nonmoving collector].
-        oldest_gen->n_blocks += bd->blocks;
-        oldest_gen->n_words  += BLOCK_SIZE_W * bd->blocks;
+        bdescr *bd = allocMBlockAlignedGroupOnNode(node, NONMOVING_SEGMENT_BLOCKS);
         release_alloc_lock(mode);
 
-        for (StgWord32 i = 0; i < bd->blocks; ++i) {
+        W_ alloc_blocks = NONMOVING_SEGMENT_BLOCKS * (BLOCKS_PER_MBLOCK / NONMOVING_SEGMENT_BLOCKS);
+
+        // See Note [Live data accounting in nonmoving collector].
+        oldest_gen->n_blocks += alloc_blocks;
+        oldest_gen->n_words  += BLOCK_SIZE_W * alloc_blocks;
+
+        for (StgWord32 i = 0; i < alloc_blocks; ++i) {
             initBdescr(&bd[i], oldest_gen, oldest_gen);
             bd[i].flags = BF_NONMOVING;
         }
+
+        // Push all but the last segment to the free segment list.
+        while(bd->link) {
+          bdescr *next_bd = bd->link;
+          bd->link = NULL;
+          nonmovingPushFreeSegment((struct NonmovingSegment *)bd->start);
+          bd = next_bd;
+        }
+
+        // Use the last segment to service the allocation.
         ret = (struct NonmovingSegment *)bd->start;
     }
 


=====================================
testsuite/tests/rts/GcStaticPointers.hs
=====================================
@@ -14,6 +14,8 @@ import Unsafe.Coerce (unsafeCoerce)
 nats :: [Integer]
 nats = [0 .. ]
 
+-- foreign import ccall "performBlockingMajorGC" performBlockingMajorGC :: IO ()
+
 -- The key of a 'StaticPtr' to some CAF.
 nats_key :: StaticKey
 nats_key = staticKey (static nats :: StaticPtr [Integer])
@@ -21,13 +23,15 @@ nats_key = staticKey (static nats :: StaticPtr [Integer])
 main = do
   let z = nats !! 400
   print z
-  performGC
+  performBlockingMajorGC
+  --print "done gc"
   addFinalizer z (putStrLn "finalizer z")
   print z
-  performGC
+  performBlockingMajorGC
+  --print "done gc"
   threadDelay 1000000
   Just p <- unsafeLookupStaticPtr nats_key
   print (deRefStaticPtr (unsafeCoerce p) !! 800 :: Integer)
   -- Uncommenting the next line keeps 'nats' alive and would prevent a segfault
   -- if 'nats' were garbage collected.
-  -- print (nats !! 900)
+  print (nats !! 900)



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/9a73a6ab47b533e3ed9249c3f925df76b74b47da...6ce7951a4d5b6cdc377b0f4b4e4b9cade93bc6be

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/9a73a6ab47b533e3ed9249c3f925df76b74b47da...6ce7951a4d5b6cdc377b0f4b4e4b9cade93bc6be
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240228/361cfef0/attachment-0001.html>