[Git][ghc/ghc][wip/T24150] rts: Allocate non-moving segments with megablocks

Thu Feb 29 11:10:58 UTC 2024

Teo Camarasu pushed to branch wip/T24150 at Glasgow Haskell Compiler / GHC


Commits:
8680656b by Teo Camarasu at 2024-02-29T11:10:40+00:00
rts: Allocate non-moving segments with megablocks

Non-moving segments are 8 blocks long and need to be aligned.
Previously we serviced allocations by grabbing 15 blocks, finding
an aligned 8 block group in it and returning the rest.
This proved to lead to high levels of fragmentation as a de-allocating a segment
caused an 8 block gap to form, and this could not be reused for allocation.

This patch introduces a segment allocator based around using entire
megablocks to service segment allocations in bulk.

When there are no free segments, we grab an entire megablock and fill it
with aligned segments. As the megablock is free, we can easily guarantee
alignment. Any unused segments are placed on a free list.

It only makes sense to free segments in bulk when all of the segments in
a megablock are freeable. After sweeping, we grab the free list, sort it,
and find all groups of segments where they cover the megablock and free
them.
This introduces a period of time when free segments are not available to
the mutator, but the risk that this would lead to excessive allocation
is low. Right after sweep, we should have an abundance of partially full
segments, and this pruning step is relatively quick.

In implementing this we drop the logic that kept NONMOVING_MAX_FREE
segments on the free list.

See Note [Segment allocation strategy]

Resolves #24150

- - - - -


5 changed files:

- rts/include/rts/storage/Block.h
- rts/sm/BlockAlloc.c
- rts/sm/NonMoving.c
- rts/sm/NonMoving.h
- rts/sm/NonMovingAllocate.c


Changes:

=====================================
rts/include/rts/storage/Block.h
=====================================
@@ -318,6 +318,10 @@ bdescr *allocGroupOnNode(uint32_t node, W_ n);
 //
 bdescr *allocAlignedGroupOnNode(uint32_t node, W_ n);
 
+// Allocate a MBlock worth of `n` block sized chunks aligned at `n`-block boundry.
+// This returns a linked list of `bdescr` of length `BLOCKS_PER_MBLOCK / n`.
+bdescr *allocMBlockAlignedGroupOnNode(uint32_t node, W_ n);
+
 EXTERN_INLINE bdescr* allocBlockOnNode(uint32_t node);
 EXTERN_INLINE bdescr* allocBlockOnNode(uint32_t node)
 {


=====================================
rts/sm/BlockAlloc.c
=====================================
@@ -394,6 +394,52 @@ split_block_low (bdescr *bd, W_ n)
 }
 
 
+// A variant of `split_block_high` where we keep both blocks.
+// The returned block has size `n`, which is split off `bd`.
+static bdescr *
+split_block_high_no_free (bdescr *bd, W_ n)
+{
+    ASSERT(bd->blocks > n);
+
+    bdescr* ret = bd + bd->blocks - n; // take n blocks off the end
+    ret->blocks = n;
+    ret->start = ret->free = bd->start + (bd->blocks - n)*BLOCK_SIZE_W;
+    ret->link = NULL;
+
+    bd->blocks -= n;
+
+    setup_tail(ret);
+    setup_tail(bd);
+
+    return ret;
+}
+
+// Allocate a MBlock worth of `n` block sized chunks aligned at `n`-block boundry.
+// This returns a linked list of `bdescr` of length `BLOCKS_PER_MBLOCK / n`.
+// We assume relevant locks are held.
+bdescr *
+allocMBlockAlignedGroupOnNode(uint32_t node, W_ n)
+{
+    bdescr *bd = allocGroupOnNode(node, BLOCKS_PER_MBLOCK);
+
+    // Free unaligned blocks, as we can't use these.
+    ASSERT(bd->blocks == BLOCKS_PER_MBLOCK);
+    bd = split_block_high(bd, bd->blocks % n);
+    ASSERT(bd->blocks % n == 0);
+
+    bdescr *start = bd;
+    // Chain the aligned groups together into a linked-list
+    while (bd->blocks > n) {
+      bdescr *chunk;
+      chunk = split_block_high_no_free(bd, n);
+      chunk->link = bd;
+    }
+    bd->link = NULL;
+
+    return start;
+}
+
+
 /* Find a fitting block for the allocation request in the given free list.
    Returns:
      - not NULL: when an exact match was found in the free list.


=====================================
rts/sm/NonMoving.c
=====================================
@@ -248,6 +248,8 @@ static void nonmovingBumpEpoch(void) {
  *
  *  - Note [Allocator sizes] goes into detail about our choice of allocator sizes.
  *
+ *  - Note [Segment allocation strategy] explains our segment allocation strategy.
+ *
  * [ueno 2016]:
  *   Katsuhiro Ueno and Atsushi Ohori. 2016. A fully concurrent garbage
  *   collector for functional programs on multicore processors. SIGPLAN Not. 51,
@@ -560,6 +562,21 @@ static void nonmovingBumpEpoch(void) {
  *
  * See #23340
  *
+ * Note [Segment allocation strategy]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * Non-moving segments must be aligned. In order, to efficiently service these
+ * allocations, we allocate segments in bulk
+ * We allocate an entire megablocks worth of segments at once.
+ * All unused segments are placed on the `nonmovingHeap.free` list.
+ *
+ * Symmetrically, we only de-allocate segments if all the segments in a megablock are free-able, ie,
+ * are on `nonmovingHeap.free`. We prune the free list in `nonmovingPruneFreeSegmentList`.
+ * Note that during pruning of the free list, free segments are not available for use by the
+ * mutator. This might lead to extra allocation of segments. But the risk is low as after sweep
+ * there is usualyl a large amount of partially full segments, and pruning the free list is quite
+ * quick.
+ *
+ * See #24150
  */
 
 memcount nonmoving_segment_live_words = 0;
@@ -578,19 +595,6 @@ static void nonmovingExitConcurrentWorker(void);
 // Add a segment to the free list.
 void nonmovingPushFreeSegment(struct NonmovingSegment *seg)
 {
-    // See Note [Live data accounting in nonmoving collector].
-    if (RELAXED_LOAD(&nonmovingHeap.n_free) > NONMOVING_MAX_FREE) {
-        bdescr *bd = Bdescr((StgPtr) seg);
-        ACQUIRE_SM_LOCK;
-        ASSERT(oldest_gen->n_blocks >= bd->blocks);
-        ASSERT(oldest_gen->n_words >= BLOCK_SIZE_W * bd->blocks);
-        oldest_gen->n_blocks -= bd->blocks;
-        oldest_gen->n_words  -= BLOCK_SIZE_W * bd->blocks;
-        freeGroup(bd);
-        RELEASE_SM_LOCK;
-        return;
-    }
-
     SET_SEGMENT_STATE(seg, FREE);
     while (true) {
         struct NonmovingSegment *old = nonmovingHeap.free;
@@ -601,6 +605,98 @@ void nonmovingPushFreeSegment(struct NonmovingSegment *seg)
     __sync_add_and_fetch(&nonmovingHeap.n_free, 1);
 }
 
+static int
+cmp_segment_ptr (const void *x, const void *y)
+{
+    const struct NonMovingSegment *p1 = *(const struct NonMovingSegment**)x;
+    const struct NonMovingSegment *p2 = *(const struct NonMovingSegment**)y;
+    if (p1 > p2) return +1;
+    else if (p1 < p2) return -1;
+    else return 0;
+}
+
+// Prune the free list of segments that can be freed.
+// Segments can be freed if all segments from a mblock are on the free list.
+void nonmovingPruneFreeSegmentList(void)
+{
+  // Atomically grab the entire free list.
+  struct NonmovingSegment *free;
+  size_t length;
+  while (true) {
+    free = ACQUIRE_LOAD(&nonmovingHeap.free);
+    length = ACQUIRE_LOAD(&nonmovingHeap.n_free);
+    if (cas((StgVolatilePtr) &nonmovingHeap.free,
+            (StgWord) free,
+            (StgWord) NULL) == (StgWord) free) {
+        __sync_sub_and_fetch(&nonmovingHeap.n_free, length);
+        break;
+    }
+  }
+  // Sort the free list by address.
+  struct NonmovingSegment **sorted = stgMallocBytes(sizeof(struct NonmovingSegment*) * length, "sorted free segment list");
+  for(size_t i = 0; i<length; i++) {
+    sorted[i] = free;
+    free = free->link;
+  }
+  // we should have reached the end of the free list
+  ASSERT(free == NULL);
+
+  qsort(sorted, length, sizeof(struct NonmovingSegment*), cmp_segment_ptr);
+
+  // Walk the sorted list and either:
+  // - free segments if the entire megablock is free
+  // - put it back on the free list
+  size_t new_length = 0;
+  size_t free_in_megablock = 0;
+  // iterate through segments by megablock
+  for(size_t i = 0; i<length; i+=free_in_megablock) {
+    // count of free segments in the current megablock
+    free_in_megablock = 1;
+    for(;i + free_in_megablock < length; free_in_megablock++) {
+      if (((W_)sorted[i] & ~MBLOCK_MASK) != ((W_)sorted[i + free_in_megablock] & ~MBLOCK_MASK))
+        break;
+    }
+    if (free_in_megablock < BLOCKS_PER_MBLOCK / NONMOVING_SEGMENT_BLOCKS) {
+      // the entire block isn't free so put it back on the list
+      for(size_t j = 0; j < free_in_megablock;j++){
+        struct NonmovingSegment *last = free;
+        free = sorted[i+j];
+        free->link = last;
+        new_length++;
+      }
+    } else {
+      // the megablock is free, so let's free all the segments.
+      ACQUIRE_SM_LOCK;
+      for(size_t j = 0; j < free_in_megablock;j++){
+        bdescr *bd = Bdescr((StgPtr)sorted[i+j]);
+        freeGroup(bd);
+        // See Note [Live data accounting in nonmoving collector].
+        oldest_gen->n_blocks -= bd->blocks;
+        oldest_gen->n_words  -= BLOCK_SIZE_W * bd->blocks;
+      }
+      RELEASE_SM_LOCK;
+    }
+  }
+  stgFree(sorted);
+  // If we have any segments left over, then put them back on the free list.
+  if(free) {
+    struct NonmovingSegment* tail = free;
+    while(tail->link) {
+      tail = tail->link;
+    }
+    while (true) {
+      struct NonmovingSegment* rest = ACQUIRE_LOAD(&nonmovingHeap.free);
+      tail->link = rest;
+      if (cas((StgVolatilePtr) &nonmovingHeap.free,
+              (StgWord) rest,
+              (StgWord) free) == (StgWord) rest) {
+          __sync_add_and_fetch(&nonmovingHeap.n_free, new_length);
+          break;
+      }
+    }
+  }
+}
+
 void nonmovingInitAllocator(struct NonmovingAllocator* alloc, uint16_t block_size)
 {
   *alloc = (struct NonmovingAllocator)
@@ -1216,6 +1312,7 @@ concurrent_marking:
     nonmovingSweepStableNameTable();
 
     nonmovingSweep();
+    nonmovingPruneFreeSegmentList();
     ASSERT(nonmovingHeap.sweep_list == NULL);
     debugTrace(DEBUG_nonmoving_gc, "Finished sweeping.");
     traceConcSweepEnd();


=====================================
rts/sm/NonMoving.h
=====================================
@@ -119,13 +119,10 @@ extern uint8_t nonmoving_alloca_dense_cnt;
 // NONMOVING_SEGMENT_SIZE (in bytes)
 extern uint8_t nonmoving_alloca_cnt;
 
-// maximum number of free segments to hold on to
-#define NONMOVING_MAX_FREE 16
-
 struct NonmovingHeap {
     struct NonmovingAllocator *allocators;
-    // free segment list. This is a cache where we keep up to
-    // NONMOVING_MAX_FREE segments to avoid thrashing the block allocator.
+    // free segment list. This is a cache where we keep segments
+    // belonging to megablocks that are only partially free.
     // Note that segments in this list are still counted towards
     // oldest_gen->n_blocks.
     struct NonmovingSegment *free;
@@ -172,6 +169,7 @@ void nonmovingCollect(StgWeak **dead_weaks,
                       bool concurrent);
 
 void nonmovingPushFreeSegment(struct NonmovingSegment *seg);
+void nonmovingPruneFreeSegmentList(void);
 
 INLINE_HEADER unsigned long log2_ceil(unsigned long x)
 {


=====================================
rts/sm/NonMovingAllocate.c
=====================================
@@ -65,19 +65,42 @@ static struct NonmovingSegment *nonmovingAllocSegment(enum AllocLockMode mode, u
     struct NonmovingSegment *ret;
     ret = nonmovingPopFreeSegment();
 
-    // Nothing in the free list, allocate a new segment...
+    // Nothing in the free list, allocate a new segment.
+    // We allocate a full megablock, and add spare segments to our free list.
     if (ret == NULL) {
         acquire_alloc_lock(mode);
-        bdescr *bd = allocAlignedGroupOnNode(node, NONMOVING_SEGMENT_BLOCKS);
-        // See Note [Live data accounting in nonmoving collector].
-        oldest_gen->n_blocks += bd->blocks;
-        oldest_gen->n_words  += BLOCK_SIZE_W * bd->blocks;
+        // Another thread might have allocated while we were waiting for the lock.
+        ret = nonmovingPopFreeSegment();
+        if (ret != NULL) {
+          release_alloc_lock(mode);
+          // Check alignment
+          ASSERT(((uintptr_t)ret % NONMOVING_SEGMENT_SIZE) == 0);
+          return ret;
+        }
+
+        bdescr *bd = allocMBlockAlignedGroupOnNode(node, NONMOVING_SEGMENT_BLOCKS);
         release_alloc_lock(mode);
 
-        for (StgWord32 i = 0; i < bd->blocks; ++i) {
+        W_ alloc_blocks = NONMOVING_SEGMENT_BLOCKS * (BLOCKS_PER_MBLOCK / NONMOVING_SEGMENT_BLOCKS);
+
+        // See Note [Live data accounting in nonmoving collector].
+        oldest_gen->n_blocks += alloc_blocks;
+        oldest_gen->n_words  += BLOCK_SIZE_W * alloc_blocks;
+
+        for (StgWord32 i = 0; i < alloc_blocks; ++i) {
             initBdescr(&bd[i], oldest_gen, oldest_gen);
             bd[i].flags = BF_NONMOVING;
         }
+
+        // Push all but the last segment to the free segment list.
+        while(bd->link) {
+          bdescr *next_bd = bd->link;
+          bd->link = NULL;
+          nonmovingPushFreeSegment((struct NonmovingSegment *)bd->start);
+          bd = next_bd;
+        }
+
+        // Use the last segment to service the allocation.
         ret = (struct NonmovingSegment *)bd->start;
     }
 



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/8680656bf7e5e6ed39becdc7702fdc2f1a7a6a63

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/8680656bf7e5e6ed39becdc7702fdc2f1a7a6a63
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240229/194e44fe/attachment-0001.html>