[commit: ghc] master: Allocate blocks in the GC in batches (f4446c5)

Tue Apr 12 10:11:33 UTC 2016

Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/f4446c5b963af8f3cc1693e2feab91dbe43d5237/ghc

>---------------------------------------------------------------

commit f4446c5b963af8f3cc1693e2feab91dbe43d5237
Author: Simon Marlow <marlowsd at gmail.com>
Date:   Sat Apr 9 20:49:52 2016 +0100

    Allocate blocks in the GC in batches
    
    Avoids contention for the block allocator lock in the GC; this can be
    seen in the gc_alloc_block_sync counter emitted by +RTS -s.
    
    I experimented with this a while ago, and there was already
    commented-out code for it in GCUtils.c, but I've now improved it so that
    it doesn't result in significantly worse memory usage.
    
    * The old method of putting spare blocks on ws->part_list was wasteful,
      the spare blocks are now shared between all generations and retained
      between GCs.
    
    * repeated allocGroup() results in fragmentation, so I switched to using
      allocLargeChunk() instead which is fragmentation-friendly; we already
      use it for the same reason in nursery allocation.


>---------------------------------------------------------------

f4446c5b963af8f3cc1693e2feab91dbe43d5237
 includes/rts/storage/Block.h |  3 ++-
 rts/sm/GC.c                  |  2 +-
 rts/sm/GCUtils.c             | 49 ++++++++++++++++++++------------------------
 rts/sm/Sanity.c              |  8 ++++++--
 4 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/includes/rts/storage/Block.h b/includes/rts/storage/Block.h
index 755c817..024f78c 100644
--- a/includes/rts/storage/Block.h
+++ b/includes/rts/storage/Block.h
@@ -89,7 +89,8 @@ typedef struct bdescr_ {
 
     StgPtr start;              // [READ ONLY] start addr of memory
 
-    StgPtr free;               // first free byte of memory.
+    StgPtr free;               // First free byte of memory.
+                               // allocGroup() sets this to the value of start.
                                // NB. during use this value should lie
                                // between start and start + blocks *
                                // BLOCK_SIZE.  Values outside this
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 02bb3bb..df73ab8 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -922,7 +922,7 @@ any_work (void)
         return rtsTrue;
     }
 
-    // Check for global work in any step.  We don't need to check for
+    // Check for global work in any gen.  We don't need to check for
     // local work, because we have already exited scavenge_loop(),
     // which means there is no local work for this thread.
     for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c
index 364a10a..9ecb674 100644
--- a/rts/sm/GCUtils.c
+++ b/rts/sm/GCUtils.c
@@ -51,29 +51,28 @@ allocGroup_sync(nat n)
 }
 
 
-#if 0
-static void
-allocBlocks_sync(nat n, bdescr **hd, bdescr **tl,
-                 nat gen_no, step *stp,
-                 StgWord32 flags)
+static nat
+allocBlocks_sync(nat n, bdescr **hd)
 {
     bdescr *bd;
     nat i;
     ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
-    bd = allocGroup(n);
+    bd = allocLargeChunk(1,n);
+    // NB. allocLargeChunk, rather than allocGroup(n), to allocate in a
+    // fragmentation-friendly way.
+    n = bd->blocks;
     for (i = 0; i < n; i++) {
         bd[i].blocks = 1;
-        bd[i].gen_no = gen_no;
-        bd[i].step = stp;
-        bd[i].flags = flags;
         bd[i].link = &bd[i+1];
-        bd[i].u.scan = bd[i].free = bd[i].start;
+        bd[i].free = bd[i].start;
     }
-    *hd = bd;
-    *tl = &bd[n-1];
+    bd[n-1].link = NULL;
+    // We have to hold the lock until we've finished fiddling with the metadata,
+    // otherwise the block allocator can get confused.
     RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+    *hd = bd;
+    return n;
 }
-#endif
 
 void
 freeChain_sync(bdescr *bd)
@@ -312,26 +311,22 @@ alloc_todo_block (gen_workspace *ws, nat size)
     }
     else
     {
-        // blocks in to-space get the BF_EVACUATED flag.
-
-//        allocBlocks_sync(16, &hd, &tl,
-//                         ws->step->gen_no, ws->step, BF_EVACUATED);
-//
-//        tl->link = ws->part_list;
-//        ws->part_list = hd->link;
-//        ws->n_part_blocks += 15;
-//
-//        bd = hd;
-
         if (size > BLOCK_SIZE_W) {
             bd = allocGroup_sync((W_)BLOCK_ROUND_UP(size*sizeof(W_))
                                  / BLOCK_SIZE);
         } else {
-            bd = allocBlock_sync();
+            if (gct->free_blocks) {
+                bd = gct->free_blocks;
+                gct->free_blocks = bd->link;
+            } else {
+                allocBlocks_sync(16, &bd);
+                gct->free_blocks = bd->link;
+            }
         }
-        initBdescr(bd, ws->gen, ws->gen->to);
+        // blocks in to-space get the BF_EVACUATED flag.
         bd->flags = BF_EVACUATED;
-        bd->u.scan = bd->free = bd->start;
+        bd->u.scan = bd->start;
+        initBdescr(bd, ws->gen, ws->gen->to);
     }
 
     bd->link = NULL;
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 7ce1183..1f4c492 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -770,6 +770,7 @@ findMemoryLeak (void)
     }
 
     for (i = 0; i < n_capabilities; i++) {
+        markBlocks(gc_threads[i]->free_blocks);
         markBlocks(capabilities[i]->pinned_object_block);
     }
 
@@ -841,7 +842,7 @@ memInventory (rtsBool show)
   nat g, i;
   W_ gen_blocks[RtsFlags.GcFlags.generations];
   W_ nursery_blocks, retainer_blocks,
-       arena_blocks, exec_blocks;
+      arena_blocks, exec_blocks, gc_free_blocks = 0;
   W_ live_blocks = 0, free_blocks = 0;
   rtsBool leak;
 
@@ -864,6 +865,7 @@ memInventory (rtsBool show)
       nursery_blocks += nurseries[i].n_blocks;
   }
   for (i = 0; i < n_capabilities; i++) {
+      gc_free_blocks += countBlocks(gc_threads[i]->free_blocks);
       if (capabilities[i]->pinned_object_block != NULL) {
           nursery_blocks += capabilities[i]->pinned_object_block->blocks;
       }
@@ -891,7 +893,7 @@ memInventory (rtsBool show)
       live_blocks += gen_blocks[g];
   }
   live_blocks += nursery_blocks +
-               + retainer_blocks + arena_blocks + exec_blocks;
+               + retainer_blocks + arena_blocks + exec_blocks + gc_free_blocks;
 
 #define MB(n) (((double)(n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_)))
 
@@ -916,6 +918,8 @@ memInventory (rtsBool show)
                  arena_blocks, MB(arena_blocks));
       debugBelch("  exec         : %5" FMT_Word " blocks (%6.1lf MB)\n",
                  exec_blocks, MB(exec_blocks));
+      debugBelch("  GC free pool : %5" FMT_Word " blocks (%6.1lf MB)\n",
+                 gc_free_blocks, MB(gc_free_blocks));
       debugBelch("  free         : %5" FMT_Word " blocks (%6.1lf MB)\n",
                  free_blocks, MB(free_blocks));
       debugBelch("  total        : %5" FMT_Word " blocks (%6.1lf MB)\n",