[Git][ghc/ghc][master] 3 commits: docs: move -xn flag beside --nonmoving-gc
Marge Bot (@marge-bot)
gitlab at gitlab.haskell.org
Tue Sep 12 08:31:32 UTC 2023
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
98166389 by Teo Camarasu at 2023-09-12T04:30:54-04:00
docs: move -xn flag beside --nonmoving-gc
It makes sense to have these beside each other as they are aliases.
- - - - -
f367835c by Teo Camarasu at 2023-09-12T04:30:55-04:00
nonmoving: introduce a family of dense allocators
Supplement the existing power 2 sized nonmoving allocators with a family
of dense allocators up to a configurable threshold.
This should reduce waste from rounding up block sizes while keeping the
amount of allocator sizes manageable.
This patch:
- Adds a new configuration option `--nonmoving-dense-allocator-count`
to control the amount of these new dense allocators.
- Adds some constants to `NonmovingAllocator` in order to keep
marking fast with the new allocators.
Resolves #23340
- - - - -
2b07bf2e by Teo Camarasu at 2023-09-12T04:30:55-04:00
Add changelog entry for #23340
- - - - -
17 changed files:
- docs/users_guide/9.10.1-notes.rst
- docs/users_guide/eventlog-formats.rst
- docs/users_guide/runtime_control.rst
- rts/RtsFlags.c
- rts/Trace.c
- rts/Trace.h
- rts/eventlog/EventLog.c
- rts/eventlog/EventLog.h
- rts/gen_event_types.py
- rts/include/rts/Flags.h
- rts/include/rts/storage/Block.h
- rts/sm/NonMoving.c
- rts/sm/NonMoving.h
- rts/sm/NonMovingAllocate.c
- rts/sm/NonMovingCensus.c
- rts/sm/Sanity.c
- rts/sm/Storage.c
Changes:
=====================================
docs/users_guide/9.10.1-notes.rst
=====================================
@@ -60,6 +60,11 @@ GHCi
Runtime system
~~~~~~~~~~~~~~
+- Internal fragmentation incurred by the non-moving GC's allocator has been reduced for small objects.
+ In one real-world application, this has reduced resident set size by about 20% and modestly improved run-time.
+ See :ghc-ticket:`23340`.
+ :rts-flag:`--nonmoving-dense-allocator-count=⟨count⟩` has been added to fine-tune this behaviour.
+
``base`` library
~~~~~~~~~~~~~~~~
=====================================
docs/users_guide/eventlog-formats.rst
=====================================
@@ -922,7 +922,7 @@ heap.
:tag: 207
:length: fixed
- :field Word8: base-2 logarithm of *blk_sz*.
+ :field Word16: *blk_sz* in bytes.
:field Word32: number of active segments.
:field Word32: number of filled segments.
:field Word32: number of live blocks.
=====================================
docs/users_guide/runtime_control.rst
=====================================
@@ -411,6 +411,29 @@ performance.
Note that :rts-flag:`--nonmoving-gc` cannot be used with ``-G1``,
:rts-flag:`profiling <-hc>` nor :rts-flag:`-c`.
+.. rts-flag:: -xn
+
+ :default: off
+ :since: 8.10.1
+
+ An alias for :rts-flag:`--nonmoving-gc`
+
+.. rts-flag:: --nonmoving-dense-allocator-count=⟨count⟩
+
+ :default: 16
+ :since: 9.10.1
+ :reverse: none
+
+ Specify the amount of dense allocators used by the non-moving garbage collector.
+
+ Increasing this value is likely to decrease the amount of memory lost to
+ internal fragmentation while marginally increasing the baseline memory requirements
+ and potentially regressing other metrics.
+
+ Large values are likely to lead to diminishing returns as
+ , in practice, the Haskell heap tends to be dominated by small objects.
+
+
.. rts-flag:: -w
:default: off
@@ -422,13 +445,6 @@ performance.
(:rts-flag:`-hT`) unless linked against the profiling runtime system with
:ghc-flag:`-prof`.
-.. rts-flag:: -xn
-
- :default: off
- :since: 8.10.1
-
- An alias for :rts-flag:`--nonmoving-gc`
-
.. rts-flag:: -A ⟨size⟩
:default: 4MB
=====================================
rts/RtsFlags.c
=====================================
@@ -166,6 +166,7 @@ void initRtsFlagsDefaults(void)
RtsFlags.GcFlags.oldGenFactor = 2;
RtsFlags.GcFlags.returnDecayFactor = 4;
RtsFlags.GcFlags.useNonmoving = false;
+ RtsFlags.GcFlags.nonmovingDenseAllocatorCount = 16;
RtsFlags.GcFlags.generations = 2;
RtsFlags.GcFlags.squeezeUpdFrames = true;
RtsFlags.GcFlags.compact = false;
@@ -1028,6 +1029,17 @@ error = true;
OPTION_SAFE;
RtsFlags.GcFlags.useNonmoving = true;
}
+ else if (!strncmp("nonmoving-dense-allocator-count=",
+ &rts_argv[arg][2], 32)) {
+ OPTION_SAFE;
+ int32_t threshold = strtol(rts_argv[arg]+34, (char **) NULL, 10);
+ if (threshold < 1 || threshold > (uint16_t)-1) {
+ errorBelch("bad value for --nonmoving-dense-allocator-count");
+ error = true;
+ } else {
+ RtsFlags.GcFlags.nonmovingDenseAllocatorCount = threshold;
+ }
+ }
#if defined(THREADED_RTS)
#if defined(mingw32_HOST_OS)
else if (!strncmp("io-manager-threads",
=====================================
rts/Trace.c
=====================================
@@ -918,11 +918,11 @@ void traceConcUpdRemSetFlush(Capability *cap)
postConcUpdRemSetFlush(cap);
}
-void traceNonmovingHeapCensus(uint32_t log_blk_size,
+void traceNonmovingHeapCensus(uint16_t blk_size,
const struct NonmovingAllocCensus *census)
{
if (eventlog_enabled && TRACE_nonmoving_gc)
- postNonmovingHeapCensus(log_blk_size, census);
+ postNonmovingHeapCensus(blk_size, census);
}
void traceThreadStatus_ (StgTSO *tso USED_IF_DEBUG)
=====================================
rts/Trace.h
=====================================
@@ -329,7 +329,7 @@ void traceConcSyncEnd(void);
void traceConcSweepBegin(void);
void traceConcSweepEnd(void);
void traceConcUpdRemSetFlush(Capability *cap);
-void traceNonmovingHeapCensus(uint32_t log_blk_size,
+void traceNonmovingHeapCensus(uint16_t blk_size,
const struct NonmovingAllocCensus *census);
void traceIPE(const InfoProvEnt *ipe);
=====================================
rts/eventlog/EventLog.c
=====================================
@@ -1132,12 +1132,12 @@ void postConcMarkEnd(StgWord32 marked_obj_count)
RELEASE_LOCK(&eventBufMutex);
}
-void postNonmovingHeapCensus(int log_blk_size,
+void postNonmovingHeapCensus(uint16_t blk_size,
const struct NonmovingAllocCensus *census)
{
ACQUIRE_LOCK(&eventBufMutex);
postEventHeader(&eventBuf, EVENT_NONMOVING_HEAP_CENSUS);
- postWord8(&eventBuf, log_blk_size);
+ postWord16(&eventBuf, blk_size);
postWord32(&eventBuf, census->n_active_segs);
postWord32(&eventBuf, census->n_filled_segs);
postWord32(&eventBuf, census->n_live_blocks);
=====================================
rts/eventlog/EventLog.h
=====================================
@@ -194,7 +194,7 @@ void postIPE(const InfoProvEnt *ipe);
void postConcUpdRemSetFlush(Capability *cap);
void postConcMarkEnd(StgWord32 marked_obj_count);
-void postNonmovingHeapCensus(int log_blk_size,
+void postNonmovingHeapCensus(uint16_t blk_size,
const struct NonmovingAllocCensus *census);
#if defined(TICKY_TICKY)
=====================================
rts/gen_event_types.py
=====================================
@@ -131,7 +131,7 @@ event_types = [
EventType(204, 'CONC_SWEEP_BEGIN', [], 'Begin concurrent sweep phase'),
EventType(205, 'CONC_SWEEP_END', [], 'End concurrent sweep phase'),
EventType(206, 'CONC_UPD_REM_SET_FLUSH', [CapNo], 'Update remembered set flushed'),
- EventType(207, 'NONMOVING_HEAP_CENSUS', [Word8, Word32, Word32, Word32], 'Nonmoving heap census'),
+ EventType(207, 'NONMOVING_HEAP_CENSUS', [Word16, Word32, Word32, Word32], 'Nonmoving heap census'),
# Ticky-ticky profiling
EventType(210, 'TICKY_COUNTER_DEF', VariableLength, 'Ticky-ticky entry counter definition'),
=====================================
rts/include/rts/Flags.h
=====================================
@@ -54,6 +54,7 @@ typedef struct _GC_FLAGS {
double pcFreeHeap;
bool useNonmoving; // default = false
+ uint16_t nonmovingDenseAllocatorCount; // Amount of dense nonmoving allocators. See Note [Allocator sizes]
uint32_t generations;
bool squeezeUpdFrames;
=====================================
rts/include/rts/storage/Block.h
=====================================
@@ -87,7 +87,8 @@
struct NonmovingSegmentInfo {
- StgWord8 log_block_size;
+ StgWord16 allocator_idx; // nonmovingHeap.allocators[allocators_idx] is
+ // this segment's allocator.
StgWord16 next_free_snap;
};
=====================================
rts/sm/NonMoving.c
=====================================
@@ -33,6 +33,8 @@
struct NonmovingHeap nonmovingHeap;
uint8_t nonmovingMarkEpoch = 1;
+uint8_t nonmoving_alloca_dense_cnt;
+uint8_t nonmoving_alloca_cnt;
static void nonmovingBumpEpoch(void) {
nonmovingMarkEpoch = nonmovingMarkEpoch == 1 ? 2 : 1;
@@ -244,6 +246,8 @@ static void nonmovingBumpEpoch(void) {
* - Note [Sync phase marking budget] describes how we avoid long mutator
* pauses during the sync phase
*
+ * - Note [Allocator sizes] goes into detail about our choice of allocator sizes.
+ *
* [ueno 2016]:
* Katsuhiro Ueno and Atsushi Ohori. 2016. A fully concurrent garbage
* collector for functional programs on multicore processors. SIGPLAN Not. 51,
@@ -527,6 +531,35 @@ static void nonmovingBumpEpoch(void) {
* TODO: Perhaps sync_phase_marking_budget should be controllable via a
* command-line argument?
*
+ *
+ * Note [Allocator sizes]
+ * ~~~~~~~~~~~~~~~~~~~~~~
+ * Our choice of allocator sizes has to balance several considerations:
+ * - Allocator sizes should be available for the most commonly request block sizes,
+ * in order to avoid excessive waste from rounding up to the next size (internal fragmentation).
+ * - It should be possible to efficiently determine which allocator services
+ * a certain block size.
+ * - The amount of allocators should be kept down to avoid overheads
+ * (eg, each capability must have an allocator of each size)
+ * and the risk of fragmentation.
+ * - It should be possible to efficiently divide by the allocator size.
+ * This is necessary to implement marking efficiently. It's trivial
+ * to efficiently divide by powers of 2. But to do so efficiently with
+ * arbitrary allocator sizes, we need to do some precomputation and make
+ * use of the integer division by constants optimisation.
+ *
+ * We currenlty try to balance these considerations by adopting the following scheme.
+ * We have nonmoving_alloca_dense_cnt "dense" allocators starting with size
+ * NONMOVING_ALLOCA0, and incrementing by NONMOVING_ALLOCA_DENSE_INCREMENT.
+ * These service the vast majority of allocations.
+ * In practice, Haskell programs tend to allocate a lot of small objects.
+ *
+ * Other allocations are handled by a family of "sparse" allocators, each providing
+ * blocks up to a power of 2. This places an upper bound on the waste at half the
+ * required block size.
+ *
+ * See #23340
+ *
*/
memcount nonmoving_segment_live_words = 0;
@@ -535,6 +568,8 @@ memcount nonmoving_segment_live_words = 0;
MarkBudget sync_phase_marking_budget = 200000;
static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads, bool concurrent);
+static void nonmovingInitAllocator(struct NonmovingAllocator* alloc, uint16_t block_size);
+static void nonmovingInitAllocators(void);
static void nonmovingInitConcurrentWorker(void);
static void nonmovingStartConcurrentMark(MarkQueue *roots);
@@ -566,23 +601,42 @@ void nonmovingPushFreeSegment(struct NonmovingSegment *seg)
__sync_add_and_fetch(&nonmovingHeap.n_free, 1);
}
-unsigned int nonmovingBlockCountFromSize(uint8_t log_block_size)
+void nonmovingInitAllocator(struct NonmovingAllocator* alloc, uint16_t block_size)
{
- // We compute the overwhelmingly common size cases directly to avoid a very
- // expensive integer division.
- switch (log_block_size) {
- case 3: return nonmovingBlockCount(3);
- case 4: return nonmovingBlockCount(4);
- case 5: return nonmovingBlockCount(5);
- case 6: return nonmovingBlockCount(6);
- case 7: return nonmovingBlockCount(7);
- default: return nonmovingBlockCount(log_block_size);
- }
+ *alloc = (struct NonmovingAllocator)
+ { .filled = NULL,
+ .saved_filled = NULL,
+ .active = NULL,
+ .block_size = block_size,
+ .block_count = nonmovingBlockCount(block_size),
+ .block_division_constant = ((uint32_t) -1) / block_size + 1
+ };
}
+void nonmovingInitAllocators(void)
+{
+ nonmoving_alloca_dense_cnt = RtsFlags.GcFlags.nonmovingDenseAllocatorCount;
+ uint16_t first_sparse_allocator = nonmoving_first_sparse_allocator_size();
+ uint16_t nonmoving_alloca_sparse_cnt = log2_ceil(NONMOVING_SEGMENT_SIZE) - first_sparse_allocator;
+ nonmoving_alloca_cnt = nonmoving_alloca_dense_cnt + nonmoving_alloca_sparse_cnt;
+
+ nonmovingHeap.allocators = stgMallocBytes(sizeof(struct NonmovingAllocator) * nonmoving_alloca_cnt, "allocators array");
+
+ // Initialise allocator sizes
+ for (unsigned int i = 0; i < nonmoving_alloca_dense_cnt; i++) {
+ nonmovingInitAllocator(&nonmovingHeap.allocators[i], NONMOVING_ALLOCA0 + i * sizeof(StgWord));
+ }
+ for (unsigned int i = nonmoving_alloca_dense_cnt; i < nonmoving_alloca_cnt; i++) {
+ uint16_t block_size = 1 << (i + first_sparse_allocator - nonmoving_alloca_dense_cnt);
+ nonmovingInitAllocator(&nonmovingHeap.allocators[i], block_size);
+ }
+}
+
+
void nonmovingInit(void)
{
if (! RtsFlags.GcFlags.useNonmoving) return;
+ nonmovingInitAllocators();
nonmovingInitConcurrentWorker();
nonmovingMarkInit();
}
@@ -606,7 +660,7 @@ static void nonmovingPrepareMark(void)
nonmovingHeap.n_caps = n_capabilities;
nonmovingBumpEpoch();
- for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
+ for (int alloca_idx = 0; alloca_idx < nonmoving_alloca_cnt; ++alloca_idx) {
struct NonmovingAllocator *alloca = &nonmovingHeap.allocators[alloca_idx];
// Update current segments' snapshot pointers
@@ -990,7 +1044,7 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
// Walk the list of filled segments that we collected during preparation,
// updated their snapshot pointers and move them to the sweep list.
- for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
+ for (int alloca_idx = 0; alloca_idx < nonmoving_alloca_cnt; ++alloca_idx) {
struct NonmovingSegment *filled = nonmovingHeap.allocators[alloca_idx].saved_filled;
if (filled) {
struct NonmovingSegment *seg = filled;
@@ -1220,7 +1274,7 @@ void assert_in_nonmoving_heap(StgPtr p)
}
}
- for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
+ for (int alloca_idx = 0; alloca_idx < nonmoving_alloca_cnt; ++alloca_idx) {
struct NonmovingAllocator *alloca = &nonmovingHeap.allocators[alloca_idx];
// Search current segments
@@ -1259,13 +1313,12 @@ void assert_in_nonmoving_heap(StgPtr p)
void nonmovingPrintSegment(struct NonmovingSegment *seg)
{
int num_blocks = nonmovingSegmentBlockCount(seg);
- uint8_t log_block_size = nonmovingSegmentLogBlockSize(seg);
+ uint16_t block_size = nonmovingSegmentBlockSize(seg);
- debugBelch("Segment with %d blocks of size 2^%d (%d bytes, %u words, scan: %p)\n",
+ debugBelch("Segment with %d blocks of size: %d bytes, %u words, scan: %p\n",
num_blocks,
- log_block_size,
- 1 << log_block_size,
- (unsigned int) ROUNDUP_BYTES_TO_WDS(1 << log_block_size),
+ block_size,
+ (unsigned int) ROUNDUP_BYTES_TO_WDS(block_size),
(void*)Bdescr((P_)seg)->u.scan);
for (nonmoving_block_idx p_idx = 0; p_idx < seg->next_free; ++p_idx) {
=====================================
rts/sm/NonMoving.h
=====================================
@@ -70,10 +70,11 @@ struct NonmovingSegment {
// N.B. There are also bits of information which are stored in the
// NonmovingBlockInfo stored in the segment's block descriptor. Namely:
//
- // * the block size can be found in nonmovingBlockInfo(seg)->log_block_size.
// * the next_free snapshot can be found in
// nonmovingBlockInfo(seg)->next_free_snap.
//
+ // Some other information about the block size is stored on NonmovingAllocator.
+ //
// This allows us to mark a nonmoving closure without bringing the
// NonmovingSegment header into cache.
};
@@ -88,20 +89,41 @@ struct NonmovingAllocator {
struct NonmovingSegment *saved_filled;
struct NonmovingSegment *active;
// N.B. Per-capabilty "current" segment lives in Capability
+
+ // The size of each block for this allocator.
+ StgWord16 block_size;
+ // The amount of blocks for a segment of this allocator.
+ // See nonmovingBlockCount for how this is calculated.
+ StgWord16 block_count;
+ // A constant for implementing the "division by a constant" optimisation.
+ // Invariant:
+ // (x * block_division_constant >> NONMOVING_ALLOCA_DIVIDE_SHIFT)
+ // = x / block_size
+ StgWord32 block_division_constant;
};
-// first allocator is of size 2^NONMOVING_ALLOCA0 (in bytes)
-#define NONMOVING_ALLOCA0 3
+// first allocator is of size NONMOVING_ALLOCA0 (in bytes)
+#define NONMOVING_ALLOCA0 8
+
+// used in conjuction with NonmovingAllocator.block_division_constant
+// to implement the "division by a constant" optimisation
+#define NONMOVING_ALLOCA_DIVIDE_SHIFT 32
+
+// amount of dense allocators.
+// These cover block sizes starting with NONMOVING_ALLOCA0
+// and increase in increments of NONMOVING_ALLOCA_INCREMENT
+extern uint8_t nonmoving_alloca_dense_cnt;
-// allocators cover block sizes of 2^NONMOVING_ALLOCA0 to
-// 2^(NONMOVING_ALLOCA0 + NONMOVING_ALLOCA_CNT) (in bytes)
-#define NONMOVING_ALLOCA_CNT 12
+// total amount of allocators (dense and sparse).
+// allocators cover block sizes of NONMOVING_ALLOCA0 to
+// NONMOVING_SEGMENT_SIZE (in bytes)
+extern uint8_t nonmoving_alloca_cnt;
// maximum number of free segments to hold on to
#define NONMOVING_MAX_FREE 16
struct NonmovingHeap {
- struct NonmovingAllocator allocators[NONMOVING_ALLOCA_CNT];
+ struct NonmovingAllocator *allocators;
// free segment list. This is a cache where we keep up to
// NONMOVING_MAX_FREE segments to avoid thrashing the block allocator.
// Note that segments in this list are still counted towards
@@ -151,19 +173,44 @@ void nonmovingCollect(StgWeak **dead_weaks,
void nonmovingPushFreeSegment(struct NonmovingSegment *seg);
+INLINE_HEADER unsigned long log2_ceil(unsigned long x)
+{
+ return (sizeof(unsigned long)*8) - __builtin_clzl(x-1);
+}
+
INLINE_HEADER struct NonmovingSegmentInfo *nonmovingSegmentInfo(struct NonmovingSegment *seg) {
return &Bdescr((StgPtr) seg)->nonmoving_segment;
}
-INLINE_HEADER uint8_t nonmovingSegmentLogBlockSize(struct NonmovingSegment *seg) {
- return nonmovingSegmentInfo(seg)->log_block_size;
+// Find the allocator a segement belongs to
+INLINE_HEADER struct NonmovingAllocator nonmovingSegmentAllocator(struct NonmovingSegment *seg) {
+ return nonmovingHeap.allocators[nonmovingSegmentInfo(seg)->allocator_idx];
+}
+
+// Determine the index of the allocator for blocks of a certain size
+INLINE_HEADER uint8_t nonmovingAllocatorForSize(uint16_t block_size){
+ if (block_size - NONMOVING_ALLOCA0 < nonmoving_alloca_dense_cnt * (uint16_t) sizeof(StgWord)) {
+ // dense case
+ return (block_size - NONMOVING_ALLOCA0) / sizeof(StgWord);
+ }
+ else {
+ // sparse case
+ return log2_ceil(block_size)
+ - log2_ceil(NONMOVING_ALLOCA0 + sizeof(StgWord) * nonmoving_alloca_dense_cnt)
+ + nonmoving_alloca_dense_cnt;
+ }
+}
+
+// The block size of a given segment in bytes.
+INLINE_HEADER unsigned int nonmovingSegmentBlockSize(struct NonmovingSegment *seg)
+{
+ return nonmovingSegmentAllocator(seg).block_size;
}
// Add a segment to the appropriate active list.
INLINE_HEADER void nonmovingPushActiveSegment(struct NonmovingSegment *seg)
{
- struct NonmovingAllocator *alloc =
- &nonmovingHeap.allocators[nonmovingSegmentLogBlockSize(seg) - NONMOVING_ALLOCA0];
+ struct NonmovingAllocator *alloc = &nonmovingHeap.allocators[nonmovingAllocatorForSize(nonmovingSegmentBlockSize(seg))];
SET_SEGMENT_STATE(seg, ACTIVE);
while (true) {
struct NonmovingSegment *current_active = RELAXED_LOAD(&alloc->active);
@@ -177,8 +224,7 @@ INLINE_HEADER void nonmovingPushActiveSegment(struct NonmovingSegment *seg)
// Add a segment to the appropriate filled list.
INLINE_HEADER void nonmovingPushFilledSegment(struct NonmovingSegment *seg)
{
- struct NonmovingAllocator *alloc =
- &nonmovingHeap.allocators[nonmovingSegmentLogBlockSize(seg) - NONMOVING_ALLOCA0];
+ struct NonmovingAllocator *alloc = &nonmovingHeap.allocators[nonmovingAllocatorForSize(nonmovingSegmentBlockSize(seg))];
SET_SEGMENT_STATE(seg, FILLED);
while (true) {
struct NonmovingSegment *current_filled = (struct NonmovingSegment*) RELAXED_LOAD(&alloc->filled);
@@ -197,52 +243,43 @@ INLINE_HEADER void nonmovingPushFilledSegment(struct NonmovingSegment *seg)
//
void assert_in_nonmoving_heap(StgPtr p);
-// The block size of a given segment in bytes.
-INLINE_HEADER unsigned int nonmovingSegmentBlockSize(struct NonmovingSegment *seg)
-{
- return 1 << nonmovingSegmentLogBlockSize(seg);
-}
-
// How many blocks does a segment with the given block size have?
-INLINE_HEADER unsigned int nonmovingBlockCount(uint8_t log_block_size)
+INLINE_HEADER unsigned int nonmovingBlockCount(uint16_t block_size)
{
unsigned int segment_data_size = NONMOVING_SEGMENT_SIZE - sizeof(struct NonmovingSegment);
segment_data_size -= segment_data_size % SIZEOF_VOID_P;
- unsigned int blk_size = 1 << log_block_size;
// N.B. +1 accounts for the byte in the mark bitmap.
- return segment_data_size / (blk_size + 1);
+ unsigned int block_count = segment_data_size / (block_size + 1);
+ ASSERT(block_count < 0xfff); // must fit into StgWord16
+ return block_count;
}
-unsigned int nonmovingBlockCountFromSize(uint8_t log_block_size);
-
// How many blocks does the given segment contain? Also the size of the bitmap.
INLINE_HEADER unsigned int nonmovingSegmentBlockCount(struct NonmovingSegment *seg)
{
- return nonmovingBlockCountFromSize(nonmovingSegmentLogBlockSize(seg));
+ return nonmovingSegmentAllocator(seg).block_count;
}
// Get a pointer to the given block index assuming that the block size is as
// given (avoiding a potential cache miss when this information is already
// available). The log_block_size argument must be equal to seg->block_size.
-INLINE_HEADER void *nonmovingSegmentGetBlock_(struct NonmovingSegment *seg, uint8_t log_block_size, nonmoving_block_idx i)
+INLINE_HEADER void *nonmovingSegmentGetBlock_(struct NonmovingSegment *seg, uint16_t block_size, uint16_t block_count, nonmoving_block_idx i)
{
- ASSERT(log_block_size == nonmovingSegmentLogBlockSize(seg));
- // Block size in bytes
- unsigned int blk_size = 1 << log_block_size;
+ ASSERT(block_size == nonmovingSegmentBlockSize(seg));
// Bitmap size in bytes
- W_ bitmap_size = nonmovingBlockCountFromSize(log_block_size) * sizeof(uint8_t);
+ W_ bitmap_size = block_count * sizeof(uint8_t);
// Where the actual data starts (address of the first block).
// Use ROUNDUP_BYTES_TO_WDS to align to word size. Note that
// ROUNDUP_BYTES_TO_WDS returns in _words_, not in _bytes_, so convert it back
// back to bytes by multiplying with word size.
W_ data = ROUNDUP_BYTES_TO_WDS(((W_)seg) + sizeof(struct NonmovingSegment) + bitmap_size) * sizeof(W_);
- return (void*)(data + i*blk_size);
+ return (void*)(data + i*block_size);
}
// Get a pointer to the given block index.
INLINE_HEADER void *nonmovingSegmentGetBlock(struct NonmovingSegment *seg, nonmoving_block_idx i)
{
- return nonmovingSegmentGetBlock_(seg, nonmovingSegmentLogBlockSize(seg), i);
+ return nonmovingSegmentGetBlock_(seg, nonmovingSegmentBlockSize(seg), nonmovingSegmentBlockCount(seg), i);
}
// Get the segment which a closure resides in. Assumes that pointer points into
@@ -267,12 +304,23 @@ INLINE_HEADER struct NonmovingSegment *nonmovingGetSegment(StgPtr p)
return nonmovingGetSegment_unchecked(p);
}
+// Divide x by the block size of the segment.
+INLINE_HEADER uint16_t nonmovingSegmentDivideBySize(struct NonmovingSegment *seg, uint16_t x)
+{
+ return ((StgWord64) x * nonmovingSegmentAllocator(seg).block_division_constant) >> NONMOVING_ALLOCA_DIVIDE_SHIFT;
+}
+
INLINE_HEADER nonmoving_block_idx nonmovingGetBlockIdx(StgPtr p)
{
struct NonmovingSegment *seg = nonmovingGetSegment(p);
ptrdiff_t blk0 = (ptrdiff_t)nonmovingSegmentGetBlock(seg, 0);
ptrdiff_t offset = (ptrdiff_t)p - blk0;
- return (nonmoving_block_idx) (offset >> nonmovingSegmentLogBlockSize(seg));
+ return (nonmoving_block_idx) nonmovingSegmentDivideBySize(seg, offset);
+}
+
+INLINE_HEADER uint16_t nonmoving_first_sparse_allocator_size (void)
+{
+ return log2_ceil(NONMOVING_ALLOCA0 + (nonmoving_alloca_dense_cnt - 1) * sizeof(StgWord) + 1);
}
// TODO: Eliminate this
@@ -311,7 +359,7 @@ INLINE_HEADER bool nonmovingClosureMarkedThisCycle(StgPtr p)
INLINE_HEADER bool nonmovingSegmentBeingSwept(struct NonmovingSegment *seg)
{
struct NonmovingSegmentInfo *seginfo = nonmovingSegmentInfo(seg);
- unsigned int n = nonmovingBlockCountFromSize(seginfo->log_block_size);
+ unsigned int n = nonmovingSegmentBlockCount(seg);
return seginfo->next_free_snap >= n;
}
=====================================
rts/sm/NonMovingAllocate.c
=====================================
@@ -17,19 +17,14 @@
enum AllocLockMode { NO_LOCK, ALLOC_SPIN_LOCK, SM_LOCK };
-static inline unsigned long log2_ceil(unsigned long x);
static struct NonmovingSegment *nonmovingAllocSegment(enum AllocLockMode mode, uint32_t node);
static void nonmovingClearBitmap(struct NonmovingSegment *seg);
-static void nonmovingInitSegment(struct NonmovingSegment *seg, uint8_t log_block_size);
+static void nonmovingInitSegment(struct NonmovingSegment *seg, uint16_t block_size);
static bool advance_next_free(struct NonmovingSegment *seg, const unsigned int blk_count);
static struct NonmovingSegment *nonmovingPopFreeSegment(void);
static struct NonmovingSegment *pop_active_segment(struct NonmovingAllocator *alloca);
static void *nonmovingAllocate_(enum AllocLockMode mode, Capability *cap, StgWord sz);
-static inline unsigned long log2_ceil(unsigned long x)
-{
- return (sizeof(unsigned long)*8) - __builtin_clzl(x-1);
-}
static inline void acquire_alloc_lock(enum AllocLockMode mode) {
switch (mode) {
@@ -97,14 +92,14 @@ static void nonmovingClearBitmap(struct NonmovingSegment *seg)
memset(seg->bitmap, 0, n);
}
-static void nonmovingInitSegment(struct NonmovingSegment *seg, uint8_t log_block_size)
+static void nonmovingInitSegment(struct NonmovingSegment *seg, uint16_t allocator_idx)
{
bdescr *bd = Bdescr((P_) seg);
seg->link = NULL;
seg->todo_link = NULL;
seg->next_free = 0;
SET_SEGMENT_STATE(seg, FREE);
- bd->nonmoving_segment.log_block_size = log_block_size;
+ bd->nonmoving_segment.allocator_idx = allocator_idx;
bd->nonmoving_segment.next_free_snap = 0;
bd->u.scan = nonmovingSegmentGetBlock(seg, 0);
nonmovingClearBitmap(seg);
@@ -115,10 +110,10 @@ void nonmovingInitCapability(Capability *cap)
{
// Initialize current segment array
struct NonmovingSegment **segs =
- stgMallocBytes(sizeof(struct NonmovingSegment*) * NONMOVING_ALLOCA_CNT, "current segment array");
- for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
+ stgMallocBytes(sizeof(struct NonmovingSegment*) * nonmoving_alloca_cnt, "current segment array");
+ for (unsigned int i = 0; i < nonmoving_alloca_cnt; i++) {
segs[i] = nonmovingAllocSegment(NO_LOCK, cap->node);
- nonmovingInitSegment(segs[i], NONMOVING_ALLOCA0 + i);
+ nonmovingInitSegment(segs[i], i);
SET_SEGMENT_STATE(segs[i], CURRENT);
}
cap->current_segments = segs;
@@ -190,20 +185,27 @@ static struct NonmovingSegment *pop_active_segment(struct NonmovingAllocator *al
static void *nonmovingAllocate_(enum AllocLockMode mode, Capability *cap, StgWord sz)
{
- unsigned int log_block_size = log2_ceil(sz * sizeof(StgWord));
- unsigned int block_count = nonmovingBlockCountFromSize(log_block_size);
+ unsigned int block_size;
+ if (sz * sizeof(StgWord) <= NONMOVING_ALLOCA0 + (nonmoving_alloca_dense_cnt-1)*sizeof(StgWord)) {
+ block_size = sizeof(StgWord) * sz;
+ } else {
+ unsigned int log_block_size = log2_ceil(sz * sizeof(StgWord));
+ block_size = 1 << log_block_size;
+ }
- // The max we ever allocate is 3276 bytes (anything larger is a large
+ // The max we ever allocate is NONMOVING_SEGMENT_SIZE bytes (anything larger is a large
// object and not moved) which is covered by allocator 9.
- ASSERT(log_block_size < NONMOVING_ALLOCA0 + NONMOVING_ALLOCA_CNT);
+ ASSERT(block_size < NONMOVING_SEGMENT_SIZE);
- unsigned int alloca_idx = log_block_size - NONMOVING_ALLOCA0;
+ unsigned int alloca_idx = nonmovingAllocatorForSize(block_size);
struct NonmovingAllocator *alloca = &nonmovingHeap.allocators[alloca_idx];
// Allocate into current segment
struct NonmovingSegment *current = cap->current_segments[alloca_idx];
ASSERT(current); // current is never NULL
- void *ret = nonmovingSegmentGetBlock_(current, log_block_size, current->next_free);
+ ASSERT(block_size == nonmovingSegmentBlockSize(current));
+ unsigned int block_count = nonmovingSegmentBlockCount(current);
+ void *ret = nonmovingSegmentGetBlock_(current, block_size, block_count, current->next_free);
ASSERT(GET_CLOSURE_TAG(ret) == 0); // check alignment
// Advance the current segment's next_free or allocate a new segment if full
@@ -216,7 +218,6 @@ static void *nonmovingAllocate_(enum AllocLockMode mode, Capability *cap, StgWor
// Update live data estimate.
// See Note [Live data accounting in nonmoving collector].
unsigned int new_blocks = block_count - nonmovingSegmentInfo(current)->next_free_snap;
- unsigned int block_size = 1 << log_block_size;
atomic_inc(&oldest_gen->live_estimate, new_blocks * block_size / sizeof(W_));
// push the current segment to the filled list
@@ -228,7 +229,7 @@ static void *nonmovingAllocate_(enum AllocLockMode mode, Capability *cap, StgWor
// there are no active segments, allocate new segment
if (new_current == NULL) {
new_current = nonmovingAllocSegment(mode, cap->node);
- nonmovingInitSegment(new_current, log_block_size);
+ nonmovingInitSegment(new_current, alloca_idx);
}
// make it current
=====================================
rts/sm/NonMovingCensus.c
=====================================
@@ -133,7 +133,7 @@ void nonmovingPrintAllocatorCensus(bool collect_live_words)
if (!RtsFlags.GcFlags.useNonmoving)
return;
- for (int i=0; i < NONMOVING_ALLOCA_CNT; i++) {
+ for (int i=0; i < nonmoving_alloca_cnt; i++) {
struct NonmovingAllocCensus census =
nonmovingAllocatorCensus_(i, collect_live_words);
@@ -147,10 +147,10 @@ void nonmovingTraceAllocatorCensus(void)
if (!RtsFlags.GcFlags.useNonmoving && !TRACE_nonmoving_gc)
return;
- for (int i=0; i < NONMOVING_ALLOCA_CNT; i++) {
+ for (int i=0; i < nonmoving_alloca_cnt; i++) {
const struct NonmovingAllocCensus census = nonmovingAllocatorCensus(i);
- const uint32_t log_blk_size = i + NONMOVING_ALLOCA0;
- traceNonmovingHeapCensus(log_blk_size, &census);
+ const uint32_t blk_size = nonmovingHeap.allocators[i].block_size;
+ traceNonmovingHeapCensus(blk_size, &census);
}
#endif
}
=====================================
rts/sm/Sanity.c
=====================================
@@ -638,7 +638,7 @@ void checkNonmovingHeap (const struct NonmovingHeap *heap)
checkLargeObjects(nonmoving_large_objects);
checkLargeObjects(nonmoving_marked_large_objects);
checkCompactObjects(nonmoving_compact_objects);
- for (unsigned int i=0; i < NONMOVING_ALLOCA_CNT; i++) {
+ for (unsigned int i=0; i < nonmoving_alloca_cnt; i++) {
const struct NonmovingAllocator *alloc = &heap->allocators[i];
checkNonmovingSegments(alloc->filled);
checkNonmovingSegments(alloc->saved_filled);
@@ -1110,7 +1110,7 @@ findMemoryLeak (void)
markBlocks(nonmoving_marked_large_objects);
markBlocks(nonmoving_compact_objects);
markBlocks(nonmoving_marked_compact_objects);
- for (i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
+ for (i = 0; i < nonmoving_alloca_cnt; i++) {
struct NonmovingAllocator *alloc = &nonmovingHeap.allocators[i];
markNonMovingSegments(alloc->filled);
markNonMovingSegments(alloc->saved_filled);
@@ -1226,7 +1226,7 @@ static W_
countNonMovingHeap(struct NonmovingHeap *heap)
{
W_ ret = 0;
- for (int alloc_idx = 0; alloc_idx < NONMOVING_ALLOCA_CNT; alloc_idx++) {
+ for (int alloc_idx = 0; alloc_idx < nonmoving_alloca_cnt; alloc_idx++) {
struct NonmovingAllocator *alloc = &heap->allocators[alloc_idx];
ret += countNonMovingSegments(alloc->filled);
ret += countNonMovingSegments(alloc->saved_filled);
=====================================
rts/sm/Storage.c
=====================================
@@ -401,7 +401,7 @@ void listAllBlocks (ListBlocksCb cb, void *user)
// list capabilities' current segments
if(RtsFlags.GcFlags.useNonmoving) {
- for (s = 0; s < NONMOVING_ALLOCA_CNT; s++) {
+ for (s = 0; s < nonmoving_alloca_cnt; s++) {
listSegmentBlocks(cb, user, getCapability(i)->current_segments[s]);
}
}
@@ -409,7 +409,7 @@ void listAllBlocks (ListBlocksCb cb, void *user)
// list blocks on the nonmoving heap
if(RtsFlags.GcFlags.useNonmoving) {
- for(s = 0; s < NONMOVING_ALLOCA_CNT; s++) {
+ for(s = 0; s < nonmoving_alloca_cnt; s++) {
listSegmentBlocks(cb, user, nonmovingHeap.allocators[s].filled);
listSegmentBlocks(cb, user, nonmovingHeap.allocators[s].saved_filled);
listSegmentBlocks(cb, user, nonmovingHeap.allocators[s].active);
@@ -2007,7 +2007,7 @@ void rts_clearMemory(void) {
nonmovingClearSegment(seg);
}
- for (int i = 0; i < NONMOVING_ALLOCA_CNT; ++i) {
+ for (int i = 0; i < nonmoving_alloca_cnt; ++i) {
struct NonmovingAllocator *alloc = &nonmovingHeap.allocators[i];
for (struct NonmovingSegment *seg = alloc->active; seg; seg = seg->link) {
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/a1f0d55c91c7b180304cc5bc28671eef30f78d76...2b07bf2e8bcb24520fe78b469c3550b9f4099526
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/a1f0d55c91c7b180304cc5bc28671eef30f78d76...2b07bf2e8bcb24520fe78b469c3550b9f4099526
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230912/be66eff8/attachment-0001.html>
More information about the ghc-commits
mailing list