[Git][ghc/ghc][wip/T25459] rts: Increase gen_workspace alignment to 128 bytes on AArch64

Ben Gamari (@bgamari) gitlab at gitlab.haskell.org
Fri Nov 15 15:51:58 UTC 2024



Ben Gamari pushed to branch wip/T25459 at Glasgow Haskell Compiler / GHC


Commits:
6e07238e by Ben Gamari at 2024-11-15T10:51:15-05:00
rts: Increase gen_workspace alignment to 128 bytes on AArch64

Increase to match the 128-byte cache-line size of Apple's ARMv8
implementation.

Closes #25459.

- - - - -


6 changed files:

- rts/Capability.h
- rts/Task.h
- rts/include/rts/Config.h
- rts/include/rts/storage/HeapAlloc.h
- rts/sm/GC.c
- rts/sm/GCThread.h


Changes:

=====================================
rts/Capability.h
=====================================
@@ -29,11 +29,7 @@
 
 // We never want a Capability to overlap a cache line with
 // anything else, so round it up to a cache line size:
-#if defined(s390x_HOST_ARCH)
-#define CAPABILITY_ALIGNMENT 256
-#else
-#define CAPABILITY_ALIGNMENT 64
-#endif
+#define CAPABILITY_ALIGNMENT CACHELINE_SIZE
 
 /* A forward declaration of the per-capability data structures belonging to
  * the I/O manager. It is opaque and only passed by pointer, so the full


=====================================
rts/Task.h
=====================================
@@ -314,18 +314,12 @@ typedef StgWord64 TaskId;
 //
 #if defined(THREADED_RTS)
 INLINE_HEADER TaskId serialiseTaskId (OSThreadId taskID) {
-#if defined(freebsd_HOST_OS) || defined(darwin_HOST_OS)
-    // Here OSThreadId is a pthread_t and pthread_t is a pointer, but within
+    // Here OSThreadId may be a pthread_t and pthread_t is a pointer, but within
     // the process we can still use that pointer value as a unique id.
-    return (TaskId) (size_t) taskID;
-#else
-    // On Windows, Linux and others it's an integral type to start with.
-    return (TaskId) taskID;
-#endif
+    return (TaskId) (uintptr_t) taskID;
 }
 #endif
 
-//
 // Get a serialisable Id for the Task's OS thread
 // Needed mainly for logging since the OSThreadId is an opaque type
 INLINE_HEADER TaskId
@@ -334,7 +328,7 @@ serialisableTaskId (Task *task)
 #if defined(THREADED_RTS)
     return serialiseTaskId(task->id);
 #else
-    return (TaskId) (size_t) task;
+    return (TaskId) (uintptr_t) task;
 #endif
 }
 


=====================================
rts/include/rts/Config.h
=====================================
@@ -82,3 +82,19 @@ code.
 #else
 #define MAX_N_CAPABILITIES 1
 #endif
+
+// The host's cacheline size.
+// We use 128-bytes here on AArch64 as this is the cache-line size of new Apple
+// ARMv8 platforms.
+//
+// At some point we may want to determine this via `configure`.
+#if defined(s390x_HOST_ARCH)
+#define CACHELINE_SIZE 256
+#elif defined(aarch64_HOST_ARCH)
+#define CACHELINE_SIZE 128
+#elif defined(x86_64_HOST_ARCH)
+#define CACHELINE_SIZE 64
+#else
+#define CACHELINE_SIZE 64
+#endif
+


=====================================
rts/include/rts/storage/HeapAlloc.h
=====================================
@@ -59,7 +59,7 @@ extern SpinLock gc_alloc_block_sync;
 struct mblock_address_range {
     W_ begin, end;
     W_ padding[6];  // ensure nothing else inhabits this cache line
-} ATTRIBUTE_ALIGNED(64);
+} ATTRIBUTE_ALIGNED(CACHELINE_SIZE);
 extern struct mblock_address_range mblock_address_space;
 
 # define HEAP_ALLOCED(p)        ((W_)(p) >= mblock_address_space.begin && \


=====================================
rts/sm/GC.c
=====================================
@@ -153,9 +153,9 @@ static Condition gc_exit_arrived_cv;
 static Condition gc_exit_leave_now_cv;
 
 #else // THREADED_RTS
-// Must be aligned to 64-bytes to meet stated 64-byte alignment of gen_workspace
+// Must match the alignment of gen_workspace.
 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)]
-    ATTRIBUTE_ALIGNED(64);
+    ATTRIBUTE_ALIGNED(GEN_WORKSPACE_ALIGNMENT);
 #endif // THREADED_RTS
 
 /* Note [n_gc_threads]


=====================================
rts/sm/GCThread.h
=====================================
@@ -75,6 +75,14 @@
 
    ------------------------------------------------------------------------- */
 
+// align so that:
+//  * no two threads' workspaces fall in the same cache-line
+//  * computing gct->gens[n] is a shift, not a multiply
+//    fails if the size is <64, which is why we need the pad above
+// We use 128-bytes here as this is the cache-line size of new Apple ARMv8
+// platforms.
+#define GEN_WORKSPACE_ALIGNMENT CACHELINE_SIZE
+
 typedef struct gen_workspace_ {
     generation * gen;           // the gen for this workspace
     struct gc_thread_ * my_gct; // the gc_thread that contains this workspace
@@ -101,9 +109,7 @@ typedef struct gen_workspace_ {
     bdescr *     part_list;
     StgWord      n_part_blocks;      // count of above
     StgWord      n_part_words;
-} gen_workspace ATTRIBUTE_ALIGNED(64);
-// align so that computing gct->gens[n] is a shift, not a multiply
-// fails if the size is <64, which is why we need the pad above
+} gen_workspace ATTRIBUTE_ALIGNED(GEN_WORKSPACE_ALIGNMENT);
 
 /* ----------------------------------------------------------------------------
    GC thread object



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6e07238e75fd702c68ea7139cc5389ffe27c72ae

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6e07238e75fd702c68ea7139cc5389ffe27c72ae
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241115/319d8d23/attachment-0001.html>


More information about the ghc-commits mailing list