[Git][ghc/ghc][wip/andreask/stm] 3 commits: testsuite: fix req_target_smp predicate

Cheng Shao (@TerrorJack) gitlab at gitlab.haskell.org
Tue Apr 30 16:33:53 UTC 2024



Cheng Shao pushed to branch wip/andreask/stm at Glasgow Haskell Compiler / GHC


Commits:
39e33a81 by Cheng Shao at 2024-04-30T16:33:08+00:00
testsuite: fix req_target_smp predicate

- - - - -
0512df04 by Andreas Klebinger at 2024-04-30T16:33:28+00:00
STM: Remove (unused)coarse grained locking.

The STM code had a coarse grained locking mode guarded by #defines that was unused.
This commit removes the code.

- - - - -
9b954aa3 by Andreas Klebinger at 2024-04-30T16:33:28+00:00
STM: Be more optimistic when validating in-flight transactions.

* Don't lock tvars when performing non-committal validation.
* If we encounter a locked tvar don't consider it a failure.

This means in-flight validation will only fail if committing at the
moment of validation is *guaranteed* to fail.

This prevents in-flight validation from failing spuriously if it happens in
parallel on multiple threads or parallel to thread comitting.

- - - - -


13 changed files:

- hadrian/src/Settings/Builders/RunTest.hs
- rts/Exception.cmm
- rts/STM.c
- rts/STM.h
- rts/Schedule.c
- rts/include/stg/SMP.h
- testsuite/config/ghc
- testsuite/driver/testglobals.py
- testsuite/driver/testlib.py
- testsuite/mk/test.mk
- + testsuite/tests/rts/T24142.hs
- + testsuite/tests/rts/T24142.stdout
- testsuite/tests/rts/all.T


Changes:

=====================================
hadrian/src/Settings/Builders/RunTest.hs
=====================================
@@ -282,7 +282,7 @@ runTestBuilderArgs = builder Testsuite ? do
             , arg "-e", arg $ asBool "ghc_with_dynamic_rts="  (hasDynamicRts)
             , arg "-e", arg $ asBool "config.ghc_with_threaded_rts=" (hasThreadedRts)
             , arg "-e", arg $ asBool "config.have_fast_bignum=" (bignumBackend /= "native" && not bignumCheck)
-            , arg "-e", arg $ asBool "target_with_smp=" targetWithSMP
+            , arg "-e", arg $ asBool "config.target_has_smp=" targetWithSMP
             , arg "-e", arg $ "config.ghc_dynamic=" ++ show hasDynamic
             , arg "-e", arg $ "config.leading_underscore=" ++ show leadingUnderscore
 


=====================================
rts/Exception.cmm
=====================================
@@ -495,7 +495,7 @@ retry_pop_stack:
       W_ trec, outer;
       W_ r;
       trec = StgTSO_trec(CurrentTSO);
-      (r) = ccall stmValidateNestOfTransactions(MyCapability() "ptr", trec "ptr");
+      (r) = ccall stmValidateNestOfTransactions(MyCapability() "ptr", trec "ptr", 0);
       outer  = StgTRecHeader_enclosing_trec(trec);
       ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr");
       ccall stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr");


=====================================
rts/STM.c
=====================================
@@ -31,10 +31,8 @@
  * interface.  In the Haskell RTS this means it is suitable only for
  * non-THREADED_RTS builds.
  *
- * STM_CG_LOCK uses coarse-grained locking -- a single 'stm lock' is acquired
- * during an invocation on the STM interface.  Note that this does not mean that
- * transactions are simply serialized -- the lock is only held *within* the
- * implementation of stmCommitTransaction, stmWait etc.
+ * STM_CG_LOCK was a historic locking mode using coarse-grained locking
+ * It has been removed, look at the git history if you are interest in it.
  *
  * STM_FG_LOCKS uses fine-grained locking -- locking is done on a per-TVar basis
  * and, when committing a transaction, no locks are acquired for TVars that have
@@ -42,19 +40,14 @@
  *
  * Concurrency control is implemented in the functions:
  *
- *    lock_stm
- *    unlock_stm
  *    lock_tvar / cond_lock_tvar
  *    unlock_tvar
  *
- * The choice between STM_UNIPROC / STM_CG_LOCK / STM_FG_LOCKS affects the
+ * The choice between STM_UNIPROC / STM_FG_LOCKS affects the
  * implementation of these functions.
  *
- * lock_stm & unlock_stm are straightforward : they acquire a simple spin-lock
- * using STM_CG_LOCK, and otherwise they are no-ops.
- *
  * lock_tvar / cond_lock_tvar and unlock_tvar are more complex because they have
- * other effects (present in STM_UNIPROC and STM_CG_LOCK builds) as well as the
+ * other effects (present in STM_UNIPROC builds) as well as the
  * actual business of manipulating a lock (present only in STM_FG_LOCKS builds).
  * This is because locking a TVar is implemented by writing the lock holder's
  * TRec into the TVar's current_value field:
@@ -167,7 +160,6 @@ static int shake(void) {
 /*......................................................................*/
 
 #define IF_STM_UNIPROC(__X)  do { } while (0)
-#define IF_STM_CG_LOCK(__X)  do { } while (0)
 #define IF_STM_FG_LOCKS(__X) do { } while (0)
 
 #if defined(STM_UNIPROC)
@@ -175,14 +167,6 @@ static int shake(void) {
 #define IF_STM_UNIPROC(__X)  do { __X } while (0)
 static const StgBool config_use_read_phase = false;
 
-static void lock_stm(StgTRecHeader *trec STG_UNUSED) {
-  TRACE("%p : lock_stm()", trec);
-}
-
-static void unlock_stm(StgTRecHeader *trec STG_UNUSED) {
-  TRACE("%p : unlock_stm()", trec);
-}
-
 static StgClosure *lock_tvar(Capability *cap STG_UNUSED,
                              StgTRecHeader *trec STG_UNUSED,
                              StgTVar *s STG_UNUSED) {
@@ -210,64 +194,9 @@ static StgBool cond_lock_tvar(Capability *cap STG_UNUSED,
                               StgTVar *s STG_UNUSED,
                               StgClosure *expected) {
   StgClosure *result;
-  TRACE("%p : cond_lock_tvar(%p, %p)", trec, s, expected);
-  result = ACQUIRE_LOAD(&s->current_value);
-  TRACE("%p : %s", trec, (result == expected) ? "success" : "failure");
-  return (result == expected);
-}
-#endif
-
-#if defined(STM_CG_LOCK) /*........................................*/
-
-#undef IF_STM_CG_LOCK
-#define IF_STM_CG_LOCK(__X)  do { __X } while (0)
-static const StgBool config_use_read_phase = false;
-static volatile StgTRecHeader *smp_locked = NULL;
-
-static void lock_stm(StgTRecHeader *trec) {
-  while (cas(&smp_locked, NULL, trec) != NULL) { }
-  TRACE("%p : lock_stm()", trec);
-}
-
-static void unlock_stm(StgTRecHeader *trec STG_UNUSED) {
-  TRACE("%p : unlock_stm()", trec);
-  ASSERT(smp_locked == trec);
-  RELEASE_STORE(&smp_locked, 0);
-}
-
-static StgClosure *lock_tvar(Capability *cap STG_UNUSED,
-                             StgTRecHeader *trec STG_UNUSED,
-                             StgTVar *s STG_UNUSED) {
-  StgClosure *result;
-  TRACE("%p : lock_tvar(%p)", trec, s);
-  ASSERT(smp_locked == trec);
+  // TRACE("%p : cond_lock_tvar(%p, %p)", trec, s, expected);
   result = ACQUIRE_LOAD(&s->current_value);
-  return result;
-}
-
-static void *unlock_tvar(Capability *cap,
-                         StgTRecHeader *trec STG_UNUSED,
-                         StgTVar *s,
-                         StgClosure *c,
-                         StgBool force_update) {
-  TRACE("%p : unlock_tvar(%p, %p)", trec, s, c);
-  ASSERT(smp_locked == trec);
-  if (force_update) {
-    StgClosure *old_value = ACQUIRE_LOAD(&s->current_value);
-    RELEASE_STORE(&s->current_value, c);
-    dirty_TVAR(cap, s, old_value);
-  }
-}
-
-static StgBool cond_lock_tvar(Capability *cap STG_UNUSED,
-                              StgTRecHeader *trec STG_UNUSED,
-                               StgTVar *s STG_UNUSED,
-                               StgClosure *expected) {
-  StgClosure *result;
-  TRACE("%p : cond_lock_tvar(%p, %p)", trec, s, expected);
-  ASSERT(smp_locked == trec);
-  result = ACQUIRE_LOAD(&s->current_value);
-  TRACE("%p : %d", result ? "success" : "failure");
+  // TRACE("%p : %s", trec, (result == expected) ? "success" : "failure");
   return (result == expected);
 }
 #endif
@@ -278,19 +207,11 @@ static StgBool cond_lock_tvar(Capability *cap STG_UNUSED,
 #define IF_STM_FG_LOCKS(__X) do { __X } while (0)
 static const StgBool config_use_read_phase = true;
 
-static void lock_stm(StgTRecHeader *trec STG_UNUSED) {
-  TRACE("%p : lock_stm()", trec);
-}
-
-static void unlock_stm(StgTRecHeader *trec STG_UNUSED) {
-  TRACE("%p : unlock_stm()", trec);
-}
-
 static StgClosure *lock_tvar(Capability *cap,
                              StgTRecHeader *trec,
                              StgTVar *s STG_UNUSED) {
   StgClosure *result;
-  TRACE("%p : lock_tvar(%p)", trec, s);
+  // TRACE("%p : lock_tvar(%p)", trec, s);
   do {
     const StgInfoTable *info;
     do {
@@ -313,7 +234,7 @@ static void unlock_tvar(Capability *cap,
                         StgTVar *s,
                         StgClosure *c,
                         StgBool force_update STG_UNUSED) {
-  TRACE("%p : unlock_tvar(%p, %p)", trec, s, c);
+  // TRACE("%p : unlock_tvar(%p, %p)", trec, s, c);
   ASSERT(ACQUIRE_LOAD(&s->current_value) == (StgClosure *)trec);
   RELEASE_STORE(&s->current_value, c);
   dirty_TVAR(cap, s, (StgClosure *) trec);
@@ -325,14 +246,14 @@ static StgBool cond_lock_tvar(Capability *cap,
                               StgClosure *expected) {
   StgClosure *result;
   StgWord w;
-  TRACE("%p : cond_lock_tvar(%p, %p)", trec, s, expected);
+  // TRACE("%p : cond_lock_tvar(%p, %p)", trec, s, expected);
   w = cas((void *)&(s -> current_value), (StgWord)expected, (StgWord)trec);
   result = (StgClosure *)w;
   IF_NONMOVING_WRITE_BARRIER_ENABLED {
       if (result)
           updateRemembSetPushClosure(cap, expected);
   }
-  TRACE("%p : %s", trec, result ? "success" : "failure");
+  // TRACE("%p : %s", trec, result ? "success" : "failure");
   return (result == expected);
 }
 #endif
@@ -438,6 +359,8 @@ static StgTRecHeader *new_stg_trec_header(Capability *cap,
 // Allocation / deallocation functions that retain per-capability lists
 // of closures that can be re-used
 
+//TODO: I think some of these lack write barriers required by the non-moving gc.
+
 static StgTVarWatchQueue *alloc_stg_tvar_watch_queue(Capability *cap,
                                                      StgClosure *closure) {
   StgTVarWatchQueue *result = NULL;
@@ -760,6 +683,44 @@ static void revert_ownership(Capability *cap STG_UNUSED,
 
 /*......................................................................*/
 
+// validate_optimistic()
+StgBool validate_trec_optimistic (Capability *cap, StgTRecHeader *trec);
+
+StgBool validate_trec_optimistic (Capability *cap, StgTRecHeader *trec) {
+  StgBool result;
+  TRACE("cap %d, trec %p : validate_trec_optimistic",
+         cap->no, trec);
+
+  if (shake()) {
+    TRACE("%p : shake, pretending trec is invalid when it may not be", trec);
+    return false;
+  }
+
+  ASSERT((trec -> state == TREC_ACTIVE) ||
+         (trec -> state == TREC_WAITING) ||
+         (trec -> state == TREC_CONDEMNED));
+  result = !((trec -> state) == TREC_CONDEMNED);
+  if (result) {
+    FOR_EACH_ENTRY(trec, e, {
+      StgTVar *s;
+      s = e -> tvar;
+      StgClosure *current = RELAXED_LOAD(&s->current_value);
+      if(current != e->expected_value &&
+          //If the trec is locked we optimistically assume our trec will still be valid after it's unlocked.
+         (GET_INFO(UNTAG_CLOSURE(current)) != &stg_TREC_HEADER_info))
+      {   TRACE("%p : failed optimistic validate %p", trec, s);
+          result = false;
+          BREAK_FOR_EACH;
+      }
+    });
+  }
+
+
+  TRACE("%p : validate_trec_optimistic, result: %d", trec, result);
+  return result;
+}
+
+
 // validate_and_acquire_ownership : this performs the twin functions
 // of checking that the TVars referred to by entries in trec hold the
 // expected values and:
@@ -778,6 +739,8 @@ static StgBool validate_and_acquire_ownership (Capability *cap,
                                                int acquire_all,
                                                int retain_ownership) {
   StgBool result;
+  TRACE("cap %d, trec %p : validate_and_acquire_ownership, all: %d, retrain: %d",
+         cap->no, trec, acquire_all, retain_ownership);
 
   if (shake()) {
     TRACE("%p : shake, pretending trec is invalid when it may not be", trec);
@@ -828,6 +791,7 @@ static StgBool validate_and_acquire_ownership (Capability *cap,
       revert_ownership(cap, trec, acquire_all);
   }
 
+  TRACE("%p : validate_and_acquire_ownership, result: %d", trec, result);
   return result;
 }
 
@@ -878,12 +842,10 @@ static StgBool check_read_only(StgTRecHeader *trec STG_UNUSED) {
 /************************************************************************/
 
 void stmPreGCHook (Capability *cap) {
-  lock_stm(NO_TREC);
   TRACE("stmPreGCHook");
   cap->free_tvar_watch_queues = END_STM_WATCH_QUEUE;
   cap->free_trec_chunks = END_STM_CHUNK_LIST;
   cap->free_trec_headers = NO_TREC;
-  unlock_stm(NO_TREC);
 }
 
 /************************************************************************/
@@ -959,8 +921,6 @@ void stmAbortTransaction(Capability *cap,
          (trec -> state == TREC_WAITING) ||
          (trec -> state == TREC_CONDEMNED));
 
-  lock_stm(trec);
-
   et = trec -> enclosing_trec;
   if (et == NO_TREC) {
     // We're a top-level transaction: remove any watch queue entries that
@@ -984,8 +944,6 @@ void stmAbortTransaction(Capability *cap,
   }
 
   trec -> state = TREC_ABORTED;
-  unlock_stm(trec);
-
   TRACE("%p : stmAbortTransaction done", trec);
 }
 
@@ -1013,35 +971,210 @@ void stmCondemnTransaction(Capability *cap,
          (trec -> state == TREC_WAITING) ||
          (trec -> state == TREC_CONDEMNED));
 
-  lock_stm(trec);
   if (trec -> state == TREC_WAITING) {
     ASSERT(trec -> enclosing_trec == NO_TREC);
     TRACE("%p : stmCondemnTransaction condemning waiting transaction", trec);
     remove_watch_queue_entries_for_trec(cap, trec);
   }
   trec -> state = TREC_CONDEMNED;
-  unlock_stm(trec);
 
   TRACE("%p : stmCondemnTransaction done", trec);
 }
 
-/*......................................................................*/
-
-StgBool stmValidateNestOfTransactions(Capability *cap, StgTRecHeader *trec) {
+/*......................................................................
+
+Note [STM Validation]
+~~~~~~~~~~~~~~~~~~~~~
+To "validate" a transaction means to check that the transaction's log (Trec) is
+consistent with the current state of memory; specifically, that any variable
+observed (through reads AND writes) by the transaction has the same value in
+memory as it did when the transaction read it.
+
+In some situations we can give ourself some leeway by allowing:
+* False positives - The validation check claims the memory state is consistent when it isn't.
+* False negatives - The validation check claims memory state is inconsistent when it
+  is in fact consistent.
+
+We validate a STM transaction for two purposes:
+
+(A) Post-run validation runs /after/ the transaction has completed, either during
+    commit or after an exception has occurred.
+
+    This is done by validate_and_acquire_ownership.  The commit process
+    /absolutely must/ be transactional: that is, it must read a consistent
+    snapshot of memory, compare with the log, and then atomically commit all the
+    writes in the log.  We do this by locking the TVars.
+
+    For post-run validation we must *never* allow false-positives for correctness
+    reasons. But we allow for false-negatives, trading occasional spurious retries
+    for performance in the average case.
+
+    The implementation of performing this update atomically is mostly based on
+    the 2002 paper "A Practical Multi-Word Compare-and-Swap Operation"
+
+(B) In-flight validation runs /during/ the execution of the transaction. Suppose a transaction
+    is long-running, and memory has /already/ changed so that it is inconsistent with the
+    transaction's log. It is just conceivable that memory might change back again to be
+    consistent, but very unlikely. It is better to terminate and retry the transaction,
+    rather than let it run potentially forever as a zombie, and only retry when it attempts to commit.
+
+    This is done by validate_trec_optimistic. Since in-flight validation at most results in early
+    termination of a transaction we may accept both
+      * a "false negative" (causing the transaction to retry unnecessarily), and
+      * a "false positive" (allowing the transaction to continue as a zombie).
+
+    We want to run in-flight validation somewhat frequently to detect invalid
+    transactions early. We perform in-flight validation whenever a thread returns to
+    the scheduler, a convenient and regular opportunity.
+
+Note that in-flight validation is not merely a optimization. Consider transactions
+that are in an infinite loop as a result of seeing an inconsistent view of
+memory, e.g.
+
+      atomically $ do
+          [a,b] <- mapM readTVar [ta,tb]
+          -- a is never equal to b given a consistent view of memory.
+          when (a == b) loop
+
+As noted above, post-run validation and commit /must/ be transactional, involving expensive locking.
+But in-flight validation can accept false positives and false negatives. While we could lock TVars
+during in-flight validation to rule out false positives, we don't have to:
+it is much cheaper and very nearly as good simply to read them without locking allowing for
+false-postive results.
+
+Moreover, locking during in-flight validation can cause lack of progress, or livelock (#24446)
+through false-negative results. Suppose we have two long-running transactions, each doing successive
+in-flight validation using locking. If the validation discovers a locked TVar it aborts and retries.
+Now they can each abort the other, forever.
+This *can* also happen with post-run validation. But since post-run validation occurs less
+frequently it's incredibly unlikely to happen repeatedly compared to in-flight validation.
+
+Hence: locking during in-flight validation is
+ * Expensive
+ * Can lead to livelock-like conditions.
+
+Conclusion:
+  * don't use locking during in-flight validation.
+  * Use locking during post-run validation, where the risk of livelock is comparatively small
+    compared to the cost of ruling out live-lock completely.
+
+See below for other design alternatives.
+
+Design considerations about locking during in flight validation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+All else being equal we would always want to get a precise result for validation.
+And indeed for the non-threaded runtime this is reasonably easy to achieve
+(see STM paper "Composable Memory Transactions").
+However for SMP things are more difficult, and ruling out false negatives/positives
+would come at significant cost in the average case.
+
+The easiest way to avoid false positives is to lock all relevant tvars during
+validation. And indeed that is what we use for post-run validation.
+The trade off being that it can lead to false negatives during validation when multiple
+threads perform validation in parallel. As long as the false-negative rate is
+is reasonably low this is not problematic.
+
+However in-flight validation can happen multiple times per transaction.
+So even a fairly low rate of spurious validation failures will result in a large
+performance hit. In the worst case preventing progress alltogether (See #24446).
+
+We don't want to reduce validation frequency too much to detect invalid
+transactions early. So we simply stick with the frequency "on return to scheduler"
+that's described in the stm paper.
+
+However we can improve in-flight validation perf by allowing false positives.
+This removes the need for tacking locks which means:
+
+Benefits
+* No lock contention between post-run and in-flight validations operating on the
+  same tvars. This reduces the false negative rate significantly for both.
+* Concurrent in-flight validations won't cause each other to fail spuriously
+  through lock contention.
+* No cas operations for in-flight validation reduces it's overhead significantly.
+
+Drawbacks:
+* We will sometimes fail to recognize invalid trecs as such by assuming locked
+  tvars contain valid values.
+
+Why can we simply not lock tvars for in-flight validations? Unlike with post-run
+validation if we miss part of an update which would invalidate the trec it will
+be either seen by a later validation (at the latest in the post-run validation
+which still locks). However there is one exception: Looping transactions.
+
+If a transaction loops it will *only* be validated optimistically.
+The only way for in-flight validation to constantly
+result in false-positives is for the conflicting tvar(s) to get constantly locked
+for updates by post-run validations. Which seems impossibly unlikely over a long
+period of time. So we accept this behaviour.
+
+Design alternatives to improve in-flight false-postive rate:
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+All of these primarily revolve around ways to ensure that we can recognize invalid
+looping transactions. However without proof this is a real problem implementing
+those seems not worthwhile.
+
+A1:
+Take locks for in-flight validation but don't fail if in-flight validation
+encounters already locked tvars.
+This would solve lock contention/false positives caused by concurrent in-flight validations.
+
+But it would still result in in-flight validation causing some false-negatives
+during post-run validation by holding locks post-run validation is trying to take.
+
+It also doesn't *guaranteed* that we recognize looping transaction as invalid.
+As the relevant tvars might be locked by other validations when we try to lock
+them. So while this would improve over using regular lock tacking for in-flight
+transactions it seems straight up worse than not taking locks to me in most
+situations.
+
+A2:
+Perform occasional locking in-flight validation for long running transactions.
+This would solve the theoretical looping transaction recognition issue at the
+cost of some performance and complexity. This could done by adding a counter to
+the trec, counting the number of validations it has endured.
+
+A2.1:
+Like A2, but instead of counting the number of validations count the number of
+locked tvars we encountered, as these are the only sources of false-positives.
+This would give a hard upper bound on the number of false-positives while keeping
+the impact on post-run validations lower.
+
+If the looping transaction issue turns out to be a real problem this might be worth
+doing.
+
+A3:
+When locking a tvar for a potential update keep the old value accessible. Then
+in-flight validations should never return false-positives. However compared to A2
+this seems like it would come with a non-trivial overhead relative to the likelyhood
+of these false-positives causing actual issues.
+
+
+*/
+
+// Check if a transaction is possibly invalid by this point.
+// Pessimistically - Currently we use this if an exception occured inside a transaction.
+//    To decide weither or not to abort by checking if the transaction was valid.
+// Optimistically - Currently we use this to eagerly abort invalid transactions from the scheduler.
+// See Note [STM Validation]
+StgBool stmValidateNestOfTransactions(Capability *cap, StgTRecHeader *trec, StgBool optimistically) {
   StgTRecHeader *t;
 
-  TRACE("%p : stmValidateNestOfTransactions", trec);
+  TRACE("%p : stmValidateNestOfTransactions, %b", trec, optimistically);
   ASSERT(trec != NO_TREC);
   ASSERT((trec -> state == TREC_ACTIVE) ||
          (trec -> state == TREC_WAITING) ||
          (trec -> state == TREC_CONDEMNED));
 
-  lock_stm(trec);
-
   t = trec;
   StgBool result = true;
   while (t != NO_TREC) {
-    result &= validate_and_acquire_ownership(cap, t, true, false);
+    if(optimistically) {
+      result &= validate_trec_optimistic(cap, t);
+
+    } else {
+      // TODO: I don't think there is a need to lock all tvars here.
+      result &= validate_and_acquire_ownership(cap, t, true, false);
+    }
     t = t -> enclosing_trec;
   }
 
@@ -1049,12 +1182,9 @@ StgBool stmValidateNestOfTransactions(Capability *cap, StgTRecHeader *trec) {
     trec -> state = TREC_CONDEMNED;
   }
 
-  unlock_stm(trec);
-
   TRACE("%p : stmValidateNestOfTransactions()=%d", trec, result);
   return result;
 }
-
 /*......................................................................*/
 
 static TRecEntry *get_entry_for(StgTRecHeader *trec, StgTVar *tvar, StgTRecHeader **in) {
@@ -1087,8 +1217,6 @@ StgBool stmCommitTransaction(Capability *cap, StgTRecHeader *trec) {
   TRACE("%p : stmCommitTransaction()", trec);
   ASSERT(trec != NO_TREC);
 
-  lock_stm(trec);
-
   ASSERT(trec -> enclosing_trec == NO_TREC);
   ASSERT((trec -> state == TREC_ACTIVE) ||
          (trec -> state == TREC_CONDEMNED));
@@ -1112,6 +1240,7 @@ StgBool stmCommitTransaction(Capability *cap, StgTRecHeader *trec) {
       max_concurrent_commits = ((max_commits_at_end - max_commits_at_start) +
                                 (getNumCapabilities() * TOKEN_BATCH_SIZE));
       if (((max_concurrent_commits >> 32) > 0) || shake()) {
+        TRACE("STM - Max commit number exceeded");
         result = false;
       }
     }
@@ -1145,8 +1274,6 @@ StgBool stmCommitTransaction(Capability *cap, StgTRecHeader *trec) {
     }
   }
 
-  unlock_stm(trec);
-
   free_stg_trec_header(cap, trec);
 
   TRACE("%p : stmCommitTransaction()=%d", trec, result);
@@ -1162,8 +1289,6 @@ StgBool stmCommitNestedTransaction(Capability *cap, StgTRecHeader *trec) {
   TRACE("%p : stmCommitNestedTransaction() into %p", trec, trec -> enclosing_trec);
   ASSERT((trec -> state == TREC_ACTIVE) || (trec -> state == TREC_CONDEMNED));
 
-  lock_stm(trec);
-
   et = trec -> enclosing_trec;
   bool result = validate_and_acquire_ownership(cap, trec, (!config_use_read_phase), true);
   if (result) {
@@ -1196,8 +1321,6 @@ StgBool stmCommitNestedTransaction(Capability *cap, StgTRecHeader *trec) {
     }
   }
 
-  unlock_stm(trec);
-
   free_stg_trec_header(cap, trec);
 
   TRACE("%p : stmCommitNestedTransaction()=%d", trec, result);
@@ -1214,7 +1337,6 @@ StgBool stmWait(Capability *cap, StgTSO *tso, StgTRecHeader *trec) {
   ASSERT((trec -> state == TREC_ACTIVE) ||
          (trec -> state == TREC_CONDEMNED));
 
-  lock_stm(trec);
   bool result = validate_and_acquire_ownership(cap, trec, true, true);
   if (result) {
     // The transaction is valid so far so we can actually start waiting.
@@ -1237,7 +1359,6 @@ StgBool stmWait(Capability *cap, StgTSO *tso, StgTRecHeader *trec) {
     // TRec.
 
   } else {
-    unlock_stm(trec);
     free_stg_trec_header(cap, trec);
   }
 
@@ -1249,7 +1370,6 @@ StgBool stmWait(Capability *cap, StgTSO *tso, StgTRecHeader *trec) {
 void
 stmWaitUnlock(Capability *cap, StgTRecHeader *trec) {
     revert_ownership(cap, trec, true);
-    unlock_stm(trec);
 }
 
 /*......................................................................*/
@@ -1263,7 +1383,6 @@ StgBool stmReWait(Capability *cap, StgTSO *tso) {
   ASSERT((trec -> state == TREC_WAITING) ||
          (trec -> state == TREC_CONDEMNED));
 
-  lock_stm(trec);
   bool result = validate_and_acquire_ownership(cap, trec, true, true);
   TRACE("%p : validation %s", trec, result ? "succeeded" : "failed");
   if (result) {
@@ -1280,7 +1399,6 @@ StgBool stmReWait(Capability *cap, StgTSO *tso) {
     }
     free_stg_trec_header(cap, trec);
   }
-  unlock_stm(trec);
 
   TRACE("%p : stmReWait()=%d", trec, result);
   return result;


=====================================
rts/STM.h
=====================================
@@ -6,24 +6,21 @@
  *
  *----------------------------------------------------------------------
 
-  STM.h defines the C-level interface to the STM.  
+  STM.h defines the C-level interface to the STM.
 
   The design follows that of the PPoPP 2005 paper "Composable memory
   transactions" extended to include fine-grained locking of TVars.
 
   Three different implementations can be built.  In overview:
-  
+
   STM_UNIPROC  -- no locking at all: not safe for concurrent invocations
- 
-  STM_CG_LOCK  -- coarse-grained locking : a single mutex protects all
-                  TVars
- 
+
   STM_FG_LOCKS -- per-TVar exclusion : each TVar can be owned by at
                   most one TRec at any time.  This allows dynamically
                   non-conflicting transactions to commit in parallel.
                   The implementation treats reads optimistically --
-                  extra versioning information is retained in the 
-                  saw_update_by field of the TVars so that they do not 
+                  extra versioning information is retained in the
+                  num_updates field of the TVars so that they do not
                   need to be locked for reading.
 
   STM.C contains more details about the locking schemes used.
@@ -72,7 +69,7 @@ void stmAbortTransaction(Capability *cap, StgTRecHeader *trec);
 void stmFreeAbortedTRec(Capability *cap, StgTRecHeader *trec);
 
 /*
- * Ensure that a subsequent commit / validation will fail.  We use this 
+ * Ensure that a subsequent commit / validation will fail.  We use this
  * in our current handling of transactions that may have become invalid
  * and started looping.  We strip their stack back to the ATOMICALLY_FRAME,
  * and, when the thread is next scheduled, discover it to be invalid and
@@ -87,16 +84,23 @@ void stmCondemnTransaction(Capability *cap, StgTRecHeader *trec);
    Validation
    ----------
 
-  Test whether the specified transaction record, and all those within which
-  it is nested, are still valid.
+   Test whether the specified transaction record, and all those within which
+   it is nested, are still valid.
+
+   stmValidateNestOfTransactions - optimistically
+      - Can return false positives when tvars are locked.
+      - Faster
+      - Does not take any locks
+
+   stmValidateNestOfTransactions - pessimistic
+      - Can return false negatives.
+      - Slower
+      - Takes locks, negatively affecting performance of other threads.
+      - Most importantly - no false positives!
 
-  Note: the caller can assume that once stmValidateTransaction has
-  returned false for a given trec then that transaction will never
-  again be valid -- we rely on this in Schedule.c when kicking invalid
-  threads at GC (in case they are stuck looping)
 */
 
-StgBool stmValidateNestOfTransactions(Capability *cap, StgTRecHeader *trec);
+StgBool stmValidateNestOfTransactions(Capability *cap, StgTRecHeader *trec, StgBool optimistically);
 
 /*----------------------------------------------------------------------
 
@@ -106,14 +110,14 @@ StgBool stmValidateNestOfTransactions(Capability *cap, StgTRecHeader *trec);
    These four operations return boolean results which should be interpreted
    as follows:
 
-   true  => The transaction record was definitely valid 
+   true  => The transaction record was definitely valid
 
    false => The transaction record may not have been valid
 
    Note that, for nested operations, validity here is solely in terms
    of the specified trec: it does not say whether those that it may be
-   nested are themselves valid.  Callers can check this with 
-   stmValidateNestOfTransactions.
+   nested are themselves valid.  Callers can check this with
+   stmValidateNestOfTransactionsPessimistic.
 
    The user of the STM should ensure that it is always safe to assume that a
    transaction context is not valid when in fact it is (i.e. to return false in
@@ -151,7 +155,7 @@ StgBool stmCommitNestedTransaction(Capability *cap, StgTRecHeader *trec);
  * Test whether the current transaction context is valid and, if so,
  * start the thread waiting for updates to any of the tvars it has
  * ready from and mark it as blocked.  It is an error to call stmWait
- * if the thread is already waiting.  
+ * if the thread is already waiting.
  */
 
 StgBool stmWait(Capability *cap, StgTSO *tso, StgTRecHeader *trec);
@@ -180,7 +184,7 @@ StgBool stmReWait(Capability *cap, StgTSO *tso);
  */
 
 StgClosure *stmReadTVar(Capability *cap,
-                        StgTRecHeader *trec, 
+                        StgTRecHeader *trec,
                         StgTVar *tvar);
 
 /* Update the logical contents of 'tvar' within the context of the
@@ -189,7 +193,7 @@ StgClosure *stmReadTVar(Capability *cap,
 
 void stmWriteTVar(Capability *cap,
                   StgTRecHeader *trec,
-                  StgTVar *tvar, 
+                  StgTVar *tvar,
                   StgClosure *new_value);
 
 /*----------------------------------------------------------------------*/


=====================================
rts/Schedule.c
=====================================
@@ -1106,7 +1106,7 @@ schedulePostRunThread (Capability *cap, StgTSO *t)
     // and a is never equal to b given a consistent view of memory.
     //
     if (t -> trec != NO_TREC && t -> why_blocked == NotBlocked) {
-        if (!stmValidateNestOfTransactions(cap, t -> trec)) {
+        if (!stmValidateNestOfTransactions(cap, t -> trec, true)) {
             debugTrace(DEBUG_sched | DEBUG_stm,
                        "trec %p found wasting its time", t);
 


=====================================
rts/include/stg/SMP.h
=====================================
@@ -201,14 +201,15 @@ EXTERN_INLINE void busy_wait_nop(void);
  *   - StgWeak: finalizer
  *   - StgMVar: head, tail, value
  *   - StgMVarTSOQueue: link
- *   - StgTVar: current_value, first_watch_queue_entry
- *   - StgTVarWatchQueue: {next,prev}_queue_entry
- *   - StgTRecChunk: TODO
  *   - StgMutArrPtrs: payload
  *   - StgSmallMutArrPtrs: payload
  *   - StgThunk although this is a somewhat special case; see below
  *   - StgInd: indirectee
  *   - StgTSO: block_info
+
+ *   - StgTVar: current_value, first_watch_queue_entry
+ *   - StgTVarWatchQueue: {next,prev}_queue_entry
+ *   - StgTRecChunk: TODO
  *
  * Finally, non-pointer fields can be safely mutated without barriers as
  * they do not refer to other memory locations. Technically, concurrent


=====================================
testsuite/config/ghc
=====================================
@@ -48,8 +48,7 @@ if config.have_interp:
 # whether the target supports smp
 if config.ghc_with_threaded_rts:
     config.run_ways.append('threaded1')
-    if target_with_smp:
-        config.target_has_smp = True
+    if config.target_has_smp:
         config.run_ways.append('threaded2')
         if config.speed == 0:
             config.run_ways.append('nonmoving_thr')


=====================================
testsuite/driver/testglobals.py
=====================================
@@ -154,7 +154,7 @@ class TestConfig:
         self.ghc_has_smp = True
 
         # Does the target have SMP support?
-        self.target_has_smp = True
+        self.target_has_smp = False
 
         # Is gdb available?
         self.have_gdb = False


=====================================
testsuite/driver/testlib.py
=====================================
@@ -328,11 +328,11 @@ def req_ghc_smp( name, opts ):
 def req_target_smp( name, opts ):
     """
     Mark a test as requiring smp when run on the target. If the target does
-    not support smp, then mark the test as an expected fail. Use this when the
+    not support smp, then skip the test. Use this when the
     test needs to run with smp support.
     """
     if not config.target_has_smp:
-        opts.expect = 'fail'
+        opts.skip = True
 
 def req_ghc_with_threaded_rts( name, opts ):
     if not config.ghc_with_threaded_rts:


=====================================
testsuite/mk/test.mk
=====================================
@@ -167,9 +167,9 @@ CABAL_PLUGIN_BUILD = --enable-library-vanilla --disable-shared
 endif
 
 ifeq "$(GhcWithSMP)" "YES"
-RUNTEST_OPTS += -e target_with_smp=True
+RUNTEST_OPTS += -e config.target_has_smp=True
 else
-RUNTEST_OPTS += -e target_with_smp=False
+RUNTEST_OPTS += -e config.target_has_smp=False
 endif
 
 ifeq "$(GhcWithRtsLinker)" "YES"


=====================================
testsuite/tests/rts/T24142.hs
=====================================
@@ -0,0 +1,63 @@
+{- This test constructs a program that used to trigger an excessive amount of STM retries. -}
+{-# LANGUAGE NumericUnderscores #-}
+{-# LANGUAGE BangPatterns #-}
+module Main where
+
+import GHC.Conc
+import Control.Concurrent (newMVar, newEmptyMVar, takeMVar, putMVar)
+import Control.Monad
+import Control.Concurrent.STM.TArray
+import Data.Array.MArray
+import Data.IORef
+
+
+main :: IO ()
+main =
+    forM_ [2..40] $ \i -> do
+        -- Run the test with an increasing number of tvars
+        let tvar_count = i * 10
+        -- print $ "Tvars: " ++ show tvar_count
+        provokeLivelock tvar_count
+
+
+-- Forks two threads running a STM transactions, both accessing the same tvars but in opposite order.
+provokeLivelock :: Int -> IO ()
+provokeLivelock n = do
+    -- Use tvar array as a convenient way to bundle up n Tvars.
+    tvarArray <- atomically $ do
+        newListArray (0,n) [0.. fromIntegral n :: Integer] :: STM (TArray Int Integer)
+    m1 <- newEmptyMVar
+    m2 <- newEmptyMVar
+    updateCount <- newIORef (0 :: Int)
+
+    let useTvars :: [Int] -> Bool -> IO ()
+        useTvars tvar_order use_writes = atomically $ do
+            -- Walk the array once in the given order to add all tvars to the transaction log.
+            unsafeIOToSTM $ atomicModifyIORef' updateCount (\i -> (i+1,()))
+            mapM_ (\i -> readArray tvarArray i >>= \(!_n) -> return ()) tvar_order
+
+
+            -- Then we just enter the scheduler a lot
+            forM_ tvar_order $ \i -> do
+                -- when use_writes $
+                --     readArray tvarArray i >>= \(!n) -> writeArray tvarArray i (n+1 :: Integer)
+                unsafeIOToSTM yield
+
+    _ <- forkIO $ do
+                    useTvars [0..n] False
+                    -- print "Thread1 done."
+                    putMVar m1 True
+    _ <- forkIO $ do
+                    useTvars (reverse [0..n]) False
+                    -- print "Thread1 done."
+                    putMVar m2 True
+    -- Wait for forked threads.
+    _ <- takeMVar m1
+    _ <- takeMVar m2
+    updates <- readIORef updateCount
+    if updates > n
+        then putStrLn $ "TVars: " ++ show n ++ ", ERROR: more than " ++ show n ++ " transaction attempts. (" ++ show updates ++")\n"
+        else putStrLn $ "TVars: " ++ show n ++ ", OK: no more than " ++ show n ++ " transaction attempts."
+
+    return ()
+


=====================================
testsuite/tests/rts/T24142.stdout
=====================================
@@ -0,0 +1,39 @@
+TVars: 20, OK: no more than 20 transaction attempts.
+TVars: 30, OK: no more than 30 transaction attempts.
+TVars: 40, OK: no more than 40 transaction attempts.
+TVars: 50, OK: no more than 50 transaction attempts.
+TVars: 60, OK: no more than 60 transaction attempts.
+TVars: 70, OK: no more than 70 transaction attempts.
+TVars: 80, OK: no more than 80 transaction attempts.
+TVars: 90, OK: no more than 90 transaction attempts.
+TVars: 100, OK: no more than 100 transaction attempts.
+TVars: 110, OK: no more than 110 transaction attempts.
+TVars: 120, OK: no more than 120 transaction attempts.
+TVars: 130, OK: no more than 130 transaction attempts.
+TVars: 140, OK: no more than 140 transaction attempts.
+TVars: 150, OK: no more than 150 transaction attempts.
+TVars: 160, OK: no more than 160 transaction attempts.
+TVars: 170, OK: no more than 170 transaction attempts.
+TVars: 180, OK: no more than 180 transaction attempts.
+TVars: 190, OK: no more than 190 transaction attempts.
+TVars: 200, OK: no more than 200 transaction attempts.
+TVars: 210, OK: no more than 210 transaction attempts.
+TVars: 220, OK: no more than 220 transaction attempts.
+TVars: 230, OK: no more than 230 transaction attempts.
+TVars: 240, OK: no more than 240 transaction attempts.
+TVars: 250, OK: no more than 250 transaction attempts.
+TVars: 260, OK: no more than 260 transaction attempts.
+TVars: 270, OK: no more than 270 transaction attempts.
+TVars: 280, OK: no more than 280 transaction attempts.
+TVars: 290, OK: no more than 290 transaction attempts.
+TVars: 300, OK: no more than 300 transaction attempts.
+TVars: 310, OK: no more than 310 transaction attempts.
+TVars: 320, OK: no more than 320 transaction attempts.
+TVars: 330, OK: no more than 330 transaction attempts.
+TVars: 340, OK: no more than 340 transaction attempts.
+TVars: 350, OK: no more than 350 transaction attempts.
+TVars: 360, OK: no more than 360 transaction attempts.
+TVars: 370, OK: no more than 370 transaction attempts.
+TVars: 380, OK: no more than 380 transaction attempts.
+TVars: 390, OK: no more than 390 transaction attempts.
+TVars: 400, OK: no more than 400 transaction attempts.


=====================================
testsuite/tests/rts/all.T
=====================================
@@ -609,3 +609,5 @@ test('T23400', [], compile_and_run, ['-with-rtsopts -A8k'])
 test('IOManager', [js_skip, when(arch('wasm32'), skip), when(opsys('mingw32'), skip),
                    pre_cmd('$MAKE -s --no-print-directory IOManager.hs')],
                   compile_and_run, [''])
+
+test('T24142', [req_target_smp], compile_and_run, ['-threaded -with-rtsopts "-N2"'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/cfaa788d5e80bce348e814a9c1b821372f7a908c...9b954aa3035627a68c9a6739dd1dca1454dd6d70

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/cfaa788d5e80bce348e814a9c1b821372f7a908c...9b954aa3035627a68c9a6739dd1dca1454dd6d70
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240430/c2dfdbe5/attachment-0001.html>


More information about the ghc-commits mailing list