[Git][ghc/ghc][wip/andreask/stm] 10 commits: Make read accepts binary integer formats

Tue Apr 30 11:01:32 UTC 2024

Cheng Shao pushed to branch wip/andreask/stm at Glasgow Haskell Compiler / GHC


Commits:
e2094df3 by damhiya at 2024-04-28T23:52:00+09:00
Make read accepts binary integer formats

CLC proposal : https://github.com/haskell/core-libraries-committee/issues/177

- - - - -
1c2fd963 by Alan Zimmerman at 2024-04-29T23:17:00-04:00
EPA: Preserve comments in Match Pats

Closes #24708
Closes #24715
Closes #24734

- - - - -
4189d17e by Sylvain Henry at 2024-04-29T23:17:42-04:00
LLVM: better unreachable default destination in Switch (#24717)

See added note.

Co-authored-by: Siddharth Bhat <siddu.druid at gmail.com>

- - - - -
a3725c88 by Cheng Shao at 2024-04-29T23:18:20-04:00
ci: enable wasm jobs for MRs with wasm label

This patch enables wasm jobs for MRs with wasm label. Previously the
wasm label didn't actually have any effect on the CI pipeline, and
full-ci needed to be applied to run wasm jobs which was a waste of
runners when working on the wasm backend, hence the fix here.

- - - - -
702f7964 by Matthew Pickering at 2024-04-29T23:18:56-04:00
Make interface files and object files depend on inplace .conf file

A potential fix for #24737

- - - - -
728af21e by Cheng Shao at 2024-04-30T05:30:23-04:00
utils: remove obsolete vagrant scripts

Vagrantfile has long been removed in !5288. This commit further
removes the obsolete vagrant scripts in the tree.

- - - - -
36f2c342 by Cheng Shao at 2024-04-30T05:31:00-04:00
Update autoconf scripts

Scripts taken from autoconf 948ae97ca5703224bd3eada06b7a69f40dd15a02

- - - - -
ecbf22a6 by Ben Gamari at 2024-04-30T05:31:36-04:00
ghcup-metadata: Drop output_name field

This is entirely redundant to the filename of the URL. There is no
compelling reason to name the downloaded file differently from its
source.

- - - - -
8e20ab33 by Andreas Klebinger at 2024-04-30T10:59:26+00:00
STM: Remove (unused)coarse grained locking.

The STM code had a coarse grained locking mode guarded by #defines that was unused.
This commit removes the code.

- - - - -
eec83538 by Andreas Klebinger at 2024-04-30T11:00:25+00:00
STM: Be more optimistic when validating in-flight transactions.

* Don't lock tvars when performing non-committal validation.
* If we encounter a locked tvar don't consider it a failure.

This means in-flight validation will only fail if committing at the
moment of validation is *guaranteed* to fail.

This prevents in-flight validation from failing spuriously if it happens in
parallel on multiple threads or parallel to thread comitting.

- - - - -


30 changed files:

- .gitlab/generate-ci/gen_ci.hs
- .gitlab/jobs.yaml
- .gitlab/rel_eng/mk-ghcup-metadata/mk_ghcup_metadata.py
- compiler/GHC/CmmToLlvm/CodeGen.hs
- compiler/GHC/Parser/Annotation.hs
- compiler/GHC/Parser/PostProcess.hs
- config.guess
- config.sub
- docs/users_guide/bugs.rst
- hadrian/src/Rules/Compile.hs
- libraries/base/changelog.md
- libraries/base/tests/char001.hs
- libraries/base/tests/char001.stdout
- libraries/base/tests/lex001.hs
- libraries/base/tests/lex001.stdout
- libraries/ghc-internal/src/GHC/Internal/Text/Read/Lex.hs
- rts/Exception.cmm
- rts/STM.c
- rts/STM.h
- rts/Schedule.c
- rts/include/stg/SMP.h
- testsuite/tests/printer/Makefile
- + testsuite/tests/printer/MatchPatComments.hs
- testsuite/tests/printer/all.T
- + testsuite/tests/rts/T24142.hs
- + testsuite/tests/rts/T24142.stdout
- testsuite/tests/rts/all.T
- utils/check-exact/Main.hs
- − utils/vagrant/bootstrap-deb.sh
- − utils/vagrant/bootstrap-rhel.sh


Changes:

=====================================
.gitlab/generate-ci/gen_ci.hs
=====================================
@@ -603,6 +603,7 @@ data ValidateRule =
             FullCI       -- ^ Run this job when the "full-ci" label is present.
           | LLVMBackend  -- ^ Run this job when the "LLVM backend" label is present
           | JSBackend    -- ^ Run this job when the "javascript" label is present
+          | WasmBackend  -- ^ Run this job when the "wasm" label is present
           | FreeBSDLabel -- ^ Run this job when the "FreeBSD" label is set.
           | NonmovingGc  -- ^ Run this job when the "non-moving GC" label is set.
           | IpeData      -- ^ Run this job when the "IPE" label is set
@@ -649,6 +650,7 @@ validateRuleString FullCI = or_all ([ labelString "full-ci"
 
 validateRuleString LLVMBackend  = labelString "LLVM backend"
 validateRuleString JSBackend    = labelString "javascript"
+validateRuleString WasmBackend  = labelString "wasm"
 validateRuleString FreeBSDLabel = labelString "FreeBSD"
 validateRuleString NonmovingGc  = labelString "non-moving GC"
 validateRuleString IpeData      = labelString "IPE"
@@ -1048,7 +1050,7 @@ job_groups =
             . setVariable "HADRIAN_ARGS" "--docs=none"
             . delVariable "INSTALL_CONFIGURE_ARGS"
         )
-        $ validateBuilds Amd64 (Linux AlpineWasm) cfg
+        $ addValidateRule WasmBackend $ validateBuilds Amd64 (Linux AlpineWasm) cfg
 
     wasm_build_config =
       (crossConfig "wasm32-wasi" NoEmulatorNeeded Nothing)


=====================================
.gitlab/jobs.yaml
=====================================
@@ -4502,7 +4502,7 @@
     ],
     "rules": [
       {
-        "if": "((($CI_MERGE_REQUEST_LABELS =~ /.*full-ci.*/) || ($CI_MERGE_REQUEST_LABELS =~ /.*marge_bot_batch_merge_job.*/) || ($CI_COMMIT_BRANCH == \"master\") || ($CI_COMMIT_BRANCH =~ /ghc-[0-9]+\\.[0-9]+/))) && ($RELEASE_JOB != \"yes\") && ($NIGHTLY == null)",
+        "if": "((($CI_MERGE_REQUEST_LABELS =~ /.*full-ci.*/) || ($CI_MERGE_REQUEST_LABELS =~ /.*marge_bot_batch_merge_job.*/) || ($CI_COMMIT_BRANCH == \"master\") || ($CI_COMMIT_BRANCH =~ /ghc-[0-9]+\\.[0-9]+/)) || ($CI_MERGE_REQUEST_LABELS =~ /.*wasm.*/)) && ($RELEASE_JOB != \"yes\") && ($NIGHTLY == null)",
         "when": "on_success"
       }
     ],
@@ -4566,7 +4566,7 @@
     "rules": [
       {
         "allow_failure": true,
-        "if": "((($CI_MERGE_REQUEST_LABELS =~ /.*full-ci.*/) || ($CI_MERGE_REQUEST_LABELS =~ /.*marge_bot_batch_merge_job.*/) || ($CI_COMMIT_BRANCH == \"master\") || ($CI_COMMIT_BRANCH =~ /ghc-[0-9]+\\.[0-9]+/))) && ($RELEASE_JOB != \"yes\") && ($NIGHTLY == null)",
+        "if": "((($CI_MERGE_REQUEST_LABELS =~ /.*full-ci.*/) || ($CI_MERGE_REQUEST_LABELS =~ /.*marge_bot_batch_merge_job.*/) || ($CI_COMMIT_BRANCH == \"master\") || ($CI_COMMIT_BRANCH =~ /ghc-[0-9]+\\.[0-9]+/)) || ($CI_MERGE_REQUEST_LABELS =~ /.*wasm.*/)) && ($RELEASE_JOB != \"yes\") && ($NIGHTLY == null)",
         "when": "manual"
       }
     ],
@@ -4630,7 +4630,7 @@
     "rules": [
       {
         "allow_failure": true,
-        "if": "((($CI_MERGE_REQUEST_LABELS =~ /.*full-ci.*/) || ($CI_MERGE_REQUEST_LABELS =~ /.*marge_bot_batch_merge_job.*/) || ($CI_COMMIT_BRANCH == \"master\") || ($CI_COMMIT_BRANCH =~ /ghc-[0-9]+\\.[0-9]+/))) && ($RELEASE_JOB != \"yes\") && ($NIGHTLY == null)",
+        "if": "((($CI_MERGE_REQUEST_LABELS =~ /.*full-ci.*/) || ($CI_MERGE_REQUEST_LABELS =~ /.*marge_bot_batch_merge_job.*/) || ($CI_COMMIT_BRANCH == \"master\") || ($CI_COMMIT_BRANCH =~ /ghc-[0-9]+\\.[0-9]+/)) || ($CI_MERGE_REQUEST_LABELS =~ /.*wasm.*/)) && ($RELEASE_JOB != \"yes\") && ($NIGHTLY == null)",
         "when": "manual"
       }
     ],


=====================================
.gitlab/rel_eng/mk-ghcup-metadata/mk_ghcup_metadata.py
=====================================
@@ -65,7 +65,6 @@ eprint(f"Supported platforms: {job_mapping.keys()}")
 class Artifact(NamedTuple):
     job_name: str
     download_name: str
-    output_name: str
     subdir: str
 
 # Platform spec provides a specification which is agnostic to Job
@@ -75,11 +74,9 @@ class PlatformSpec(NamedTuple):
     subdir: str
 
 source_artifact = Artifact('source-tarball'
-                          , 'ghc-{version}-src.tar.xz'
                           , 'ghc-{version}-src.tar.xz'
                           , 'ghc-{version}' )
 test_artifact = Artifact('source-tarball'
-                        , 'ghc-{version}-testsuite.tar.xz'
                         , 'ghc-{version}-testsuite.tar.xz'
                         , 'ghc-{version}/testsuite' )
 
@@ -164,11 +161,6 @@ def mk_one_metadata(release_mode, version, job_map, artifact):
           , "dlSubdir": artifact.subdir.format(version=version)
           , "dlHash" : h }
 
-    # Only add dlOutput if it is inconsistent with the filename inferred from the URL
-    output = artifact.output_name.format(version=version)
-    if Path(urlparse(final_url).path).name != output:
-        res["dlOutput"] = output
-
     eprint(res)
     return res
 


=====================================
compiler/GHC/CmmToLlvm/CodeGen.hs
=====================================
@@ -56,6 +56,7 @@ data Signage = Signed | Unsigned deriving (Eq, Show)
 genLlvmProc :: RawCmmDecl -> LlvmM [LlvmCmmDecl]
 genLlvmProc (CmmProc infos lbl live graph) = do
     let blocks = toBlockListEntryFirstFalseFallthrough graph
+
     (lmblocks, lmdata) <- basicBlocksCodeGen live blocks
     let info = mapLookup (g_entry graph) infos
         proc = CmmProc info lbl live (ListGraph lmblocks)
@@ -67,6 +68,11 @@ genLlvmProc _ = panic "genLlvmProc: case that shouldn't reach here!"
 -- * Block code generation
 --
 
+-- | Unreachable basic block
+--
+-- See Note [Unreachable block as default destination in Switch]
+newtype UnreachableBlockId = UnreachableBlockId BlockId
+
 -- | Generate code for a list of blocks that make up a complete
 -- procedure. The first block in the list is expected to be the entry
 -- point.
@@ -82,20 +88,27 @@ basicBlocksCodeGen live cmmBlocks
        (prologue, prologueTops) <- funPrologue live cmmBlocks
        let entryBlock = BasicBlock bid (fromOL prologue)
 
+       -- allocate one unreachable basic block that can be used as a default
+       -- destination in exhaustive switches.
+       --
+       -- See Note [Unreachable block as default destination in Switch]
+       ubid@(UnreachableBlockId ubid') <- (UnreachableBlockId . mkBlockId) <$> getUniqueM
+       let ubblock = BasicBlock ubid' [Unreachable]
+
        -- Generate code
-       (blocks, topss) <- fmap unzip $ mapM basicBlockCodeGen cmmBlocks
+       (blocks, topss) <- fmap unzip $ mapM (basicBlockCodeGen ubid) cmmBlocks
 
        -- Compose
-       return (entryBlock : blocks, prologueTops ++ concat topss)
+       return (entryBlock : ubblock : blocks, prologueTops ++ concat topss)
 
 
 -- | Generate code for one block
-basicBlockCodeGen :: CmmBlock -> LlvmM ( LlvmBasicBlock, [LlvmCmmDecl] )
-basicBlockCodeGen block
+basicBlockCodeGen :: UnreachableBlockId -> CmmBlock -> LlvmM ( LlvmBasicBlock, [LlvmCmmDecl] )
+basicBlockCodeGen ubid block
   = do let (_, nodes, tail)  = blockSplit block
            id = entryLabel block
-       (mid_instrs, top) <- stmtsToInstrs $ blockToList nodes
-       (tail_instrs, top')  <- stmtToInstrs tail
+       (mid_instrs, top) <- stmtsToInstrs ubid $ blockToList nodes
+       (tail_instrs, top')  <- stmtToInstrs ubid tail
        let instrs = fromOL (mid_instrs `appOL` tail_instrs)
        return (BasicBlock id instrs, top' ++ top)
 
@@ -110,15 +123,15 @@ type StmtData = (LlvmStatements, [LlvmCmmDecl])
 
 
 -- | Convert a list of CmmNode's to LlvmStatement's
-stmtsToInstrs :: [CmmNode e x] -> LlvmM StmtData
-stmtsToInstrs stmts
-   = do (instrss, topss) <- fmap unzip $ mapM stmtToInstrs stmts
+stmtsToInstrs :: UnreachableBlockId -> [CmmNode e x] -> LlvmM StmtData
+stmtsToInstrs ubid stmts
+   = do (instrss, topss) <- fmap unzip $ mapM (stmtToInstrs ubid) stmts
         return (concatOL instrss, concat topss)
 
 
 -- | Convert a CmmStmt to a list of LlvmStatement's
-stmtToInstrs :: CmmNode e x -> LlvmM StmtData
-stmtToInstrs stmt = case stmt of
+stmtToInstrs :: UnreachableBlockId -> CmmNode e x -> LlvmM StmtData
+stmtToInstrs ubid stmt = case stmt of
 
     CmmComment _         -> return (nilOL, []) -- nuke comments
     CmmTick    _         -> return (nilOL, [])
@@ -131,7 +144,7 @@ stmtToInstrs stmt = case stmt of
     CmmBranch id         -> genBranch id
     CmmCondBranch arg true false likely
                          -> genCondBranch arg true false likely
-    CmmSwitch arg ids    -> genSwitch arg ids
+    CmmSwitch arg ids    -> genSwitch ubid arg ids
 
     -- Foreign Call
     CmmUnsafeForeignCall target res args
@@ -1305,21 +1318,38 @@ For a real example of this, see ./rts/StgStdThunks.cmm
 
 
 -- | Switch branch
-genSwitch :: CmmExpr -> SwitchTargets -> LlvmM StmtData
-genSwitch cond ids = do
+genSwitch :: UnreachableBlockId -> CmmExpr -> SwitchTargets -> LlvmM StmtData
+genSwitch (UnreachableBlockId ubid) cond ids = do
     (vc, stmts, top) <- exprToVar cond
     let ty = getVarType vc
 
     let labels = [ (mkIntLit ty ix, blockIdToLlvm b)
                  | (ix, b) <- switchTargetsCases ids ]
-    -- out of range is undefined, so let's just branch to first label
     let defLbl | Just l <- switchTargetsDefault ids = blockIdToLlvm l
-               | otherwise                          = snd (head labels)
+               | otherwise                          = blockIdToLlvm ubid
+                 -- switch to an unreachable basic block for exhaustive
+                 -- switches. See Note [Unreachable block as default destination
+                 -- in Switch]
 
     let s1 = Switch vc defLbl labels
     return $ (stmts `snocOL` s1, top)
 
 
+-- Note [Unreachable block as default destination in Switch]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- LLVM IR requires a default destination (a block label) for its Switch
+-- operation, even if the switch is exhaustive. An LLVM switch is considered
+-- exhausitve (e.g. to omit range checks for bit tests [1]) if the default
+-- destination is unreachable.
+--
+-- When we codegen a Cmm function, we always reserve an unreachable basic block
+-- that is used as a default destination for exhaustive Cmm switches in
+-- genSwitch. See #24717
+--
+-- [1] https://reviews.llvm.org/D68131
+
+
+
 -- -----------------------------------------------------------------------------
 -- * CmmExpr code generation
 --


=====================================
compiler/GHC/Parser/Annotation.hs
=====================================
@@ -1244,7 +1244,7 @@ transferAnnsOnlyA (EpAnn a an cs) (EpAnn a' an' cs')
 
 -- | Transfer comments from the annotations in the
 -- first 'SrcSpanAnnA' argument to those in the second.
-transferCommentsOnlyA :: SrcSpanAnnA -> SrcSpanAnnA -> (SrcSpanAnnA,  SrcSpanAnnA)
+transferCommentsOnlyA :: EpAnn a -> EpAnn b -> (EpAnn a,  EpAnn b)
 transferCommentsOnlyA (EpAnn a an cs) (EpAnn a' an' cs')
   = (EpAnn a an emptyComments, EpAnn a' an' (cs <> cs'))
 


=====================================
compiler/GHC/Parser/PostProcess.hs
=====================================
@@ -1220,19 +1220,23 @@ checkLPat e@(L l _) = checkPat l e [] []
 checkPat :: SrcSpanAnnA -> LocatedA (PatBuilder GhcPs) -> [HsConPatTyArg GhcPs] -> [LPat GhcPs]
          -> PV (LPat GhcPs)
 checkPat loc (L l e@(PatBuilderVar (L ln c))) tyargs args
-  | isRdrDataCon c = return . L loc $ ConPat
-      { pat_con_ext = noAnn -- AZ: where should this come from?
-      , pat_con = L ln c
-      , pat_args = PrefixCon tyargs args
-      }
+  | isRdrDataCon c = do
+      let (_l', loc') = transferCommentsOnlyA l loc
+      return . L loc' $ ConPat
+        { pat_con_ext = noAnn -- AZ: where should this come from?
+        , pat_con = L ln c
+        , pat_args = PrefixCon tyargs args
+        }
   | (not (null args) && patIsRec c) = do
       ctx <- askParseContext
       patFail (locA l) . PsErrInPat e $ PEIP_RecPattern args YesPatIsRecursive ctx
-checkPat loc (L _ (PatBuilderAppType f at t)) tyargs args =
-  checkPat loc f (HsConPatTyArg at t : tyargs) args
-checkPat loc (L _ (PatBuilderApp f e)) [] args = do
-  p <- checkLPat e
-  checkPat loc f [] (p : args)
+checkPat loc (L _ (PatBuilderAppType (L lf f) at t)) tyargs args = do
+  let (loc', lf') = transferCommentsOnlyA loc lf
+  checkPat loc' (L lf' f) (HsConPatTyArg at t : tyargs) args
+checkPat loc (L _ (PatBuilderApp f (L le e))) [] args = do
+  let (loc', le') = transferCommentsOnlyA loc le
+  p <- checkLPat (L le' e)
+  checkPat loc' f [] (p : args)
 checkPat loc (L l e) [] [] = do
   p <- checkAPat loc e
   return (L l p)
@@ -1432,20 +1436,27 @@ isFunLhs e = go e [] [] []
  where
    mk = fmap ArgPatBuilderVisPat
 
-   go (L _ (PatBuilderVar (L loc f))) es ops cps
-       | not (isRdrDataCon f)        = return (Just (L loc f, Prefix, es, (reverse ops) ++ cps))
-   go (L _ (PatBuilderApp f e))   es       ops cps = go f (mk e:es) ops cps
-   go (L l (PatBuilderPar _ e _)) es@(_:_) ops cps = go e es (o:ops) (c:cps)
+   go (L l (PatBuilderVar (L loc f))) es ops cps
+       | not (isRdrDataCon f)        = do
+           let (_l, loc') = transferCommentsOnlyA l loc
+           return (Just (L loc' f, Prefix, es, (reverse ops) ++ cps))
+   go (L l (PatBuilderApp (L lf f) e))   es       ops cps = do
+     let (_l, lf') = transferCommentsOnlyA l lf
+     go (L lf' f) (mk e:es) ops cps
+   go (L l (PatBuilderPar _ (L le e) _)) es@(_:_) ops cps = go (L le' e) es (o:ops) (c:cps)
       -- NB: es@(_:_) means that there must be an arg after the parens for the
       -- LHS to be a function LHS. This corresponds to the Haskell Report's definition
       -- of funlhs.
      where
+       (_l, le') = transferCommentsOnlyA l le
        (o,c) = mkParensEpAnn (realSrcSpan $ locA l)
-   go (L loc (PatBuilderOpApp l (L loc' op) r anns)) es ops cps
+   go (L loc (PatBuilderOpApp (L ll l) (L loc' op) r anns)) es ops cps
       | not (isRdrDataCon op)         -- We have found the function!
-      = return (Just (L loc' op, Infix, (mk l:mk r:es), (anns ++ reverse ops ++ cps)))
+      = do { let (_l, ll') = transferCommentsOnlyA loc ll
+           ; return (Just (L loc' op, Infix, (mk (L ll' l):mk r:es), (anns ++ reverse ops ++ cps))) }
       | otherwise                     -- Infix data con; keep going
-      = do { mb_l <- go l es ops cps
+      = do { let (_l, ll') = transferCommentsOnlyA loc ll
+           ; mb_l <- go (L ll' l) es ops cps
            ; return (reassociate =<< mb_l) }
         where
           reassociate (op', Infix, j : L k_loc (ArgPatBuilderVisPat k) : es', anns')
@@ -1454,12 +1465,13 @@ isFunLhs e = go e [] [] []
               op_app = mk $ L loc (PatBuilderOpApp (L k_loc k)
                                     (L loc' op) r (reverse ops ++ cps))
           reassociate _other = Nothing
-   go (L _ (PatBuilderAppType pat tok ty_pat@(HsTP _ (L (EpAnn anc ann cs) _)))) es ops cps
-             = go pat (L (EpAnn anc' ann cs) (ArgPatBuilderArgPat invis_pat) : es) ops cps
+   go (L l (PatBuilderAppType (L lp pat) tok ty_pat@(HsTP _ (L (EpAnn anc ann cs) _)))) es ops cps
+             = go (L lp' pat) (L (EpAnn anc' ann cs) (ArgPatBuilderArgPat invis_pat) : es) ops cps
              where invis_pat = InvisPat tok ty_pat
                    anc' = case tok of
                      NoEpTok -> anc
                      EpTok l -> widenAnchor anc [AddEpAnn AnnAnyclass l]
+                   (_l, lp') = transferCommentsOnlyA l lp
    go _ _ _ _ = return Nothing
 
 data ArgPatBuilder p


=====================================
config.guess
=====================================
@@ -1,10 +1,10 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
-#   Copyright 1992-2022 Free Software Foundation, Inc.
+#   Copyright 1992-2024 Free Software Foundation, Inc.
 
 # shellcheck disable=SC2006,SC2268 # see below for rationale
 
-timestamp='2022-05-25'
+timestamp='2024-01-01'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -47,7 +47,7 @@ me=`echo "$0" | sed -e 's,.*/,,'`
 usage="\
 Usage: $0 [OPTION]
 
-Output the configuration name of the system \`$me' is run on.
+Output the configuration name of the system '$me' is run on.
 
 Options:
   -h, --help         print this help, then exit
@@ -60,13 +60,13 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright 1992-2022 Free Software Foundation, Inc.
+Copyright 1992-2024 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
 
 help="
-Try \`$me --help' for more information."
+Try '$me --help' for more information."
 
 # Parse command line
 while test $# -gt 0 ; do
@@ -102,8 +102,8 @@ GUESS=
 # temporary files to be created and, as you can see below, it is a
 # headache to deal with in a portable fashion.
 
-# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
-# use `HOST_CC' if defined, but it is deprecated.
+# Historically, 'CC_FOR_BUILD' used to be named 'HOST_CC'. We still
+# use 'HOST_CC' if defined, but it is deprecated.
 
 # Portable tmp directory creation inspired by the Autoconf team.
 
@@ -155,6 +155,9 @@ Linux|GNU|GNU/*)
 
 	set_cc_for_build
 	cat <<-EOF > "$dummy.c"
+	#if defined(__ANDROID__)
+	LIBC=android
+	#else
 	#include <features.h>
 	#if defined(__UCLIBC__)
 	LIBC=uclibc
@@ -162,6 +165,8 @@ Linux|GNU|GNU/*)
 	LIBC=dietlibc
 	#elif defined(__GLIBC__)
 	LIBC=gnu
+	#elif defined(__LLVM_LIBC__)
+	LIBC=llvm
 	#else
 	#include <stdarg.h>
 	/* First heuristic to detect musl libc.  */
@@ -169,6 +174,7 @@ Linux|GNU|GNU/*)
 	LIBC=musl
 	#endif
 	#endif
+	#endif
 	EOF
 	cc_set_libc=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`
 	eval "$cc_set_libc"
@@ -459,7 +465,7 @@ case $UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION in
 		UNAME_RELEASE=`uname -v`
 		;;
 	esac
-	# Japanese Language versions have a version number like `4.1.3-JL'.
+	# Japanese Language versions have a version number like '4.1.3-JL'.
 	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/'`
 	GUESS=sparc-sun-sunos$SUN_REL
 	;;
@@ -904,7 +910,7 @@ EOF
 	fi
 	;;
     *:FreeBSD:*:*)
-	UNAME_PROCESSOR=`/usr/bin/uname -p`
+	UNAME_PROCESSOR=`uname -p`
 	case $UNAME_PROCESSOR in
 	    amd64)
 		UNAME_PROCESSOR=x86_64 ;;
@@ -966,11 +972,37 @@ EOF
 	GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
 	GUESS=$UNAME_MACHINE-unknown-$GNU_SYS$GNU_REL-$LIBC
 	;;
+    x86_64:[Mm]anagarm:*:*|i?86:[Mm]anagarm:*:*)
+	GUESS="$UNAME_MACHINE-pc-managarm-mlibc"
+	;;
+    *:[Mm]anagarm:*:*)
+	GUESS="$UNAME_MACHINE-unknown-managarm-mlibc"
+	;;
     *:Minix:*:*)
 	GUESS=$UNAME_MACHINE-unknown-minix
 	;;
     aarch64:Linux:*:*)
-	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	set_cc_for_build
+	CPU=$UNAME_MACHINE
+	LIBCABI=$LIBC
+	if test "$CC_FOR_BUILD" != no_compiler_found; then
+	    ABI=64
+	    sed 's/^	    //' << EOF > "$dummy.c"
+	    #ifdef __ARM_EABI__
+	    #ifdef __ARM_PCS_VFP
+	    ABI=eabihf
+	    #else
+	    ABI=eabi
+	    #endif
+	    #endif
+EOF
+	    cc_set_abi=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^ABI' | sed 's, ,,g'`
+	    eval "$cc_set_abi"
+	    case $ABI in
+		eabi | eabihf) CPU=armv8l; LIBCABI=$LIBC$ABI ;;
+	    esac
+	fi
+	GUESS=$CPU-unknown-linux-$LIBCABI
 	;;
     aarch64_be:Linux:*:*)
 	UNAME_MACHINE=aarch64_be
@@ -1036,7 +1068,16 @@ EOF
     k1om:Linux:*:*)
 	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
 	;;
-    loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*)
+    kvx:Linux:*:*)
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
+    kvx:cos:*:*)
+	GUESS=$UNAME_MACHINE-unknown-cos
+	;;
+    kvx:mbr:*:*)
+	GUESS=$UNAME_MACHINE-unknown-mbr
+	;;
+    loongarch32:Linux:*:* | loongarch64:Linux:*:*)
 	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
 	;;
     m32r*:Linux:*:*)
@@ -1191,7 +1232,7 @@ EOF
 	GUESS=$UNAME_MACHINE-pc-sysv4.2uw$UNAME_VERSION
 	;;
     i*86:OS/2:*:*)
-	# If we were able to find `uname', then EMX Unix compatibility
+	# If we were able to find 'uname', then EMX Unix compatibility
 	# is probably installed.
 	GUESS=$UNAME_MACHINE-pc-os2-emx
 	;;
@@ -1332,7 +1373,7 @@ EOF
 		GUESS=ns32k-sni-sysv
 	fi
 	;;
-    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+    PENTIUM:*:4.0*:*)	# Unisys 'ClearPath HMP IX 4000' SVR4/MP effort
 			# says <Richard.M.Bartel at ccMail.Census.GOV>
 	GUESS=i586-unisys-sysv4
 	;;
@@ -1554,6 +1595,9 @@ EOF
     *:Unleashed:*:*)
 	GUESS=$UNAME_MACHINE-unknown-unleashed$UNAME_RELEASE
 	;;
+    *:Ironclad:*:*)
+	GUESS=$UNAME_MACHINE-unknown-ironclad
+	;;
 esac
 
 # Do we have a guess based on uname results?


=====================================
config.sub
=====================================
@@ -1,10 +1,10 @@
 #! /bin/sh
 # Configuration validation subroutine script.
-#   Copyright 1992-2022 Free Software Foundation, Inc.
+#   Copyright 1992-2024 Free Software Foundation, Inc.
 
 # shellcheck disable=SC2006,SC2268 # see below for rationale
 
-timestamp='2022-01-03'
+timestamp='2024-01-01'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -76,13 +76,13 @@ Report bugs and patches to <config-patches at gnu.org>."
 version="\
 GNU config.sub ($timestamp)
 
-Copyright 1992-2022 Free Software Foundation, Inc.
+Copyright 1992-2024 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
 
 help="
-Try \`$me --help' for more information."
+Try '$me --help' for more information."
 
 # Parse command line
 while test $# -gt 0 ; do
@@ -130,7 +130,7 @@ IFS=$saved_IFS
 # Separate into logical components for further validation
 case $1 in
 	*-*-*-*-*)
-		echo Invalid configuration \`"$1"\': more than four components >&2
+		echo "Invalid configuration '$1': more than four components" >&2
 		exit 1
 		;;
 	*-*-*-*)
@@ -145,7 +145,8 @@ case $1 in
 			nto-qnx* | linux-* | uclinux-uclibc* \
 			| uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \
 			| netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \
-			| storm-chaos* | os2-emx* | rtmk-nova*)
+			| storm-chaos* | os2-emx* | rtmk-nova* | managarm-* \
+			| windows-* )
 				basic_machine=$field1
 				basic_os=$maybe_os
 				;;
@@ -943,7 +944,7 @@ $basic_machine
 EOF
 		IFS=$saved_IFS
 		;;
-	# We use `pc' rather than `unknown'
+	# We use 'pc' rather than 'unknown'
 	# because (1) that's what they normally are, and
 	# (2) the word "unknown" tends to confuse beginning users.
 	i*86 | x86_64)
@@ -1075,7 +1076,7 @@ case $cpu-$vendor in
 	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
 		cpu=i586
 		;;
-	pentiumpro-* | p6-* | 6x86-* | athlon-* | athalon_*-*)
+	pentiumpro-* | p6-* | 6x86-* | athlon-* | athlon_*-*)
 		cpu=i686
 		;;
 	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
@@ -1180,7 +1181,7 @@ case $cpu-$vendor in
 		case $cpu in
 			1750a | 580 \
 			| a29k \
-			| aarch64 | aarch64_be \
+			| aarch64 | aarch64_be | aarch64c | arm64ec \
 			| abacus \
 			| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \
 			| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \
@@ -1190,7 +1191,7 @@ case $cpu-$vendor in
 			| arc | arceb | arc32 | arc64 \
 			| arm | arm[lb]e | arme[lb] | armv* \
 			| avr | avr32 \
-			| asmjs | javascript \
+			| asmjs \
 			| ba \
 			| be32 | be64 \
 			| bfin | bpf | bs2000 \
@@ -1199,50 +1200,29 @@ case $cpu-$vendor in
 			| d10v | d30v | dlx | dsp16xx \
 			| e2k | elxsi | epiphany \
 			| f30[01] | f700 | fido | fr30 | frv | ft32 | fx80 \
+			| javascript \
 			| h8300 | h8500 \
 			| hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
 			| hexagon \
 			| i370 | i*86 | i860 | i960 | ia16 | ia64 \
 			| ip2k | iq2000 \
 			| k1om \
+			| kvx \
 			| le32 | le64 \
 			| lm32 \
-			| loongarch32 | loongarch64 | loongarchx32 \
+			| loongarch32 | loongarch64 \
 			| m32c | m32r | m32rle \
 			| m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \
 			| m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \
 			| m88110 | m88k | maxq | mb | mcore | mep | metag \
 			| microblaze | microblazeel \
-			| mips | mipsbe | mipseb | mipsel | mipsle \
-			| mips16 \
-			| mips64 | mips64eb | mips64el \
-			| mips64octeon | mips64octeonel \
-			| mips64orion | mips64orionel \
-			| mips64r5900 | mips64r5900el \
-			| mips64vr | mips64vrel \
-			| mips64vr4100 | mips64vr4100el \
-			| mips64vr4300 | mips64vr4300el \
-			| mips64vr5000 | mips64vr5000el \
-			| mips64vr5900 | mips64vr5900el \
-			| mipsisa32 | mipsisa32el \
-			| mipsisa32r2 | mipsisa32r2el \
-			| mipsisa32r3 | mipsisa32r3el \
-			| mipsisa32r5 | mipsisa32r5el \
-			| mipsisa32r6 | mipsisa32r6el \
-			| mipsisa64 | mipsisa64el \
-			| mipsisa64r2 | mipsisa64r2el \
-			| mipsisa64r3 | mipsisa64r3el \
-			| mipsisa64r5 | mipsisa64r5el \
-			| mipsisa64r6 | mipsisa64r6el \
-			| mipsisa64sb1 | mipsisa64sb1el \
-			| mipsisa64sr71k | mipsisa64sr71kel \
-			| mipsr5900 | mipsr5900el \
-			| mipstx39 | mipstx39el \
+			| mips* \
 			| mmix \
 			| mn10200 | mn10300 \
 			| moxie \
 			| mt \
 			| msp430 \
+			| nanomips* \
 			| nds32 | nds32le | nds32be \
 			| nfp \
 			| nios | nios2 | nios2eb | nios2el \
@@ -1274,6 +1254,7 @@ case $cpu-$vendor in
 			| ubicom32 \
 			| v70 | v850 | v850e | v850e1 | v850es | v850e2 | v850e2v3 \
 			| vax \
+			| vc4 \
 			| visium \
 			| w65 \
 			| wasm32 | wasm64 \
@@ -1285,7 +1266,7 @@ case $cpu-$vendor in
 				;;
 
 			*)
-				echo Invalid configuration \`"$1"\': machine \`"$cpu-$vendor"\' not recognized 1>&2
+				echo "Invalid configuration '$1': machine '$cpu-$vendor' not recognized" 1>&2
 				exit 1
 				;;
 		esac
@@ -1306,11 +1287,12 @@ esac
 
 # Decode manufacturer-specific aliases for certain operating systems.
 
-if test x$basic_os != x
+if test x"$basic_os" != x
 then
 
 # First recognize some ad-hoc cases, or perhaps split kernel-os, or else just
 # set os.
+obj=
 case $basic_os in
 	gnu/linux*)
 		kernel=linux
@@ -1341,6 +1323,10 @@ EOF
 		kernel=linux
 		os=`echo "$basic_os" | sed -e 's|linux|gnu|'`
 		;;
+	managarm*)
+		kernel=managarm
+		os=`echo "$basic_os" | sed -e 's|managarm|mlibc|'`
+		;;
 	*)
 		kernel=
 		os=$basic_os
@@ -1506,10 +1492,16 @@ case $os in
 			os=eabi
 			;;
 		    *)
-			os=elf
+			os=
+			obj=elf
 			;;
 		esac
 		;;
+	aout* | coff* | elf* | pe*)
+		# These are machine code file formats, not OSes
+		obj=$os
+		os=
+		;;
 	*)
 		# No normalization, but not necessarily accepted, that comes below.
 		;;
@@ -1528,12 +1520,15 @@ else
 # system, and we'll never get to this point.
 
 kernel=
+obj=
 case $cpu-$vendor in
 	score-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	spu-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	*-acorn)
 		os=riscix1.2
@@ -1543,28 +1538,35 @@ case $cpu-$vendor in
 		os=gnu
 		;;
 	arm*-semi)
-		os=aout
+		os=
+		obj=aout
 		;;
 	c4x-* | tic4x-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	c8051-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	clipper-intergraph)
 		os=clix
 		;;
 	hexagon-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	tic54x-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	tic55x-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	tic6x-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	# This must come before the *-dec entry.
 	pdp10-*)
@@ -1586,19 +1588,24 @@ case $cpu-$vendor in
 		os=sunos3
 		;;
 	m68*-cisco)
-		os=aout
+		os=
+		obj=aout
 		;;
 	mep-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	mips*-cisco)
-		os=elf
+		os=
+		obj=elf
 		;;
-	mips*-*)
-		os=elf
+	mips*-*|nanomips*-*)
+		os=
+		obj=elf
 		;;
 	or32-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	*-tti)	# must be before sparc entry or we get the wrong os.
 		os=sysv3
@@ -1607,7 +1614,8 @@ case $cpu-$vendor in
 		os=sunos4.1.1
 		;;
 	pru-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	*-be)
 		os=beos
@@ -1688,10 +1696,12 @@ case $cpu-$vendor in
 		os=uxpv
 		;;
 	*-rom68k)
-		os=coff
+		os=
+		obj=coff
 		;;
 	*-*bug)
-		os=coff
+		os=
+		obj=coff
 		;;
 	*-apple)
 		os=macos
@@ -1709,14 +1719,11 @@ esac
 
 fi
 
-# Now, validate our (potentially fixed-up) OS.
-case $os in
-        # GHC specific: added for JS backend support
-        js | ghcjs)
-                ;;
+# Now, validate our (potentially fixed-up) individual pieces (OS, OBJ).
 
-	# Sometimes we do "kernel-abi", so those need to count as OSes.
-	musl* | newlib* | relibc* | uclibc*)
+case $os in
+	# Sometimes we do "kernel-libc", so those need to count as OSes.
+	llvm* | musl* | newlib* | relibc* | uclibc*)
 		;;
 	# Likewise for "kernel-abi"
 	eabi* | gnueabi*)
@@ -1724,6 +1731,9 @@ case $os in
 	# VxWorks passes extra cpu info in the 4th filed.
 	simlinux | simwindows | spe)
 		;;
+	# See `case $cpu-$os` validation below
+	ghcjs)
+		;;
 	# Now accept the basic system types.
 	# The portable systems comes first.
 	# Each alternative MUST end in a * to match a version number.
@@ -1732,7 +1742,7 @@ case $os in
 	     | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \
 	     | sym* |  plan9* | psp* | sim* | xray* | os68k* | v88r* \
 	     | hiux* | abug | nacl* | netware* | windows* \
-	     | os9* | macos* | osx* | ios* \
+	     | os9* | macos* | osx* | ios* | tvos* | watchos* \
 	     | mpw* | magic* | mmixware* | mon960* | lnews* \
 	     | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \
 	     | aos* | aros* | cloudabi* | sortix* | twizzler* \
@@ -1741,11 +1751,11 @@ case $os in
 	     | mirbsd* | netbsd* | dicos* | openedition* | ose* \
 	     | bitrig* | openbsd* | secbsd* | solidbsd* | libertybsd* | os108* \
 	     | ekkobsd* | freebsd* | riscix* | lynxos* | os400* \
-	     | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \
-	     | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \
+	     | bosx* | nextstep* | cxux* | oabi* \
+	     | ptx* | ecoff* | winnt* | domain* | vsta* \
 	     | udi* | lites* | ieee* | go32* | aux* | hcos* \
 	     | chorusrdb* | cegcc* | glidix* | serenity* \
-	     | cygwin* | msys* | pe* | moss* | proelf* | rtems* \
+	     | cygwin* | msys* | moss* | proelf* | rtems* \
 	     | midipix* | mingw32* | mingw64* | mint* \
 	     | uxpv* | beos* | mpeix* | udk* | moxiebox* \
 	     | interix* | uwin* | mks* | rhapsody* | darwin* \
@@ -1758,49 +1768,116 @@ case $os in
 	     | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \
 	     | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \
 	     | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx* | zephyr* \
-	     | fiwix* )
+	     | fiwix* | mlibc* | cos* | mbr* | ironclad* )
 		;;
 	# This one is extra strict with allowed versions
 	sco3.2v2 | sco3.2v[4-9]* | sco5v6*)
 		# Don't forget version if it is 3.2v4 or newer.
 		;;
+	# This refers to builds using the UEFI calling convention
+	# (which depends on the architecture) and PE file format.
+	# Note that this is both a different calling convention and
+	# different file format than that of GNU-EFI
+	# (x86_64-w64-mingw32).
+	uefi)
+		;;
 	none)
 		;;
+	kernel* | msvc* )
+		# Restricted further below
+		;;
+	'')
+		if test x"$obj" = x
+		then
+			echo "Invalid configuration '$1': Blank OS only allowed with explicit machine code file format" 1>&2
+		fi
+		;;
 	*)
-		echo Invalid configuration \`"$1"\': OS \`"$os"\' not recognized 1>&2
+		echo "Invalid configuration '$1': OS '$os' not recognized" 1>&2
+		exit 1
+		;;
+esac
+
+case $obj in
+	aout* | coff* | elf* | pe*)
+		;;
+	'')
+		# empty is fine
+		;;
+	*)
+		echo "Invalid configuration '$1': Machine code format '$obj' not recognized" 1>&2
+		exit 1
+		;;
+esac
+
+# Here we handle the constraint that a (synthetic) cpu and os are
+# valid only in combination with each other and nowhere else.
+case $cpu-$os in
+	# The "javascript-unknown-ghcjs" triple is used by GHC; we
+	# accept it here in order to tolerate that, but reject any
+	# variations.
+	javascript-ghcjs)
+		;;
+	javascript-* | *-ghcjs)
+		echo "Invalid configuration '$1': cpu '$cpu' is not valid with os '$os$obj'" 1>&2
 		exit 1
 		;;
 esac
 
 # As a final step for OS-related things, validate the OS-kernel combination
 # (given a valid OS), if there is a kernel.
-case $kernel-$os in
-	linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* \
-		   | linux-musl* | linux-relibc* | linux-uclibc* )
+case $kernel-$os-$obj in
+	linux-gnu*- | linux-android*- | linux-dietlibc*- | linux-llvm*- \
+		    | linux-mlibc*- | linux-musl*- | linux-newlib*- \
+		    | linux-relibc*- | linux-uclibc*- )
+		;;
+	uclinux-uclibc*- )
+		;;
+	managarm-mlibc*- | managarm-kernel*- )
 		;;
-	uclinux-uclibc* )
+	windows*-msvc*-)
 		;;
-	-dietlibc* | -newlib* | -musl* | -relibc* | -uclibc* )
+	-dietlibc*- | -llvm*- | -mlibc*- | -musl*- | -newlib*- | -relibc*- \
+		    | -uclibc*- )
 		# These are just libc implementations, not actual OSes, and thus
 		# require a kernel.
-		echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2
+		echo "Invalid configuration '$1': libc '$os' needs explicit kernel." 1>&2
 		exit 1
 		;;
-	kfreebsd*-gnu* | kopensolaris*-gnu*)
+	-kernel*- )
+		echo "Invalid configuration '$1': '$os' needs explicit kernel." 1>&2
+		exit 1
 		;;
-	vxworks-simlinux | vxworks-simwindows | vxworks-spe)
+	*-kernel*- )
+		echo "Invalid configuration '$1': '$kernel' does not support '$os'." 1>&2
+		exit 1
 		;;
-	nto-qnx*)
+	*-msvc*- )
+		echo "Invalid configuration '$1': '$os' needs 'windows'." 1>&2
+		exit 1
 		;;
-	os2-emx)
+	kfreebsd*-gnu*- | kopensolaris*-gnu*-)
+		;;
+	vxworks-simlinux- | vxworks-simwindows- | vxworks-spe-)
+		;;
+	nto-qnx*-)
+		;;
+	os2-emx-)
 		;;
-	*-eabi* | *-gnueabi*)
+	*-eabi*- | *-gnueabi*-)
 		;;
-	-*)
+	none--*)
+		# None (no kernel, i.e. freestanding / bare metal),
+		# can be paired with an machine code file format
+		;;
+	-*-)
 		# Blank kernel with real OS is always fine.
 		;;
-	*-*)
-		echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2
+	--*)
+		# Blank kernel and OS with real machine code file format is always fine.
+		;;
+	*-*-*)
+		echo "Invalid configuration '$1': Kernel '$kernel' not known to work with OS '$os'." 1>&2
 		exit 1
 		;;
 esac
@@ -1883,7 +1960,7 @@ case $vendor in
 		;;
 esac
 
-echo "$cpu-$vendor-${kernel:+$kernel-}$os"
+echo "$cpu-$vendor${kernel:+-$kernel}${os:+-$os}${obj:+-$obj}"
 exit
 
 # Local variables:


=====================================
docs/users_guide/bugs.rst
=====================================
@@ -445,15 +445,15 @@ In ``Prelude`` support
 
 ``Read``\ ing integers
     GHC's implementation of the ``Read`` class for integral types
-    accepts hexadecimal and octal literals (the code in the Haskell 98
+    accepts hexadecimal, octal and binary literals (the code in the Haskell 98
     report doesn't). So, for example, ::
 
         read "0xf00" :: Int
 
     works in GHC.
 
-    A possible reason for this is that ``readLitChar`` accepts hex and
-    octal escapes, so it seems inconsistent not to do so for integers
+    This is to maintain consistency with the language's syntax. Haskell98
+    accepts hexadecimal and octal formats, and GHC2021 accepts binary formats
     too.
 
 ``isAlpha``


=====================================
hadrian/src/Rules/Compile.hs
=====================================
@@ -218,6 +218,9 @@ compileHsObjectAndHi rs objpath = do
   ctxPath <- contextPath ctx
   (src, deps) <- lookupDependencies (ctxPath -/- ".dependencies") objpath
   need (src:deps)
+  -- The .conf file is needed when template-haskell is implicitly added as a dependency
+  -- when a module in the template-haskell package is compiled. (See #24737)
+  when  (isLibrary (C.package ctx)) (need . (:[]) =<< pkgConfFile ctx)
 
   -- The .dependencies file lists indicating inputs. ghc will
   -- generally read more *.hi and *.hi-boot files (direct inputs).


=====================================
libraries/base/changelog.md
=====================================
@@ -3,6 +3,7 @@
 ## 4.21.0.0 *TBA*
   * Add the `MonadFix` instance for `(,) a`, similar to the one for `Writer a` ([CLC proposal #238](https://github.com/haskell/core-libraries-committee/issues/238))
   * Improve `toInteger :: Word32 -> Integer` on 64-bit platforms ([CLC proposal #259](https://github.com/haskell/core-libraries-committee/issues/259))
+  * Make `read` accept binary integer notation ([CLC proposal #177](https://github.com/haskell/core-libraries-committee/issues/177))
 
 ## 4.20.0.0 *TBA*
   * Deprecate `GHC.Pack` ([#21461](https://gitlab.haskell.org/ghc/ghc/-/issues/21461))


=====================================
libraries/base/tests/char001.hs
=====================================
@@ -1,7 +1,8 @@
 -- !!! Testing the behaviour of Char.lexLitChar a little..

--- [March 2003]  We now allow \X and \O as escapes although the 
--- spec only permits \x and \o.  Seems more consistent. 
+-- [March 2003]  We now allow \X and \O as escapes although the
+-- spec only permits \x and \o.  Seems more consistent.
+-- [January 2024]  Binary character literals, something like '\b100' are not permitted.
 
 module Main where
 
@@ -33,9 +34,15 @@ octs = do
   lex' "'\\o14b'"
   lex' "'\\0a4bg'"
 
+-- Binaries are NOT supported. '\b' stands for backspace.
+bins = do
+  lex' "'\\b'"
+  lex' "'\\b00'"
+
 main = do
   hexes
   octs
+  bins
 
 
 


=====================================
libraries/base/tests/char001.stdout
=====================================
@@ -16,3 +16,5 @@ lex '\O000024' = [("'\\O000024'","")]
 lex '\024b' = []
 lex '\o14b' = []
 lex '\0a4bg' = []
+lex '\b' = [("'\\b'","")]
+lex '\b00' = []


=====================================
libraries/base/tests/lex001.hs
=====================================
@@ -27,7 +27,23 @@ testStrings
         "035e-3x",
         "35e+3y",
         "83.3e-22",
-        "083.3e-22"
+        "083.3e-22",
+
+        "0b001",
+        "0b100",
+        "0b110",
+        "0B001",
+        "0B100",
+        "0B110",
+
+        "78_91",
+        "678_346",
+        "0x23d_fa4",
+        "0X23d_fa4",
+        "0o01_253",
+        "0O304_367",
+        "0b0101_0110",
+        "0B11_010_0110"
    ]
 
 main = mapM test testStrings


=====================================
libraries/base/tests/lex001.stdout
=====================================
@@ -82,3 +82,58 @@
 [("083.3e-22","")]
 [(Number (MkDecimal [0,8,3] (Just [3]) (Just (-22))),"")]
 
+"0b001"
+[("0b001","")]
+[(Number (MkNumber 2 [0,0,1]),"")]
+
+"0b100"
+[("0b100","")]
+[(Number (MkNumber 2 [1,0,0]),"")]
+
+"0b110"
+[("0b110","")]
+[(Number (MkNumber 2 [1,1,0]),"")]
+
+"0B001"
+[("0B001","")]
+[(Number (MkNumber 2 [0,0,1]),"")]
+
+"0B100"
+[("0B100","")]
+[(Number (MkNumber 2 [1,0,0]),"")]
+
+"0B110"
+[("0B110","")]
+[(Number (MkNumber 2 [1,1,0]),"")]
+
+"78_91"
+[("78","_91")]
+[(Number (MkDecimal [7,8] Nothing Nothing),"_91")]
+
+"678_346"
+[("678","_346")]
+[(Number (MkDecimal [6,7,8] Nothing Nothing),"_346")]
+
+"0x23d_fa4"
+[("0x23d","_fa4")]
+[(Number (MkNumber 16 [2,3,13]),"_fa4")]
+
+"0X23d_fa4"
+[("0X23d","_fa4")]
+[(Number (MkNumber 16 [2,3,13]),"_fa4")]
+
+"0o01_253"
+[("0o01","_253")]
+[(Number (MkNumber 8 [0,1]),"_253")]
+
+"0O304_367"
+[("0O304","_367")]
+[(Number (MkNumber 8 [3,0,4]),"_367")]
+
+"0b0101_0110"
+[("0b0101","_0110")]
+[(Number (MkNumber 2 [0,1,0,1]),"_0110")]
+
+"0B11_010_0110"
+[("0B11","_010_0110")]
+[(Number (MkNumber 2 [1,1]),"_010_0110")]


=====================================
libraries/ghc-internal/src/GHC/Internal/Text/Read/Lex.hs
=====================================
@@ -300,6 +300,17 @@ lexCharE =
        n    <- lexInteger base
        guard (n <= toInteger (ord maxBound))
        return (chr (fromInteger n))
+    where
+      -- Slightly different variant of lexBaseChar that denies binary format.
+      -- Binary formats are not allowed for character/string literal.
+      lexBaseChar = do
+        c <- get
+        case c of
+          'o' -> return 8
+          'O' -> return 8
+          'x' -> return 16
+          'X' -> return 16
+          _   -> pfail
 
   lexCntrlChar =
     do _ <- char '^'
@@ -415,27 +426,28 @@ type Digits = [Int]
 
 lexNumber :: ReadP Lexeme
 lexNumber
-  = lexHexOct  <++      -- First try for hex or octal 0x, 0o etc
+  = lexHexOctBin  <++   -- First try for hex, octal or binary 0x, 0o, 0b etc
                         -- If that fails, try for a decimal number
     lexDecNumber        -- Start with ordinary digits
 
-lexHexOct :: ReadP Lexeme
-lexHexOct
+lexHexOctBin :: ReadP Lexeme
+lexHexOctBin
   = do  _ <- char '0'
         base <- lexBaseChar
         digits <- lexDigits base
         return (Number (MkNumber base digits))
-
-lexBaseChar :: ReadP Int
--- Lex a single character indicating the base; fail if not there
-lexBaseChar = do
-  c <- get
-  case c of
-    'o' -> return 8
-    'O' -> return 8
-    'x' -> return 16
-    'X' -> return 16
-    _   -> pfail
+  where
+    -- Lex a single character indicating the base; fail if not there
+    lexBaseChar = do
+      c <- get
+      case c of
+        'b' -> return 2
+        'B' -> return 2
+        'o' -> return 8
+        'O' -> return 8
+        'x' -> return 16
+        'X' -> return 16
+        _   -> pfail
 
 lexDecNumber :: ReadP Lexeme
 lexDecNumber =


=====================================
rts/Exception.cmm
=====================================
@@ -495,7 +495,7 @@ retry_pop_stack:
       W_ trec, outer;
       W_ r;
       trec = StgTSO_trec(CurrentTSO);
-      (r) = ccall stmValidateNestOfTransactions(MyCapability() "ptr", trec "ptr");
+      (r) = ccall stmValidateNestOfTransactions(MyCapability() "ptr", trec "ptr", 0);
       outer  = StgTRecHeader_enclosing_trec(trec);
       ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr");
       ccall stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr");


=====================================
rts/STM.c
=====================================
@@ -31,10 +31,8 @@
  * interface.  In the Haskell RTS this means it is suitable only for
  * non-THREADED_RTS builds.
  *
- * STM_CG_LOCK uses coarse-grained locking -- a single 'stm lock' is acquired
- * during an invocation on the STM interface.  Note that this does not mean that
- * transactions are simply serialized -- the lock is only held *within* the
- * implementation of stmCommitTransaction, stmWait etc.
+ * STM_CG_LOCK was a historic locking mode using coarse-grained locking
+ * It has been removed, look at the git history if you are interest in it.
  *
  * STM_FG_LOCKS uses fine-grained locking -- locking is done on a per-TVar basis
  * and, when committing a transaction, no locks are acquired for TVars that have
@@ -42,19 +40,14 @@
  *
  * Concurrency control is implemented in the functions:
  *
- *    lock_stm
- *    unlock_stm
  *    lock_tvar / cond_lock_tvar
  *    unlock_tvar
  *
- * The choice between STM_UNIPROC / STM_CG_LOCK / STM_FG_LOCKS affects the
+ * The choice between STM_UNIPROC / STM_FG_LOCKS affects the
  * implementation of these functions.
  *
- * lock_stm & unlock_stm are straightforward : they acquire a simple spin-lock
- * using STM_CG_LOCK, and otherwise they are no-ops.
- *
  * lock_tvar / cond_lock_tvar and unlock_tvar are more complex because they have
- * other effects (present in STM_UNIPROC and STM_CG_LOCK builds) as well as the
+ * other effects (present in STM_UNIPROC builds) as well as the
  * actual business of manipulating a lock (present only in STM_FG_LOCKS builds).
  * This is because locking a TVar is implemented by writing the lock holder's
  * TRec into the TVar's current_value field:
@@ -167,7 +160,6 @@ static int shake(void) {
 /*......................................................................*/
 
 #define IF_STM_UNIPROC(__X)  do { } while (0)
-#define IF_STM_CG_LOCK(__X)  do { } while (0)
 #define IF_STM_FG_LOCKS(__X) do { } while (0)
 
 #if defined(STM_UNIPROC)
@@ -175,14 +167,6 @@ static int shake(void) {
 #define IF_STM_UNIPROC(__X)  do { __X } while (0)
 static const StgBool config_use_read_phase = false;
 
-static void lock_stm(StgTRecHeader *trec STG_UNUSED) {
-  TRACE("%p : lock_stm()", trec);
-}
-
-static void unlock_stm(StgTRecHeader *trec STG_UNUSED) {
-  TRACE("%p : unlock_stm()", trec);
-}
-
 static StgClosure *lock_tvar(Capability *cap STG_UNUSED,
                              StgTRecHeader *trec STG_UNUSED,
                              StgTVar *s STG_UNUSED) {
@@ -210,64 +194,9 @@ static StgBool cond_lock_tvar(Capability *cap STG_UNUSED,
                               StgTVar *s STG_UNUSED,
                               StgClosure *expected) {
   StgClosure *result;
-  TRACE("%p : cond_lock_tvar(%p, %p)", trec, s, expected);
-  result = ACQUIRE_LOAD(&s->current_value);
-  TRACE("%p : %s", trec, (result == expected) ? "success" : "failure");
-  return (result == expected);
-}
-#endif
-
-#if defined(STM_CG_LOCK) /*........................................*/
-
-#undef IF_STM_CG_LOCK
-#define IF_STM_CG_LOCK(__X)  do { __X } while (0)
-static const StgBool config_use_read_phase = false;
-static volatile StgTRecHeader *smp_locked = NULL;
-
-static void lock_stm(StgTRecHeader *trec) {
-  while (cas(&smp_locked, NULL, trec) != NULL) { }
-  TRACE("%p : lock_stm()", trec);
-}
-
-static void unlock_stm(StgTRecHeader *trec STG_UNUSED) {
-  TRACE("%p : unlock_stm()", trec);
-  ASSERT(smp_locked == trec);
-  RELEASE_STORE(&smp_locked, 0);
-}
-
-static StgClosure *lock_tvar(Capability *cap STG_UNUSED,
-                             StgTRecHeader *trec STG_UNUSED,
-                             StgTVar *s STG_UNUSED) {
-  StgClosure *result;
-  TRACE("%p : lock_tvar(%p)", trec, s);
-  ASSERT(smp_locked == trec);
+  // TRACE("%p : cond_lock_tvar(%p, %p)", trec, s, expected);
   result = ACQUIRE_LOAD(&s->current_value);
-  return result;
-}
-
-static void *unlock_tvar(Capability *cap,
-                         StgTRecHeader *trec STG_UNUSED,
-                         StgTVar *s,
-                         StgClosure *c,
-                         StgBool force_update) {
-  TRACE("%p : unlock_tvar(%p, %p)", trec, s, c);
-  ASSERT(smp_locked == trec);
-  if (force_update) {
-    StgClosure *old_value = ACQUIRE_LOAD(&s->current_value);
-    RELEASE_STORE(&s->current_value, c);
-    dirty_TVAR(cap, s, old_value);
-  }
-}
-
-static StgBool cond_lock_tvar(Capability *cap STG_UNUSED,
-                              StgTRecHeader *trec STG_UNUSED,
-                               StgTVar *s STG_UNUSED,
-                               StgClosure *expected) {
-  StgClosure *result;
-  TRACE("%p : cond_lock_tvar(%p, %p)", trec, s, expected);
-  ASSERT(smp_locked == trec);
-  result = ACQUIRE_LOAD(&s->current_value);
-  TRACE("%p : %d", result ? "success" : "failure");
+  // TRACE("%p : %s", trec, (result == expected) ? "success" : "failure");
   return (result == expected);
 }
 #endif
@@ -278,19 +207,11 @@ static StgBool cond_lock_tvar(Capability *cap STG_UNUSED,
 #define IF_STM_FG_LOCKS(__X) do { __X } while (0)
 static const StgBool config_use_read_phase = true;
 
-static void lock_stm(StgTRecHeader *trec STG_UNUSED) {
-  TRACE("%p : lock_stm()", trec);
-}
-
-static void unlock_stm(StgTRecHeader *trec STG_UNUSED) {
-  TRACE("%p : unlock_stm()", trec);
-}
-
 static StgClosure *lock_tvar(Capability *cap,
                              StgTRecHeader *trec,
                              StgTVar *s STG_UNUSED) {
   StgClosure *result;
-  TRACE("%p : lock_tvar(%p)", trec, s);
+  // TRACE("%p : lock_tvar(%p)", trec, s);
   do {
     const StgInfoTable *info;
     do {
@@ -313,7 +234,7 @@ static void unlock_tvar(Capability *cap,
                         StgTVar *s,
                         StgClosure *c,
                         StgBool force_update STG_UNUSED) {
-  TRACE("%p : unlock_tvar(%p, %p)", trec, s, c);
+  // TRACE("%p : unlock_tvar(%p, %p)", trec, s, c);
   ASSERT(ACQUIRE_LOAD(&s->current_value) == (StgClosure *)trec);
   RELEASE_STORE(&s->current_value, c);
   dirty_TVAR(cap, s, (StgClosure *) trec);
@@ -325,14 +246,14 @@ static StgBool cond_lock_tvar(Capability *cap,
                               StgClosure *expected) {
   StgClosure *result;
   StgWord w;
-  TRACE("%p : cond_lock_tvar(%p, %p)", trec, s, expected);
+  // TRACE("%p : cond_lock_tvar(%p, %p)", trec, s, expected);
   w = cas((void *)&(s -> current_value), (StgWord)expected, (StgWord)trec);
   result = (StgClosure *)w;
   IF_NONMOVING_WRITE_BARRIER_ENABLED {
       if (result)
           updateRemembSetPushClosure(cap, expected);
   }
-  TRACE("%p : %s", trec, result ? "success" : "failure");
+  // TRACE("%p : %s", trec, result ? "success" : "failure");
   return (result == expected);
 }
 #endif
@@ -438,6 +359,8 @@ static StgTRecHeader *new_stg_trec_header(Capability *cap,
 // Allocation / deallocation functions that retain per-capability lists
 // of closures that can be re-used
 
+//TODO: I think some of these lack write barriers required by the non-moving gc.
+
 static StgTVarWatchQueue *alloc_stg_tvar_watch_queue(Capability *cap,
                                                      StgClosure *closure) {
   StgTVarWatchQueue *result = NULL;
@@ -760,6 +683,44 @@ static void revert_ownership(Capability *cap STG_UNUSED,
 
 /*......................................................................*/
 
+// validate_optimistic()
+StgBool validate_trec_optimistic (Capability *cap, StgTRecHeader *trec);
+
+StgBool validate_trec_optimistic (Capability *cap, StgTRecHeader *trec) {
+  StgBool result;
+  TRACE("cap %d, trec %p : validate_trec_optimistic",
+         cap->no, trec);
+
+  if (shake()) {
+    TRACE("%p : shake, pretending trec is invalid when it may not be", trec);
+    return false;
+  }
+
+  ASSERT((trec -> state == TREC_ACTIVE) ||
+         (trec -> state == TREC_WAITING) ||
+         (trec -> state == TREC_CONDEMNED));
+  result = !((trec -> state) == TREC_CONDEMNED);
+  if (result) {
+    FOR_EACH_ENTRY(trec, e, {
+      StgTVar *s;
+      s = e -> tvar;
+      StgClosure *current = RELAXED_LOAD(&s->current_value);
+      if(current != e->expected_value &&
+          //If the trec is locked we optimistically assume our trec will still be valid after it's unlocked.
+         (GET_INFO(UNTAG_CLOSURE(current)) != &stg_TREC_HEADER_info))
+      {   TRACE("%p : failed optimistic validate %p", trec, s);
+          result = false;
+          BREAK_FOR_EACH;
+      }
+    });
+  }
+
+
+  TRACE("%p : validate_trec_optimistic, result: %d", trec, result);
+  return result;
+}
+
+
 // validate_and_acquire_ownership : this performs the twin functions
 // of checking that the TVars referred to by entries in trec hold the
 // expected values and:
@@ -778,6 +739,8 @@ static StgBool validate_and_acquire_ownership (Capability *cap,
                                                int acquire_all,
                                                int retain_ownership) {
   StgBool result;
+  TRACE("cap %d, trec %p : validate_and_acquire_ownership, all: %d, retrain: %d",
+         cap->no, trec, acquire_all, retain_ownership);
 
   if (shake()) {
     TRACE("%p : shake, pretending trec is invalid when it may not be", trec);
@@ -828,6 +791,7 @@ static StgBool validate_and_acquire_ownership (Capability *cap,
       revert_ownership(cap, trec, acquire_all);
   }
 
+  TRACE("%p : validate_and_acquire_ownership, result: %d", trec, result);
   return result;
 }
 
@@ -878,12 +842,10 @@ static StgBool check_read_only(StgTRecHeader *trec STG_UNUSED) {
 /************************************************************************/
 
 void stmPreGCHook (Capability *cap) {
-  lock_stm(NO_TREC);
   TRACE("stmPreGCHook");
   cap->free_tvar_watch_queues = END_STM_WATCH_QUEUE;
   cap->free_trec_chunks = END_STM_CHUNK_LIST;
   cap->free_trec_headers = NO_TREC;
-  unlock_stm(NO_TREC);
 }
 
 /************************************************************************/
@@ -959,8 +921,6 @@ void stmAbortTransaction(Capability *cap,
          (trec -> state == TREC_WAITING) ||
          (trec -> state == TREC_CONDEMNED));
 
-  lock_stm(trec);
-
   et = trec -> enclosing_trec;
   if (et == NO_TREC) {
     // We're a top-level transaction: remove any watch queue entries that
@@ -984,8 +944,6 @@ void stmAbortTransaction(Capability *cap,
   }
 
   trec -> state = TREC_ABORTED;
-  unlock_stm(trec);
-
   TRACE("%p : stmAbortTransaction done", trec);
 }
 
@@ -1013,35 +971,210 @@ void stmCondemnTransaction(Capability *cap,
          (trec -> state == TREC_WAITING) ||
          (trec -> state == TREC_CONDEMNED));
 
-  lock_stm(trec);
   if (trec -> state == TREC_WAITING) {
     ASSERT(trec -> enclosing_trec == NO_TREC);
     TRACE("%p : stmCondemnTransaction condemning waiting transaction", trec);
     remove_watch_queue_entries_for_trec(cap, trec);
   }
   trec -> state = TREC_CONDEMNED;
-  unlock_stm(trec);
 
   TRACE("%p : stmCondemnTransaction done", trec);
 }
 
-/*......................................................................*/
-
-StgBool stmValidateNestOfTransactions(Capability *cap, StgTRecHeader *trec) {
+/*......................................................................
+
+Note [STM Validation]
+~~~~~~~~~~~~~~~~~~~~~
+To "validate" a transaction means to check that the transaction's log (Trec) is
+consistent with the current state of memory; specifically, that any variable
+observed (through reads AND writes) by the transaction has the same value in
+memory as it did when the transaction read it.
+
+In some situations we can give ourself some leeway by allowing:
+* False positives - The validation check claims the memory state is consistent when it isn't.
+* False negatives - The validation check claims memory state is inconsistent when it
+  is in fact consistent.
+
+We validate a STM transaction for two purposes:
+
+(A) Post-run validation runs /after/ the transaction has completed, either during
+    commit or after an exception has occurred.
+
+    This is done by validate_and_acquire_ownership.  The commit process
+    /absolutely must/ be transactional: that is, it must read a consistent
+    snapshot of memory, compare with the log, and then atomically commit all the
+    writes in the log.  We do this by locking the TVars.
+
+    For post-run validation we must *never* allow false-positives for correctness
+    reasons. But we allow for false-negatives, trading occasional spurious retries
+    for performance in the average case.
+
+    The implementation of performing this update atomically is mostly based on
+    the 2002 paper "A Practical Multi-Word Compare-and-Swap Operation"
+
+(B) In-flight validation runs /during/ the execution of the transaction. Suppose a transaction
+    is long-running, and memory has /already/ changed so that it is inconsistent with the
+    transaction's log. It is just conceivable that memory might change back again to be
+    consistent, but very unlikely. It is better to terminate and retry the transaction,
+    rather than let it run potentially forever as a zombie, and only retry when it attempts to commit.
+
+    This is done by validate_trec_optimistic. Since in-flight validation at most results in early
+    termination of a transaction we may accept both
+      * a "false negative" (causing the transaction to retry unnecessarily), and
+      * a "false positive" (allowing the transaction to continue as a zombie).
+
+    We want to run in-flight validation somewhat frequently to detect invalid
+    transactions early. We perform in-flight validation whenever a thread returns to
+    the scheduler, a convenient and regular opportunity.
+
+Note that in-flight validation is not merely a optimization. Consider transactions
+that are in an infinite loop as a result of seeing an inconsistent view of
+memory, e.g.
+
+      atomically $ do
+          [a,b] <- mapM readTVar [ta,tb]
+          -- a is never equal to b given a consistent view of memory.
+          when (a == b) loop
+
+As noted above, post-run validation and commit /must/ be transactional, involving expensive locking.
+But in-flight validation can accept false positives and false negatives. While we could lock TVars
+during in-flight validation to rule out false positives, we don't have to:
+it is much cheaper and very nearly as good simply to read them without locking allowing for
+false-postive results.
+
+Moreover, locking during in-flight validation can cause lack of progress, or livelock (#24446)
+through false-negative results. Suppose we have two long-running transactions, each doing successive
+in-flight validation using locking. If the validation discovers a locked TVar it aborts and retries.
+Now they can each abort the other, forever.
+This *can* also happen with post-run validation. But since post-run validation occurs less
+frequently it's incredibly unlikely to happen repeatedly compared to in-flight validation.
+
+Hence: locking during in-flight validation is
+ * Expensive
+ * Can lead to livelock-like conditions.
+
+Conclusion:
+  * don't use locking during in-flight validation.
+  * Use locking during post-run validation, where the risk of livelock is comparatively small
+    compared to the cost of ruling out live-lock completely.
+
+See below for other design alternatives.
+
+Design considerations about locking during in flight validation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+All else being equal we would always want to get a precise result for validation.
+And indeed for the non-threaded runtime this is reasonably easy to achieve
+(see STM paper "Composable Memory Transactions").
+However for SMP things are more difficult, and ruling out false negatives/positives
+would come at significant cost in the average case.
+
+The easiest way to avoid false positives is to lock all relevant tvars during
+validation. And indeed that is what we use for post-run validation.
+The trade off being that it can lead to false negatives during validation when multiple
+threads perform validation in parallel. As long as the false-negative rate is
+is reasonably low this is not problematic.
+
+However in-flight validation can happen multiple times per transaction.
+So even a fairly low rate of spurious validation failures will result in a large
+performance hit. In the worst case preventing progress alltogether (See #24446).
+
+We don't want to reduce validation frequency too much to detect invalid
+transactions early. So we simply stick with the frequency "on return to scheduler"
+that's described in the stm paper.
+
+However we can improve in-flight validation perf by allowing false positives.
+This removes the need for tacking locks which means:
+
+Benefits
+* No lock contention between post-run and in-flight validations operating on the
+  same tvars. This reduces the false negative rate significantly for both.
+* Concurrent in-flight validations won't cause each other to fail spuriously
+  through lock contention.
+* No cas operations for in-flight validation reduces it's overhead significantly.
+
+Drawbacks:
+* We will sometimes fail to recognize invalid trecs as such by assuming locked
+  tvars contain valid values.
+
+Why can we simply not lock tvars for in-flight validations? Unlike with post-run
+validation if we miss part of an update which would invalidate the trec it will
+be either seen by a later validation (at the latest in the post-run validation
+which still locks). However there is one exception: Looping transactions.
+
+If a transaction loops it will *only* be validated optimistically.
+The only way for in-flight validation to constantly
+result in false-positives is for the conflicting tvar(s) to get constantly locked
+for updates by post-run validations. Which seems impossibly unlikely over a long
+period of time. So we accept this behaviour.
+
+Design alternatives to improve in-flight false-postive rate:
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+All of these primarily revolve around ways to ensure that we can recognize invalid
+looping transactions. However without proof this is a real problem implementing
+those seems not worthwhile.
+
+A1:
+Take locks for in-flight validation but don't fail if in-flight validation
+encounters already locked tvars.
+This would solve lock contention/false positives caused by concurrent in-flight validations.
+
+But it would still result in in-flight validation causing some false-negatives
+during post-run validation by holding locks post-run validation is trying to take.
+
+It also doesn't *guaranteed* that we recognize looping transaction as invalid.
+As the relevant tvars might be locked by other validations when we try to lock
+them. So while this would improve over using regular lock tacking for in-flight
+transactions it seems straight up worse than not taking locks to me in most
+situations.
+
+A2:
+Perform occasional locking in-flight validation for long running transactions.
+This would solve the theoretical looping transaction recognition issue at the
+cost of some performance and complexity. This could done by adding a counter to
+the trec, counting the number of validations it has endured.
+
+A2.1:
+Like A2, but instead of counting the number of validations count the number of
+locked tvars we encountered, as these are the only sources of false-positives.
+This would give a hard upper bound on the number of false-positives while keeping
+the impact on post-run validations lower.
+
+If the looping transaction issue turns out to be a real problem this might be worth
+doing.
+
+A3:
+When locking a tvar for a potential update keep the old value accessible. Then
+in-flight validations should never return false-positives. However compared to A2
+this seems like it would come with a non-trivial overhead relative to the likelyhood
+of these false-positives causing actual issues.
+
+
+*/
+
+// Check if a transaction is possibly invalid by this point.
+// Pessimistically - Currently we use this if an exception occured inside a transaction.
+//    To decide weither or not to abort by checking if the transaction was valid.
+// Optimistically - Currently we use this to eagerly abort invalid transactions from the scheduler.
+// See Note [STM Validation]
+StgBool stmValidateNestOfTransactions(Capability *cap, StgTRecHeader *trec, StgBool optimistically) {
   StgTRecHeader *t;
 
-  TRACE("%p : stmValidateNestOfTransactions", trec);
+  TRACE("%p : stmValidateNestOfTransactions, %b", trec, optimistically);
   ASSERT(trec != NO_TREC);
   ASSERT((trec -> state == TREC_ACTIVE) ||
          (trec -> state == TREC_WAITING) ||
          (trec -> state == TREC_CONDEMNED));
 
-  lock_stm(trec);
-
   t = trec;
   StgBool result = true;
   while (t != NO_TREC) {
-    result &= validate_and_acquire_ownership(cap, t, true, false);
+    if(optimistically) {
+      result &= validate_trec_optimistic(cap, t);
+
+    } else {
+      // TODO: I don't think there is a need to lock all tvars here.
+      result &= validate_and_acquire_ownership(cap, t, true, false);
+    }
     t = t -> enclosing_trec;
   }
 
@@ -1049,12 +1182,9 @@ StgBool stmValidateNestOfTransactions(Capability *cap, StgTRecHeader *trec) {
     trec -> state = TREC_CONDEMNED;
   }
 
-  unlock_stm(trec);
-
   TRACE("%p : stmValidateNestOfTransactions()=%d", trec, result);
   return result;
 }
-
 /*......................................................................*/
 
 static TRecEntry *get_entry_for(StgTRecHeader *trec, StgTVar *tvar, StgTRecHeader **in) {
@@ -1087,8 +1217,6 @@ StgBool stmCommitTransaction(Capability *cap, StgTRecHeader *trec) {
   TRACE("%p : stmCommitTransaction()", trec);
   ASSERT(trec != NO_TREC);
 
-  lock_stm(trec);
-
   ASSERT(trec -> enclosing_trec == NO_TREC);
   ASSERT((trec -> state == TREC_ACTIVE) ||
          (trec -> state == TREC_CONDEMNED));
@@ -1112,6 +1240,7 @@ StgBool stmCommitTransaction(Capability *cap, StgTRecHeader *trec) {
       max_concurrent_commits = ((max_commits_at_end - max_commits_at_start) +
                                 (getNumCapabilities() * TOKEN_BATCH_SIZE));
       if (((max_concurrent_commits >> 32) > 0) || shake()) {
+        TRACE("STM - Max commit number exceeded");
         result = false;
       }
     }
@@ -1145,8 +1274,6 @@ StgBool stmCommitTransaction(Capability *cap, StgTRecHeader *trec) {
     }
   }
 
-  unlock_stm(trec);
-
   free_stg_trec_header(cap, trec);
 
   TRACE("%p : stmCommitTransaction()=%d", trec, result);
@@ -1162,8 +1289,6 @@ StgBool stmCommitNestedTransaction(Capability *cap, StgTRecHeader *trec) {
   TRACE("%p : stmCommitNestedTransaction() into %p", trec, trec -> enclosing_trec);
   ASSERT((trec -> state == TREC_ACTIVE) || (trec -> state == TREC_CONDEMNED));
 
-  lock_stm(trec);
-
   et = trec -> enclosing_trec;
   bool result = validate_and_acquire_ownership(cap, trec, (!config_use_read_phase), true);
   if (result) {
@@ -1196,8 +1321,6 @@ StgBool stmCommitNestedTransaction(Capability *cap, StgTRecHeader *trec) {
     }
   }
 
-  unlock_stm(trec);
-
   free_stg_trec_header(cap, trec);
 
   TRACE("%p : stmCommitNestedTransaction()=%d", trec, result);
@@ -1214,7 +1337,6 @@ StgBool stmWait(Capability *cap, StgTSO *tso, StgTRecHeader *trec) {
   ASSERT((trec -> state == TREC_ACTIVE) ||
          (trec -> state == TREC_CONDEMNED));
 
-  lock_stm(trec);
   bool result = validate_and_acquire_ownership(cap, trec, true, true);
   if (result) {
     // The transaction is valid so far so we can actually start waiting.
@@ -1237,7 +1359,6 @@ StgBool stmWait(Capability *cap, StgTSO *tso, StgTRecHeader *trec) {
     // TRec.
 
   } else {
-    unlock_stm(trec);
     free_stg_trec_header(cap, trec);
   }
 
@@ -1249,7 +1370,6 @@ StgBool stmWait(Capability *cap, StgTSO *tso, StgTRecHeader *trec) {
 void
 stmWaitUnlock(Capability *cap, StgTRecHeader *trec) {
     revert_ownership(cap, trec, true);
-    unlock_stm(trec);
 }
 
 /*......................................................................*/
@@ -1263,7 +1383,6 @@ StgBool stmReWait(Capability *cap, StgTSO *tso) {
   ASSERT((trec -> state == TREC_WAITING) ||
          (trec -> state == TREC_CONDEMNED));
 
-  lock_stm(trec);
   bool result = validate_and_acquire_ownership(cap, trec, true, true);
   TRACE("%p : validation %s", trec, result ? "succeeded" : "failed");
   if (result) {
@@ -1280,7 +1399,6 @@ StgBool stmReWait(Capability *cap, StgTSO *tso) {
     }
     free_stg_trec_header(cap, trec);
   }
-  unlock_stm(trec);
 
   TRACE("%p : stmReWait()=%d", trec, result);
   return result;


=====================================
rts/STM.h
=====================================
@@ -6,24 +6,21 @@
  *
  *----------------------------------------------------------------------
 
-  STM.h defines the C-level interface to the STM.  
+  STM.h defines the C-level interface to the STM.
 
   The design follows that of the PPoPP 2005 paper "Composable memory
   transactions" extended to include fine-grained locking of TVars.
 
   Three different implementations can be built.  In overview:
-  
+
   STM_UNIPROC  -- no locking at all: not safe for concurrent invocations
- 
-  STM_CG_LOCK  -- coarse-grained locking : a single mutex protects all
-                  TVars
- 
+
   STM_FG_LOCKS -- per-TVar exclusion : each TVar can be owned by at
                   most one TRec at any time.  This allows dynamically
                   non-conflicting transactions to commit in parallel.
                   The implementation treats reads optimistically --
-                  extra versioning information is retained in the 
-                  saw_update_by field of the TVars so that they do not 
+                  extra versioning information is retained in the
+                  num_updates field of the TVars so that they do not
                   need to be locked for reading.
 
   STM.C contains more details about the locking schemes used.
@@ -72,7 +69,7 @@ void stmAbortTransaction(Capability *cap, StgTRecHeader *trec);
 void stmFreeAbortedTRec(Capability *cap, StgTRecHeader *trec);
 
 /*
- * Ensure that a subsequent commit / validation will fail.  We use this 
+ * Ensure that a subsequent commit / validation will fail.  We use this
  * in our current handling of transactions that may have become invalid
  * and started looping.  We strip their stack back to the ATOMICALLY_FRAME,
  * and, when the thread is next scheduled, discover it to be invalid and
@@ -87,16 +84,23 @@ void stmCondemnTransaction(Capability *cap, StgTRecHeader *trec);
    Validation
    ----------
 
-  Test whether the specified transaction record, and all those within which
-  it is nested, are still valid.
+   Test whether the specified transaction record, and all those within which
+   it is nested, are still valid.
+
+   stmValidateNestOfTransactions - optimistically
+      - Can return false positives when tvars are locked.
+      - Faster
+      - Does not take any locks
+
+   stmValidateNestOfTransactions - pessimistic
+      - Can return false negatives.
+      - Slower
+      - Takes locks, negatively affecting performance of other threads.
+      - Most importantly - no false positives!
 
-  Note: the caller can assume that once stmValidateTransaction has
-  returned false for a given trec then that transaction will never
-  again be valid -- we rely on this in Schedule.c when kicking invalid
-  threads at GC (in case they are stuck looping)
 */
 
-StgBool stmValidateNestOfTransactions(Capability *cap, StgTRecHeader *trec);
+StgBool stmValidateNestOfTransactions(Capability *cap, StgTRecHeader *trec, StgBool optimistically);
 
 /*----------------------------------------------------------------------
 
@@ -106,14 +110,14 @@ StgBool stmValidateNestOfTransactions(Capability *cap, StgTRecHeader *trec);
    These four operations return boolean results which should be interpreted
    as follows:
 
-   true  => The transaction record was definitely valid 
+   true  => The transaction record was definitely valid
 
    false => The transaction record may not have been valid
 
    Note that, for nested operations, validity here is solely in terms
    of the specified trec: it does not say whether those that it may be
-   nested are themselves valid.  Callers can check this with 
-   stmValidateNestOfTransactions.
+   nested are themselves valid.  Callers can check this with
+   stmValidateNestOfTransactionsPessimistic.
 
    The user of the STM should ensure that it is always safe to assume that a
    transaction context is not valid when in fact it is (i.e. to return false in
@@ -151,7 +155,7 @@ StgBool stmCommitNestedTransaction(Capability *cap, StgTRecHeader *trec);
  * Test whether the current transaction context is valid and, if so,
  * start the thread waiting for updates to any of the tvars it has
  * ready from and mark it as blocked.  It is an error to call stmWait
- * if the thread is already waiting.  
+ * if the thread is already waiting.
  */
 
 StgBool stmWait(Capability *cap, StgTSO *tso, StgTRecHeader *trec);
@@ -180,7 +184,7 @@ StgBool stmReWait(Capability *cap, StgTSO *tso);
  */
 
 StgClosure *stmReadTVar(Capability *cap,
-                        StgTRecHeader *trec, 
+                        StgTRecHeader *trec,
                         StgTVar *tvar);
 
 /* Update the logical contents of 'tvar' within the context of the
@@ -189,7 +193,7 @@ StgClosure *stmReadTVar(Capability *cap,
 
 void stmWriteTVar(Capability *cap,
                   StgTRecHeader *trec,
-                  StgTVar *tvar, 
+                  StgTVar *tvar,
                   StgClosure *new_value);
 
 /*----------------------------------------------------------------------*/


=====================================
rts/Schedule.c
=====================================
@@ -1106,7 +1106,7 @@ schedulePostRunThread (Capability *cap, StgTSO *t)
     // and a is never equal to b given a consistent view of memory.
     //
     if (t -> trec != NO_TREC && t -> why_blocked == NotBlocked) {
-        if (!stmValidateNestOfTransactions(cap, t -> trec)) {
+        if (!stmValidateNestOfTransactions(cap, t -> trec, true)) {
             debugTrace(DEBUG_sched | DEBUG_stm,
                        "trec %p found wasting its time", t);
 


=====================================
rts/include/stg/SMP.h
=====================================
@@ -201,14 +201,15 @@ EXTERN_INLINE void busy_wait_nop(void);
  *   - StgWeak: finalizer
  *   - StgMVar: head, tail, value
  *   - StgMVarTSOQueue: link
- *   - StgTVar: current_value, first_watch_queue_entry
- *   - StgTVarWatchQueue: {next,prev}_queue_entry
- *   - StgTRecChunk: TODO
  *   - StgMutArrPtrs: payload
  *   - StgSmallMutArrPtrs: payload
  *   - StgThunk although this is a somewhat special case; see below
  *   - StgInd: indirectee
  *   - StgTSO: block_info
+
+ *   - StgTVar: current_value, first_watch_queue_entry
+ *   - StgTVarWatchQueue: {next,prev}_queue_entry
+ *   - StgTRecChunk: TODO
  *
  * Finally, non-pointer fields can be safely mutated without barriers as
  * they do not refer to other memory locations. Technically, concurrent


=====================================
testsuite/tests/printer/Makefile
=====================================
@@ -831,3 +831,8 @@ PprLetIn:
 CaseAltComments:
 	$(CHECK_PPR)   $(LIBDIR) CaseAltComments.hs
 	$(CHECK_EXACT) $(LIBDIR) CaseAltComments.hs
+
+.PHONY: MatchPatComments
+MatchPatComments:
+	$(CHECK_PPR)   $(LIBDIR) MatchPatComments.hs
+	$(CHECK_EXACT) $(LIBDIR) MatchPatComments.hs


=====================================
testsuite/tests/printer/MatchPatComments.hs
=====================================
@@ -0,0 +1,16 @@
+module MatchPatComments where
+
+expandProcess
+        outCHAs -- c0
+        locationDescr =
+    blah
+
+next
+    ( steps  -- c1
+    , ys     -- c2
+    ) x      -- c3
+    = (steps, x, ys)
+
+makeProjection
+    Function{funMutual = VV, -- c4
+             funAbstr = ConcreteDef} = undefined


=====================================
testsuite/tests/printer/all.T
=====================================
@@ -199,3 +199,4 @@ test('AnnotationNoListTuplePuns', [ignore_stderr, req_ppr_deps], makefile_test,
 test('Test24533', [ignore_stderr, req_ppr_deps], makefile_test, ['Test24533'])
 test('PprLetIn', [ignore_stderr, req_ppr_deps], makefile_test, ['PprLetIn'])
 test('CaseAltComments', [ignore_stderr, req_ppr_deps], makefile_test, ['CaseAltComments'])
+test('MatchPatComments', [ignore_stderr, req_ppr_deps], makefile_test, ['MatchPatComments'])


=====================================
testsuite/tests/rts/T24142.hs
=====================================
@@ -0,0 +1,63 @@
+{- This test constructs a program that used to trigger an excessive amount of STM retries. -}
+{-# LANGUAGE NumericUnderscores #-}
+{-# LANGUAGE BangPatterns #-}
+module Main where
+
+import GHC.Conc
+import Control.Concurrent (newMVar, newEmptyMVar, takeMVar, putMVar)
+import Control.Monad
+import Control.Concurrent.STM.TArray
+import Data.Array.MArray
+import Data.IORef
+
+
+main :: IO ()
+main =
+    forM_ [2..40] $ \i -> do
+        -- Run the test with an increasing number of tvars
+        let tvar_count = i * 10
+        -- print $ "Tvars: " ++ show tvar_count
+        provokeLivelock tvar_count
+
+
+-- Forks two threads running a STM transactions, both accessing the same tvars but in opposite order.
+provokeLivelock :: Int -> IO ()
+provokeLivelock n = do
+    -- Use tvar array as a convenient way to bundle up n Tvars.
+    tvarArray <- atomically $ do
+        newListArray (0,n) [0.. fromIntegral n :: Integer] :: STM (TArray Int Integer)
+    m1 <- newEmptyMVar
+    m2 <- newEmptyMVar
+    updateCount <- newIORef (0 :: Int)
+
+    let useTvars :: [Int] -> Bool -> IO ()
+        useTvars tvar_order use_writes = atomically $ do
+            -- Walk the array once in the given order to add all tvars to the transaction log.
+            unsafeIOToSTM $ atomicModifyIORef' updateCount (\i -> (i+1,()))
+            mapM_ (\i -> readArray tvarArray i >>= \(!_n) -> return ()) tvar_order
+
+
+            -- Then we just enter the scheduler a lot
+            forM_ tvar_order $ \i -> do
+                -- when use_writes $
+                --     readArray tvarArray i >>= \(!n) -> writeArray tvarArray i (n+1 :: Integer)
+                unsafeIOToSTM yield
+
+    _ <- forkIO $ do
+                    useTvars [0..n] False
+                    -- print "Thread1 done."
+                    putMVar m1 True
+    _ <- forkIO $ do
+                    useTvars (reverse [0..n]) False
+                    -- print "Thread1 done."
+                    putMVar m2 True
+    -- Wait for forked threads.
+    _ <- takeMVar m1
+    _ <- takeMVar m2
+    updates <- readIORef updateCount
+    if updates > n
+        then putStrLn $ "TVars: " ++ show n ++ ", ERROR: more than " ++ show n ++ " transaction attempts. (" ++ show updates ++")\n"
+        else putStrLn $ "TVars: " ++ show n ++ ", OK: no more than " ++ show n ++ " transaction attempts."
+
+    return ()
+


=====================================
testsuite/tests/rts/T24142.stdout
=====================================
@@ -0,0 +1,39 @@
+TVars: 20, OK: no more than 20 transaction attempts.
+TVars: 30, OK: no more than 30 transaction attempts.
+TVars: 40, OK: no more than 40 transaction attempts.
+TVars: 50, OK: no more than 50 transaction attempts.
+TVars: 60, OK: no more than 60 transaction attempts.
+TVars: 70, OK: no more than 70 transaction attempts.
+TVars: 80, OK: no more than 80 transaction attempts.
+TVars: 90, OK: no more than 90 transaction attempts.
+TVars: 100, OK: no more than 100 transaction attempts.
+TVars: 110, OK: no more than 110 transaction attempts.
+TVars: 120, OK: no more than 120 transaction attempts.
+TVars: 130, OK: no more than 130 transaction attempts.
+TVars: 140, OK: no more than 140 transaction attempts.
+TVars: 150, OK: no more than 150 transaction attempts.
+TVars: 160, OK: no more than 160 transaction attempts.
+TVars: 170, OK: no more than 170 transaction attempts.
+TVars: 180, OK: no more than 180 transaction attempts.
+TVars: 190, OK: no more than 190 transaction attempts.
+TVars: 200, OK: no more than 200 transaction attempts.
+TVars: 210, OK: no more than 210 transaction attempts.
+TVars: 220, OK: no more than 220 transaction attempts.
+TVars: 230, OK: no more than 230 transaction attempts.
+TVars: 240, OK: no more than 240 transaction attempts.
+TVars: 250, OK: no more than 250 transaction attempts.
+TVars: 260, OK: no more than 260 transaction attempts.
+TVars: 270, OK: no more than 270 transaction attempts.
+TVars: 280, OK: no more than 280 transaction attempts.
+TVars: 290, OK: no more than 290 transaction attempts.
+TVars: 300, OK: no more than 300 transaction attempts.
+TVars: 310, OK: no more than 310 transaction attempts.
+TVars: 320, OK: no more than 320 transaction attempts.
+TVars: 330, OK: no more than 330 transaction attempts.
+TVars: 340, OK: no more than 340 transaction attempts.
+TVars: 350, OK: no more than 350 transaction attempts.
+TVars: 360, OK: no more than 360 transaction attempts.
+TVars: 370, OK: no more than 370 transaction attempts.
+TVars: 380, OK: no more than 380 transaction attempts.
+TVars: 390, OK: no more than 390 transaction attempts.
+TVars: 400, OK: no more than 400 transaction attempts.


=====================================
testsuite/tests/rts/all.T
=====================================
@@ -609,3 +609,5 @@ test('T23400', [], compile_and_run, ['-with-rtsopts -A8k'])
 test('IOManager', [js_skip, when(arch('wasm32'), skip), when(opsys('mingw32'), skip),
                    pre_cmd('$MAKE -s --no-print-directory IOManager.hs')],
                   compile_and_run, [''])
+
+test('T24142', [req_target_smp], compile_and_run, ['-threaded -with-rtsopts "-N2"'])


=====================================
utils/check-exact/Main.hs
=====================================
@@ -128,7 +128,7 @@ _tt = testOneFile changers "/home/alanz/mysrc/git.haskell.org/ghc/_build/stage1/
  -- "../../testsuite/tests/printer/Ppr034.hs" Nothing
  -- "../../testsuite/tests/printer/Ppr035.hs" Nothing
  -- "../../testsuite/tests/printer/Ppr036.hs" Nothing
- "../../testsuite/tests/printer/Ppr037.hs" Nothing
+ "../../testsuite/tests/printer/MatchPatComments.hs" Nothing
  -- "../../testsuite/tests/printer/Ppr038.hs" Nothing
  -- "../../testsuite/tests/printer/Ppr039.hs" Nothing
  -- "../../testsuite/tests/printer/Ppr040.hs" Nothing


=====================================
utils/vagrant/bootstrap-deb.sh deleted
=====================================
@@ -1,3 +0,0 @@
-#!/bin/sh
-apt-get update
-apt-get build-dep -y ghc


=====================================
utils/vagrant/bootstrap-rhel.sh deleted
=====================================
@@ -1,4 +0,0 @@
-#!/bin/sh
-yum update -y
-yum install -y glibc-devel ncurses-devel gmp-devel autoconf automake libtool \
- gcc make python ghc git



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/53327d1e7d829cc1bd911f9beccdb3ebd11e3fe9...eec83538359e59c03d73edbaaa287140fb40959a

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/53327d1e7d829cc1bd911f9beccdb3ebd11e3fe9...eec83538359e59c03d73edbaaa287140fb40959a
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240430/a47c6eab/attachment-0001.html>