[Git][ghc/ghc][wip/perf-ci] 5 commits: testsuite: Add support to capture performance metrics via 'perf'

Fri May 31 07:48:33 UTC 2024


Hannes Siebenhandl pushed to branch wip/perf-ci at Glasgow Haskell Compiler / GHC


Commits:
20e80498 by Fendor at 2024-05-31T09:38:20+02:00
testsuite: Add support to capture performance metrics via 'perf'

Performance metrics collected via 'perf' can be more accurate for
run-time performance than GHC's rts, due to the usage of hardware
counters.

We allow performance tests to also record PMU events according to 'perf
list'.

- - - - -
335083de by Fendor at 2024-05-31T09:38:20+02:00
hadrian: Pass 'perf' program to the test suite if it can be found

Currently, we only look for 'perf' on '$PATH' with no way of
customisation. This might change in the future.

- - - - -
99a7b240 by Fendor at 2024-05-31T09:38:20+02:00
gitlab-ci: Add nightly job for running the testsuite with perf profiling support

- - - - -
cd41791f by Fendor at 2024-05-31T09:38:20+02:00
Enable perf profiling for compiler performance tests

- - - - -
d9d6f7d1 by Fendor at 2024-05-31T09:38:20+02:00
FIXME: bump ci-images commit to modified version

- - - - -


9 changed files:

- .gitlab-ci.yml
- .gitlab/generate-ci/gen_ci.hs
- .gitlab/jobs.yaml
- hadrian/src/Settings/Builders/RunTest.hs
- testsuite/driver/perf_notes.py
- testsuite/driver/testglobals.py
- testsuite/driver/testlib.py
- testsuite/driver/testutil.py
- testsuite/tests/perf/compiler/all.T


Changes:

=====================================
.gitlab-ci.yml
=====================================
@@ -2,7 +2,7 @@ variables:
   GIT_SSL_NO_VERIFY: "1"
 
   # Commit of ghc/ci-images repository from which to pull Docker images
-  DOCKER_REV: 064e90c26dffe5709bd5b87dbd211b9a8b21fc5b
+  DOCKER_REV: da1db60cf83e2720c2d347b1cc9128ed330836a9
 
   # Sequential version number of all cached things.
   # Bump to invalidate GitLab CI cache.


=====================================
.gitlab/generate-ci/gen_ci.hs
=====================================
@@ -153,6 +153,7 @@ data BuildConfig
                 , threadSanitiser :: Bool
                 , noSplitSections :: Bool
                 , validateNonmovingGc :: Bool
+                , testsuiteUsePerf :: Bool
                 }
 
 -- Extra arguments to pass to ./configure due to the BuildConfig
@@ -211,6 +212,7 @@ vanilla = BuildConfig
   , threadSanitiser = False
   , noSplitSections = False
   , validateNonmovingGc = False
+  , testsuiteUsePerf = False
   }
 
 splitSectionsBroken :: BuildConfig -> BuildConfig
@@ -263,6 +265,9 @@ tsan = vanilla { threadSanitiser = True }
 noTntc :: BuildConfig
 noTntc = vanilla { tablesNextToCode = False }
 
+usePerfProfilingTestsuite :: BuildConfig -> BuildConfig
+usePerfProfilingTestsuite bc = bc { testsuiteUsePerf = True }
+
 -----------------------------------------------------------------------------
 -- Platform specific variables
 -----------------------------------------------------------------------------
@@ -283,6 +288,9 @@ runnerTag _ _ = error "Invalid arch/opsys"
 tags :: Arch -> Opsys -> BuildConfig -> [String]
 tags arch opsys _bc = [runnerTag arch opsys] -- Tag for which runners we can use
 
+runnerPerfTag :: Arch -> Opsys -> String
+runnerPerfTag arch sys = runnerTag arch sys ++ "-perf"
+
 -- These names are used to find the docker image so they have to match what is
 -- in the docker registry.
 distroName :: LinuxDistro -> String
@@ -761,6 +769,7 @@ job arch opsys buildConfig = NamedJob { name = jobName, jobInfo = Job {..} }
                 | validateNonmovingGc buildConfig
                 ]
         in "RUNTEST_ARGS" =: unwords runtestArgs
+      , if testsuiteUsePerf buildConfig then "RUNTEST_ARGS" =: "--config perf_path=perf" else mempty
       ]
 
     jobArtifacts = Artifacts
@@ -883,6 +892,12 @@ highCompression = addVariable "XZ_OPT" "-9"
 useHashUnitIds :: Job -> Job
 useHashUnitIds = addVariable "HADRIAN_ARGS" "--hash-unit-ids"
 
+-- | Change the tag of the job to make sure the job is scheduled on a
+-- runner that has the necessary capabilties to run the job with 'perf'
+-- profiling counters.
+perfProfilingJobTag :: Arch -> Opsys -> Job -> Job
+perfProfilingJobTag arch opsys j = j { jobTags = [ runnerPerfTag arch opsys ] }
+
 -- | Mark the validate job to run in fast-ci mode
 -- This is default way, to enable all jobs you have to apply the `full-ci` label.
 fastCI :: JobGroup Job -> JobGroup Job
@@ -1022,6 +1037,8 @@ job_groups =
          make_wasm_jobs wasm_build_config {unregisterised = True}
      , onlyRule NonmovingGc (validateBuilds Amd64 (Linux Debian11) vanilla {validateNonmovingGc = True})
      , onlyRule IpeData (validateBuilds Amd64 (Linux Debian10) zstdIpe)
+     -- Run the 'perf' profiling nightly job in the release config.
+     , perfProfilingJob Amd64 (Linux Debian12) releaseConfig
      ]
 
   where
@@ -1034,6 +1051,12 @@ job_groups =
     -- (see Note [Object unloading]).
     fullyStaticBrokenTests = modifyJobs (addVariable "BROKEN_TESTS" "ghcilink002 linker_unload_native")
 
+    perfProfilingJob arch sys buildConfig =
+        -- Rename the job to avoid conflicts
+        rename (<> "-perf")
+          $ modifyJobs (perfProfilingJobTag arch sys)
+          $ disableValidate (validateBuilds arch sys $ usePerfProfilingTestsuite buildConfig)
+
     hackage_doc_job = rename (<> "-hackage") . modifyJobs (addVariable "HADRIAN_ARGS" "--haddock-for-hackage")
 
     tsan_jobs =


=====================================
.gitlab/jobs.yaml
=====================================
@@ -1895,6 +1895,68 @@
       "XZ_OPT": "-9"
     }
   },
+  "nightly-x86_64-linux-deb12-release-perf": {
+    "after_script": [
+      ".gitlab/ci.sh save_cache",
+      ".gitlab/ci.sh save_test_output",
+      ".gitlab/ci.sh clean",
+      "cat ci_timings"
+    ],
+    "allow_failure": false,
+    "artifacts": {
+      "expire_in": "8 weeks",
+      "paths": [
+        "ghc-x86_64-linux-deb12-release.tar.xz",
+        "junit.xml",
+        "unexpected-test-output.tar.gz"
+      ],
+      "reports": {
+        "junit": "junit.xml"
+      },
+      "when": "always"
+    },
+    "cache": {
+      "key": "x86_64-linux-deb12-$CACHE_REV",
+      "paths": [
+        "cabal-cache",
+        "toolchain"
+      ]
+    },
+    "dependencies": [],
+    "image": "registry.gitlab.haskell.org/ghc/ci-images/x86_64-linux-deb12:$DOCKER_REV",
+    "needs": [
+      {
+        "artifacts": false,
+        "job": "hadrian-ghc-in-ghci"
+      }
+    ],
+    "rules": [
+      {
+        "if": "(\"true\" == \"true\") && ($RELEASE_JOB != \"yes\") && ($NIGHTLY)",
+        "when": "on_success"
+      }
+    ],
+    "script": [
+      "sudo chown ghc:ghc -R .",
+      ".gitlab/ci.sh setup",
+      ".gitlab/ci.sh configure",
+      ".gitlab/ci.sh build_hadrian",
+      ".gitlab/ci.sh test_hadrian"
+    ],
+    "stage": "full-build",
+    "tags": [
+      "x86_64-linux-perf"
+    ],
+    "variables": {
+      "BIGNUM_BACKEND": "gmp",
+      "BIN_DIST_NAME": "ghc-x86_64-linux-deb12-release",
+      "BUILD_FLAVOUR": "release",
+      "CONFIGURE_ARGS": "--enable-strict-ghc-toolchain-check",
+      "RUNTEST_ARGS": " --config perf_path=perf",
+      "TEST_ENV": "x86_64-linux-deb12-release",
+      "XZ_OPT": "-9"
+    }
+  },
   "nightly-x86_64-linux-deb12-validate": {
     "after_script": [
       ".gitlab/ci.sh save_cache",


=====================================
hadrian/src/Settings/Builders/RunTest.hs
=====================================
@@ -245,6 +245,9 @@ runTestBuilderArgs = builder Testsuite ? do
     let asBool :: String -> Bool -> String
         asBool s b = s ++ show b
 
+
+    perfPathM <- expr $ liftIO (findExecutable "perf")
+
     -- TODO: set CABAL_MINIMAL_BUILD/CABAL_PLUGIN_BUILD
     mconcat [ arg "-Wdefault"  -- see #22727
             , arg $ "testsuite/driver/runtests.py"
@@ -293,6 +296,7 @@ runTestBuilderArgs = builder Testsuite ? do
             , arg "-e", arg $ "config.platform=" ++ show platform
             , arg "-e", arg $ "config.stage="    ++ show (stageNumber (C.stage ctx))
 
+            , emitWhenSet perfPathM $ \perf -> arg "--config" <> arg ("perf_path=" ++ perf)
             , arg "--config", arg $ "gs=gs"                           -- Use the default value as in test.mk
             , arg "--config", arg $ "timeout_prog=" ++ show (top -/- timeoutProg)
             , arg "--config", arg $ "stats_files_dir=" ++ statsFilesDir


=====================================
testsuite/driver/perf_notes.py
=====================================
@@ -128,6 +128,41 @@ AllowedPerfChange = NamedTuple('AllowedPerfChange',
                                 ('opts', Dict[str, str])
                                 ])
 
+class MetricAcceptanceWindow:
+    """
+    A strategy for computing an acceptance window for a metric measurement
+    given a baseline value.
+    """
+    def get_bounds(self, baseline: float) -> Tuple[float, float]:
+        raise NotImplemented
+    def describe(self) -> str:
+        raise NotImplemented
+
+class AlwaysAccept(MetricAcceptanceWindow):
+    def get_bounds(self, baseline: float) -> Tuple[float, float]:
+        return (-1/0, +1/0)
+
+    def describe(self) -> str:
+        raise NotImplemented
+
+class RelativeMetricAcceptanceWindow(MetricAcceptanceWindow):
+    """
+    A MetricAcceptanceWindow which accepts measurements within tol-percent of
+    the baseline.
+    """
+    def __init__(self, tol: float):
+        """ Accept any metric within tol-percent of the baseline """
+        self.tol = tol / 100
+
+    def get_bounds(self, baseline: float) -> Tuple[float, float]:
+        lowerBound = trunc(           int(baseline) * ((100 - float(self.tol))/100))
+        upperBound = trunc(0.5 + ceil(int(baseline) * ((100 + float(self.tol))/100)))
+
+        return (lowerBound, upperBound)
+
+    def describe(self) -> str:
+        return '+/- %1.1f%%' % (100*self.tol)
+
 def parse_perf_stat(stat_str: str) -> PerfStat:
     field_vals = stat_str.strip('\t').split('\t')
     stat = PerfStat(*field_vals) # type: ignore
@@ -558,26 +593,32 @@ def get_commit_metric(gitNoteRef,
     _commit_metric_cache[cacheKeyA] = baseline_by_cache_key_b
     return baseline_by_cache_key_b.get(cacheKeyB)
 
-# Check test stats. This prints the results for the user.
-# actual: the PerfStat with actual value.
-# baseline: the expected Baseline value (this should generally be derived from baseline_metric())
-# tolerance_dev: allowed deviation of the actual value from the expected value.
-# allowed_perf_changes: allowed changes in stats. This is a dictionary as returned by get_allowed_perf_changes().
-# force_print: Print stats even if the test stat was in the tolerance range.
-# Returns a (MetricChange, pass/fail object) tuple. Passes if the stats are within the expected value ranges.
 def check_stats_change(actual: PerfStat,
                        baseline: Baseline,
-                       tolerance_dev,
+                       acceptance_window: MetricAcceptanceWindow,
                        allowed_perf_changes: Dict[TestName, List[AllowedPerfChange]] = {},
                        force_print = False
                        ) -> Tuple[MetricChange, Any]:
+    """
+    Check test stats. This prints the results for the user.
+
+    Parameters:
+    actual: the PerfStat with actual value
+    baseline: the expected Baseline value (this should generally be derived
+        from baseline_metric())
+    acceptance_window: allowed deviation of the actual value from the expected
+        value.
+    allowed_perf_changes: allowed changes in stats. This is a dictionary as
+        returned by get_allowed_perf_changes().
+    force_print: Print stats even if the test stat was in the tolerance range.
+
+    Returns a (MetricChange, pass/fail object) tuple. Passes if the stats are within the expected value ranges.
+    """
     expected_val = baseline.perfStat.value
     full_name = actual.test + ' (' + actual.way + ')'
 
-    lowerBound = trunc(           int(expected_val) * ((100 - float(tolerance_dev))/100))
-    upperBound = trunc(0.5 + ceil(int(expected_val) * ((100 + float(tolerance_dev))/100)))
 
-    actual_dev = round(((float(actual.value) * 100)/ int(expected_val)) - 100, 1)
+    lowerBound, upperBound = acceptance_window.get_bounds(expected_val)
 
     # Find the direction of change.
     change = MetricChange.NoChange
@@ -613,11 +654,12 @@ def check_stats_change(actual: PerfStat,
         def display(descr, val, extra):
             print(descr, str(val).rjust(length), extra)
 
-        display('    Expected    ' + full_name + ' ' + actual.metric + ':', expected_val, '+/-' + str(tolerance_dev) + '%')
+        display('    Expected    ' + full_name + ' ' + actual.metric + ':', expected_val, acceptance_window.describe())
         display('    Lower bound ' + full_name + ' ' + actual.metric + ':', lowerBound, '')
         display('    Upper bound ' + full_name + ' ' + actual.metric + ':', upperBound, '')
         display('    Actual      ' + full_name + ' ' + actual.metric + ':', actual.value, '')
         if actual.value != expected_val:
+            actual_dev = round(((float(actual.value) * 100)/ int(expected_val)) - 100, 1)
             display('    Deviation   ' + full_name + ' ' + actual.metric + ':', actual_dev, '%')
 
     return (change, result)


=====================================
testsuite/driver/testglobals.py
=====================================
@@ -49,6 +49,9 @@ class TestConfig:
         # Path to Ghostscript
         self.gs = None # type: Optional[Path]
 
+        # Path to Linux `perf` tool
+        self.perf_path = None # type: Optional[Path]
+
         # Run tests requiring Haddock
         self.haddock = False
 
@@ -469,6 +472,9 @@ class TestOptions:
        # The extra hadrian dependencies we need for this particular test
        self.hadrian_deps = set(["test:ghc"]) # type: Set[str]
 
+       # Record these `perf-events` counters when compiling this test, if `perf` is available
+       self.compiler_perf_counters = [] # type: List[str]
+
    @property
    def testdir(self) -> Path:
        if self.testdir_raw is None:


=====================================
testsuite/driver/testlib.py
=====================================
@@ -3,6 +3,7 @@
 # (c) Simon Marlow 2002
 #
 
+import csv
 import io
 import shutil
 import os
@@ -23,12 +24,13 @@ from testglobals import config, ghc_env, default_testopts, brokens, t, \
                         TestRun, TestResult, TestOptions, PerfMetric
 from testutil import strip_quotes, lndir, link_or_copy_file, passed, \
                      failBecause, testing_metrics, residency_testing_metrics, \
+                     stable_perf_counters, \
                      PassFail, badResult, memoize
 from term_color import Color, colored
 import testutil
 from cpu_features import have_cpu_feature
 import perf_notes as Perf
-from perf_notes import MetricChange, PerfStat, StatsException
+from perf_notes import MetricChange, PerfStat, StatsException, AlwaysAccept, RelativeMetricAcceptanceWindow
 extra_src_files = {'T4198': ['exitminus1.c']} # TODO: See #12223
 
 from my_typing import *
@@ -755,9 +757,14 @@ def find_so(lib):
 def find_non_inplace_so(lib):
     return _find_so(lib,path_from_ghcPkg(lib, "dynamic-library-dirs"),False)
 
-# Define a generic stat test, which computes the statistic by calling the function
-# given as the third argument.
-def collect_generic_stat ( metric, deviation, get_stat ):
+
+def collect_generic_stat ( metric, deviation: Optional[int], get_stat: Callable[[WayName], str]):
+    """
+    Define a generic stat test, which computes the statistic by calling the function
+    given as the third argument.
+
+    If no deviation is given, the test cannot fail, but the metric will be recorded nevertheless.
+    """
     return collect_generic_stats ( { metric: { 'deviation': deviation, 'current': get_stat } } )
 
 def _collect_generic_stat(name : TestName, opts, metric_infos):
@@ -804,16 +811,27 @@ def collect_stats(metric='all', deviation=20, static_stats_file=None):
 def statsFile(comp_test: bool, name: str) -> str:
     return name + ('.comp' if comp_test else '') + '.stats'
 
+def perfStatsFile(comp_test: bool, name: str) -> str:
+    return name + ('.comp' if comp_test else '') + '.perf.csv'
+
 # This is an internal function that is used only in the implementation.
 # 'is_compiler_stats_test' is somewhat of an unfortunate name.
 # If the boolean is set to true, it indicates that this test is one that
 # measures the performance numbers of the compiler.
 # As this is a fairly rare case in the testsuite, it defaults to false to
 # indicate that it is a 'normal' performance test.
-def _collect_stats(name: TestName, opts, metrics, deviation, static_stats_file, is_compiler_stats_test=False):
+def _collect_stats(name: TestName, opts, metrics, deviation: Optional[int],
+                    static_stats_file: Optional[Union[Path,str]],
+                    is_compiler_stats_test: bool = False, is_compiler_perf_test: bool = False) -> None:
     if not re.match('^[0-9]*[a-zA-Z][a-zA-Z0-9._-]*$', name):
         failBecause('This test has an invalid name.')
 
+    if is_compiler_perf_test and config.perf_path is None:
+        # If we are doing a 'perf' run but no 'perf' is configured,
+        # don't try to read the results.
+        # This is a bit weird, though.
+        return
+
     # Normalize metrics to a list of strings.
     if isinstance(metrics, str):
         if metrics == 'all':
@@ -868,11 +886,47 @@ def _collect_stats(name: TestName, opts, metrics, deviation, static_stats_file,
             assert val is not None
             return int(val)
 
+    # How to read the result of the performance test
+    def read_perf_stats_file(way, metric_name):
+        FIELDS = ['value','unit','event','runtime','percent']
+        # Confusingly compile time ghci tests are actually runtime tests, so we have
+        # to go and look for the name.stats file rather than name.comp.stats file.
+        compiler_stats_test = is_compiler_stats_test and not (way == "ghci" or way == "ghci-opt")
+
+        perf_stats_file = Path(in_testdir(perfStatsFile(compiler_stats_test, name)))
+        perf_metrics = {}
+        try:
+            perf_csv_lines = perf_stats_file.read_text().splitlines()
+            # Output looks like:
+            # """
+            # # Started on <date>
+            #
+            # <value>,<unit>,<event>,<runtime>,<percent>,...
+            # """
+            #
+            # Ignore empty lines and lines starting with '#'
+            perf_csv = [l for l in perf_csv_lines if l and not l.startswith('#')]
+
+            perf_stats_csv_reader = csv.DictReader(perf_csv, fieldnames=FIELDS, delimiter=";", quotechar="\"")
+            for fields in perf_stats_csv_reader:
+                perf_metrics[fields['event']] = float(fields['value'])
+
+        except IOError as e:
+            raise StatsException(str(e))
+
+        val = perf_metrics[metric_name]
+        if val is None:
+            print('Failed to find metric: ', metric_name)
+            raise StatsException("No such metric")
+        else:
+            assert val is not None
+            return int(val)
 
     collect_stat = {}
     for metric_name in metrics:
         def action_generator(mn):
-            return lambda way: read_stats_file(way, mn)
+            read_stats = read_perf_stats_file if is_compiler_perf_test else read_stats_file
+            return lambda way: read_stats(way, mn)
         metric = '{}/{}'.format(tag, metric_name)
         collect_stat[metric] = { "deviation": deviation
                                 , "current": action_generator(metric_name) }
@@ -1007,7 +1061,32 @@ def have_thread_sanitizer( ) -> bool:
 def gcc_as_cmmp() -> bool:
     return config.cmm_cpp_is_gcc
 
-# ---
+# -----
+
+def collect_compiler_perf(deviation: Optional[int] = None):
+    """
+    Record stable performance counters using `perf stat` when available.
+    """
+    return [
+        _collect_compiler_perf_counters(stable_perf_counters(), deviation)
+    ]
+
+def collect_compiler_perf_counters(counters: List[str], deviation: Optional[int] = None):
+    """
+    Record the given event counters using `perf stat` when available.
+    """
+    return [
+        _collect_compiler_perf_counters(set(counters), deviation)
+    ]
+
+def _collect_compiler_perf_counters(counters: Set[str], deviation: Optional[int] = None):
+    def f(name, opts):
+        # Slightly hacky, we need the requested perf_counters in 'simple_run'.
+        # Thus, we have to globally register these counters
+        opts.compiler_perf_counters += list(counters)
+        _collect_stats(name, opts, counters, deviation, False, True, True)
+    return f
+
 
 # Note [Measuring residency]
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1060,6 +1139,12 @@ def collect_compiler_residency(tolerance_pct: float):
         collect_compiler_stats(residency_testing_metrics(), tolerance_pct)
     ]
 
+def collect_compiler_runtime(tolerance_pct: float):
+    return [
+        collect_compiler_stats('bytes allocated', tolerance_pct),
+        _collect_compiler_perf_counters(stable_perf_counters())
+    ]
+
 # ---
 
 def high_memory_usage(name, opts):
@@ -1602,7 +1687,7 @@ async def do_test(name: TestName,
                            stdout = stdout_path,
                            stderr = stderr_path,
                            print_output = config.verbose >= 3,
-                           timeout_multiplier = opts.pre_cmd_timeout_multiplier,
+                           timeout_multiplier = opts.pre_cmd_timeout_multiplier
                            )
 
         # If user used expect_broken then don't record failures of pre_cmd
@@ -1792,6 +1877,8 @@ async def do_compile(name: TestName,
     result = await extras_build( way, extra_mods, extra_hc_opts )
     if badResult(result):
        return result
+
+    assert result.hc_opts is not None
     extra_hc_opts = result.hc_opts
 
     result = await simple_build(name, way, extra_hc_opts, should_fail, top_mod, units, should_link, True, **kwargs)
@@ -1917,6 +2004,7 @@ async def compile_and_run__(name: TestName,
     result = await extras_build( way, extra_mods, extra_hc_opts )
     if badResult(result):
        return result
+    assert result.hc_opts is not None
     extra_hc_opts = result.hc_opts
     assert extra_hc_opts is not None
 
@@ -2010,10 +2098,15 @@ def report_stats(name, way, metric, gen_stat):
         metric_result = passed()
         perf_change = MetricChange.NewMetric
     else:
+        deviation = gen_stat["deviation"]
+        if deviation:
+            tolerance_metric = RelativeMetricAcceptanceWindow(deviation)
+        else:
+            tolerance_metric = AlwaysAccept()
         (perf_change, metric_result) = Perf.check_stats_change(
             perf_stat,
             baseline,
-            gen_stat["deviation"],
+            tolerance_metric,
             config.allowed_perf_changes,
             config.verbose >= 4)
 
@@ -2034,9 +2127,14 @@ def report_stats(name, way, metric, gen_stat):
 # -----------------------------------------------------------------------------
 # Build a single-module program
 
-async def extras_build( way, extra_mods, extra_hc_opts ):
+async def extras_build(way: WayName, extra_mods, extra_hc_opts) -> PassFail:
     for mod, opts in extra_mods:
-        result = await simple_build(mod, way, opts + ' ' + extra_hc_opts, False, None, [], False, False)
+        result = await simple_build(mod, way, opts + ' ' + extra_hc_opts,
+                should_fail=False,
+                top_mod=None,
+                units=[],
+                link=False,
+                addsuf=False)
         if not (mod.endswith('.hs') or mod.endswith('.lhs')):
             extra_hc_opts += ' %s' % Path(mod).with_suffix('.o')
         if badResult(result):
@@ -2118,14 +2216,22 @@ async def simple_build(name: Union[TestName, str],
 
     flags = ' '.join(get_compiler_flags() + config.way_flags[way])
 
-    cmd = ('cd "{opts.testdir}" && {cmd_prefix} '
+    cmd = ('{cmd_prefix} '
            '{{compiler}} {to_do} {srcname} {flags} {extra_hc_opts}'
           ).format(**locals())
 
     if filter_with != '':
         cmd = cmd + ' | ' + filter_with
 
-    exit_code = await runCmd(cmd, None, stdout, stderr, opts.compile_timeout_multiplier)
+    output_file = perfStatsFile(True, name)
+
+    exit_code = await runCmdPerf(
+            opts.compiler_perf_counters,
+            cmd,
+            output_file,
+            working_dir=opts.testdir,
+            stdin=None, stdout=stdout, stderr=stderr,
+            timeout_multiplier=opts.compile_timeout_multiplier)
 
     actual_stderr_path = in_testdir(name, 'comp.stderr')
 
@@ -2145,7 +2251,6 @@ async def simple_build(name: Union[TestName, str],
             stderr_contents = actual_stderr_path.read_text(encoding='UTF-8', errors='replace')
             return failBecause('exit code non-0', stderr=stderr_contents)
 
-
     return passed()
 
 # -----------------------------------------------------------------------------
@@ -2197,10 +2302,13 @@ async def simple_run(name: TestName, way: WayName, prog: str, extra_run_opts: st
     if opts.cmd_wrapper is not None:
         cmd = opts.cmd_wrapper(cmd)
 
-    cmd = 'cd "{opts.testdir}" && {cmd}'.format(**locals())
+    output_file = perfStatsFile(False, name)
 
     # run the command
-    exit_code = await runCmd(cmd, stdin_arg, stdout_arg, stderr_arg, opts.run_timeout_multiplier)
+    exit_code = await runCmdPerf(opts.compiler_perf_counters, cmd, output_file,
+        working_dir=opts.testdir,
+        stdin=stdin_arg, stdout=stdout_arg, stderr=stderr_arg,
+        timeout_multiplier=opts.run_timeout_multiplier)
 
     # check the exit code
     if exit_code != opts.exit_code:
@@ -2298,7 +2406,7 @@ async def interpreter_run(name: TestName,
 
     cmd = 'cd "{opts.testdir}" && {cmd}'.format(**locals())
 
-    exit_code = await runCmd(cmd, script, stdout, stderr, opts.run_timeout_multiplier)
+    exit_code = await runCmd(cmd, script, stdout, stderr, timeout_multiplier=opts.run_timeout_multiplier)
 
     # split the stdout into compilation/program output
     split_file(stdout, delimiter,
@@ -2956,12 +3064,56 @@ def dump_file(f: Path):
     except Exception:
         print('')
 
+# -----------------------------------------------------------------------------
+# Run a program in the interpreter and check its output
+
+async def runCmdPerf(
+        perf_counters: List[str],
+        cmd: str,
+        output_file: str,
+        working_dir: Optional[Path] = None,
+        **kwargs)  -> int:
+    """
+    Run a command under `perf stat`, collecting the given counters.
+
+    Returns the exit code and a dictionary of the collected counter values.
+
+    If given a 'working_dir', we generate a command looking like:
+
+    .. code-block:: text
+
+        cd $working_dir && perf stat ... $cmd
+
+    This allows users to find the test directory by looking at the execution logs,
+    and allows us to write 'perf' output files in the test directory.
+    """
+    if len(perf_counters) == 0 or config.perf_path is None:
+        if working_dir:
+            cmd = f"cd \"{working_dir}\" && {cmd}"
+
+        exit_code = await runCmd(cmd, **kwargs)
+        return exit_code
+
+    perf_cmd_args: List[str] = [str(config.perf_path), 'stat', '-x\\;', '-o', output_file, '-e', ','.join(perf_counters), cmd]
+    cmd = ' '.join(perf_cmd_args)
+    if working_dir:
+        cmd = f"cd \"{working_dir}\" && {cmd}"
+
+    exit_code = await runCmd(cmd, **kwargs)
+    return exit_code
+
 async def runCmd(cmd: str,
            stdin: Union[None, Path]=None,
            stdout: Union[None, Path]=None,
            stderr: Union[None, int, Path]=None,
            timeout_multiplier=1.0,
            print_output=False) -> int:
+    """
+    Run a command enforcing a timeout and returning the exit code.
+
+    The process's working directory is changed to 'working_dir'.
+    """
+
     timeout_prog = strip_quotes(config.timeout_prog)
     timeout = str(int(ceil(config.timeout * timeout_multiplier)))
 
@@ -2983,7 +3135,12 @@ async def runCmd(cmd: str,
         # Hence it must ultimately be run by a Bourne shell. It's timeout's job
         # to invoke the Bourne shell
 
-        proc = await asyncio.create_subprocess_exec(timeout_prog, timeout, cmd, stdin=stdin_file, stdout=asyncio.subprocess.PIPE, stderr=hStdErr, env=ghc_env)
+        proc = await asyncio.create_subprocess_exec(timeout_prog, timeout, cmd,
+                                                   stdin=stdin_file,
+                                                   stdout=asyncio.subprocess.PIPE,
+                                                   stderr=hStdErr,
+                                                   env=ghc_env
+                                                   )
 
         stdout_buffer, stderr_buffer = await proc.communicate()
     finally:


=====================================
testsuite/driver/testutil.py
=====================================
@@ -77,6 +77,11 @@ def lndir(srcdir: Path, dstdir: Path, force_copy=False):
 def testing_metrics():
     return { 'bytes allocated', 'peak_megabytes_allocated', 'max_bytes_used' }
 
+# All performance counters we consider to be stable enough in CI to
+# test for.
+def stable_perf_counters():
+    return { 'instructions:u' }
+
 # Metrics which are testing residency information
 def residency_testing_metrics():
     return { 'peak_megabytes_allocated', 'max_bytes_used' }


=====================================
testsuite/tests/perf/compiler/all.T
=====================================
@@ -8,7 +8,7 @@ test('T1969',
       extra_run_opts('+RTS -A64k -RTS'),
           # The default RESIDENCY_OPTS is 256k and we need higher sampling
           # frequency. Incurs a slow-down by about 2.
-      collect_compiler_stats('bytes allocated', 1),
+      collect_compiler_runtime(1),
       only_ways(['normal']),
 
       extra_hc_opts('-dcore-lint -static'),
@@ -32,14 +32,14 @@ else:
 
 test('T3294',
      [collect_compiler_residency(15),
-      collect_compiler_stats('bytes allocated', 1),
+      collect_compiler_runtime(1),
       conf_3294,
       ],
      compile,
      [''])
 
 test('T4801',
-     [collect_compiler_stats('bytes allocated',2),
+     [collect_compiler_runtime(2),
       only_ways(['normal']),
       extra_hc_opts('-static'),
       when(arch('wasm32') and unregisterised(), fragile(23290))
@@ -49,7 +49,7 @@ test('T4801',
 
 test('T3064',
      [collect_compiler_residency(20),
-      collect_compiler_stats('bytes allocated',2),
+      collect_compiler_runtime(2),
       only_ways(['normal']),
       ],
      compile,
@@ -59,7 +59,7 @@ test('T3064',
 test('T4007', normal, makefile_test, ['T4007'])
 
 test('T5030',
-     [collect_compiler_stats('bytes allocated', 2),
+     [collect_compiler_runtime(2),
 
        only_ways(['normal'])
       ],
@@ -67,14 +67,14 @@ test('T5030',
      ['-freduction-depth=300'])
 
 test('T5631',
-     [collect_compiler_stats('bytes allocated',2),
+     [collect_compiler_runtime(2),
       only_ways(['normal'])
       ],
      compile,
      [''])
 
 test('parsing001',
-     [collect_compiler_stats('bytes allocated',2),
+     [collect_compiler_runtime(2),
        only_ways(['normal']),
       ],
      compile_fail, [''])
@@ -82,27 +82,27 @@ test('parsing001',
 
 test('T783',
      [ only_ways(['normal']),  # no optimisation for this one
-      collect_compiler_stats('bytes allocated',2),
+      collect_compiler_runtime(2),
       extra_hc_opts('-static')
       ],
       compile,[''])
 
 test('T5321Fun',
      [ only_ways(['normal']),  # no optimisation for this one
-       collect_compiler_stats('bytes allocated',2)
+       collect_compiler_runtime(2),
       ],
       compile,[''])
 
 test('T5321FD',
      [ only_ways(['normal']),  # no optimisation for this one
-      collect_compiler_stats('bytes allocated',2)
+      collect_compiler_runtime(2),
       ],
       compile,[''])
 
 test('T5642',
      [ only_ways(['normal']),
        normal,
-       collect_compiler_stats('bytes allocated',2)
+       collect_compiler_runtime(2),
       ],
       compile,['-O'])
 
@@ -114,7 +114,7 @@ test('T5837',
 
 test('T6048',
      [ only_ways(['optasm']),
-      collect_compiler_stats('bytes allocated',2)
+      collect_compiler_runtime(2),
       ],
       compile,[''])
 
@@ -134,7 +134,7 @@ test('T9675',
 
 test('T9872a',
      [ only_ways(['normal']),
-       collect_compiler_stats('bytes allocated', 1),
+       collect_compiler_runtime(1),
        high_memory_usage
       ],
      compile_fail,
@@ -142,28 +142,28 @@ test('T9872a',
 
 test('T9872b',
      [ only_ways(['normal']),
-       collect_compiler_stats('bytes allocated', 1),
+       collect_compiler_runtime(1),
        high_memory_usage
       ],
      compile_fail,
      [''])
 test('T9872b_defer',
      [ only_ways(['normal']),
-       collect_compiler_stats('bytes allocated', 1),
+       collect_compiler_runtime(1),
        high_memory_usage
       ],
      compile,
      ['-fdefer-type-errors'])
 test('T9872c',
      [ only_ways(['normal']),
-       collect_compiler_stats('bytes allocated', 1),
+       collect_compiler_runtime(1),
        high_memory_usage
       ],
      compile_fail,
      [''])
 test('T9872d',
      [ only_ways(['normal']),
-       collect_compiler_stats('bytes allocated', 1)
+       collect_compiler_runtime(1)
       ],
      compile,
      [''])
@@ -227,14 +227,14 @@ test ('LargeRecord',
 
 test('T9961',
      [ only_ways(['normal']),
-       collect_compiler_stats('bytes allocated', 1)
+       collect_compiler_runtime(1)
       ],
      compile,
      ['-O'])
 
 test('T9233',
     [ only_ways(['normal']),
-      collect_compiler_stats('bytes allocated', 1)
+      collect_compiler_runtime(1)
     ],
     multimod_compile,
     ['T9233', '-v0 -O2 -fno-spec-constr'])
@@ -249,14 +249,14 @@ test('T10370',
 test('T11068', normal, makefile_test, ['T11068'])
 
 test('T10547',
-     [ collect_compiler_stats('bytes allocated', 4),
+     [ collect_compiler_runtime(4),
      ],
      compile_fail,
      ['-fprint-expanded-synonyms'])
 
 test('T12227',
      [ only_ways(['normal']),
-       collect_compiler_stats('bytes allocated', 1)
+       collect_compiler_runtime(1)
      ],
      compile,
      # Use `-M1G` to prevent memory thrashing with ghc-8.0.1.
@@ -264,14 +264,14 @@ test('T12227',
 
 test('T12425',
      [ only_ways(['optasm']),
-       collect_compiler_stats('bytes allocated', 1)
+       collect_compiler_runtime(1)
      ],
      compile,
      [''])
 
 test('T12234',
      [ only_ways(['optasm']),
-       collect_compiler_stats('bytes allocated', 2),
+       collect_compiler_runtime(2),
      ],
      compile,
      [''])
@@ -279,14 +279,14 @@ test('T12234',
 # See Note [Sensitivity to unique increment] in T12545.hs; spread was 4.8%
 test('T12545',
      [ only_ways(['normal']),
-       collect_compiler_stats('bytes allocated', 10), #
+       collect_compiler_runtime(10), #
      ],
      multimod_compile,
      ['T12545', '-v0'] )
 
 test('T13035',
      [ only_ways(['normal']),
-       collect_compiler_stats('bytes allocated', 1),
+       collect_compiler_runtime(1),
      ],
      compile,
      [''] )
@@ -299,7 +299,7 @@ test('T13056',
      ['-O1'])
 
 test('T12707',
-     [ collect_compiler_stats('bytes allocated', 1),
+     [ collect_compiler_runtime(1),
      ],
      compile,
      [''])
@@ -311,7 +311,7 @@ test('T12707',
 # to avoid spurious errors.
 test('T12150',
      [ only_ways(['optasm']),
-       collect_compiler_stats('bytes allocated', 2)
+       collect_compiler_runtime(2)
      ],
     compile,
      [''])
@@ -483,7 +483,7 @@ test('MultiLayerModulesNoCode',
      ['MultiLayerModulesNoCode.script'])
 
 test('MultiComponentModulesRecomp',
-     [ collect_compiler_stats('bytes allocated', 2),
+     [ collect_compiler_runtime(2),
        pre_cmd('$MAKE -s --no-print-directory MultiComponentModulesRecomp'),
        extra_files(['genMultiComp.py']),
        compile_timeout_multiplier(5)
@@ -492,7 +492,7 @@ test('MultiComponentModulesRecomp',
      [['unitp%d' % n for n in range(20)], '-fno-code -fwrite-interface -v0'])
 
 test('MultiComponentModules',
-     [ collect_compiler_stats('bytes allocated', 2),
+     [ collect_compiler_runtime(2),
        pre_cmd('$PYTHON ./genMultiComp.py'),
        extra_files(['genMultiComp.py']),
        compile_timeout_multiplier(5)
@@ -565,7 +565,7 @@ test('T14683',
 
 test ('T9630',
       [ collect_compiler_residency(15),
-        collect_compiler_stats('bytes allocated', 2),
+        collect_compiler_runtime(2),
       ],
       multimod_compile,
       ['T9630', '-v0 -O'])
@@ -611,7 +611,7 @@ test ('T16473',
       ['-O2 -flate-specialise'])
 
 test('T17516',
-      [ collect_compiler_stats('bytes allocated', 5),
+      [ collect_compiler_runtime(5),
       ],
       multimod_compile,
       ['T17516', '-O -v0'])
@@ -635,13 +635,13 @@ test ('T18140',
       ['-v0 -O'])
 test('T10421',
     [ only_ways(['normal']),
-      collect_compiler_stats('bytes allocated', 1)
+      collect_compiler_runtime(1)
     ],
     multimod_compile,
     ['T10421', '-v0 -O'])
 test('T10421a',
     [ only_ways(['normal']),
-      collect_compiler_stats('bytes allocated', 10)
+      collect_compiler_runtime(10)
     ],
     multimod_compile,
     ['T10421a', '-v0 -O'])
@@ -700,13 +700,13 @@ test ('T19695',
       ['-v0 -O2'])
 
 test('hard_hole_fits', # Testing multiple hole-fits with lots in scope for #16875
-     collect_compiler_stats('bytes allocated', 2), # 1 is 300s, 0.010 is 3s. Without  hole-fits it takes 1s
+     collect_compiler_runtime(2), # 1 is 300s, 0.010 is 3s. Without  hole-fits it takes 1s
      compile, ['-fdefer-type-errors -fno-max-valid-hole-fits -package ghc'])
 
 test('T16875',  # Testing one hole-fit with a lot in scope for #16875
      # This test is very sensitive to environmental differences.. we should fix
      # that but for now the failure threshold is 4% (see #21557)
-     collect_compiler_stats('bytes allocated', 4),
+     collect_compiler_runtime(4),
      compile, ['-fdefer-type-errors -fno-max-valid-hole-fits -package ghc'])
 
 test ('T20261',
@@ -720,7 +720,7 @@ test ('T20261',
 # a compile-time and a run-time performance test
 test('T21839c',
     [   collect_compiler_stats('all', 10),
-        collect_compiler_stats('bytes allocated', 1),
+        collect_compiler_runtime(1),
         only_ways(['normal'])],
     compile,
     ['-O'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/d19f62bba1af2784bb04c44436ae8ee5bcbf1690...d9d6f7d181ef73996740a43f95ae772a56cdd8fa

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/d19f62bba1af2784bb04c44436ae8ee5bcbf1690...d9d6f7d181ef73996740a43f95ae772a56cdd8fa
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240531/1e3a7aa8/attachment-0001.html>