[Git][ghc/ghc][wip/perf-ci] 3 commits: testsuite: Add support to capture performance metrics via 'perf'

Fri May 17 11:38:43 UTC 2024


Hannes Siebenhandl pushed to branch wip/perf-ci at Glasgow Haskell Compiler / GHC


Commits:
517ec2d7 by Fendor at 2024-05-17T13:38:29+02:00
testsuite: Add support to capture performance metrics via 'perf'

Performance metrics collected via 'perf' can be more accurate for
run-time performance than GHC's rts, due to the usage of hardware
counters.

We allow performance tests to also record PMU events according to 'perf
list'.

- - - - -
de906ec1 by Fendor at 2024-05-17T13:38:29+02:00
hadrian: Pass 'perf' program to the test suite if it can be found

Currently, we only look for 'perf' on '$PATH' with no way of
customisation. This might change in the future.

- - - - -
61c48bb3 by Fendor at 2024-05-17T13:38:29+02:00
gitlab-ci: Hack in 'perf' counters support

- - - - -


5 changed files:

- .gitlab/generate-ci/gen_ci.hs
- hadrian/src/Settings/Builders/RunTest.hs
- testsuite/driver/testglobals.py
- testsuite/driver/testlib.py
- testsuite/tests/perf/compiler/all.T


Changes:

=====================================
.gitlab/generate-ci/gen_ci.hs
=====================================
@@ -153,6 +153,7 @@ data BuildConfig
                 , threadSanitiser :: Bool
                 , noSplitSections :: Bool
                 , validateNonmovingGc :: Bool
+                , testsuiteUsePerf :: Bool
                 }
 
 -- Extra arguments to pass to ./configure due to the BuildConfig
@@ -211,6 +212,7 @@ vanilla = BuildConfig
   , threadSanitiser = False
   , noSplitSections = False
   , validateNonmovingGc = False
+  , testsuiteUsePerf = False
   }
 
 splitSectionsBroken :: BuildConfig -> BuildConfig
@@ -761,6 +763,7 @@ job arch opsys buildConfig = NamedJob { name = jobName, jobInfo = Job {..} }
                 | validateNonmovingGc buildConfig
                 ]
         in "RUNTEST_ARGS" =: unwords runtestArgs
+      , if testsuiteUsePerf buildConfig then "RUNTEST_ARGS" =: "--config perf_path=perf" else mempty
       ]
 
     jobArtifacts = Artifacts


=====================================
hadrian/src/Settings/Builders/RunTest.hs
=====================================
@@ -245,6 +245,9 @@ runTestBuilderArgs = builder Testsuite ? do
     let asBool :: String -> Bool -> String
         asBool s b = s ++ show b
 
+
+    perfPathM <- expr $ liftIO (findExecutable "perf")
+
     -- TODO: set CABAL_MINIMAL_BUILD/CABAL_PLUGIN_BUILD
     mconcat [ arg "-Wdefault"  -- see #22727
             , arg $ "testsuite/driver/runtests.py"
@@ -293,6 +296,7 @@ runTestBuilderArgs = builder Testsuite ? do
             , arg "-e", arg $ "config.platform=" ++ show platform
             , arg "-e", arg $ "config.stage="    ++ show (stageNumber (C.stage ctx))
 
+            , emitWhenSet perfPathM $ \perf -> arg "--config" <> arg ("perf_path=" ++ perf)
             , arg "--config", arg $ "gs=gs"                           -- Use the default value as in test.mk
             , arg "--config", arg $ "timeout_prog=" ++ show (top -/- timeoutProg)
             , arg "--config", arg $ "stats_files_dir=" ++ statsFilesDir


=====================================
testsuite/driver/testglobals.py
=====================================
@@ -49,6 +49,9 @@ class TestConfig:
         # Path to Ghostscript
         self.gs = None # type: Optional[Path]
 
+        # Path to Linux `perf` tool
+        self.perf_path = None # type: Optional[Path]
+
         # Run tests requiring Haddock
         self.haddock = False
 
@@ -469,6 +472,9 @@ class TestOptions:
        # The extra hadrian dependencies we need for this particular test
        self.hadrian_deps = set(["test:ghc"]) # type: Set[str]
 
+       # Record these `perf-events` counters when compiling this test, if `perf` is available
+       self.compiler_perf_counters = [] # type: List[str]
+
    @property
    def testdir(self) -> Path:
        if self.testdir_raw is None:


=====================================
testsuite/driver/testlib.py
=====================================
@@ -3,6 +3,7 @@
 # (c) Simon Marlow 2002
 #
 
+import csv
 import io
 import shutil
 import os
@@ -748,7 +749,7 @@ def find_so(lib):
 def find_non_inplace_so(lib):
     return _find_so(lib,path_from_ghcPkg(lib, "dynamic-library-dirs"),False)
 
-# Define a generic stat test, which computes the statistic by calling the function
+# Define the a generic stat test, which computes the statistic by calling the function
 # given as the third argument.
 def collect_generic_stat ( metric, deviation, get_stat ):
     return collect_generic_stats ( { metric: { 'deviation': deviation, 'current': get_stat } } )
@@ -797,16 +798,25 @@ def collect_stats(metric='all', deviation=20, static_stats_file=None):
 def statsFile(comp_test: bool, name: str) -> str:
     return name + ('.comp' if comp_test else '') + '.stats'
 
+def perfStatsFile(comp_test: bool, name: str) -> str:
+    return name + ('.comp' if comp_test else '') + '.perf.csv'
+
 # This is an internal function that is used only in the implementation.
 # 'is_compiler_stats_test' is somewhat of an unfortunate name.
 # If the boolean is set to true, it indicates that this test is one that
 # measures the performance numbers of the compiler.
 # As this is a fairly rare case in the testsuite, it defaults to false to
 # indicate that it is a 'normal' performance test.
-def _collect_stats(name: TestName, opts, metrics, deviation, static_stats_file, is_compiler_stats_test=False):
+def _collect_stats(name: TestName, opts, metrics, deviation, static_stats_file, is_compiler_stats_test=False, is_compiler_perf_test=False):
     if not re.match('^[0-9]*[a-zA-Z][a-zA-Z0-9._-]*$', name):
         failBecause('This test has an invalid name.')
 
+    if is_compiler_perf_test and config.perf_path is None:
+        # If we are doing a 'perf' run but no 'perf' is configured,
+        # don't try to read the results.
+        # This is a bit weird, though.
+        return
+
     # Normalize metrics to a list of strings.
     if isinstance(metrics, str):
         if metrics == 'all':
@@ -861,11 +871,47 @@ def _collect_stats(name: TestName, opts, metrics, deviation, static_stats_file,
             assert val is not None
             return int(val)
 
+    # How to read the result of the performance test
+    def read_perf_stats_file(way, metric_name):
+        FIELDS = ['value','unit','event','runtime','percent']
+        # Confusingly compile time ghci tests are actually runtime tests, so we have
+        # to go and look for the name.stats file rather than name.comp.stats file.
+        compiler_stats_test = is_compiler_stats_test and not (way == "ghci" or way == "ghci-opt")
+
+        perf_stats_file = Path(in_testdir(perfStatsFile(compiler_stats_test, name)))
+        perf_metrics = {}
+        try:
+            perf_csv_lines = perf_stats_file.read_text().splitlines()
+            # Output looks like:
+            # """
+            # # Started on <date>
+            #
+            # <value>,<unit>,<event>,<runtime>,<percent>,...
+            # """
+            #
+            # Ignore empty lines and lines starting with '#'
+            perf_csv = [l for l in perf_csv_lines if l and not l.startswith('#')]
+
+            perf_stats_csv_reader = csv.DictReader(perf_csv, fieldnames=FIELDS, delimiter=";", quotechar="\"")
+            for fields in perf_stats_csv_reader:
+                perf_metrics[fields['event']] = float(fields['value'])
+
+        except IOError as e:
+            raise StatsException(str(e))
+
+        val = perf_metrics[metric_name]
+        if val is None:
+            print('Failed to find metric: ', metric_name)
+            raise StatsException("No such metric")
+        else:
+            assert val is not None
+            return int(val)
 
     collect_stat = {}
     for metric_name in metrics:
         def action_generator(mn):
-            return lambda way: read_stats_file(way, mn)
+            read_stats = read_perf_stats_file if is_compiler_perf_test else read_stats_file
+            return lambda way: read_stats(way, mn)
         metric = '{}/{}'.format(tag, metric_name)
         collect_stat[metric] = { "deviation": deviation
                                 , "current": action_generator(metric_name) }
@@ -1000,7 +1046,17 @@ def have_thread_sanitizer( ) -> bool:
 def gcc_as_cmmp() -> bool:
     return config.cmm_cpp_is_gcc
 
-# ---
+# -----
+
+def collect_compiler_perf_counters(counters: List[str], deviation: int = 20):
+    """
+    Record the given event counters using `perf stat` when available.
+    """
+    def f(name, opts):
+        opts.compiler_perf_counters += counters
+        _collect_stats(name, opts, set(counters), deviation, False, True, True)
+    return f
+
 
 # Note [Measuring residency]
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1595,7 +1651,7 @@ async def do_test(name: TestName,
                            stdout = stdout_path,
                            stderr = stderr_path,
                            print_output = config.verbose >= 3,
-                           timeout_multiplier = opts.pre_cmd_timeout_multiplier,
+                           timeout_multiplier = opts.pre_cmd_timeout_multiplier
                            )
 
         # If user used expect_broken then don't record failures of pre_cmd
@@ -2027,9 +2083,14 @@ def report_stats(name, way, metric, gen_stat):
 # -----------------------------------------------------------------------------
 # Build a single-module program
 
-async def extras_build( way, extra_mods, extra_hc_opts ):
+async def extras_build(way: WayName, extra_mods: List[str], extra_hc_opts: str) -> PassFail:
     for mod, opts in extra_mods:
-        result = await simple_build(mod, way, opts + ' ' + extra_hc_opts, False, None, [], False, False)
+        result = await simple_build(mod, way, opts + ' ' + extra_hc_opts,
+                should_fail=False,
+                top_mod=None,
+                units=[],
+                link=False,
+                addsuf=False)
         if not (mod.endswith('.hs') or mod.endswith('.lhs')):
             extra_hc_opts += ' %s' % Path(mod).with_suffix('.o')
         if badResult(result):
@@ -2056,7 +2117,9 @@ async def simple_build(name: Union[TestName, str],
     stdout = in_testdir(name, 'comp.stderr')
     stderr = subprocess.STDOUT if not suppress_stdout else None
 
-    if top_mod is not None:
+    if mode is not None:
+        to_do = mode
+    elif top_mod is not None:
         srcname = top_mod
     elif addsuf:
         if backpack:
@@ -2066,9 +2129,7 @@ async def simple_build(name: Union[TestName, str],
     else:
         srcname = Path(name)
 
-    if mode is not None:
-        to_do = mode
-    elif top_mod is not None:
+    if top_mod is not None:
         to_do = '--make '
         if link:
             to_do = to_do + '-o ' + name
@@ -2111,14 +2172,22 @@ async def simple_build(name: Union[TestName, str],
 
     flags = ' '.join(get_compiler_flags() + config.way_flags[way])
 
-    cmd = ('cd "{opts.testdir}" && {cmd_prefix} '
+    cmd = ('{cmd_prefix} '
            '{{compiler}} {to_do} {srcname} {flags} {extra_hc_opts}'
           ).format(**locals())
 
     if filter_with != '':
         cmd = cmd + ' | ' + filter_with
 
-    exit_code = await runCmd(cmd, None, stdout, stderr, opts.compile_timeout_multiplier)
+    output_file = perfStatsFile(True, name)
+
+    exit_code = await runCmdPerf(
+            opts.compiler_perf_counters,
+            cmd,
+            output_file,
+            working_dir=opts.testdir,
+            stdin=None, stdout=stdout, stderr=stderr,
+            timeout_multiplier=opts.compile_timeout_multiplier)
 
     actual_stderr_path = in_testdir(name, 'comp.stderr')
 
@@ -2138,7 +2207,6 @@ async def simple_build(name: Union[TestName, str],
             stderr_contents = actual_stderr_path.read_text(encoding='UTF-8', errors='replace')
             return failBecause('exit code non-0', stderr=stderr_contents)
 
-
     return passed()
 
 # -----------------------------------------------------------------------------
@@ -2190,10 +2258,13 @@ async def simple_run(name: TestName, way: WayName, prog: str, extra_run_opts: st
     if opts.cmd_wrapper is not None:
         cmd = opts.cmd_wrapper(cmd)
 
-    cmd = 'cd "{opts.testdir}" && {cmd}'.format(**locals())
+    output_file = perfStatsFile(False, name)
 
     # run the command
-    exit_code = await runCmd(cmd, stdin_arg, stdout_arg, stderr_arg, opts.run_timeout_multiplier)
+    exit_code = await runCmdPerf(opts.compiler_perf_counters, cmd, output_file,
+        working_dir=opts.testdir,
+        stdin=stdin_arg, stdout=stdout_arg, stderr=stderr_arg,
+        timeout_multiplier=opts.run_timeout_multiplier)
 
     # check the exit code
     if exit_code != opts.exit_code:
@@ -2291,7 +2362,7 @@ async def interpreter_run(name: TestName,
 
     cmd = 'cd "{opts.testdir}" && {cmd}'.format(**locals())
 
-    exit_code = await runCmd(cmd, script, stdout, stderr, opts.run_timeout_multiplier)
+    exit_code = await runCmd(cmd, script, stdout, stderr, timeout_multiplier=opts.run_timeout_multiplier, working_dir=opts.testdir)
 
     # split the stdout into compilation/program output
     split_file(stdout, delimiter,
@@ -2949,12 +3020,57 @@ def dump_file(f: Path):
     except Exception:
         print('')
 
+# -----------------------------------------------------------------------------
+# Run a program in the interpreter and check its output
+
+async def runCmdPerf(
+        perf_counters: List[str],
+        cmd: str,
+        output_file: str,
+        working_dir: Optional[Path] = None,
+        **kwargs)  -> int:
+    """
+    Run a command under `perf stat`, collecting the given counters.
+
+    Returns the exit code and a dictionary of the collected counter values.
+
+    If given a 'working_dir', we generate a command looking like:
+
+    .. code-block:: text
+
+        cd $working_dir && perf stat ... $cmd
+
+    This allows users to find the test directory by looking at the execution logs,
+    and allows us to write 'perf' output files in the test directory.
+    """
+    if len(perf_counters) == 0 or config.perf_path is None:
+        if working_dir:
+            cmd = f"cd \"{working_dir}\" && {cmd}"
+
+        exit_code = await runCmd(cmd, **kwargs)
+        return exit_code
+
+    perf_cmd_args: List[str] = [str(config.perf_path), 'stat', '-x\;', '-o', output_file, '-e', ','.join(perf_counters), cmd]
+    cmd = ' '.join(perf_cmd_args)
+    if working_dir:
+        cmd = f"cd \"{working_dir}\" && {cmd}"
+
+    exit_code = await runCmd(cmd, **kwargs)
+    return exit_code
+
 async def runCmd(cmd: str,
            stdin: Union[None, Path]=None,
            stdout: Union[None, Path]=None,
            stderr: Union[None, int, Path]=None,
            timeout_multiplier=1.0,
+           working_dir : Union[None, Path]=None,
            print_output=False) -> int:
+    """
+    Run a command enforcing a timeout and returning the exit code.
+
+    The process's working directory is changed to 'working_dir'.
+    """
+
     timeout_prog = strip_quotes(config.timeout_prog)
     timeout = str(int(ceil(config.timeout * timeout_multiplier)))
 
@@ -2976,7 +3092,13 @@ async def runCmd(cmd: str,
         # Hence it must ultimately be run by a Bourne shell. It's timeout's job
         # to invoke the Bourne shell
 
-        proc = await asyncio.create_subprocess_exec(timeout_prog, timeout, cmd, stdin=stdin_file, stdout=asyncio.subprocess.PIPE, stderr=hStdErr, env=ghc_env)
+        proc = await asyncio.create_subprocess_exec(timeout_prog, timeout, cmd,
+                                                   stdin=stdin_file,
+                                                   stdout=asyncio.subprocess.PIPE,
+                                                   stderr=hStdErr,
+                                                   env=ghc_env,
+                                                   cwd=working_dir
+                                                   )
 
         stdout_buffer, stderr_buffer = await proc.communicate()
     finally:


=====================================
testsuite/tests/perf/compiler/all.T
=====================================
@@ -5,6 +5,7 @@ setTestOpts(no_lint)
 test('T1969',
      [# expect_broken(12437),
       collect_compiler_residency(20),
+      collect_compiler_perf_counters(['instructions:u', 'cycles:u']),
       extra_run_opts('+RTS -A64k -RTS'),
           # The default RESIDENCY_OPTS is 256k and we need higher sampling
           # frequency. Incurs a slow-down by about 2.



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/1b7b460ee775907213ecf349757e559b96968bc1...61c48bb309bfe76b6adad0cbd958ca0bf89d7ad4

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/1b7b460ee775907213ecf349757e559b96968bc1...61c48bb309bfe76b6adad0cbd958ca0bf89d7ad4
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240517/27cbfa65/attachment-0001.html>