[Git][ghc/ghc][master] 3 commits: testsuite: Add support to capture performance metrics via 'perf'
Marge Bot (@marge-bot)
gitlab at gitlab.haskell.org
Thu Sep 5 00:22:33 UTC 2024
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
a45f1488 by Fendor at 2024-09-04T20:22:00-04:00
testsuite: Add support to capture performance metrics via 'perf'
Performance metrics collected via 'perf' can be more accurate for
run-time performance than GHC's rts, due to the usage of hardware
counters.
We allow performance tests to also record PMU events according to 'perf
list'.
- - - - -
ce61fca5 by Fendor at 2024-09-04T20:22:00-04:00
gitlab-ci: Add nightly job for running the testsuite with perf profiling support
- - - - -
6dfb9471 by Fendor at 2024-09-04T20:22:00-04:00
Enable perf profiling for compiler performance tests
- - - - -
7 changed files:
- .gitlab/generate-ci/gen_ci.hs
- .gitlab/jobs.yaml
- testsuite/driver/perf_notes.py
- testsuite/driver/testglobals.py
- testsuite/driver/testlib.py
- testsuite/driver/testutil.py
- testsuite/tests/perf/compiler/all.T
Changes:
=====================================
.gitlab/generate-ci/gen_ci.hs
=====================================
@@ -155,6 +155,7 @@ data BuildConfig
, noSplitSections :: Bool
, validateNonmovingGc :: Bool
, textWithSIMDUTF :: Bool
+ , testsuiteUsePerf :: Bool
}
-- Extra arguments to pass to ./configure due to the BuildConfig
@@ -216,6 +217,7 @@ vanilla = BuildConfig
, noSplitSections = False
, validateNonmovingGc = False
, textWithSIMDUTF = False
+ , testsuiteUsePerf = False
}
splitSectionsBroken :: BuildConfig -> BuildConfig
@@ -268,6 +270,9 @@ tsan = vanilla { threadSanitiser = True }
noTntc :: BuildConfig
noTntc = vanilla { tablesNextToCode = False }
+usePerfProfilingTestsuite :: BuildConfig -> BuildConfig
+usePerfProfilingTestsuite bc = bc { testsuiteUsePerf = True }
+
-----------------------------------------------------------------------------
-- Platform specific variables
-----------------------------------------------------------------------------
@@ -288,6 +293,9 @@ runnerTag _ _ = error "Invalid arch/opsys"
tags :: Arch -> Opsys -> BuildConfig -> [String]
tags arch opsys _bc = [runnerTag arch opsys] -- Tag for which runners we can use
+runnerPerfTag :: Arch -> Opsys -> String
+runnerPerfTag arch sys = runnerTag arch sys ++ "-perf"
+
-- These names are used to find the docker image so they have to match what is
-- in the docker registry.
distroName :: LinuxDistro -> String
@@ -775,6 +783,7 @@ job arch opsys buildConfig = NamedJob { name = jobName, jobInfo = Job {..} }
| validateNonmovingGc buildConfig
]
in "RUNTEST_ARGS" =: unwords runtestArgs
+ , if testsuiteUsePerf buildConfig then "RUNTEST_ARGS" =: "--config perf_path=perf" else mempty
]
jobArtifacts = Artifacts
@@ -897,6 +906,12 @@ highCompression = addVariable "XZ_OPT" "-9"
useHashUnitIds :: Job -> Job
useHashUnitIds = addVariable "HADRIAN_ARGS" "--hash-unit-ids"
+-- | Change the tag of the job to make sure the job is scheduled on a
+-- runner that has the necessary capabilties to run the job with 'perf'
+-- profiling counters.
+perfProfilingJobTag :: Arch -> Opsys -> Job -> Job
+perfProfilingJobTag arch opsys j = j { jobTags = [ runnerPerfTag arch opsys ] }
+
-- | Mark the validate job to run in fast-ci mode
-- This is default way, to enable all jobs you have to apply the `full-ci` label.
fastCI :: JobGroup Job -> JobGroup Job
@@ -1000,6 +1015,8 @@ debian_x86 =
, modifyNightlyJobs allowFailure (modifyValidateJobs (allowFailure . manual) tsan_jobs)
, -- Nightly allowed to fail: #22343
modifyNightlyJobs allowFailure (modifyValidateJobs manual (validateBuilds Amd64 (Linux validate_debian) noTntc))
+ -- Run the 'perf' profiling nightly job in the release config.
+ , perfProfilingJob Amd64 (Linux Debian12) releaseConfig
, onlyRule LLVMBackend (validateBuilds Amd64 (Linux validate_debian) llvm)
, addValidateRule TestPrimops (standardBuilds Amd64 (Linux validate_debian))
@@ -1010,6 +1027,12 @@ debian_x86 =
where
validate_debian = Debian12
+ perfProfilingJob arch sys buildConfig =
+ -- Rename the job to avoid conflicts
+ rename (<> "-perf")
+ $ modifyJobs (perfProfilingJobTag arch sys)
+ $ disableValidate (validateBuilds arch sys $ usePerfProfilingTestsuite buildConfig)
+
tsan_jobs =
modifyJobs
( addVariable "TSAN_OPTIONS" "suppressions=$CI_PROJECT_DIR/rts/.tsan-suppressions"
=====================================
.gitlab/jobs.yaml
=====================================
@@ -1791,6 +1791,69 @@
"XZ_OPT": "-9"
}
},
+ "nightly-x86_64-linux-deb12-release-perf": {
+ "after_script": [
+ ".gitlab/ci.sh save_cache",
+ ".gitlab/ci.sh save_test_output",
+ ".gitlab/ci.sh clean",
+ "cat ci_timings"
+ ],
+ "allow_failure": false,
+ "artifacts": {
+ "expire_in": "8 weeks",
+ "paths": [
+ "ghc-x86_64-linux-deb12-release.tar.xz",
+ "junit.xml",
+ "unexpected-test-output.tar.gz"
+ ],
+ "reports": {
+ "junit": "junit.xml"
+ },
+ "when": "always"
+ },
+ "cache": {
+ "key": "x86_64-linux-deb12-$CACHE_REV",
+ "paths": [
+ "cabal-cache",
+ "toolchain"
+ ]
+ },
+ "dependencies": [],
+ "image": "registry.gitlab.haskell.org/ghc/ci-images/x86_64-linux-deb12:$DOCKER_REV",
+ "needs": [
+ {
+ "artifacts": false,
+ "job": "hadrian-ghc-in-ghci"
+ }
+ ],
+ "rules": [
+ {
+ "if": "(\"true\" == \"true\") && ($RELEASE_JOB != \"yes\") && ($NIGHTLY)",
+ "when": "on_success"
+ }
+ ],
+ "script": [
+ "sudo chown ghc:ghc -R .",
+ ".gitlab/ci.sh setup",
+ ".gitlab/ci.sh configure",
+ ".gitlab/ci.sh build_hadrian",
+ ".gitlab/ci.sh test_hadrian"
+ ],
+ "stage": "full-build",
+ "tags": [
+ "x86_64-linux-perf"
+ ],
+ "variables": {
+ "BIGNUM_BACKEND": "gmp",
+ "BIN_DIST_NAME": "ghc-x86_64-linux-deb12-release",
+ "BUILD_FLAVOUR": "release",
+ "CONFIGURE_ARGS": "--enable-strict-ghc-toolchain-check",
+ "INSTALL_CONFIGURE_ARGS": "--enable-strict-ghc-toolchain-check",
+ "RUNTEST_ARGS": " --config perf_path=perf",
+ "TEST_ENV": "x86_64-linux-deb12-release",
+ "XZ_OPT": "-9"
+ }
+ },
"nightly-x86_64-linux-deb12-unreg-validate": {
"after_script": [
".gitlab/ci.sh save_cache",
=====================================
testsuite/driver/perf_notes.py
=====================================
@@ -128,6 +128,41 @@ AllowedPerfChange = NamedTuple('AllowedPerfChange',
('opts', Dict[str, str])
])
+class MetricAcceptanceWindow:
+ """
+ A strategy for computing an acceptance window for a metric measurement
+ given a baseline value.
+ """
+ def get_bounds(self, baseline: float) -> Tuple[float, float]:
+ raise NotImplemented
+ def describe(self) -> str:
+ raise NotImplemented
+
+class AlwaysAccept(MetricAcceptanceWindow):
+ def get_bounds(self, baseline: float) -> Tuple[float, float]:
+ return (-1/0, +1/0)
+
+ def describe(self) -> str:
+ raise NotImplemented
+
+class RelativeMetricAcceptanceWindow(MetricAcceptanceWindow):
+ """
+ A MetricAcceptanceWindow which accepts measurements within tol-percent of
+ the baseline.
+ """
+ def __init__(self, tol: float):
+ """ Accept any metric within tol-percent of the baseline """
+ self.__tol = tol
+
+ def get_bounds(self, baseline: float) -> Tuple[float, float]:
+ lowerBound = trunc( int(baseline) * ((100 - float(self.__tol))/100))
+ upperBound = trunc(0.5 + ceil(int(baseline) * ((100 + float(self.__tol))/100)))
+
+ return (lowerBound, upperBound)
+
+ def describe(self) -> str:
+ return '+/- %1.1f%%' % (100*self.__tol)
+
def parse_perf_stat(stat_str: str) -> PerfStat:
field_vals = stat_str.strip('\t').split('\t')
stat = PerfStat(*field_vals) # type: ignore
@@ -558,26 +593,32 @@ def get_commit_metric(gitNoteRef,
_commit_metric_cache[cacheKeyA] = baseline_by_cache_key_b
return baseline_by_cache_key_b.get(cacheKeyB)
-# Check test stats. This prints the results for the user.
-# actual: the PerfStat with actual value.
-# baseline: the expected Baseline value (this should generally be derived from baseline_metric())
-# tolerance_dev: allowed deviation of the actual value from the expected value.
-# allowed_perf_changes: allowed changes in stats. This is a dictionary as returned by get_allowed_perf_changes().
-# force_print: Print stats even if the test stat was in the tolerance range.
-# Returns a (MetricChange, pass/fail object) tuple. Passes if the stats are within the expected value ranges.
def check_stats_change(actual: PerfStat,
baseline: Baseline,
- tolerance_dev,
+ acceptance_window: MetricAcceptanceWindow,
allowed_perf_changes: Dict[TestName, List[AllowedPerfChange]] = {},
force_print = False
) -> Tuple[MetricChange, Any]:
+ """
+ Check test stats. This prints the results for the user.
+
+ Parameters:
+ actual: the PerfStat with actual value
+ baseline: the expected Baseline value (this should generally be derived
+ from baseline_metric())
+ acceptance_window: allowed deviation of the actual value from the expected
+ value.
+ allowed_perf_changes: allowed changes in stats. This is a dictionary as
+ returned by get_allowed_perf_changes().
+ force_print: Print stats even if the test stat was in the tolerance range.
+
+ Returns a (MetricChange, pass/fail object) tuple. Passes if the stats are within the expected value ranges.
+ """
expected_val = baseline.perfStat.value
full_name = actual.test + ' (' + actual.way + ')'
- lowerBound = trunc( int(expected_val) * ((100 - float(tolerance_dev))/100))
- upperBound = trunc(0.5 + ceil(int(expected_val) * ((100 + float(tolerance_dev))/100)))
- actual_dev = round(((float(actual.value) * 100)/ int(expected_val)) - 100, 1)
+ lowerBound, upperBound = acceptance_window.get_bounds(expected_val)
# Find the direction of change.
change = MetricChange.NoChange
@@ -613,11 +654,12 @@ def check_stats_change(actual: PerfStat,
def display(descr, val, extra):
print(descr, str(val).rjust(length), extra)
- display(' Expected ' + full_name + ' ' + actual.metric + ':', expected_val, '+/-' + str(tolerance_dev) + '%')
+ display(' Expected ' + full_name + ' ' + actual.metric + ':', expected_val, acceptance_window.describe())
display(' Lower bound ' + full_name + ' ' + actual.metric + ':', lowerBound, '')
display(' Upper bound ' + full_name + ' ' + actual.metric + ':', upperBound, '')
display(' Actual ' + full_name + ' ' + actual.metric + ':', actual.value, '')
if actual.value != expected_val:
+ actual_dev = round(((float(actual.value) * 100)/ int(expected_val)) - 100, 1)
display(' Deviation ' + full_name + ' ' + actual.metric + ':', actual_dev, '%')
return (change, result)
=====================================
testsuite/driver/testglobals.py
=====================================
@@ -49,6 +49,9 @@ class TestConfig:
# Path to Ghostscript
self.gs = None # type: Optional[Path]
+ # Path to Linux `perf` tool
+ self.perf_path = None # type: Optional[Path]
+
# Run tests requiring Haddock
self.haddock = False
@@ -472,6 +475,9 @@ class TestOptions:
# The extra hadrian dependencies we need for this particular test
self.hadrian_deps = set(["test:ghc"]) # type: Set[str]
+ # Record these `perf-events` counters when compiling this test, if `perf` is available
+ self.compiler_perf_counters = [] # type: List[str]
+
@property
def testdir(self) -> Path:
if self.testdir_raw is None:
=====================================
testsuite/driver/testlib.py
=====================================
@@ -3,6 +3,7 @@
# (c) Simon Marlow 2002
#
+import csv
import io
import shutil
import os
@@ -23,12 +24,13 @@ from testglobals import config, ghc_env, default_testopts, brokens, t, \
TestRun, TestResult, TestOptions, PerfMetric
from testutil import strip_quotes, lndir, link_or_copy_file, passed, \
failBecause, testing_metrics, residency_testing_metrics, \
+ stable_perf_counters, \
PassFail, badResult, memoize
from term_color import Color, colored
import testutil
from cpu_features import have_cpu_feature
import perf_notes as Perf
-from perf_notes import MetricChange, PerfStat, StatsException
+from perf_notes import MetricChange, PerfStat, StatsException, AlwaysAccept, RelativeMetricAcceptanceWindow
extra_src_files = {'T4198': ['exitminus1.c']} # TODO: See #12223
from my_typing import *
@@ -752,9 +754,14 @@ def find_so(lib):
def find_non_inplace_so(lib):
return _find_so(lib,path_from_ghcPkg(lib, "dynamic-library-dirs"),False)
-# Define a generic stat test, which computes the statistic by calling the function
-# given as the third argument.
-def collect_generic_stat ( metric, deviation, get_stat ):
+
+def collect_generic_stat ( metric, deviation: Optional[int], get_stat: Callable[[WayName], str]):
+ """
+ Define a generic stat test, which computes the statistic by calling the function
+ given as the third argument.
+
+ If no deviation is given, the test cannot fail, but the metric will be recorded nevertheless.
+ """
return collect_generic_stats ( { metric: { 'deviation': deviation, 'current': get_stat } } )
def _collect_generic_stat(name : TestName, opts, metric_infos):
@@ -801,16 +808,27 @@ def collect_stats(metric='all', deviation=20, static_stats_file=None):
def statsFile(comp_test: bool, name: str) -> str:
return name + ('.comp' if comp_test else '') + '.stats'
+def perfStatsFile(comp_test: bool, name: str) -> str:
+ return name + ('.comp' if comp_test else '') + '.perf.csv'
+
# This is an internal function that is used only in the implementation.
# 'is_compiler_stats_test' is somewhat of an unfortunate name.
# If the boolean is set to true, it indicates that this test is one that
# measures the performance numbers of the compiler.
# As this is a fairly rare case in the testsuite, it defaults to false to
# indicate that it is a 'normal' performance test.
-def _collect_stats(name: TestName, opts, metrics, deviation, static_stats_file, is_compiler_stats_test=False):
+def _collect_stats(name: TestName, opts, metrics, deviation: Optional[int],
+ static_stats_file: Optional[Union[Path,str]],
+ is_compiler_stats_test: bool = False, is_compiler_perf_test: bool = False) -> None:
if not re.match('^[0-9]*[a-zA-Z][a-zA-Z0-9._-]*$', name):
failBecause('This test has an invalid name.')
+ if is_compiler_perf_test and config.perf_path is None:
+ # If we are doing a 'perf' run but no 'perf' is configured,
+ # don't try to read the results.
+ # This is a bit weird, though.
+ return
+
# Normalize metrics to a list of strings.
if isinstance(metrics, str):
if metrics == 'all':
@@ -865,11 +883,47 @@ def _collect_stats(name: TestName, opts, metrics, deviation, static_stats_file,
assert val is not None
return int(val)
+ # How to read the result of the performance test
+ def read_perf_stats_file(way, metric_name):
+ FIELDS = ['value','unit','event','runtime','percent']
+ # Confusingly compile time ghci tests are actually runtime tests, so we have
+ # to go and look for the name.stats file rather than name.comp.stats file.
+ compiler_stats_test = is_compiler_stats_test and not (way == "ghci" or way == "ghci-opt")
+
+ perf_stats_file = Path(in_testdir(perfStatsFile(compiler_stats_test, name)))
+ perf_metrics = {}
+ try:
+ perf_csv_lines = perf_stats_file.read_text().splitlines()
+ # Output looks like:
+ # """
+ # # Started on <date>
+ #
+ # <value>,<unit>,<event>,<runtime>,<percent>,...
+ # """
+ #
+ # Ignore empty lines and lines starting with '#'
+ perf_csv = [l for l in perf_csv_lines if l and not l.startswith('#')]
+
+ perf_stats_csv_reader = csv.DictReader(perf_csv, fieldnames=FIELDS, delimiter=";", quotechar="\"")
+ for fields in perf_stats_csv_reader:
+ perf_metrics[fields['event']] = float(fields['value'])
+
+ except IOError as e:
+ raise StatsException(str(e))
+
+ val = perf_metrics[metric_name]
+ if val is None:
+ print('Failed to find metric: ', metric_name)
+ raise StatsException("No such metric")
+ else:
+ assert val is not None
+ return int(val)
collect_stat = {}
for metric_name in metrics:
def action_generator(mn):
- return lambda way: read_stats_file(way, mn)
+ read_stats = read_perf_stats_file if is_compiler_perf_test else read_stats_file
+ return lambda way: read_stats(way, mn)
metric = '{}/{}'.format(tag, metric_name)
collect_stat[metric] = { "deviation": deviation
, "current": action_generator(metric_name) }
@@ -1009,7 +1063,32 @@ def have_thread_sanitizer( ) -> bool:
def gcc_as_cmmp() -> bool:
return config.cmm_cpp_is_gcc
-# ---
+# -----
+
+def collect_compiler_perf(deviation: Optional[int] = None):
+ """
+ Record stable performance counters using `perf stat` when available.
+ """
+ return [
+ _collect_compiler_perf_counters(stable_perf_counters(), deviation)
+ ]
+
+def collect_compiler_perf_counters(counters: List[str], deviation: Optional[int] = None):
+ """
+ Record the given event counters using `perf stat` when available.
+ """
+ return [
+ _collect_compiler_perf_counters(set(counters), deviation)
+ ]
+
+def _collect_compiler_perf_counters(counters: Set[str], deviation: Optional[int] = None):
+ def f(name, opts):
+ # Slightly hacky, we need the requested perf_counters in 'simple_run'.
+ # Thus, we have to globally register these counters
+ opts.compiler_perf_counters += list(counters)
+ _collect_stats(name, opts, counters, deviation, False, True, True)
+ return f
+
# Note [Measuring residency]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1062,6 +1141,12 @@ def collect_compiler_residency(tolerance_pct: float):
collect_compiler_stats(residency_testing_metrics(), tolerance_pct)
]
+def collect_compiler_runtime(tolerance_pct: float):
+ return [
+ collect_compiler_stats('bytes allocated', tolerance_pct),
+ _collect_compiler_perf_counters(stable_perf_counters())
+ ]
+
# ---
def high_memory_usage(name, opts):
@@ -1619,7 +1704,7 @@ async def do_test(name: TestName,
stdout = stdout_path,
stderr = stderr_path,
print_output = config.verbose >= 3,
- timeout_multiplier = opts.pre_cmd_timeout_multiplier,
+ timeout_multiplier = opts.pre_cmd_timeout_multiplier
)
# If user used expect_broken then don't record failures of pre_cmd
@@ -1809,6 +1894,8 @@ async def do_compile(name: TestName,
result = await extras_build( way, extra_mods, extra_hc_opts )
if badResult(result):
return result
+
+ assert result.hc_opts is not None
extra_hc_opts = result.hc_opts
result = await simple_build(name, way, extra_hc_opts, should_fail, top_mod, units, should_link, True, **kwargs)
@@ -1934,6 +2021,7 @@ async def compile_and_run__(name: TestName,
result = await extras_build( way, extra_mods, extra_hc_opts )
if badResult(result):
return result
+ assert result.hc_opts is not None
extra_hc_opts = result.hc_opts
assert extra_hc_opts is not None
@@ -2027,10 +2115,15 @@ def report_stats(name, way, metric, gen_stat):
metric_result = passed()
perf_change = MetricChange.NewMetric
else:
+ deviation = gen_stat["deviation"]
+ if deviation:
+ tolerance_metric = RelativeMetricAcceptanceWindow(deviation)
+ else:
+ tolerance_metric = AlwaysAccept()
(perf_change, metric_result) = Perf.check_stats_change(
perf_stat,
baseline,
- gen_stat["deviation"],
+ tolerance_metric,
config.allowed_perf_changes,
config.verbose >= 4)
@@ -2051,9 +2144,14 @@ def report_stats(name, way, metric, gen_stat):
# -----------------------------------------------------------------------------
# Build a single-module program
-async def extras_build( way, extra_mods, extra_hc_opts ):
+async def extras_build(way: WayName, extra_mods, extra_hc_opts) -> PassFail:
for mod, opts in extra_mods:
- result = await simple_build(mod, way, opts + ' ' + extra_hc_opts, False, None, [], False, False)
+ result = await simple_build(mod, way, opts + ' ' + extra_hc_opts,
+ should_fail=False,
+ top_mod=None,
+ units=[],
+ link=False,
+ addsuf=False)
if not (mod.endswith('.hs') or mod.endswith('.lhs')):
extra_hc_opts += ' %s' % Path(mod).with_suffix('.o')
if badResult(result):
@@ -2135,14 +2233,22 @@ async def simple_build(name: Union[TestName, str],
flags = ' '.join(get_compiler_flags() + config.way_flags[way])
- cmd = ('cd "{opts.testdir}" && {cmd_prefix} '
+ cmd = ('{cmd_prefix} '
'{{compiler}} {to_do} {srcname} {flags} {extra_hc_opts}'
).format(**locals())
if filter_with != '':
cmd = cmd + ' | ' + filter_with
- exit_code = await runCmd(cmd, None, stdout, stderr, opts.compile_timeout_multiplier)
+ output_file = perfStatsFile(True, name)
+
+ exit_code = await runCmdPerf(
+ opts.compiler_perf_counters,
+ cmd,
+ output_file,
+ working_dir=opts.testdir,
+ stdin=None, stdout=stdout, stderr=stderr,
+ timeout_multiplier=opts.compile_timeout_multiplier)
actual_stderr_path = in_testdir(name, 'comp.stderr')
@@ -2162,7 +2268,6 @@ async def simple_build(name: Union[TestName, str],
stderr_contents = actual_stderr_path.read_text(encoding='UTF-8', errors='replace')
return failBecause('exit code non-0', stderr=stderr_contents)
-
return passed()
# -----------------------------------------------------------------------------
@@ -2214,10 +2319,13 @@ async def simple_run(name: TestName, way: WayName, prog: str, extra_run_opts: st
if opts.cmd_wrapper is not None:
cmd = opts.cmd_wrapper(cmd)
- cmd = 'cd "{opts.testdir}" && {cmd}'.format(**locals())
+ output_file = perfStatsFile(False, name)
# run the command
- exit_code = await runCmd(cmd, stdin_arg, stdout_arg, stderr_arg, opts.run_timeout_multiplier)
+ exit_code = await runCmdPerf(opts.compiler_perf_counters, cmd, output_file,
+ working_dir=opts.testdir,
+ stdin=stdin_arg, stdout=stdout_arg, stderr=stderr_arg,
+ timeout_multiplier=opts.run_timeout_multiplier)
# check the exit code
if exit_code != opts.exit_code:
@@ -2315,7 +2423,7 @@ async def interpreter_run(name: TestName,
cmd = 'cd "{opts.testdir}" && {cmd}'.format(**locals())
- exit_code = await runCmd(cmd, script, stdout, stderr, opts.run_timeout_multiplier)
+ exit_code = await runCmd(cmd, script, stdout, stderr, timeout_multiplier=opts.run_timeout_multiplier)
# split the stdout into compilation/program output
split_file(stdout, delimiter,
@@ -2973,12 +3081,56 @@ def dump_file(f: Path):
except Exception:
print('')
+# -----------------------------------------------------------------------------
+# Run a program in the interpreter and check its output
+
+async def runCmdPerf(
+ perf_counters: List[str],
+ cmd: str,
+ output_file: str,
+ working_dir: Optional[Path] = None,
+ **kwargs) -> int:
+ """
+ Run a command under `perf stat`, collecting the given counters.
+
+ Returns the exit code and a dictionary of the collected counter values.
+
+ If given a 'working_dir', we generate a command looking like:
+
+ .. code-block:: text
+
+ cd $working_dir && perf stat ... $cmd
+
+ This allows users to find the test directory by looking at the execution logs,
+ and allows us to write 'perf' output files in the test directory.
+ """
+ if len(perf_counters) == 0 or config.perf_path is None:
+ if working_dir:
+ cmd = f"cd \"{working_dir}\" && {cmd}"
+
+ exit_code = await runCmd(cmd, **kwargs)
+ return exit_code
+
+ perf_cmd_args: List[str] = [str(config.perf_path), 'stat', '-x\\;', '-o', output_file, '-e', ','.join(perf_counters), cmd]
+ cmd = ' '.join(perf_cmd_args)
+ if working_dir:
+ cmd = f"cd \"{working_dir}\" && {cmd}"
+
+ exit_code = await runCmd(cmd, **kwargs)
+ return exit_code
+
async def runCmd(cmd: str,
stdin: Union[None, Path]=None,
stdout: Union[None, Path]=None,
stderr: Union[None, int, Path]=None,
timeout_multiplier=1.0,
print_output=False) -> int:
+ """
+ Run a command enforcing a timeout and returning the exit code.
+
+ The process's working directory is changed to 'working_dir'.
+ """
+
timeout_prog = strip_quotes(config.timeout_prog)
timeout = str(int(ceil(config.timeout * timeout_multiplier)))
@@ -3000,7 +3152,12 @@ async def runCmd(cmd: str,
# Hence it must ultimately be run by a Bourne shell. It's timeout's job
# to invoke the Bourne shell
- proc = await asyncio.create_subprocess_exec(timeout_prog, timeout, cmd, stdin=stdin_file, stdout=asyncio.subprocess.PIPE, stderr=hStdErr, env=ghc_env)
+ proc = await asyncio.create_subprocess_exec(timeout_prog, timeout, cmd,
+ stdin=stdin_file,
+ stdout=asyncio.subprocess.PIPE,
+ stderr=hStdErr,
+ env=ghc_env
+ )
stdout_buffer, stderr_buffer = await proc.communicate()
finally:
=====================================
testsuite/driver/testutil.py
=====================================
@@ -77,6 +77,11 @@ def lndir(srcdir: Path, dstdir: Path, force_copy=False):
def testing_metrics():
return { 'bytes allocated', 'peak_megabytes_allocated', 'max_bytes_used' }
+# All performance counters we consider to be stable enough in CI to
+# test for.
+def stable_perf_counters():
+ return { 'instructions:u' }
+
# Metrics which are testing residency information
def residency_testing_metrics():
return { 'peak_megabytes_allocated', 'max_bytes_used' }
=====================================
testsuite/tests/perf/compiler/all.T
=====================================
@@ -8,7 +8,7 @@ test('T1969',
extra_run_opts('+RTS -A64k -RTS'),
# The default RESIDENCY_OPTS is 256k and we need higher sampling
# frequency. Incurs a slow-down by about 2.
- collect_compiler_stats('bytes allocated', 1),
+ collect_compiler_runtime(1),
only_ways(['normal']),
extra_hc_opts('-dcore-lint -static'),
@@ -32,14 +32,14 @@ else:
test('T3294',
[collect_compiler_residency(15),
- collect_compiler_stats('bytes allocated', 1),
+ collect_compiler_runtime(1),
conf_3294,
],
compile,
[''])
test('T4801',
- [collect_compiler_stats('bytes allocated',2),
+ [collect_compiler_runtime(2),
only_ways(['normal']),
extra_hc_opts('-static'),
when(arch('wasm32') and unregisterised(), fragile(23290))
@@ -49,7 +49,7 @@ test('T4801',
test('T3064',
[collect_compiler_residency(20),
- collect_compiler_stats('bytes allocated',2),
+ collect_compiler_runtime(2),
only_ways(['normal']),
],
compile,
@@ -59,7 +59,7 @@ test('T3064',
test('T4007', normal, makefile_test, ['T4007'])
test('T5030',
- [collect_compiler_stats('bytes allocated', 2),
+ [collect_compiler_runtime(2),
only_ways(['normal'])
],
@@ -67,14 +67,14 @@ test('T5030',
['-freduction-depth=300'])
test('T5631',
- [collect_compiler_stats('bytes allocated',2),
+ [collect_compiler_runtime(2),
only_ways(['normal'])
],
compile,
[''])
test('parsing001',
- [collect_compiler_stats('bytes allocated',2),
+ [collect_compiler_runtime(2),
only_ways(['normal']),
],
compile_fail, [''])
@@ -82,27 +82,27 @@ test('parsing001',
test('T783',
[ only_ways(['normal']), # no optimisation for this one
- collect_compiler_stats('bytes allocated',2),
+ collect_compiler_runtime(2),
extra_hc_opts('-static')
],
compile,[''])
test('T5321Fun',
[ only_ways(['normal']), # no optimisation for this one
- collect_compiler_stats('bytes allocated',2)
+ collect_compiler_runtime(2),
],
compile,[''])
test('T5321FD',
[ only_ways(['normal']), # no optimisation for this one
- collect_compiler_stats('bytes allocated',2)
+ collect_compiler_runtime(2),
],
compile,[''])
test('T5642',
[ only_ways(['normal']),
normal,
- collect_compiler_stats('bytes allocated',2)
+ collect_compiler_runtime(2),
],
compile,['-O'])
@@ -114,7 +114,7 @@ test('T5837',
test('T6048',
[ only_ways(['optasm']),
- collect_compiler_stats('bytes allocated',2)
+ collect_compiler_runtime(2),
],
compile,[''])
@@ -134,7 +134,7 @@ test('T9675',
test('T9872a',
[ only_ways(['normal']),
- collect_compiler_stats('bytes allocated', 1),
+ collect_compiler_runtime(1),
high_memory_usage
],
compile_fail,
@@ -142,28 +142,28 @@ test('T9872a',
test('T9872b',
[ only_ways(['normal']),
- collect_compiler_stats('bytes allocated', 1),
+ collect_compiler_runtime(1),
high_memory_usage
],
compile_fail,
[''])
test('T9872b_defer',
[ only_ways(['normal']),
- collect_compiler_stats('bytes allocated', 1),
+ collect_compiler_runtime(1),
high_memory_usage
],
compile,
['-fdefer-type-errors'])
test('T9872c',
[ only_ways(['normal']),
- collect_compiler_stats('bytes allocated', 1),
+ collect_compiler_runtime(1),
high_memory_usage
],
compile_fail,
[''])
test('T9872d',
[ only_ways(['normal']),
- collect_compiler_stats('bytes allocated', 1)
+ collect_compiler_runtime(1)
],
compile,
[''])
@@ -227,14 +227,14 @@ test ('LargeRecord',
test('T9961',
[ only_ways(['normal']),
- collect_compiler_stats('bytes allocated', 1)
+ collect_compiler_runtime(1)
],
compile,
['-O'])
test('T9233',
[ only_ways(['normal']),
- collect_compiler_stats('bytes allocated', 1)
+ collect_compiler_runtime(1)
],
multimod_compile,
['T9233', '-v0 -O2 -fno-spec-constr'])
@@ -249,14 +249,14 @@ test('T10370',
test('T11068', normal, makefile_test, ['T11068'])
test('T10547',
- [ collect_compiler_stats('bytes allocated', 4),
+ [ collect_compiler_runtime(4),
],
compile_fail,
['-fprint-expanded-synonyms'])
test('T12227',
[ only_ways(['normal']),
- collect_compiler_stats('bytes allocated', 1)
+ collect_compiler_runtime(1)
],
compile,
# Use `-M1G` to prevent memory thrashing with ghc-8.0.1.
@@ -264,14 +264,14 @@ test('T12227',
test('T12425',
[ only_ways(['optasm']),
- collect_compiler_stats('bytes allocated', 1)
+ collect_compiler_runtime(1)
],
compile,
[''])
test('T12234',
[ only_ways(['optasm']),
- collect_compiler_stats('bytes allocated', 2),
+ collect_compiler_runtime(2),
],
compile,
[''])
@@ -279,14 +279,14 @@ test('T12234',
# See Note [Sensitivity to unique increment] in T12545.hs; spread was 4.8%
test('T12545',
[ only_ways(['normal']),
- collect_compiler_stats('bytes allocated', 10), #
+ collect_compiler_runtime(10), #
],
multimod_compile,
['T12545', '-v0'] )
test('T13035',
[ only_ways(['normal']),
- collect_compiler_stats('bytes allocated', 1),
+ collect_compiler_runtime(1),
],
compile,
[''] )
@@ -299,7 +299,7 @@ test('T13056',
['-O1'])
test('T12707',
- [ collect_compiler_stats('bytes allocated', 1),
+ [ collect_compiler_runtime(1),
],
compile,
[''])
@@ -311,7 +311,7 @@ test('T12707',
# to avoid spurious errors.
test('T12150',
[ only_ways(['optasm']),
- collect_compiler_stats('bytes allocated', 2)
+ collect_compiler_runtime(2)
],
compile,
[''])
@@ -483,7 +483,7 @@ test('MultiLayerModulesNoCode',
['MultiLayerModulesNoCode.script'])
test('MultiComponentModulesRecomp',
- [ collect_compiler_stats('bytes allocated', 2),
+ [ collect_compiler_runtime(2),
pre_cmd('$MAKE -s --no-print-directory MultiComponentModulesRecomp'),
extra_files(['genMultiComp.py']),
compile_timeout_multiplier(5)
@@ -492,7 +492,7 @@ test('MultiComponentModulesRecomp',
[['unitp%d' % n for n in range(20)], '-fno-code -fwrite-interface -v0'])
test('MultiComponentModules',
- [ collect_compiler_stats('bytes allocated', 2),
+ [ collect_compiler_runtime(2),
pre_cmd('$PYTHON ./genMultiComp.py'),
extra_files(['genMultiComp.py']),
compile_timeout_multiplier(5)
@@ -565,7 +565,7 @@ test('T14683',
test ('T9630',
[ collect_compiler_residency(15),
- collect_compiler_stats('bytes allocated', 2),
+ collect_compiler_runtime(2),
],
multimod_compile,
['T9630', '-v0 -O'])
@@ -611,7 +611,7 @@ test ('T16473',
['-O2 -flate-specialise'])
test('T17516',
- [ collect_compiler_stats('bytes allocated', 5),
+ [ collect_compiler_runtime(5),
],
multimod_compile,
['T17516', '-O -v0'])
@@ -635,13 +635,13 @@ test ('T18140',
['-v0 -O'])
test('T10421',
[ only_ways(['normal']),
- collect_compiler_stats('bytes allocated', 1)
+ collect_compiler_runtime(1)
],
multimod_compile,
['T10421', '-v0 -O'])
test('T10421a',
[ only_ways(['normal']),
- collect_compiler_stats('bytes allocated', 10)
+ collect_compiler_runtime(10)
],
multimod_compile,
['T10421a', '-v0 -O'])
@@ -700,13 +700,13 @@ test ('T19695',
['-v0 -O2'])
test('hard_hole_fits', # Testing multiple hole-fits with lots in scope for #16875
- collect_compiler_stats('bytes allocated', 2), # 1 is 300s, 0.010 is 3s. Without hole-fits it takes 1s
+ collect_compiler_runtime(2), # 1 is 300s, 0.010 is 3s. Without hole-fits it takes 1s
compile, ['-fdefer-type-errors -fno-max-valid-hole-fits -package ghc'])
test('T16875', # Testing one hole-fit with a lot in scope for #16875
# This test is very sensitive to environmental differences.. we should fix
# that but for now the failure threshold is 4% (see #21557)
- collect_compiler_stats('bytes allocated', 4),
+ collect_compiler_runtime(4),
compile, ['-fdefer-type-errors -fno-max-valid-hole-fits -package ghc'])
test ('T20261',
@@ -720,7 +720,7 @@ test ('T20261',
# a compile-time and a run-time performance test
test('T21839c',
[ collect_compiler_stats('all', 10),
- collect_compiler_stats('bytes allocated', 1),
+ collect_compiler_runtime(1),
only_ways(['normal'])],
compile,
['-O'])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/fb0a4e5cc545d8d995b6631138f40495917e795a...6dfb9471822660c6a90a018c4615407e4a5469ba
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/fb0a4e5cc545d8d995b6631138f40495917e795a...6dfb9471822660c6a90a018c4615407e4a5469ba
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240904/d61281f6/attachment-0001.html>
More information about the ghc-commits
mailing list