[Git][ghc/ghc][wip/perf-ci] 2 commits: testsuite: Add infrastructure for collecting perf-events counters
Ben Gamari (@bgamari)
gitlab at gitlab.haskell.org
Thu Jan 19 18:41:32 UTC 2023
Ben Gamari pushed to branch wip/perf-ci at Glasgow Haskell Compiler / GHC
Commits:
2b304b40 by Ben Gamari at 2023-01-19T13:41:24-05:00
testsuite: Add infrastructure for collecting perf-events counters
- - - - -
4e7d3385 by Ben Gamari at 2023-01-19T13:41:24-05:00
gitlab-ci: Hack it in
- - - - -
5 changed files:
- .gitlab/gen_ci.hs
- testsuite/driver/perf_notes.py
- testsuite/driver/testglobals.py
- testsuite/driver/testlib.py
- testsuite/tests/perf/compiler/all.T
Changes:
=====================================
.gitlab/gen_ci.hs
=====================================
@@ -141,6 +141,7 @@ data BuildConfig
, tablesNextToCode :: Bool
, threadSanitiser :: Bool
, noSplitSections :: Bool
+ , testsuiteUsePerf :: Bool
}
-- Extra arguments to pass to ./configure due to the BuildConfig
@@ -188,6 +189,7 @@ vanilla = BuildConfig
, tablesNextToCode = True
, threadSanitiser = False
, noSplitSections = False
+ , testsuiteUsePerf = False
}
splitSectionsBroken :: BuildConfig -> BuildConfig
@@ -663,6 +665,7 @@ job arch opsys buildConfig = NamedJob { name = jobName, jobInfo = Job {..} }
Emulator s -> "CROSS_EMULATOR" =: s
NoEmulatorNeeded -> mempty
, if withNuma buildConfig then "ENABLE_NUMA" =: "1" else mempty
+ , if testsuiteUsePerf buildConfig then "RUNTEST_ARGS" =: "--config perf_path=perf" else mempty
]
jobArtifacts = Artifacts
=====================================
testsuite/driver/perf_notes.py
=====================================
@@ -123,10 +123,37 @@ AllowedPerfChange = NamedTuple('AllowedPerfChange',
('opts', Dict[str, str])
])
-MetricBaselineOracle = Callable[[WayName, GitHash], Baseline]
-MetricDeviationOracle = Callable[[WayName, GitHash], Optional[float]]
-MetricOracles = NamedTuple("MetricOracles", [("baseline", MetricBaselineOracle),
- ("deviation", MetricDeviationOracle)])
+class MetricAcceptanceWindow:
+ """
+ A strategy for computing an acceptance window for a metric measurement
+ given a baseline value.
+ """
+ def get_bounds(self, baseline: float) -> Tuple[float, float]:
+ raise NotImplemented
+ def describe(self) -> str:
+ raise NotImplemented
+
+class AlwaysAccept(MetricAcceptanceWindow):
+ def get_bounds(self, baseline: float) -> Tuple[float, float]:
+ return (-1/0, +1/0)
+
+ def describe(self) -> str:
+ raise NotImplemented
+
+class RelativeMetricAcceptanceWindow(MetricAcceptanceWindow):
+ """
+ A MetricAcceptanceWindow which accepts measurements within tol-percent of
+ the baseline.
+ """
+ def __init__(self, tol: float):
+ """ Accept any metric within tol-percent of the baseline """
+ self.tol = tol / 100
+
+ def get_bounds(self, baseline: float) -> Tuple[float, float]:
+ return (baseline * (1-self.tol), baseline * (1+self.tol))
+
+ def describe(self) -> str:
+ return '+/- %1.1f%%' % (100*self.tol)
def parse_perf_stat(stat_str: str) -> PerfStat:
field_vals = stat_str.strip('\t').split('\t')
@@ -558,32 +585,38 @@ def get_commit_metric(gitNoteRef,
_commit_metric_cache[cacheKeyA] = baseline_by_cache_key_b
return baseline_by_cache_key_b.get(cacheKeyB)
-# Check test stats. This prints the results for the user.
-# actual: the PerfStat with actual value.
-# baseline: the expected Baseline value (this should generally be derived from baseline_metric())
-# tolerance_dev: allowed deviation of the actual value from the expected value.
-# allowed_perf_changes: allowed changes in stats. This is a dictionary as returned by get_allowed_perf_changes().
-# force_print: Print stats even if the test stat was in the tolerance range.
-# Returns a (MetricChange, pass/fail object) tuple. Passes if the stats are within the expected value ranges.
def check_stats_change(actual: PerfStat,
baseline: Baseline,
- tolerance_dev,
+ acceptance_window: MetricAcceptanceWindow,
allowed_perf_changes: Dict[TestName, List[AllowedPerfChange]] = {},
force_print = False
) -> Tuple[MetricChange, Any]:
+ """
+ Check test stats. This prints the results for the user.
+
+ Parameters:
+ actual: the PerfStat with actual value
+ baseline: the expected Baseline value (this should generally be derived
+ from baseline_metric())
+ acceptance_window: allowed deviation of the actual value from the expected
+ value.
+ allowed_perf_changes: allowed changes in stats. This is a dictionary as
+ returned by get_allowed_perf_changes().
+ force_print: Print stats even if the test stat was in the tolerance range.
+
+ Returns a (MetricChange, pass/fail object) tuple. Passes if the stats are within the expected value ranges.
+ """
expected_val = baseline.perfStat.value
full_name = actual.test + ' (' + actual.way + ')'
- lowerBound = trunc( int(expected_val) * ((100 - float(tolerance_dev))/100))
- upperBound = trunc(0.5 + ceil(int(expected_val) * ((100 + float(tolerance_dev))/100)))
-
- actual_dev = round(((float(actual.value) * 100)/ int(expected_val)) - 100, 1)
+ lower_bound, upper_bound = acceptance_window.get_bounds(expected_val)
+ actual_dev = round(((float(actual.value) * 100)/ expected_val) - 100, 1)
# Find the direction of change.
change = MetricChange.NoChange
- if actual.value < lowerBound:
+ if actual.value < lower_bound:
change = MetricChange.Decrease
- elif actual.value > upperBound:
+ elif actual.value > upper_bound:
change = MetricChange.Increase
# Is the change allowed?
@@ -608,14 +641,14 @@ def check_stats_change(actual: PerfStat,
result = failBecause('stat ' + error, tag='stat')
if not change_allowed or force_print:
- length = max(len(str(x)) for x in [expected_val, lowerBound, upperBound, actual.value])
+ length = max(len(str(x)) for x in [expected_val, lower_bound, upper_bound, actual.value])
def display(descr, val, extra):
print(descr, str(val).rjust(length), extra)
- display(' Expected ' + full_name + ' ' + actual.metric + ':', expected_val, '+/-' + str(tolerance_dev) + '%')
- display(' Lower bound ' + full_name + ' ' + actual.metric + ':', lowerBound, '')
- display(' Upper bound ' + full_name + ' ' + actual.metric + ':', upperBound, '')
+ display(' Expected ' + full_name + ' ' + actual.metric + ':', expected_val, acceptance_window.describe())
+ display(' Lower bound ' + full_name + ' ' + actual.metric + ':', lower_bound, '')
+ display(' Upper bound ' + full_name + ' ' + actual.metric + ':', upper_bound, '')
display(' Actual ' + full_name + ' ' + actual.metric + ':', actual.value, '')
if actual.value != expected_val:
display(' Deviation ' + full_name + ' ' + actual.metric + ':', actual_dev, '%')
=====================================
testsuite/driver/testglobals.py
=====================================
@@ -4,7 +4,7 @@
from my_typing import *
from pathlib import Path
-from perf_notes import MetricChange, PerfStat, Baseline, MetricOracles, GitRef
+from perf_notes import MetricChange, PerfStat, Baseline, MetricAcceptanceWindow, GitRef
from datetime import datetime
# -----------------------------------------------------------------------------
@@ -49,6 +49,9 @@ class TestConfig:
# Path to Ghostscript
self.gs = None # type: Optional[Path]
+ # Path to Linux `perf` tool
+ self.perf_path = None # type: Optional[Path]
+
# Run tests requiring Haddock
self.haddock = False
@@ -377,7 +380,7 @@ class TestOptions:
# , 10) }
# This means no baseline is available for way1. For way 2, allow a 10%
# deviation from 9300000000.
- self.stats_range_fields = {} # type: Dict[MetricName, MetricOracles]
+ self.stats_range_fields = {} # type: Dict[MetricName, MetricAcceptanceWindow]
# Is the test testing performance?
self.is_stats_test = False
@@ -449,6 +452,9 @@ class TestOptions:
# The extra hadrian dependencies we need for this particular test
self.hadrian_deps = set(["test:ghc"]) # type: Set[str]
+ # Record these `perf-events` counters when compiling this test, if `perf` is available
+ self.compiler_perf_counters = [] # type: List[str]
+
# The default set of options
global default_testopts
default_testopts = TestOptions()
=====================================
testsuite/driver/testlib.py
=====================================
@@ -3,10 +3,12 @@
# (c) Simon Marlow 2002
#
+import csv
import io
import shutil
import os
import re
+import tempfile
import traceback
import time
import datetime
@@ -28,7 +30,7 @@ from term_color import Color, colored
import testutil
from cpu_features import have_cpu_feature
import perf_notes as Perf
-from perf_notes import MetricChange, PerfStat, MetricOracles
+from perf_notes import MetricChange, PerfStat, MetricAcceptanceWindow
extra_src_files = {'T4198': ['exitminus1.c']} # TODO: See #12223
from my_typing import *
@@ -477,6 +479,17 @@ def _run_timeout_multiplier( name, opts, v ):
# -----
+def collect_compiler_perf_counters( counters: List[str] ):
+ """
+ Record the given event counters using `perf stat` when available.
+ """
+ def f(name, opts):
+ opts.compiler_perf_counters += counters
+ return f
+
+
+# -----
+
def extra_run_opts( val ):
return lambda name, opts, v=val: _extra_run_opts(name, opts, v);
@@ -522,10 +535,10 @@ def _extra_files(name, opts, files):
# are about the performance of the runtime code generated by the compiler.
def collect_compiler_stats(metric='all',deviation=20):
setTestOpts(no_lint)
- return lambda name, opts, m=metric, d=deviation: _collect_stats(name, opts, m,d, True)
+ return lambda name, opts: _collect_rts_stats(name, opts, metric, deviation, True)
def collect_stats(metric='all', deviation=20):
- return lambda name, opts, m=metric, d=deviation: _collect_stats(name, opts, m, d)
+ return lambda name, opts: _collect_rts_stats(name, opts, metric, deviation, False)
# This is an internal function that is used only in the implementation.
# 'is_compiler_stats_test' is somewhat of an unfortunate name.
@@ -533,7 +546,11 @@ def collect_stats(metric='all', deviation=20):
# measures the performance numbers of the compiler.
# As this is a fairly rare case in the testsuite, it defaults to false to
# indicate that it is a 'normal' performance test.
-def _collect_stats(name: TestName, opts, metrics, deviation, is_compiler_stats_test=False):
+def _collect_rts_stats(
+ name: TestName,
+ opts, metrics: List[MetricName],
+ deviation: float,
+ is_compiler_stats_test: bool):
if not re.match('^[0-9]*[a-zA-Z][a-zA-Z0-9._-]*$', name):
failBecause('This test has an invalid name.')
@@ -573,8 +590,7 @@ def _collect_stats(name: TestName, opts, metrics, deviation, is_compiler_stats_t
target_commit, name, config.test_env, metric, way, \
config.baseline_commit )
- opts.stats_range_fields[metric] = MetricOracles(baseline=baselineByWay,
- deviation=deviation)
+ opts.stats_range_fields[metric] = Perf.RelativeMetricAcceptanceWindow(deviation)
# -----
@@ -1458,7 +1474,7 @@ def do_compile(name: TestName,
return result
extra_hc_opts = result.hc_opts
- result = simple_build(name, way, extra_hc_opts, should_fail, top_mod, units, False, True, **kwargs)
+ result = simple_build(name, way, extra_hc_opts, should_fail, top_mod, units, False, True, compiler_perf_counters = getTestOpts().compiler_perf_counters, **kwargs)
if badResult(result):
return result
@@ -1580,7 +1596,7 @@ def compile_and_run__(name: TestName,
if way.startswith('ghci'): # interpreted...
return interpreter_run(name, way, extra_hc_opts, top_mod)
else: # compiled...
- result = simple_build(name, way, extra_hc_opts, False, top_mod, [], True, True, backpack = backpack)
+ result = simple_build(name, way, extra_hc_opts, False, top_mod, [], True, True, backpack = backpack, compiler_perf_counters = getTestOpts().compiler_perf_counters)
if badResult(result):
return result
@@ -1615,6 +1631,49 @@ def metric_dict(name, way, metric, value) -> PerfStat:
value = value)
# -----------------------------------------------------------------------------
+
+def check_stat(
+ name: TestName,
+ way: WayName,
+ metric: MetricName,
+ acceptance_window: MetricAcceptanceWindow,
+ value: float) -> PassFail:
+ if not Perf.inside_git_repo():
+ return passed()
+
+ head_commit = Perf.commit_hash(GitRef('HEAD')) if Perf.inside_git_repo() else None
+ if head_commit is None:
+ return passed()
+
+ # Store the metric so it can later be stored in a git note.
+ perf_stat = metric_dict(name, way, metric, value)
+
+ # Find baseline; If this is the first time running the benchmark, then pass.
+ baseline = Perf.baseline_metric(head_commit, name, config.test_env, metric, way, config.baseline_commit)
+ if baseline is None:
+ metric_result = passed()
+ perf_change = MetricChange.NewMetric
+ else:
+ (perf_change, metric_result) = Perf.check_stats_change(
+ perf_stat,
+ baseline,
+ acceptance_window,
+ config.allowed_perf_changes,
+ config.verbose >= 4)
+
+ t.metrics.append(PerfMetric(change=perf_change, stat=perf_stat, baseline=baseline))
+
+ # If any metric fails then the test fails.
+ # Note, the remaining metrics are still run so that
+ # a complete list of changes can be presented to the user.
+ if not metric_result.passed:
+ if config.ignore_perf_increases and perf_change == MetricChange.Increase:
+ metric_result = passed()
+ elif config.ignore_perf_decreases and perf_change == MetricChange.Decrease:
+ metric_result = passed()
+
+ return metric_result
+
# Check test stats. This prints the results for the user.
# name: name of the test.
# way: the way.
@@ -1622,14 +1681,11 @@ def metric_dict(name, way, metric, value) -> PerfStat:
# range_fields: see TestOptions.stats_range_fields
# Returns a pass/fail object. Passes if the stats are within the expected value ranges.
# This prints the results for the user.
-def check_stats(name: TestName,
+def check_rts_stats(name: TestName,
way: WayName,
stats_file: Path,
- range_fields: Dict[MetricName, MetricOracles]
+ range_fields: Dict[MetricName, MetricAcceptanceWindow]
) -> PassFail:
- head_commit = Perf.commit_hash(GitRef('HEAD')) if Perf.inside_git_repo() else None
- if head_commit is None:
- return passed()
result = passed()
if range_fields:
@@ -1638,7 +1694,7 @@ def check_stats(name: TestName,
except IOError as e:
return failBecause(str(e))
- for (metric, baseline_and_dev) in range_fields.items():
+ for (metric, acceptance_window) in range_fields.items():
# Remove any metric prefix e.g. "runtime/" and "compile_time/"
stat_file_metric = metric.split("/")[-1]
perf_change = None
@@ -1651,46 +1707,27 @@ def check_stats(name: TestName,
val = field_match.group(1)
assert val is not None
actual_val = int(val)
-
- # Store the metric so it can later be stored in a git note.
- perf_stat = metric_dict(name, way, metric, actual_val)
-
- # If this is the first time running the benchmark, then pass.
- baseline = baseline_and_dev.baseline(way, head_commit) \
- if Perf.inside_git_repo() else None
- if baseline is None:
- metric_result = passed()
- perf_change = MetricChange.NewMetric
- else:
- tolerance_dev = baseline_and_dev.deviation
- (perf_change, metric_result) = Perf.check_stats_change(
- perf_stat,
- baseline,
- tolerance_dev,
- config.allowed_perf_changes,
- config.verbose >= 4)
-
- t.metrics.append(PerfMetric(change=perf_change, stat=perf_stat, baseline=baseline))
-
- # If any metric fails then the test fails.
- # Note, the remaining metrics are still run so that
- # a complete list of changes can be presented to the user.
- if not metric_result.passed:
- if config.ignore_perf_increases and perf_change == MetricChange.Increase:
- metric_result = passed()
- elif config.ignore_perf_decreases and perf_change == MetricChange.Decrease:
- metric_result = passed()
-
- result = metric_result
+ r = check_stat(name, way, metric, acceptance_window, actual_val)
+ if badResult(r):
+ result = r
return result
# -----------------------------------------------------------------------------
# Build a single-module program
-def extras_build( way, extra_mods, extra_hc_opts ):
+def extras_build(
+ way: WayName,
+ extra_mods: List[str],
+ extra_hc_opts: str
+ ) -> PassFail:
for mod, opts in extra_mods:
- result = simple_build(mod, way, opts + ' ' + extra_hc_opts, False, None, [], False, False)
+ result = simple_build(mod, way, opts + ' ' + extra_hc_opts,
+ should_fail = False,
+ top_mod = None,
+ units = [],
+ link = False,
+ addsuf = False)
if not (mod.endswith('.hs') or mod.endswith('.lhs')):
extra_hc_opts += ' %s' % Path(mod).with_suffix('.o')
if badResult(result):
@@ -1708,7 +1745,8 @@ def simple_build(name: Union[TestName, str],
addsuf: bool,
backpack: bool = False,
suppress_stdout: bool = False,
- filter_with: str = '') -> Any:
+ filter_with: str = '',
+ compiler_perf_counters: List[str] = []) -> Any:
opts = getTestOpts()
# Redirect stdout and stderr to the same file
@@ -1763,14 +1801,19 @@ def simple_build(name: Union[TestName, str],
flags = ' '.join(get_compiler_flags() + config.way_flags[way])
- cmd = ('cd "{opts.testdir}" && {cmd_prefix} '
+ cmd = ('{cmd_prefix} '
'{{compiler}} {to_do} {srcname} {flags} {extra_hc_opts}'
).format(**locals())
if filter_with != '':
cmd = cmd + ' | ' + filter_with
- exit_code = runCmd(cmd, None, stdout, stderr, opts.compile_timeout_multiplier)
+ (exit_code, perf_counts) = runCmdPerf(
+ compiler_perf_counters,
+ cmd,
+ stdin=None, stdout=stdout, stderr=stderr,
+ working_dir=opts.testdir,
+ timeout_multiplier=opts.compile_timeout_multiplier)
actual_stderr_path = in_testdir(name, 'comp.stderr')
@@ -1791,10 +1834,15 @@ def simple_build(name: Union[TestName, str],
return failBecause('exit code non-0', stderr=stderr_contents)
if isCompilerStatsTest():
- statsResult = check_stats(TestName(name), way, in_testdir(stats_file), opts.stats_range_fields)
+ statsResult = check_rts_stats(TestName(name), way, in_testdir(stats_file), opts.stats_range_fields)
if badResult(statsResult):
return statsResult
+ for k,v in perf_counts.items():
+ r = check_stat(TestName(name), way, MetricName('compile_time/perf/%s' % k), Perf.AlwaysAccept(), v)
+ if badResult(r):
+ return r
+
return passed()
# -----------------------------------------------------------------------------
@@ -1841,10 +1889,10 @@ def simple_run(name: TestName, way: WayName, prog: str, extra_run_opts: str) ->
if opts.cmd_wrapper is not None:
cmd = opts.cmd_wrapper(cmd)
- cmd = 'cd "{opts.testdir}" && {cmd}'.format(**locals())
-
# run the command
- exit_code = runCmd(cmd, stdin_arg, stdout_arg, stderr_arg, opts.run_timeout_multiplier)
+ exit_code = runCmd(cmd, stdin_arg, stdout_arg, stderr_arg,
+ timeout_multiplier=opts.run_timeout_multiplier,
+ working_dir=opts.testdir)
# check the exit code
if exit_code != opts.exit_code:
@@ -1875,7 +1923,7 @@ def simple_run(name: TestName, way: WayName, prog: str, extra_run_opts: str) ->
# Check runtime stats if desired.
if stats_file is not None:
- return check_stats(name, way, in_testdir(stats_file), opts.stats_range_fields)
+ return check_rts_stats(name, way, in_testdir(stats_file), opts.stats_range_fields)
else:
return passed()
@@ -1935,9 +1983,9 @@ def interpreter_run(name: TestName,
if opts.cmd_wrapper is not None:
cmd = opts.cmd_wrapper(cmd);
- cmd = 'cd "{opts.testdir}" && {cmd}'.format(**locals())
-
- exit_code = runCmd(cmd, script, stdout, stderr, opts.run_timeout_multiplier)
+ exit_code = runCmd(cmd, script, stdout, stderr,
+ timeout_multiplier=opts.run_timeout_multiplier,
+ working_dir=opts.testdir)
# split the stdout into compilation/program output
split_file(stdout, delimiter,
@@ -2095,9 +2143,9 @@ def check_hp_ok(name: TestName) -> bool:
opts = getTestOpts()
# do not qualify for hp2ps because we should be in the right directory
- hp2psCmd = 'cd "{opts.testdir}" && {{hp2ps}} {name}'.format(**locals())
+ hp2psCmd = '{{hp2ps}} {name}'.format(**locals())
- hp2psResult = runCmd(hp2psCmd, print_output=True)
+ hp2psResult = runCmd(hp2psCmd, print_output=True, working_dir=opts.testdir)
actual_ps_path = in_testdir(name, 'ps')
@@ -2532,12 +2580,45 @@ def dump_file(f: Path):
except Exception:
print('')
+def runCmdPerf(
+ perf_counters: List[str],
+ cmd: str,
+ **kwargs) -> Tuple[int, Dict[str,float]]:
+ """
+ Run a command under `perf stat`, collecting the given counters.
+
+ Returns the exit code and a dictionary of the collected counter values.
+ """
+ FIELDS = ['value','unit','event','runtime','percent']
+ if len(perf_counters) == 0 or config.perf_path is None:
+ return (runCmd(cmd, **kwargs), {})
+
+ with tempfile.NamedTemporaryFile('rt') as perf_out:
+ args = [config.perf_path, 'stat', '-x,', '-o', perf_out.name, '-e', ','.join(perf_counters), cmd]
+ exit_code = runCmd(' '.join(args), **kwargs)
+
+ perf_out.readline() # drop initial comment line
+ perf_metrics = {}
+ for line in perf_out:
+ line = line.strip()
+ if line == '' or line.startswith('#'):
+ continue
+ fields = { k: v for k,v in zip(FIELDS, line.split(',')) }
+ perf_metrics[fields['event']] = float(fields['value'])
+
+ return (exit_code, perf_metrics)
+
def runCmd(cmd: str,
stdin: Union[None, Path]=None,
stdout: Union[None, Path]=None,
stderr: Union[None, int, Path]=None,
+ working_dir: Optional[Path]=None,
timeout_multiplier=1.0,
- print_output=False) -> int:
+ print_output=False,
+ ) -> int:
+ """
+ Run a command enforcing a timeout and returning the exit code.
+ """
timeout_prog = strip_quotes(config.timeout_prog)
timeout = str(int(ceil(config.timeout * timeout_multiplier)))
@@ -2563,7 +2644,8 @@ def runCmd(cmd: str,
stdin=stdin_file,
stdout=subprocess.PIPE,
stderr=hStdErr,
- env=ghc_env)
+ env=ghc_env,
+ cwd=working_dir)
stdout_buffer, stderr_buffer = r.communicate()
finally:
=====================================
testsuite/tests/perf/compiler/all.T
=====================================
@@ -5,6 +5,7 @@ setTestOpts(no_lint)
test('T1969',
[# expect_broken(12437),
collect_compiler_residency(20),
+ collect_compiler_perf_counters(['instructions']),
extra_run_opts('+RTS -A64k -RTS'),
# The default RESIDENCY_OPTS is 256k and we need higher sampling
# frequency. Incurs a slow-down by about 2.
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/311866fca6827361be2193301a78b0162cca4347...4e7d338583e957256b18bd88ca852389024ca6fc
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/311866fca6827361be2193301a78b0162cca4347...4e7d338583e957256b18bd88ca852389024ca6fc
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230119/03cca8d3/attachment-0001.html>
More information about the ghc-commits
mailing list