[Git][ghc/ghc][wip/testsuite-generic-stats] testsuite: Add mechanism to collect generic metrics
Matthew Pickering (@mpickering)
gitlab at gitlab.haskell.org
Thu Nov 16 18:35:04 UTC 2023
Matthew Pickering pushed to branch wip/testsuite-generic-stats at Glasgow Haskell Compiler / GHC
Commits:
f51e88d1 by Matthew Pickering at 2023-11-16T18:34:47+00:00
testsuite: Add mechanism to collect generic metrics
* Generalise the metric logic by adding an additional field which
allows you to specify how to query for the actual value. Previously
the method of querying the baseline value was abstracted (but always
set to the same thing).
* This requires rejigging how the stat collection works slightly but now
it's more uniform and hopefully simpler.
* Introduce some new "generic" helper functions for writing generic
stats tests.
- collect_size ( deviation, path )
Record the size of the file as a metric
- stat_from_file ( metric, deviation, path )
Read a value from the given path, and store that as a metric
- collect_generic_stat ( metric, deviation, get_stat )
Provide your own `get_stat` function, `lambda way: <Int>`, which
can be used to establish the value of the metric.
- collect_generic_stats ( get_stats ):
Like collect_generic_stat but provide the whole dictionary of metric
definitions.
{ metric: {
deviation: <Int>
action: lambda way: <Int>
} }
* Introduce two new "size" metrics for keeping track of build products.
- `size_hello` - The size of `hello.o` from compiling hello.hs
- `libdir` - The total size of the `libdir` folder.
* Track the number of modules in the AST tests
- CountDepsAst
- CountDepsParser
This lays the infrastructure for #24191 #22256 #17129
- - - - -
8 changed files:
- testsuite/driver/perf_notes.py
- testsuite/driver/testglobals.py
- testsuite/driver/testlib.py
- testsuite/tests/count-deps/Makefile
- testsuite/tests/count-deps/all.T
- + testsuite/tests/perf/size/Makefile
- + testsuite/tests/perf/size/all.T
- + testsuite/tests/perf/size/size_hello.hs
Changes:
=====================================
testsuite/driver/perf_notes.py
=====================================
@@ -123,11 +123,6 @@ AllowedPerfChange = NamedTuple('AllowedPerfChange',
('opts', Dict[str, str])
])
-MetricBaselineOracle = Callable[[WayName, GitHash], Baseline]
-MetricDeviationOracle = Callable[[WayName, GitHash], Optional[float]]
-MetricOracles = NamedTuple("MetricOracles", [("baseline", MetricBaselineOracle),
- ("deviation", MetricDeviationOracle)])
-
def parse_perf_stat(stat_str: str) -> PerfStat:
field_vals = stat_str.strip('\t').split('\t')
stat = PerfStat(*field_vals) # type: ignore
=====================================
testsuite/driver/testglobals.py
=====================================
@@ -4,7 +4,7 @@
from my_typing import *
from pathlib import Path
-from perf_notes import MetricChange, PerfStat, Baseline, MetricOracles, GitRef
+from perf_notes import MetricChange, PerfStat, Baseline, GitRef
from datetime import datetime
# -----------------------------------------------------------------------------
@@ -378,24 +378,20 @@ class TestOptions:
# extra files to copy to the testdir
self.extra_files = [] # type: List[str]
- # Map from metric to (function from way and commit to baseline value, allowed percentage deviation) e.g.
- # { 'bytes allocated': (
- # lambda way commit:
- # ...
- # if way1: return None ...
- # elif way2:return 9300000000 ...
- # ...
- # , 10) }
- # This means no baseline is available for way1. For way 2, allow a 10%
- # deviation from 9300000000.
- self.stats_range_fields = {} # type: Dict[MetricName, MetricOracles]
-
# Is the test testing performance?
self.is_stats_test = False
# Does this test the compiler's performance as opposed to the generated code.
self.is_compiler_stats_test = False
+ # Map from metric to information about that metric
+ # { metric: { "deviation": <int>
+ # The action to run to get the current value of the test
+ # , "action": lambda way: <Int>
+ # The action to run to get the baseline value of the test
+ # , "oracle": lambda way commit: baseline value } }
+ self.generic_stats_test: Dict = {} # Dict
+
# should we run this test alone, i.e. not run it in parallel with
# any other threads
self.alone = False
=====================================
testsuite/driver/testlib.py
=====================================
@@ -28,7 +28,7 @@ from term_color import Color, colored
import testutil
from cpu_features import have_cpu_feature
import perf_notes as Perf
-from perf_notes import MetricChange, PerfStat, MetricOracles
+from perf_notes import MetricChange, PerfStat
extra_src_files = {'T4198': ['exitminus1.c']} # TODO: See #12223
from my_typing import *
@@ -99,6 +99,10 @@ def isCompilerStatsTest() -> bool:
opts = getTestOpts()
return bool(opts.is_compiler_stats_test)
+def isGenericStatsTest() -> bool:
+ opts = getTestOpts()
+ return bool(opts.generic_stats_test)
+
def isStatsTest() -> bool:
opts = getTestOpts()
return opts.is_stats_test
@@ -599,6 +603,44 @@ def extra_files(files):
def _extra_files(name, opts, files):
opts.extra_files.extend(files)
+# Record the size of a specific file
+def collect_size ( deviation, path ):
+ return collect_generic_stat ( 'size', deviation, lambda way: os.path.getsize(in_testdir(path)) )
+
+# Read a number from a specific file
+def stat_from_file ( metric, deviation, path ):
+ def read_file (way):
+ with open(in_testdir(path)) as f:
+ return int(f.read())
+ return collect_generic_stat ( metric, deviation, read_file )
+
+
+# Define a set of generic stat tests
+def collect_generic_stats ( get_stats ):
+ def f(name, opts, f=get_stats):
+ return _collect_generic_stat(name, opts, get_stats)
+ return f
+
+# Define the a generic stat test, which computes the statistic by calling the function
+# given as the third argument.
+def collect_generic_stat ( metric, deviation, get_stat ):
+ return collect_generic_stats ( { metric: { 'deviation': deviation, 'action': get_stat } } )
+
+def _collect_generic_stat(name : TestName, opts, get_stat):
+
+
+ # Add new stats to the stat list
+ opts.generic_stats_test.update(get_stat)
+
+ # Add the "oracle" which determines the stat baseline
+ for (metric, info) in get_stat.items():
+ def baselineByWay(way, target_commit, metric=metric):
+ return Perf.baseline_metric( \
+ target_commit, name, config.test_env, metric, way, \
+ config.baseline_commit )
+ opts.generic_stats_test[metric]["oracle"] = baselineByWay
+
+
# -----
# Defaults to "test everything, and only break on extreme cases"
@@ -625,6 +667,9 @@ def collect_compiler_stats(metric='all',deviation=20):
def collect_stats(metric='all', deviation=20):
return lambda name, opts, m=metric, d=deviation: _collect_stats(name, opts, m, d)
+def statsFile(comp_test: bool, name: str) -> str:
+ return name + ('.comp' if comp_test else '') + '.stats'
+
# This is an internal function that is used only in the implementation.
# 'is_compiler_stats_test' is somewhat of an unfortunate name.
# If the boolean is set to true, it indicates that this test is one that
@@ -664,15 +709,33 @@ def _collect_stats(name: TestName, opts, metrics, deviation, is_compiler_stats_t
# The nonmoving collector does not support -G1
_omit_ways(name, opts, [WayName(name) for name in ['nonmoving', 'nonmoving_thr', 'nonmoving_thr_ghc']])
+ # How to read the result of the performance test
+ def read_stats_file(way, metric_name):
+ # Confusingly compile time ghci tests are actually runtime tests, so we have
+ # to go and look for the name.stats file rather than name.comp.stats file.
+ compiler_stats_test = is_compiler_stats_test and not (way == "ghci" or way == "ghci-opt")
+ stats_file = Path(in_testdir(statsFile(compiler_stats_test, name)))
+ try:
+ stats_file_contents = stats_file.read_text()
+ except IOError as e:
+ raise Exception(failBecause(str(e)))
+ field_match = re.search('\\("' + metric_name + '", "([0-9]+)"\\)', stats_file_contents)
+ if field_match is None:
+ print('Failed to find metric: ', metric_name)
+ raise Exception(failBecause("No such metric"))
+ else:
+ val = field_match.group(1)
+ assert val is not None
+ return int(val)
+
+
+ collect_stat = {}
for metric_name in metrics:
metric = '{}/{}'.format(tag, metric_name)
- def baselineByWay(way, target_commit, metric=metric):
- return Perf.baseline_metric( \
- target_commit, name, config.test_env, metric, way, \
- config.baseline_commit )
+ collect_stat[metric] = { "deviation": deviation
+ , "action": lambda way: read_stats_file(way, metric_name) }
- opts.stats_range_fields[metric] = MetricOracles(baseline=baselineByWay,
- deviation=deviation)
+ _collect_generic_stat(name, opts, collect_stat)
# -----
@@ -1581,6 +1644,11 @@ async def do_compile(name: TestName,
diff_file_name.unlink()
return failBecause('stderr mismatch', stderr=stderr)
+ opts = getTestOpts()
+ if isGenericStatsTest():
+ statsResult = check_generic_stats(TestName(name), way, opts.generic_stats_test)
+ if badResult(statsResult):
+ return statsResult
# no problems found, this test passed
return passed()
@@ -1717,13 +1785,9 @@ async def multi_compile_and_run( name, way, top_mod, extra_mods, extra_hc_opts )
async def warn_and_run( name, way, extra_hc_opts ):
return await compile_and_run__( name, way, None, [], extra_hc_opts, compile_stderr = True)
-def stats( name, way, stats_file ):
- opts = getTestOpts()
- return check_stats(name, way, in_testdir(stats_file), opts.stats_range_fields)
-
async def static_stats( name, way, stats_file ):
opts = getTestOpts()
- return check_stats(name, way, in_statsdir(stats_file), opts.stats_range_fields)
+ return check_generic_stats(name, way, opts.generic_stats_tests)
def metric_dict(name, way, metric, value) -> PerfStat:
return Perf.PerfStat(
@@ -1733,75 +1797,58 @@ def metric_dict(name, way, metric, value) -> PerfStat:
metric = metric,
value = value)
-# -----------------------------------------------------------------------------
-# Check test stats. This prints the results for the user.
-# name: name of the test.
-# way: the way.
-# stats_file: the path of the stats_file containing the stats for the test.
-# range_fields: see TestOptions.stats_range_fields
-# Returns a pass/fail object. Passes if the stats are within the expected value ranges.
-# This prints the results for the user.
-def check_stats(name: TestName,
- way: WayName,
- stats_file: Path,
- range_fields: Dict[MetricName, MetricOracles]
- ) -> PassFail:
+
+
+def check_generic_stats(name, way, get_stats):
+ for (metric, gen_stat) in get_stats.items():
+ res = report_stats(name, way, metric, gen_stat)
+ if badResult(res):
+ return res
+ return passed()
+
+def report_stats(name, way, metric, gen_stat):
+ try:
+ actual_val = gen_stat['action'](way)
+ # Metrics can exit early by throwing an Exception with the desired result.
+ # This is used for both failure, and skipping computing the metric.
+ except Exception as e:
+ result = e.args[0]
+ return result
+
head_commit = Perf.commit_hash(GitRef('HEAD')) if Perf.inside_git_repo() else None
if head_commit is None:
return passed()
result = passed()
- if range_fields:
- try:
- stats_file_contents = stats_file.read_text()
- except IOError as e:
- return failBecause(str(e))
-
- for (metric, baseline_and_dev) in range_fields.items():
- # Remove any metric prefix e.g. "runtime/" and "compile_time/"
- stat_file_metric = metric.split("/")[-1]
- perf_change = None
-
- field_match = re.search('\\("' + stat_file_metric + '", "([0-9]+)"\\)', stats_file_contents)
- if field_match is None:
- print('Failed to find metric: ', stat_file_metric)
- result = failBecause('no such stats metric')
- else:
- val = field_match.group(1)
- assert val is not None
- actual_val = int(val)
-
- # Store the metric so it can later be stored in a git note.
- perf_stat = metric_dict(name, way, metric, actual_val)
-
- # If this is the first time running the benchmark, then pass.
- baseline = baseline_and_dev.baseline(way, head_commit) \
- if Perf.inside_git_repo() else None
- if baseline is None:
- metric_result = passed()
- perf_change = MetricChange.NewMetric
- else:
- tolerance_dev = baseline_and_dev.deviation
- (perf_change, metric_result) = Perf.check_stats_change(
- perf_stat,
- baseline,
- tolerance_dev,
- config.allowed_perf_changes,
- config.verbose >= 4)
-
- t.metrics.append(PerfMetric(change=perf_change, stat=perf_stat, baseline=baseline))
-
- # If any metric fails then the test fails.
- # Note, the remaining metrics are still run so that
- # a complete list of changes can be presented to the user.
- if not metric_result.passed:
- if config.ignore_perf_increases and perf_change == MetricChange.Increase:
- metric_result = passed()
- elif config.ignore_perf_decreases and perf_change == MetricChange.Decrease:
- metric_result = passed()
-
- result = metric_result
-
+ # Store the metric so it can later be stored in a git note.
+ perf_stat = metric_dict(name, way, metric, actual_val)
+
+ # If this is the first time running the benchmark, then pass.
+ baseline = gen_stat['oracle'](way, head_commit) \
+ if Perf.inside_git_repo() else None
+ if baseline is None:
+ metric_result = passed()
+ perf_change = MetricChange.NewMetric
+ else:
+ (perf_change, metric_result) = Perf.check_stats_change(
+ perf_stat,
+ baseline,
+ gen_stat["deviation"],
+ config.allowed_perf_changes,
+ config.verbose >= 4)
+
+ t.metrics.append(PerfMetric(change=perf_change, stat=perf_stat, baseline=baseline))
+
+ # If any metric fails then the test fails.
+ # Note, the remaining metrics are still run so that
+ # a complete list of changes can be presented to the user.
+ if not metric_result.passed:
+ if config.ignore_perf_increases and perf_change == MetricChange.Increase:
+ metric_result = passed()
+ elif config.ignore_perf_decreases and perf_change == MetricChange.Decrease:
+ metric_result = passed()
+
+ result = metric_result
return result
# -----------------------------------------------------------------------------
@@ -1863,8 +1910,8 @@ async def simple_build(name: Union[TestName, str],
else:
to_do = '-c' # just compile
- stats_file = name + '.comp.stats'
if isCompilerStatsTest():
+ stats_file = statsFile(True, name)
# Set a bigger chunk size to reduce variation due to additional under/overflowing
# The tests are attempting to test how much work the compiler is doing by proxy of
# bytes allocated. The additional allocations caused by stack overflow can cause
@@ -1913,10 +1960,6 @@ async def simple_build(name: Union[TestName, str],
stderr_contents = actual_stderr_path.read_text(encoding='UTF-8', errors='replace')
return failBecause('exit code non-0', stderr=stderr_contents)
- if isCompilerStatsTest():
- statsResult = check_stats(TestName(name), way, in_testdir(stats_file), opts.stats_range_fields)
- if badResult(statsResult):
- return statsResult
return passed()
@@ -1953,7 +1996,7 @@ async def simple_run(name: TestName, way: WayName, prog: str, extra_run_opts: st
# assume we are running a program via ghci. Collect stats
stats_file = None # type: Optional[str]
if isStatsTest() and (not isCompilerStatsTest() or way == 'ghci' or way == 'ghci-opt'):
- stats_file = name + '.stats'
+ stats_file = statsFile(False, name)
stats_args = ' +RTS -V0 -t' + stats_file + ' --machine-readable -RTS'
else:
stats_args = ''
@@ -1999,11 +2042,13 @@ async def simple_run(name: TestName, way: WayName, prog: str, extra_run_opts: st
if check_prof and not await check_prof_ok(name, way):
return failBecause('bad profile')
- # Check runtime stats if desired.
- if stats_file is not None:
- return check_stats(name, way, in_testdir(stats_file), opts.stats_range_fields)
- else:
- return passed()
+ # Check the results of stats tests
+ if isGenericStatsTest():
+ statsResult = check_generic_stats(TestName(name), way, opts.generic_stats_test)
+ if badResult(statsResult):
+ return statsResult
+
+ return passed()
def rts_flags(way: WayName) -> str:
args = config.way_rts_flags.get(way, [])
=====================================
testsuite/tests/count-deps/Makefile
=====================================
@@ -16,8 +16,10 @@ LIBDIR := "`'$(TEST_HC)' $(TEST_HC_OPTS) --print-libdir | tr -d '\r'`"
.PHONY: count-deps-parser
count-deps-parser:
- $(COUNT_DEPS) $(LIBDIR) "GHC.Parser"
+ $(COUNT_DEPS) $(LIBDIR) "GHC.Parser" | tee out
+ cat out | tail -n +2 | wc -l > SIZE
.PHONY: count-deps-ast
count-deps-ast:
- $(COUNT_DEPS) $(LIBDIR) "Language.Haskell.Syntax"
+ $(COUNT_DEPS) $(LIBDIR) "Language.Haskell.Syntax" | tee out
+ cat out | tail -n +2 | wc -l > SIZE
=====================================
testsuite/tests/count-deps/all.T
=====================================
@@ -1,2 +1,2 @@
-test('CountDepsAst', [req_hadrian_deps(["test:count-deps"])], makefile_test, ['count-deps-ast'])
-test('CountDepsParser', [req_hadrian_deps(["test:count-deps"])], makefile_test, ['count-deps-parser'])
+test('CountDepsAst', [stat_from_file('deps', 100, 'SIZE'), req_hadrian_deps(["test:count-deps"])], makefile_test, ['count-deps-ast'])
+test('CountDepsParser', [stat_from_file('deps', 100, 'SIZE'), req_hadrian_deps(["test:count-deps"])], makefile_test, ['count-deps-parser'])
=====================================
testsuite/tests/perf/size/Makefile
=====================================
@@ -0,0 +1,7 @@
+TOP=../../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
+
+libdir_size:
+ du -s `$(TEST_HC) --print-libdir` | cut -f1 > SIZE
+
=====================================
testsuite/tests/perf/size/all.T
=====================================
@@ -0,0 +1,3 @@
+test('size_hello', [collect_size(3, 'size_hello.o')], compile, [''])
+
+test('libdir',[stat_from_file('size', 3, 'SIZE')], makefile_test, ['libdir_size'] )
=====================================
testsuite/tests/perf/size/size_hello.hs
=====================================
@@ -0,0 +1,3 @@
+module Main where
+
+main = print "Hello World!"
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/f51e88d1641ea9f9e276e15c70ffcbfdc4beef84
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/f51e88d1641ea9f9e276e15c70ffcbfdc4beef84
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231116/780df3d9/attachment-0001.html>
More information about the ghc-commits
mailing list