[Git][ghc/ghc][master] testsuite: Add mechanism to collect generic metrics

Mon Nov 27 17:54:31 UTC 2023


Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
b5213542 by Matthew Pickering at 2023-11-27T12:53:59-05:00
testsuite: Add mechanism to collect generic metrics

* Generalise the metric logic by adding an additional field which
  allows you to specify how to query for the actual value. Previously
  the method of querying the baseline value was abstracted (but always
  set to the same thing).

* This requires rejigging how the stat collection works slightly but now
  it's more uniform and hopefully simpler.

* Introduce some new "generic" helper functions for writing generic
  stats tests.

  - collect_size ( deviation, path )
    Record the size of the file as a metric

  - stat_from_file ( metric, deviation, path )
    Read a value from the given path, and store that as a metric

  - collect_generic_stat ( metric, deviation, get_stat)
    Provide your own `get_stat` function, `lambda way: <Int>`, which
    can be used to establish the current value of the metric.

  - collect_generic_stats ( metric_info ):
    Like collect_generic_stat but provide the whole dictionary of metric
    definitions.

    { metric: {
        deviation: <Int>
        current: lambda way: <Int>
        } }

* Introduce two new "size" metrics for keeping track of build products.
    - `size_hello_obj` - The size of `hello.o` from compiling hello.hs
    - `libdir` - The total size of the `libdir` folder.

* Track the number of modules in the AST tests
   - CountDepsAst
   - CountDepsParser

This lays the infrastructure for #24191 #22256 #17129

- - - - -


9 changed files:

- testsuite/driver/perf_notes.py
- testsuite/driver/testglobals.py
- testsuite/driver/testlib.py
- testsuite/tests/count-deps/Makefile
- testsuite/tests/count-deps/all.T
- testsuite/tests/perf/haddock/all.T
- + testsuite/tests/perf/size/Makefile
- + testsuite/tests/perf/size/all.T
- + testsuite/tests/perf/size/size_hello_obj.hs


Changes:

=====================================
testsuite/driver/perf_notes.py
=====================================
@@ -86,6 +86,11 @@ PerfStat = NamedTuple('PerfStat', [('test_env', TestEnv),
 Baseline = NamedTuple('Baseline', [('perfStat', PerfStat),
                                    ('commit', GitHash)])
 
+# The type of exceptions which are thrown when computing the current stat value
+# fails.
+class StatsException(Exception):
+    pass
+
 class MetricChange(Enum):
     # The metric appears to have no baseline and is presumably a new test.
     NewMetric = 'NewMetric'
@@ -123,11 +128,6 @@ AllowedPerfChange = NamedTuple('AllowedPerfChange',
                                 ('opts', Dict[str, str])
                                 ])
 
-MetricBaselineOracle = Callable[[WayName, GitHash], Baseline]
-MetricDeviationOracle = Callable[[WayName, GitHash], Optional[float]]
-MetricOracles = NamedTuple("MetricOracles", [("baseline", MetricBaselineOracle),
-                                             ("deviation", MetricDeviationOracle)])
-
 def parse_perf_stat(stat_str: str) -> PerfStat:
     field_vals = stat_str.strip('\t').split('\t')
     stat = PerfStat(*field_vals) # type: ignore


=====================================
testsuite/driver/testglobals.py
=====================================
@@ -4,7 +4,7 @@
 
 from my_typing import *
 from pathlib import Path
-from perf_notes import MetricChange, PerfStat, Baseline, MetricOracles, GitRef
+from perf_notes import MetricChange, PerfStat, Baseline, GitRef
 from datetime import datetime
 
 # -----------------------------------------------------------------------------
@@ -378,24 +378,20 @@ class TestOptions:
        # extra files to copy to the testdir
        self.extra_files = [] # type: List[str]
 
-       # Map from metric to (function from way and commit to baseline value, allowed percentage deviation) e.g.
-       #     { 'bytes allocated': (
-       #              lambda way commit:
-       #                    ...
-       #                    if way1: return None ...
-       #                    elif way2:return 9300000000 ...
-       #                    ...
-       #              , 10) }
-       # This means no baseline is available for way1. For way 2, allow a 10%
-       # deviation from 9300000000.
-       self.stats_range_fields = {} # type: Dict[MetricName, MetricOracles]
-
        # Is the test testing performance?
        self.is_stats_test = False
 
        # Does this test the compiler's performance as opposed to the generated code.
        self.is_compiler_stats_test = False
 
+       # Map from metric to information about that metric
+       #    { metric: { "deviation": <int>
+       #                The action to run to get the current value of the test
+       #              , "current": lambda way: <Int>
+       #                The action to run to get the baseline value of the test
+       #              , "baseline": lambda way, commit: baseline value } }
+       self.generic_stats_test: Dict  = {} # Dict
+
        # should we run this test alone, i.e. not run it in parallel with
        # any other threads
        self.alone = False


=====================================
testsuite/driver/testlib.py
=====================================
@@ -28,7 +28,7 @@ from term_color import Color, colored
 import testutil
 from cpu_features import have_cpu_feature
 import perf_notes as Perf
-from perf_notes import MetricChange, PerfStat, MetricOracles
+from perf_notes import MetricChange, PerfStat, StatsException
 extra_src_files = {'T4198': ['exitminus1.c']} # TODO: See #12223
 
 from my_typing import *
@@ -99,6 +99,10 @@ def isCompilerStatsTest() -> bool:
     opts = getTestOpts()
     return bool(opts.is_compiler_stats_test)
 
+def isGenericStatsTest() -> bool:
+    opts = getTestOpts()
+    return bool(opts.generic_stats_test)
+
 def isStatsTest() -> bool:
     opts = getTestOpts()
     return opts.is_stats_test
@@ -599,6 +603,44 @@ def extra_files(files):
 def _extra_files(name, opts, files):
     opts.extra_files.extend(files)
 
+# Record the size of a specific file
+def collect_size ( deviation, path ):
+    return collect_generic_stat ( 'size', deviation, lambda way: os.path.getsize(in_testdir(path)) )
+
+# Read a number from a specific file
+def stat_from_file ( metric, deviation, path ):
+    def read_file (way):
+        with open(in_testdir(path)) as f:
+            return int(f.read())
+    return collect_generic_stat ( metric, deviation, read_file )
+
+
+# Define a set of generic stat tests
+def collect_generic_stats ( metric_info ):
+    def f(name, opts, f=metric_info):
+        return _collect_generic_stat(name, opts, metric_info)
+    return f
+
+# Define the a generic stat test, which computes the statistic by calling the function
+# given as the third argument.
+def collect_generic_stat ( metric, deviation, get_stat ):
+    return collect_generic_stats ( { metric: { 'deviation': deviation, 'current': get_stat } } )
+
+def _collect_generic_stat(name : TestName, opts, metric_infos):
+
+
+    # Add new stats to the stat list
+    opts.generic_stats_test.update(metric_infos)
+
+    # Add the way to determine the baseline
+    for (metric, info) in metric_infos.items():
+        def baselineByWay(way, target_commit, metric=metric):
+            return Perf.baseline_metric( \
+                              target_commit, name, config.test_env, metric, way, \
+                              config.baseline_commit )
+        opts.generic_stats_test[metric]["baseline"] = baselineByWay
+
+
 # -----
 
 # Defaults to "test everything, and only break on extreme cases"
@@ -619,11 +661,14 @@ def _extra_files(name, opts, files):
 def collect_compiler_stats(metric='all',deviation=20):
     def f(name, opts, m=metric, d=deviation):
         no_lint(name, opts)
-        return _collect_stats(name, opts, m, d, True)
+        return _collect_stats(name, opts, m, d, None, True)
     return f
 
-def collect_stats(metric='all', deviation=20):
-    return lambda name, opts, m=metric, d=deviation: _collect_stats(name, opts, m, d)
+def collect_stats(metric='all', deviation=20, static_stats_file=None):
+    return lambda name, opts, m=metric, d=deviation, s=static_stats_file: _collect_stats(name, opts, m, d, s)
+
+def statsFile(comp_test: bool, name: str) -> str:
+    return name + ('.comp' if comp_test else '') + '.stats'
 
 # This is an internal function that is used only in the implementation.
 # 'is_compiler_stats_test' is somewhat of an unfortunate name.
@@ -631,7 +676,7 @@ def collect_stats(metric='all', deviation=20):
 # measures the performance numbers of the compiler.
 # As this is a fairly rare case in the testsuite, it defaults to false to
 # indicate that it is a 'normal' performance test.
-def _collect_stats(name: TestName, opts, metrics, deviation, is_compiler_stats_test=False):
+def _collect_stats(name: TestName, opts, metrics, deviation, static_stats_file, is_compiler_stats_test=False):
     if not re.match('^[0-9]*[a-zA-Z][a-zA-Z0-9._-]*$', name):
         failBecause('This test has an invalid name.')
 
@@ -664,15 +709,41 @@ def _collect_stats(name: TestName, opts, metrics, deviation, is_compiler_stats_t
         # The nonmoving collector does not support -G1
         _omit_ways(name, opts, [WayName(name) for name in ['nonmoving', 'nonmoving_thr', 'nonmoving_thr_ghc']])
 
+    # How to read the result of the performance test
+    def read_stats_file(way, metric_name):
+        # Confusingly compile time ghci tests are actually runtime tests, so we have
+        # to go and look for the name.stats file rather than name.comp.stats file.
+        compiler_stats_test = is_compiler_stats_test and not (way == "ghci" or way == "ghci-opt")
+
+        if static_stats_file:
+            stats_file = in_statsdir(static_stats_file)
+        else:
+            stats_file = Path(in_testdir(statsFile(compiler_stats_test, name)))
+
+
+        try:
+            stats_file_contents = stats_file.read_text()
+        except IOError as e:
+            raise StatsException(str(e))
+        field_match = re.search('\\("' + metric_name + '", "([0-9]+)"\\)', stats_file_contents)
+        if field_match is None:
+            print('Failed to find metric: ', metric_name)
+            raise StatsException("No such metric")
+        else:
+            val = field_match.group(1)
+            assert val is not None
+            return int(val)
+
+
+    collect_stat = {}
     for metric_name in metrics:
+        def action_generator(mn):
+            return lambda way: read_stats_file(way, mn)
         metric = '{}/{}'.format(tag, metric_name)
-        def baselineByWay(way, target_commit, metric=metric):
-            return Perf.baseline_metric( \
-                              target_commit, name, config.test_env, metric, way, \
-                              config.baseline_commit )
+        collect_stat[metric] = { "deviation": deviation
+                                , "current": action_generator(metric_name) }
 
-        opts.stats_range_fields[metric] = MetricOracles(baseline=baselineByWay,
-                                                        deviation=deviation)
+    _collect_generic_stat(name, opts, collect_stat)
 
 # -----
 
@@ -1581,6 +1652,11 @@ async def do_compile(name: TestName,
         diff_file_name.unlink()
         return failBecause('stderr mismatch', stderr=stderr)
 
+    opts = getTestOpts()
+    if isGenericStatsTest():
+        statsResult = check_generic_stats(TestName(name), way, opts.generic_stats_test)
+        if badResult(statsResult):
+            return statsResult
 
     # no problems found, this test passed
     return passed()
@@ -1717,13 +1793,9 @@ async def multi_compile_and_run( name, way, top_mod, extra_mods, extra_hc_opts )
 async def warn_and_run( name, way, extra_hc_opts ):
     return await compile_and_run__( name, way, None, [], extra_hc_opts, compile_stderr = True)
 
-def stats( name, way, stats_file ):
-    opts = getTestOpts()
-    return check_stats(name, way, in_testdir(stats_file), opts.stats_range_fields)
-
-async def static_stats( name, way, stats_file ):
+async def static_stats( name, way ):
     opts = getTestOpts()
-    return check_stats(name, way, in_statsdir(stats_file), opts.stats_range_fields)
+    return check_generic_stats(name, way, opts.generic_stats_test)
 
 def metric_dict(name, way, metric, value) -> PerfStat:
     return Perf.PerfStat(
@@ -1733,75 +1805,57 @@ def metric_dict(name, way, metric, value) -> PerfStat:
         metric   = metric,
         value    = value)
 
-# -----------------------------------------------------------------------------
-# Check test stats. This prints the results for the user.
-# name: name of the test.
-# way: the way.
-# stats_file: the path of the stats_file containing the stats for the test.
-# range_fields: see TestOptions.stats_range_fields
-# Returns a pass/fail object. Passes if the stats are within the expected value ranges.
-# This prints the results for the user.
-def check_stats(name: TestName,
-                way: WayName,
-                stats_file: Path,
-                range_fields: Dict[MetricName, MetricOracles]
-                ) -> PassFail:
+
+
+def check_generic_stats(name, way, get_stats):
+    for (metric, gen_stat) in get_stats.items():
+        res = report_stats(name, way, metric, gen_stat)
+        print(res)
+        if badResult(res):
+            return res
+    return passed()
+
+def report_stats(name, way, metric, gen_stat):
+    try:
+        actual_val = gen_stat['current'](way)
+    # Metrics can exit early by throwing a StatsException with the failure string.
+    except StatsException as e:
+        return failBecause(e.args[0])
+
     head_commit = Perf.commit_hash(GitRef('HEAD')) if Perf.inside_git_repo() else None
     if head_commit is None:
         return passed()
 
     result = passed()
-    if range_fields:
-        try:
-            stats_file_contents = stats_file.read_text()
-        except IOError as e:
-            return failBecause(str(e))
-
-        for (metric, baseline_and_dev) in range_fields.items():
-            # Remove any metric prefix e.g. "runtime/" and "compile_time/"
-            stat_file_metric = metric.split("/")[-1]
-            perf_change = None
-
-            field_match = re.search('\\("' + stat_file_metric + '", "([0-9]+)"\\)', stats_file_contents)
-            if field_match is None:
-                print('Failed to find metric: ', stat_file_metric)
-                result = failBecause('no such stats metric')
-            else:
-                val = field_match.group(1)
-                assert val is not None
-                actual_val = int(val)
-
-                # Store the metric so it can later be stored in a git note.
-                perf_stat = metric_dict(name, way, metric, actual_val)
-
-                # If this is the first time running the benchmark, then pass.
-                baseline = baseline_and_dev.baseline(way, head_commit) \
-                    if Perf.inside_git_repo() else None
-                if baseline is None:
-                    metric_result = passed()
-                    perf_change = MetricChange.NewMetric
-                else:
-                    tolerance_dev = baseline_and_dev.deviation
-                    (perf_change, metric_result) = Perf.check_stats_change(
-                        perf_stat,
-                        baseline,
-                        tolerance_dev,
-                        config.allowed_perf_changes,
-                        config.verbose >= 4)
-
-                t.metrics.append(PerfMetric(change=perf_change, stat=perf_stat, baseline=baseline))
-
-                # If any metric fails then the test fails.
-                # Note, the remaining metrics are still run so that
-                # a complete list of changes can be presented to the user.
-                if not metric_result.passed:
-                    if config.ignore_perf_increases and perf_change == MetricChange.Increase:
-                        metric_result = passed()
-                    elif config.ignore_perf_decreases and perf_change == MetricChange.Decrease:
-                        metric_result = passed()
-
-                    result = metric_result
-
+    # Store the metric so it can later be stored in a git note.
+    perf_stat = metric_dict(name, way, metric, actual_val)
+
+    # If this is the first time running the benchmark, then pass.
+    baseline = gen_stat['baseline'](way, head_commit) \
+        if Perf.inside_git_repo() else None
+    if baseline is None:
+        metric_result = passed()
+        perf_change = MetricChange.NewMetric
+    else:
+        (perf_change, metric_result) = Perf.check_stats_change(
+            perf_stat,
+            baseline,
+            gen_stat["deviation"],
+            config.allowed_perf_changes,
+            config.verbose >= 4)
+
+    t.metrics.append(PerfMetric(change=perf_change, stat=perf_stat, baseline=baseline))
+
+    # If any metric fails then the test fails.
+    # Note, the remaining metrics are still run so that
+    # a complete list of changes can be presented to the user.
+    if not metric_result.passed:
+        if config.ignore_perf_increases and perf_change == MetricChange.Increase:
+            metric_result = passed()
+        elif config.ignore_perf_decreases and perf_change == MetricChange.Decrease:
+            metric_result = passed()
+
+        result = metric_result
     return result
 
 # -----------------------------------------------------------------------------
@@ -1863,8 +1917,8 @@ async def simple_build(name: Union[TestName, str],
     else:
         to_do = '-c' # just compile
 
-    stats_file = name + '.comp.stats'
     if isCompilerStatsTest():
+        stats_file = statsFile(True, name)
         # Set a bigger chunk size to reduce variation due to additional under/overflowing
         # The tests are attempting to test how much work the compiler is doing by proxy of
         # bytes allocated. The additional allocations caused by stack overflow can cause
@@ -1913,10 +1967,6 @@ async def simple_build(name: Union[TestName, str],
             stderr_contents = actual_stderr_path.read_text(encoding='UTF-8', errors='replace')
             return failBecause('exit code non-0', stderr=stderr_contents)
 
-    if isCompilerStatsTest():
-        statsResult = check_stats(TestName(name), way, in_testdir(stats_file), opts.stats_range_fields)
-        if badResult(statsResult):
-            return statsResult
 
     return passed()
 
@@ -1953,7 +2003,7 @@ async def simple_run(name: TestName, way: WayName, prog: str, extra_run_opts: st
     #   assume we are running a program via ghci. Collect stats
     stats_file = None # type: Optional[str]
     if isStatsTest() and (not isCompilerStatsTest() or way == 'ghci' or way == 'ghci-opt'):
-        stats_file = name + '.stats'
+        stats_file = statsFile(False, name)
         stats_args = ' +RTS -V0 -t' + stats_file + ' --machine-readable -RTS'
     else:
         stats_args = ''
@@ -1999,11 +2049,13 @@ async def simple_run(name: TestName, way: WayName, prog: str, extra_run_opts: st
     if check_prof and not await check_prof_ok(name, way):
         return failBecause('bad profile')
 
-    # Check runtime stats if desired.
-    if stats_file is not None:
-        return check_stats(name, way, in_testdir(stats_file), opts.stats_range_fields)
-    else:
-        return passed()
+    # Check the results of stats tests
+    if isGenericStatsTest():
+        statsResult = check_generic_stats(TestName(name), way, opts.generic_stats_test)
+        if badResult(statsResult):
+            return statsResult
+
+    return passed()
 
 def rts_flags(way: WayName) -> str:
     args = config.way_rts_flags.get(way, [])


=====================================
testsuite/tests/count-deps/Makefile
=====================================
@@ -16,8 +16,10 @@ LIBDIR := "`'$(TEST_HC)' $(TEST_HC_OPTS) --print-libdir | tr -d '\r'`"
 
 .PHONY: count-deps-parser
 count-deps-parser:
-	$(COUNT_DEPS) $(LIBDIR) "GHC.Parser"
+	$(COUNT_DEPS) $(LIBDIR) "GHC.Parser" | tee out
+	cat out | tail -n +2 | wc -l > SIZE
 
 .PHONY: count-deps-ast
 count-deps-ast:
-	$(COUNT_DEPS) $(LIBDIR) "Language.Haskell.Syntax"
+	$(COUNT_DEPS) $(LIBDIR) "Language.Haskell.Syntax" | tee out
+	cat out | tail -n +2 | wc -l > SIZE


=====================================
testsuite/tests/count-deps/all.T
=====================================
@@ -1,2 +1,2 @@
-test('CountDepsAst', [req_hadrian_deps(["test:count-deps"])], makefile_test, ['count-deps-ast'])
-test('CountDepsParser', [req_hadrian_deps(["test:count-deps"])], makefile_test, ['count-deps-parser'])
+test('CountDepsAst', [stat_from_file('deps', 100, 'SIZE'), req_hadrian_deps(["test:count-deps"])], makefile_test, ['count-deps-ast'])
+test('CountDepsParser', [stat_from_file('deps', 100, 'SIZE'), req_hadrian_deps(["test:count-deps"])], makefile_test, ['count-deps-parser'])


=====================================
testsuite/tests/perf/haddock/all.T
=====================================
@@ -19,21 +19,21 @@
 
 test('haddock.base',
      [unless(in_tree_compiler(), skip), req_haddock
-     ,collect_stats('bytes allocated',5)
+     ,collect_stats('bytes allocated',5, static_stats_file='base.t')
       ],
      static_stats,
-     ['base.t'])
+     [])
 
 test('haddock.Cabal',
      [unless(in_tree_compiler(), skip), req_haddock
-     ,collect_stats('bytes allocated',5)
+     ,collect_stats('bytes allocated',5, static_stats_file='Cabal.t')
       ],
      static_stats,
-     ['Cabal.t'])
+     [])
 
 test('haddock.compiler',
      [unless(in_tree_compiler(), skip), req_haddock
-     ,collect_stats('bytes allocated',10)
+     ,collect_stats('bytes allocated',10, static_stats_file='ghc.t')
       ],
      static_stats,
-     ['ghc.t'])
+     [])


=====================================
testsuite/tests/perf/size/Makefile
=====================================
@@ -0,0 +1,7 @@
+TOP=../../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
+
+libdir_size:
+	du -s `$(TEST_HC) --print-libdir` | cut -f1 > SIZE
+


=====================================
testsuite/tests/perf/size/all.T
=====================================
@@ -0,0 +1,3 @@
+test('size_hello_obj', [collect_size(3, 'size_hello_obj.o')], compile, [''])
+
+test('libdir',[stat_from_file('size', 3, 'SIZE')], makefile_test, ['libdir_size'] )


=====================================
testsuite/tests/perf/size/size_hello_obj.hs
=====================================
@@ -0,0 +1,3 @@
+module Main where
+
+main = print "Hello World!"



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/b521354216f2821e00d75f088d74081d8b236810

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/b521354216f2821e00d75f088d74081d8b236810
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231127/088531e0/attachment-0001.html>