From 94645972f466261eeb88326651ee51fec6f23787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre-Cl=C3=A9ment=20Tosi?= Date: Wed, 1 May 2019 11:49:38 +0100 Subject: [PATCH 1/7] instruments/perf: Support ALL perf subcommands Introduce an implementation of the PerfInstrument that is more generic than the previous one and which is expected to be able to handle all potential calls to perf (irrespective of the subcommand, flags, options or arguments being used) but which maintains backward compatibility with the previous implementation, targeting perf-stat. --- wa/instruments/perf.py | 277 +++++++++++++++++++++++++++++------------ 1 file changed, 197 insertions(+), 80 deletions(-) diff --git a/wa/instruments/perf.py b/wa/instruments/perf.py index c5179cdfa..549fd5b00 100644 --- a/wa/instruments/perf.py +++ b/wa/instruments/perf.py @@ -1,4 +1,4 @@ -# Copyright 2013-2015 ARM Limited +# Copyright 2013-2019 ARM Limited # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,74 +13,170 @@ # limitations under the License. # - -# pylint: disable=unused-argument +import collections import os -import re +from devlib.utils.cli import Command from devlib.trace.perf import PerfCollector - from wa import Instrument, Parameter from wa.utils.types import list_or_string, list_of_strs -PERF_COUNT_REGEX = re.compile(r'^(CPU\d+)?\s*(\d+)\s*(.*?)\s*(\[\s*\d+\.\d+%\s*\])?\s*$') - +__all__ = [ + 'PerfInstrument', +] -class PerfInstrument(Instrument): - name = 'perf' - description = """ - Perf is a Linux profiling with performance counters. +class YamlCommandDescriptor(collections.OrderedDict): - Performance counters are CPU hardware registers that count hardware events - such as instructions executed, cache-misses suffered, or branches - mispredicted. They form a basis for profiling applications to trace dynamic - control flow and identify hotspots. + def __init__(self, yaml_dict): + super(YamlCommandDescriptor, self).__init__() + if isinstance(yaml_dict, YamlCommandDescriptor): + for k, v in yaml_dict.items(): + self[k] = v + return + yaml_dict_copy = yaml_dict.copy() + for label, parameters in yaml_dict_copy.items(): + self[label] = str(Command(kwflags_join=',', + kwflags_sep='=', + end_of_options='--', + **parameters)) - pref accepts options and events. If no option is given the default '-a' is - used. For events, the default events are migrations and cs. They both can - be specified in the config file. - Events must be provided as a list that contains them and they will look like - this :: +DEFAULT_EVENTS = ['migration', 'cs'] +DEFAULT_OPTIONSTRING = '-a' - perf_events = ['migrations', 'cs'] - Events can be obtained by typing the following in the command line on the - device :: - - perf list - - Whereas options, they can be provided as a single string as following :: - - perf_options = '-a -i' - - Options can be obtained by running the following in the command line :: +class PerfInstrument(Instrument): - man perf-stat + name = 'perf' + description = """ + Perf is a Linux profiling tool based on performance counters. + + Performance counters are typically CPU hardware registers (found in the + Performance Monitoring Unit) that count hardware events such as + instructions executed, cache-misses suffered, or branches mispredicted. + Because each ``event`` corresponds to a hardware counter, the maximum + number of events that can be tracked is imposed by the available hardware. + + By extension, performance counters, in the context of ``perf``, also refer + to so-called "software counters" representing events that can be tracked by + the OS kernel (e.g. context switches). As these are software events, the + counters are kept in RAM and the hardware virtually imposes no limit on the + number that can be used. + + This instrument allows a straight-forward way of calling ``perf stat`` + through the named parameters ``optionstring`` and ``events``, which is the + default behaviour (see the defaults of these parameters). However, it can + also be used through the more advanced ``commands`` dictionary which + provides a flexible access to all ways ``perf`` can be used. + + The ``pre_commands`` and ``post_commands`` are provided to suit those + ``perf`` commands that don't actually capture data (``list``, ``config``, + ``report``, ...). + + Commands are tagged with _labels_ which are used to define in which + directory they run. Therefore, a pair of commands (_e.g._ a `record` + followed by a `report`) sharing the same label can access the same files + while commands with different labels can use the same filename with the + guarantee of avoiding clashes. + + Depending on the subcommand used, ``perf`` might require setting: + + - ``/proc/sys/kernel/printk`` to ``4`` + - ``/proc/sys/kernel/kptr_restrict`` to ``0`` + + Please refer to the ``sysfile_values`` runtime parameter to do so from an + agenda. """ parameters = [ - Parameter('events', kind=list_of_strs, default=['migrations', 'cs'], - global_alias='perf_events', - constraint=(lambda x: x, 'must not be empty.'), - description="""Specifies the events to be counted."""), - Parameter('optionstring', kind=list_or_string, default='-a', - global_alias='perf_options', - description="""Specifies options to be used for the perf command. This - may be a list of option strings, in which case, multiple instances of perf - will be kicked off -- one for each option string. This may be used to e.g. - collected different events from different big.LITTLE clusters. + Parameter('force_install', kind=bool, default=False, + description=""" + Always install ``perf`` binary even if ``perf`` is already + present on the device. """), + Parameter('events', kind=list_of_strs, default=None, + description=""" + List of events the default ``perf stat`` should capture. + Valid events can be obtained from ``perf list`` and + ``perf --help``. + This parameter is ignored if ``commands`` is passed. + + default: {} + """.format( + ','.join('``{}``'.format(e) for e in DEFAULT_EVENTS)) + ), + Parameter('optionstring', kind=list_or_string, default=None, + description=""" + String of options the default ``perf stat`` should use. + For backward compatibility, this may be be a list of strings. + In that case, a ``perf stat`` command will be launched for + each string. This parameter is ignored if ``commands`` is + passed. + This parameter is ignored if ``commands`` is passed. + + default: ``{}`` + """.format(DEFAULT_OPTIONSTRING) + ), Parameter('labels', kind=list_of_strs, default=None, - global_alias='perf_labels', - description="""Provides labels for pref output. If specified, the number of - labels must match the number of ``optionstring``\ s. + description=r""" + These labels act like the keys of the ``commands`` parameter. + They are provided for backward compatibility. If specified, + the number of labels must match the number of + ``optionstring``\ s. This parameter is ignored if + ``commands`` is passed. """), - Parameter('force_install', kind=bool, default=False, + Parameter('pre_commands', kind=YamlCommandDescriptor, default=None, description=""" - always install perf binary even if perf is already present on the device. - """), + Dictionary of commands to be run before the workloads run + (same format as ``commands``). + """), + Parameter('commands', kind=YamlCommandDescriptor, default=None, + description=""" + Dictionary in which keys are considered as *labels* and + values are themselves dictionaries with the following + entries: + + - ``command`` (``str``): The ``perf`` subcommand + (``stat``, ``record``, ...); + - ``flags`` (``str`` or ``list``): Switch flags without + their leading hyphens (``no-inherit``, ``all-cpus``, + ``a``, ...); + - ``kwflags`` (``dict``): Dictionary of flag names (no + hyphen) as keys and their corresponding values. + These values can be ``list``s for flags taking CSV + inputs (``event``, ``pid``, ...); + - ``args`` (``str`` or valid command): the post-``--`` + arguments. This is typically the command ``perf`` will + launch and monitor. Therefore, a valid command + dictionary (same as this one) is accepted; + + As an example, the default behaviour can be replicated + through:: + + :language: yaml + + perf: + commands: + default_behaviour: + command: stat + flags: + - all-cpus + kwflags: + event: + - migrations + - cs + args: + command: sleep + args: 1000 + stderr: '&1' + stdout: stat.out + """), + Parameter('post_commands', kind=YamlCommandDescriptor, default=None, + description=""" + Dictionary of commands to be run after the workloads run + (same format as ``commands``). + """), ] def __init__(self, target, **kwargs): @@ -88,51 +184,72 @@ def __init__(self, target, **kwargs): self.collector = None def initialize(self, context): + # pylint: disable=unused-argument + # pylint: disable=access-member-before-definition + # pylint: disable=attribute-defined-outside-init + if self.commands is None: + if self.optionstring is None: + self.optionstring = DEFAULT_OPTIONSTRING + + if self.events is None: + self.events = DEFAULT_EVENTS + + if isinstance(self.optionstring, str): + self.optionstring = [self.optionstring] + + if not self.labels: + self.labels = ['default{}'.format(i) + for i, _ in enumerate(self.optionstring)] + elif isinstance(self.labels, str): + self.labels = [self.labels] + + if len(self.labels) != len(self.optionstring): + raise ValueError('Lengths of labels and optionstring differ') + + self.commands = YamlCommandDescriptor({ + label: { + 'command': 'stat', + 'kwflags': {'event': self.events}, + 'options': options, + 'args': { + 'command': 'sleep', + 'args': 1000, + }, + 'stderr': '&1', + 'stdout': 'stat.out', + } + for label, options in zip(self.labels, self.optionstring) + }) + else: + for name in ['optionstring', 'events', 'labels']: + if self.__dict__[name] is not None: + raise ValueError( + '{} should not be passed if commands is'.format(name)) + self.collector = PerfCollector(self.target, - self.events, - self.optionstring, - self.labels, - self.force_install) + self.force_install, + self.pre_commands, + self.commands, + self.post_commands) def setup(self, context): + # pylint: disable=unused-argument self.collector.reset() def start(self, context): + # pylint: disable=unused-argument self.collector.start() def stop(self, context): + # pylint: disable=unused-argument self.collector.stop() def update_output(self, context): - self.logger.info('Extracting reports from target...') outdir = os.path.join(context.output_directory, 'perf') - self.collector.get_trace(outdir) - - for host_file in os.listdir(outdir): - label = host_file.split('.out')[0] - host_file_path = os.path.join(outdir, host_file) - context.add_artifact(label, host_file_path, 'raw') - with open(host_file_path) as fh: - in_results_section = False - for line in fh: - if 'Performance counter stats' in line: - in_results_section = True - next(fh) # skip the following blank line - if in_results_section: - if not line.strip(): # blank line - in_results_section = False - break - else: - line = line.split('#')[0] # comment - match = PERF_COUNT_REGEX.search(line) - if match: - classifiers = {} - cpu = match.group(1) - if cpu is not None: - classifiers['cpu'] = int(cpu.replace('CPU', '')) - count = int(match.group(2)) - metric = '{}_{}'.format(label, match.group(3)) - context.add_metric(metric, count, classifiers=classifiers) + self.collector.get_traces(outdir) + # HUGE TODO: add parsers for supported post_commands + # (or should these be in devlib?) def teardown(self, context): + # pylint: disable=unused-argument self.collector.reset() From e8f036006485999a83406c9d7597de12bd69a835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre-Cl=C3=A9ment=20Tosi?= Date: Tue, 11 Jun 2019 14:32:10 +0100 Subject: [PATCH 2/7] fixup! instruments/perf: Support ALL perf subcommands --- wa/instruments/perf.py | 200 +++++++++++++++++++++++++++++++++++------ 1 file changed, 171 insertions(+), 29 deletions(-) diff --git a/wa/instruments/perf.py b/wa/instruments/perf.py index 549fd5b00..b69cb101e 100644 --- a/wa/instruments/perf.py +++ b/wa/instruments/perf.py @@ -13,11 +13,12 @@ # limitations under the License. # -import collections +import itertools import os +import re +import shlex -from devlib.utils.cli import Command -from devlib.trace.perf import PerfCollector +from devlib.trace.perf import PerfCollector, PerfCommandDict from wa import Instrument, Parameter from wa.utils.types import list_or_string, list_of_strs @@ -25,23 +26,6 @@ 'PerfInstrument', ] - -class YamlCommandDescriptor(collections.OrderedDict): - - def __init__(self, yaml_dict): - super(YamlCommandDescriptor, self).__init__() - if isinstance(yaml_dict, YamlCommandDescriptor): - for k, v in yaml_dict.items(): - self[k] = v - return - yaml_dict_copy = yaml_dict.copy() - for label, parameters in yaml_dict_copy.items(): - self[label] = str(Command(kwflags_join=',', - kwflags_sep='=', - end_of_options='--', - **parameters)) - - DEFAULT_EVENTS = ['migration', 'cs'] DEFAULT_OPTIONSTRING = '-a' @@ -70,6 +54,10 @@ class PerfInstrument(Instrument): also be used through the more advanced ``commands`` dictionary which provides a flexible access to all ways ``perf`` can be used. + In both cases, if a ``stat`` command is issued, this workload will + automatically parse its output into run ``metrics``. For this reason, + please avoid the ``-x`` ``stat`` flag. + The ``pre_commands`` and ``post_commands`` are provided to suit those ``perf`` commands that don't actually capture data (``list``, ``config``, ``report``, ...). @@ -126,12 +114,12 @@ class PerfInstrument(Instrument): ``optionstring``\ s. This parameter is ignored if ``commands`` is passed. """), - Parameter('pre_commands', kind=YamlCommandDescriptor, default=None, + Parameter('pre_commands', kind=PerfCommandDict, default=None, description=""" Dictionary of commands to be run before the workloads run (same format as ``commands``). """), - Parameter('commands', kind=YamlCommandDescriptor, default=None, + Parameter('commands', kind=PerfCommandDict, default=None, description=""" Dictionary in which keys are considered as *labels* and values are themselves dictionaries with the following @@ -172,7 +160,7 @@ class PerfInstrument(Instrument): stderr: '&1' stdout: stat.out """), - Parameter('post_commands', kind=YamlCommandDescriptor, default=None, + Parameter('post_commands', kind=PerfCommandDict, default=None, description=""" Dictionary of commands to be run after the workloads run (same format as ``commands``). @@ -187,6 +175,10 @@ def initialize(self, context): # pylint: disable=unused-argument # pylint: disable=access-member-before-definition # pylint: disable=attribute-defined-outside-init + if self.pre_commands is None: + self.pre_commands = PerfCommandDict({}) + if self.post_commands is None: + self.post_commands = PerfCommandDict({}) if self.commands is None: if self.optionstring is None: self.optionstring = DEFAULT_OPTIONSTRING @@ -206,11 +198,11 @@ def initialize(self, context): if len(self.labels) != len(self.optionstring): raise ValueError('Lengths of labels and optionstring differ') - self.commands = YamlCommandDescriptor({ + self.commands = PerfCommandDict({ label: { 'command': 'stat', 'kwflags': {'event': self.events}, - 'options': options, + 'options': shlex.split(options), 'args': { 'command': 'sleep', 'args': 1000, @@ -233,8 +225,9 @@ def initialize(self, context): self.post_commands) def setup(self, context): - # pylint: disable=unused-argument self.collector.reset() + version = self.collector.execute('--version').strip() + context.update_metadata('versions', self.name, version) def start(self, context): # pylint: disable=unused-argument @@ -245,11 +238,160 @@ def stop(self, context): self.collector.stop() def update_output(self, context): - outdir = os.path.join(context.output_directory, 'perf') + outdir = os.path.join(context.output_directory, self.name) self.collector.get_traces(outdir) - # HUGE TODO: add parsers for supported post_commands - # (or should these be in devlib?) + all_commands = itertools.chain(self.pre_commands.items(), + self.commands.items(), + self.post_commands.items()) + for label, cmd in all_commands: + if 'stat' in cmd.command: + # perf stat supports redirecting its stdout to --output/-o: + stat_file = (cmd.kwflags.get('o', None) or + cmd.kwflags.get('output', None) or + cmd.stdout) + with open(os.path.join(outdir, label, stat_file)) as f: + for metric in self._extract_stat_metrics(label, f.read()): + context.add_metric(**metric) def teardown(self, context): # pylint: disable=unused-argument self.collector.reset() + + @classmethod + def _extract_stat_metrics(cls, label, stdout): + """ + When running ``perf stat``, this instrument reports the captured + counters as unitless :class:`Metrics` with the following classifiers: + + - ``'name'``: The name of the event as reported by ``perf``. This name + may not be unique when aggregation is disabled as the same counter is + then captured for multiple hardware threads; + - ``'label'``: Label given to the run of ``perf stat``; + - ``'target'``: The target ``perf`` reports for the captured events. + This is shared across all events of a run and is further specialized + by ``'hw_thread'``, ``'core'`` and ``'cluster'`` if applicable; + - ``'duration'``, ``'duration_units'``: duration of the ``perf`` run; + - ``'count_error'``: A string containing the error corresponding that + prevented the counter from being captured. Only available if an error + occured. In this case the value of the metric is always ``0``; + - ``'hw_thread_count'``: Number of **hardware** threads that were + contributing to the counter. Only available when the automatic + aggregation done by ``perf stat`` is disabled. See ``'hw_thread'``, + ``'core'`` and ``'cluster'``; + - ``'hw_thread'``: When the ``--no-aggr`` option is used, holds the + index of the hardware thread that incremented the counter. In this + case, ``'hw_thread_count'`` is always ``1``. For backward + compatibility, the ``'cpu'`` classifier is provided as a synonym of + ``'hw_thread'`` (unlike what its name might suggest, on systems + supporting hardware multithreading, ``'cpu'`` is not a synonym of + ``'core'``!); + - ``'cluster'``: When the ``--per-socket`` option is used, holds the + index of the cluster (_i.e._ "socket" in ``perf`` terminology) that + incremented the counter and ``'hw_thread_count'`` holds the number of + hardware threads in the cluster. When the ``--per-core`` option is + used, this classifier gives the index of the cluster of the core. + - ``'core'``: When the ``--per-core`` option is used, holds the index + (within its cluster) of the core that incremented the counter and + ``'hw_thread_count'`` holds the number of hardware threads in the + core. + - ``'enabled'``: When ``perf`` needs to capture more hardware events + than there are hardware counters, it shares the hardware counters + among the events through time-slicing. This classifier holds the + fraction (between ``0.0`` and ``100.0``) of the run that a hardware + counter was allocated to the the event. Available only for hardware + events and only when time-slicing was required. + - ``'comment_value'``, ``'comment_units'``: Some counters may come with + an extra "comment" (following a ``#``) added by ``perf``. The + ``'comment_value'`` holds the numeric (``int`` or ``float``) value of + the comment while ``'comment_units'`` holds the rest of the comment + (typically the units). Only available for the events for which + ``perf`` added a comment. + """ + match = cls._stat_regex.search(stdout) + if match is None: + return + base_classifiers = { + 'label': label, + 'target': match['target'], + 'duration': float(match['duration'].replace(',', '')), + 'duration_units': match['duration_units'], + } + for m in cls._stat_counter_regex.finditer(match['counters']): + classifiers = base_classifiers.copy() + name, count = cls._extract_stat_count(m, classifiers) + yield { + 'name': name, + 'units': None, + 'value': count, + 'classifiers': classifiers, + } + + _stat_regex = re.compile( + r'Performance counter stats for (?P.*?)\s*:\s*$' + r'^(?P.*)$' + r'^\s*(?P[0-9.,]+)\s*(?P\S+)\s*time elapsed', + flags=(re.S | re.M)) + + _stat_counter_regex = re.compile( + r'^\s*{aggr}?\s*{count}\s*{name}\s*{comment}?(?:{enabled}|$)'.format( + aggr=r'(?:{hw_thread}|(?:{cluster}{core}?\s*{thread_cnt}))'.format( + hw_thread=r'(?:CPU-?(?P\d+))', + cluster=r'S(?P\d+)', + core=r'(?:-C(?P\d+))', + thread_cnt=r'(?P\d+)'), + count=r'(?P[0-9.,]+|\|\)', + name=r'(?P.*?)', + comment=r'(?:#\s*{value}\s*{units}\s*)'.format( + value=r'(?P[0-9,.]+)', + units=r'(?P.*?)'), + enabled=r'(?:[\[\(](?P[0-9.]+)%[\)\]])'), + flags=re.M) + + @staticmethod + def _extract_stat_count(match, classifiers): + """Extracts the counter classifiers and count from a counter_match. + + Parameters: + match A :class:`re.Match` from :attr:`_stat_counter_regex` + classifiers A dictionary to be completed for the matched counter + + Returns: + A (name, value) tuple for the matched counter (value is 0 if an + error occurred). + """ + name = f'{classifiers["label"]}_{match["name"]}'.replace(' ', '_') + classifiers['name'] = match['name'] + # But metrics need a unique name (classifiers not enough) so this + # name might be specialized by the following: + try: + count = int(match['count'].replace(',', '')) + except ValueError: + try: + # some "counters" return a float (e.g. "task-clock"): + count = float(match['count'].replace(',', '')) + except ValueError: + # perf may report "not supported" or "not counted": + count = 0 # as metrics have to be numeric, can't use None + classifiers['count_error'] = match['count'] + if match['hw_thread']: # --no-aggr + classifiers['hw_thread'] = int(match['hw_thread']) + classifiers['hw_thread_count'] = 1 + classifiers['cpu'] = int(match['hw_thread']) # deprecated! + name += f'_T{classifiers["hw_thread"]}' + elif match['cluster']: # --per-core or --per-socket + classifiers['cluster'] = int(match['cluster']) + classifiers['hw_thread_count'] = int(match['hw_thread_count']) + name += f'_S{classifiers["cluster"]}' + if match['core']: # --per-core + classifiers['core'] = int(match['core']) + name += f'_C{classifiers["core"]}' + if match['comment_value']: + try: + classifiers['comment_value'] = int(match['comment_value']) + except ValueError: + classifiers['comment_value'] = float(match['comment_value']) + if match['comment_units']: + classifiers['comment_units'] = match['comment_units'] + if match['enabled']: + classifiers['enabled'] = float(match['enabled']) + return (name, count) From 55359d8402ca11baad58d6aa7200d902fb67daf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre-Cl=C3=A9ment=20Tosi?= Date: Tue, 11 Jun 2019 14:32:34 +0100 Subject: [PATCH 3/7] instruments/perf: Add tests for perf stat parser Add tests with parser inputs (i.e. perf stat stdout outputs) and parser outputs (i.e. arrays of WA metrics) for the `perf stat` parser of PerfInstrument. This will be useful when modifying the code of the parser, to verify its robustness. NB: These tests are not exhaustive. --- tests/test_instrument_perf.py | 2062 +++++++++++++++++++++++++++++++++ 1 file changed, 2062 insertions(+) create mode 100644 tests/test_instrument_perf.py diff --git a/tests/test_instrument_perf.py b/tests/test_instrument_perf.py new file mode 100644 index 000000000..60ea64fc1 --- /dev/null +++ b/tests/test_instrument_perf.py @@ -0,0 +1,2062 @@ +# copyright 2019 Arm limited +# +# licensed under the apache license, version 2.0 (the "license"); +# you may not use this file except in compliance with the license. +# you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2. +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. + +import unittest + +from wa.instruments.perf import PerfInstrument + +STAT_PAIRS = { + + '-a -e r1,r2,r3,r4,r5,r6,r7,r8': [ +( +# Pixel 2 - OS 4.4.88-ga1592dc22912 +# perf version 3.9.rc8.ge9aa1d6 +""" + Performance counter stats for 'sleep 1000': + + 1139 migrations [100.00%] + 6141 cs + 14648295 r1 [74.87%] + 2966422 r2 [74.96%] + 11872707 r3 [74.94%] + 8184054637 r4 [75.11%] + 2409014 r5 [75.30%] + 86957873 r6 [75.27%] + 34552449 r7 [75.14%] + 15730113018 r8 [74.88%] + + 1.681693229 seconds time elapsed +""", +[ + { + "name": "default0_migrations", + "value": 1139, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "migrations", + "enabled": 100.0 + } + }, + { + "name": "default0_cs", + "value": 6141, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "cs" + } + }, + { + "name": "default0_r1", + "value": 14648295, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "r1", + "enabled": 74.87 + } + }, + { + "name": "default0_r2", + "value": 2966422, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "r2", + "enabled": 74.96 + } + }, + { + "name": "default0_r3", + "value": 11872707, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "r3", + "enabled": 74.94 + } + }, + { + "name": "default0_r4", + "value": 8184054637, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "r4", + "enabled": 75.11 + } + }, + { + "name": "default0_r5", + "value": 2409014, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "r5", + "enabled": 75.3 + } + }, + { + "name": "default0_r6", + "value": 86957873, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "r6", + "enabled": 75.27 + } + }, + { + "name": "default0_r7", + "value": 34552449, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "r7", + "enabled": 75.14 + } + }, + { + "name": "default0_r8", + "value": 15730113018, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "r8", + "enabled": 74.88 + } + } +], +), +], + + '-a -A': [ +( +# Pixel 2 - OS 4.4.88-ga1592dc22912 +# perf version 3.9.rc8.ge9aa1d6 +""" + Performance counter stats for 'sleep 1000': + + CPU0 201 migrations (100.00%) + CPU1 217 migrations (100.00%) + CPU2 241 migrations (100.00%) + CPU3 216 migrations (100.00%) + CPU4 79 migrations (100.00%) + CPU5 40 migrations (100.00%) + CPU6 55 migrations (100.00%) + CPU7 70 migrations (100.00%) + CPU0 2285 cs + CPU1 1454 cs + CPU2 2704 cs + CPU3 2085 cs + CPU4 1790 cs + CPU5 1240 cs + CPU6 636 cs + CPU7 1557 cs + + 2.494999050 seconds time elapsed +""", +[ + { + "name": "default0_migrations_T0", + "value": 201, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "migrations", + "hw_thread": 0, + "hw_thread_count": 1, + "cpu": 0, + "enabled": 100.0 + } + }, + { + "name": "default0_migrations_T1", + "value": 217, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "migrations", + "hw_thread": 1, + "hw_thread_count": 1, + "cpu": 1, + "enabled": 100.0 + } + }, + { + "name": "default0_migrations_T2", + "value": 241, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "migrations", + "hw_thread": 2, + "hw_thread_count": 1, + "cpu": 2, + "enabled": 100.0 + } + }, + { + "name": "default0_migrations_T3", + "value": 216, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "migrations", + "hw_thread": 3, + "hw_thread_count": 1, + "cpu": 3, + "enabled": 100.0 + } + }, + { + "name": "default0_migrations_T4", + "value": 79, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "migrations", + "hw_thread": 4, + "hw_thread_count": 1, + "cpu": 4, + "enabled": 100.0 + } + }, + { + "name": "default0_migrations_T5", + "value": 40, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "migrations", + "hw_thread": 5, + "hw_thread_count": 1, + "cpu": 5, + "enabled": 100.0 + } + }, + { + "name": "default0_migrations_T6", + "value": 55, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "migrations", + "hw_thread": 6, + "hw_thread_count": 1, + "cpu": 6, + "enabled": 100.0 + } + }, + { + "name": "default0_migrations_T7", + "value": 70, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "migrations", + "hw_thread": 7, + "hw_thread_count": 1, + "cpu": 7, + "enabled": 100.0 + } + }, + { + "name": "default0_cs_T0", + "value": 2285, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "cs", + "hw_thread": 0, + "hw_thread_count": 1, + "cpu": 0 + } + }, + { + "name": "default0_cs_T1", + "value": 1454, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "cs", + "hw_thread": 1, + "hw_thread_count": 1, + "cpu": 1 + } + }, + { + "name": "default0_cs_T2", + "value": 2704, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "cs", + "hw_thread": 2, + "hw_thread_count": 1, + "cpu": 2 + } + }, + { + "name": "default0_cs_T3", + "value": 2085, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "cs", + "hw_thread": 3, + "hw_thread_count": 1, + "cpu": 3 + } + }, + { + "name": "default0_cs_T4", + "value": 1790, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "cs", + "hw_thread": 4, + "hw_thread_count": 1, + "cpu": 4 + } + }, + { + "name": "default0_cs_T5", + "value": 1240, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "cs", + "hw_thread": 5, + "hw_thread_count": 1, + "cpu": 5 + } + }, + { + "name": "default0_cs_T6", + "value": 636, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "cs", + "hw_thread": 6, + "hw_thread_count": 1, + "cpu": 6 + } + }, + { + "name": "default0_cs_T7", + "value": 1557, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "cs", + "hw_thread": 7, + "hw_thread_count": 1, + "cpu": 7 + } + } +], +), +], + + '-a -A --per-socket': [ +( +# Pixel 2 - OS 4.4.88-ga1592dc22912 +# perf version 3.9.rc8.ge9aa1d6 +""" + Performance counter stats for 'sleep 1000': + +S0 4 697 migrations (100.00%) +S0 4 7801 cs +S1 4 203 migrations (100.00%) +S1 4 4408 cs + + 2.262571267 seconds time elapsed +""", +[ + { + "name": "default0_migrations_S0", + "value": 697, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.262571267, + "duration_units": "seconds", + "name": "migrations", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 100.0 + } + }, + { + "name": "default0_cs_S0", + "value": 7801, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.262571267, + "duration_units": "seconds", + "name": "cs", + "cluster": 0, + "hw_thread_count": 4 + } + }, + { + "name": "default0_migrations_S1", + "value": 203, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.262571267, + "duration_units": "seconds", + "name": "migrations", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 100.0 + } + }, + { + "name": "default0_cs_S1", + "value": 4408, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.262571267, + "duration_units": "seconds", + "name": "cs", + "cluster": 1, + "hw_thread_count": 4 + } + } +], +), +], + "-a -A -e r1,r2,r3,r4,r5,r6,r7,r8 --per-socket": [ +( +# Pixel 2 - OS 4.4.88-ga1592dc22912 +# perf version 3.9.rc8.ge9aa1d6 +""" + Performance counter stats for 'sleep 1000': + +S0 4 725 migrations (100.00%) +S0 4 7202 cs +S0 4 9439048 r1 (37.55%) +S0 4 179650 r2 (37.54%) +S0 4 3856583 r3 (37.56%) +S0 4 71399486 r4 (37.49%) +S0 4 251669 r5 (37.48%) +S0 4 39189196 r6 (37.46%) +S0 4 19239860 r7 (37.47%) +S0 4 288165417 r8 (37.47%) +S1 4 222 migrations (100.00%) +S1 4 5225 cs +S1 4 8222810 r1 (37.55%) +S1 4 2852407 r2 (37.55%) +S1 4 5519117 r3 (37.55%) +S1 4 7193421718 r4 (37.49%) +S1 4 3236589 r5 (37.47%) +S1 4 0 r6 (37.47%) +S1 4 0 r7 (37.47%) +S1 4 13821910139 r8 (37.47%) + + 2.256465902 seconds time elapsed +""", +[ + { + "name": "default0_migrations_S0", + "value": 725, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "migrations", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 100.0 + } + }, + { + "name": "default0_cs_S0", + "value": 7202, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "cs", + "cluster": 0, + "hw_thread_count": 4 + } + }, + { + "name": "default0_r1_S0", + "value": 9439048, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r1", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 37.55 + } + }, + { + "name": "default0_r2_S0", + "value": 179650, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r2", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 37.54 + } + }, + { + "name": "default0_r3_S0", + "value": 3856583, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r3", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 37.56 + } + }, + { + "name": "default0_r4_S0", + "value": 71399486, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r4", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 37.49 + } + }, + { + "name": "default0_r5_S0", + "value": 251669, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r5", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 37.48 + } + }, + { + "name": "default0_r6_S0", + "value": 39189196, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r6", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 37.46 + } + }, + { + "name": "default0_r7_S0", + "value": 19239860, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r7", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 37.47 + } + }, + { + "name": "default0_r8_S0", + "value": 288165417, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r8", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 37.47 + } + }, + { + "name": "default0_migrations_S1", + "value": 222, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "migrations", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 100.0 + } + }, + { + "name": "default0_cs_S1", + "value": 5225, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "cs", + "cluster": 1, + "hw_thread_count": 4 + } + }, + { + "name": "default0_r1_S1", + "value": 8222810, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r1", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 37.55 + } + }, + { + "name": "default0_r2_S1", + "value": 2852407, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r2", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 37.55 + } + }, + { + "name": "default0_r3_S1", + "value": 5519117, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r3", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 37.55 + } + }, + { + "name": "default0_r4_S1", + "value": 7193421718, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r4", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 37.49 + } + }, + { + "name": "default0_r5_S1", + "value": 3236589, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r5", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 37.47 + } + }, + { + "name": "default0_r6_S1", + "value": 0, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r6", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 37.47 + } + }, + { + "name": "default0_r7_S1", + "value": 0, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r7", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 37.47 + } + }, + { + "name": "default0_r8_S1", + "value": 13821910139, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r8", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 37.47 + } + } +], +), +], + + '-a -A --per-core': [ +( +# Ubuntu 18.04.2 LTS - OS 4.15.0-50-generic +# perf version 4.15.18 +""" + Performance counter stats for 'system wide': + +S0-C0 2 2003.008100 cpu-clock (msec) # 1.998 CPUs utilized +S0-C0 2 38 context-switches # 0.019 K/sec +S0-C0 2 3 cpu-migrations # 0.001 K/sec +S0-C0 2 73 page-faults # 0.036 K/sec +S0-C0 2 15,750,905 cycles # 0.008 GHz +S0-C0 2 4,042,693 instructions # 0.26 insn per cycle +S0-C0 2 860,481 branches # 0.430 M/sec +S0-C0 2 166,940 branch-misses # 19.40% of all branches +S0-C1 2 2003.042586 cpu-clock (msec) # 1.998 CPUs utilized +S0-C1 2 155 context-switches # 0.077 K/sec +S0-C1 2 1 cpu-migrations # 0.000 K/sec +S0-C1 2 386 page-faults # 0.193 K/sec +S0-C1 2 407,532,423 cycles # 0.203 GHz +S0-C1 2 73,526,057 instructions # 0.18 insn per cycle +S0-C1 2 22,478,777 branches # 11.222 M/sec +S0-C1 2 293,815 branch-misses # 1.31% of all branches +S0-C2 2 2003.076028 cpu-clock (msec) # 1.998 CPUs utilized +S0-C2 2 213 context-switches # 0.106 K/sec +S0-C2 2 2 cpu-migrations # 0.001 K/sec +S0-C2 2 1 page-faults # 0.000 K/sec +S0-C2 2 18,605,672 cycles # 0.009 GHz +S0-C2 2 4,406,356 instructions # 0.24 insn per cycle +S0-C2 2 1,088,504 branches # 0.543 M/sec +S0-C2 2 142,203 branch-misses # 13.06% of all branches +S0-C3 2 2003.109192 cpu-clock (msec) # 1.998 CPUs utilized +S0-C3 2 245 context-switches # 0.122 K/sec +S0-C3 2 6 cpu-migrations # 0.003 K/sec +S0-C3 2 0 page-faults # 0.000 K/sec +S0-C3 2 23,626,131 cycles # 0.012 GHz +S0-C3 2 7,714,748 instructions # 0.33 insn per cycle +S0-C3 2 1,805,933 branches # 0.902 M/sec +S0-C3 2 193,243 branch-misses # 10.70% of all branches +S0-C4 2 2003.143584 cpu-clock (msec) # 1.998 CPUs utilized +S0-C4 2 596 context-switches # 0.298 K/sec +S0-C4 2 6 cpu-migrations # 0.003 K/sec +S0-C4 2 113 page-faults # 0.056 K/sec +S0-C4 2 53,837,367 cycles # 0.027 GHz +S0-C4 2 23,264,962 instructions # 0.43 insn per cycle +S0-C4 2 4,975,165 branches # 2.484 M/sec +S0-C4 2 301,069 branch-misses # 6.05% of all branches +S0-C5 2 2003.151837 cpu-clock (msec) # 1.998 CPUs utilized +S0-C5 2 172 context-switches # 0.086 K/sec +S0-C5 2 4 cpu-migrations # 0.002 K/sec +S0-C5 2 37 page-faults # 0.018 K/sec +S0-C5 2 24,086,889 cycles # 0.012 GHz +S0-C5 2 7,219,194 instructions # 0.30 insn per cycle +S0-C5 2 1,537,648 branches # 0.768 M/sec +S0-C5 2 177,565 branch-misses # 11.55% of all branches +S0-C6 2 2003.160900 cpu-clock (msec) # 1.998 CPUs utilized +S0-C6 2 146 context-switches # 0.073 K/sec +S0-C6 2 3 cpu-migrations # 0.001 K/sec +S0-C6 2 69 page-faults # 0.034 K/sec +S0-C6 2 27,327,018 cycles # 0.014 GHz +S0-C6 2 7,956,363 instructions # 0.29 insn per cycle +S0-C6 2 1,834,119 branches # 0.916 M/sec +S0-C6 2 210,607 branch-misses # 11.48% of all branches +S0-C7 2 2003.187967 cpu-clock (msec) # 1.998 CPUs utilized +S0-C7 2 91 context-switches # 0.045 K/sec +S0-C7 2 1 cpu-migrations # 0.000 K/sec +S0-C7 2 32 page-faults # 0.016 K/sec +S0-C7 2 26,120,485 cycles # 0.013 GHz +S0-C7 2 10,457,563 instructions # 0.40 insn per cycle +S0-C7 2 2,337,445 branches # 1.167 M/sec +S0-C7 2 238,864 branch-misses # 10.22% of all branches + + 1.002350964 seconds time elapsed +""", +[ + { + "name": "default0_cpu-clock_(msec)_S0_C0", + "units": None, + "value": 2003.0081, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-clock (msec)", + "cluster": 0, + "hw_thread_count": 2, + "core": 0, + "comment_value": 1.998, + "comment_units": "CPUs utilized" + } + }, + { + "name": "default0_context-switches_S0_C0", + "units": None, + "value": 38, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "context-switches", + "cluster": 0, + "hw_thread_count": 2, + "core": 0, + "comment_value": 0.019, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cpu-migrations_S0_C0", + "units": None, + "value": 3, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-migrations", + "cluster": 0, + "hw_thread_count": 2, + "core": 0, + "comment_value": 0.001, + "comment_units": "K/sec" + } + }, + { + "name": "default0_page-faults_S0_C0", + "units": None, + "value": 73, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "page-faults", + "cluster": 0, + "hw_thread_count": 2, + "core": 0, + "comment_value": 0.036, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cycles_S0_C0", + "units": None, + "value": 15750905, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cycles", + "cluster": 0, + "hw_thread_count": 2, + "core": 0, + "comment_value": 0.008, + "comment_units": "GHz" + } + }, + { + "name": "default0_instructions_S0_C0", + "units": None, + "value": 4042693, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "instructions", + "cluster": 0, + "hw_thread_count": 2, + "core": 0, + "comment_value": 0.26, + "comment_units": "insn per cycle" + } + }, + { + "name": "default0_branches_S0_C0", + "units": None, + "value": 860481, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branches", + "cluster": 0, + "hw_thread_count": 2, + "core": 0, + "comment_value": 0.43, + "comment_units": "M/sec" + } + }, + { + "name": "default0_branch-misses_S0_C0", + "units": None, + "value": 166940, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branch-misses", + "cluster": 0, + "hw_thread_count": 2, + "core": 0, + "comment_value": 19.4, + "comment_units": "% of all branches" + } + }, + { + "name": "default0_cpu-clock_(msec)_S0_C1", + "units": None, + "value": 2003.042586, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-clock (msec)", + "cluster": 0, + "hw_thread_count": 2, + "core": 1, + "comment_value": 1.998, + "comment_units": "CPUs utilized" + } + }, + { + "name": "default0_context-switches_S0_C1", + "units": None, + "value": 155, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "context-switches", + "cluster": 0, + "hw_thread_count": 2, + "core": 1, + "comment_value": 0.077, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cpu-migrations_S0_C1", + "units": None, + "value": 1, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-migrations", + "cluster": 0, + "hw_thread_count": 2, + "core": 1, + "comment_value": 0.0, + "comment_units": "K/sec" + } + }, + { + "name": "default0_page-faults_S0_C1", + "units": None, + "value": 386, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "page-faults", + "cluster": 0, + "hw_thread_count": 2, + "core": 1, + "comment_value": 0.193, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cycles_S0_C1", + "units": None, + "value": 407532423, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cycles", + "cluster": 0, + "hw_thread_count": 2, + "core": 1, + "comment_value": 0.203, + "comment_units": "GHz" + } + }, + { + "name": "default0_instructions_S0_C1", + "units": None, + "value": 73526057, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "instructions", + "cluster": 0, + "hw_thread_count": 2, + "core": 1, + "comment_value": 0.18, + "comment_units": "insn per cycle" + } + }, + { + "name": "default0_branches_S0_C1", + "units": None, + "value": 22478777, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branches", + "cluster": 0, + "hw_thread_count": 2, + "core": 1, + "comment_value": 11.222, + "comment_units": "M/sec" + } + }, + { + "name": "default0_branch-misses_S0_C1", + "units": None, + "value": 293815, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branch-misses", + "cluster": 0, + "hw_thread_count": 2, + "core": 1, + "comment_value": 1.31, + "comment_units": "% of all branches" + } + }, + { + "name": "default0_cpu-clock_(msec)_S0_C2", + "units": None, + "value": 2003.076028, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-clock (msec)", + "cluster": 0, + "hw_thread_count": 2, + "core": 2, + "comment_value": 1.998, + "comment_units": "CPUs utilized" + } + }, + { + "name": "default0_context-switches_S0_C2", + "units": None, + "value": 213, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "context-switches", + "cluster": 0, + "hw_thread_count": 2, + "core": 2, + "comment_value": 0.106, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cpu-migrations_S0_C2", + "units": None, + "value": 2, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-migrations", + "cluster": 0, + "hw_thread_count": 2, + "core": 2, + "comment_value": 0.001, + "comment_units": "K/sec" + } + }, + { + "name": "default0_page-faults_S0_C2", + "units": None, + "value": 1, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "page-faults", + "cluster": 0, + "hw_thread_count": 2, + "core": 2, + "comment_value": 0.0, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cycles_S0_C2", + "units": None, + "value": 18605672, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cycles", + "cluster": 0, + "hw_thread_count": 2, + "core": 2, + "comment_value": 0.009, + "comment_units": "GHz" + } + }, + { + "name": "default0_instructions_S0_C2", + "units": None, + "value": 4406356, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "instructions", + "cluster": 0, + "hw_thread_count": 2, + "core": 2, + "comment_value": 0.24, + "comment_units": "insn per cycle" + } + }, + { + "name": "default0_branches_S0_C2", + "units": None, + "value": 1088504, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branches", + "cluster": 0, + "hw_thread_count": 2, + "core": 2, + "comment_value": 0.543, + "comment_units": "M/sec" + } + }, + { + "name": "default0_branch-misses_S0_C2", + "units": None, + "value": 142203, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branch-misses", + "cluster": 0, + "hw_thread_count": 2, + "core": 2, + "comment_value": 13.06, + "comment_units": "% of all branches" + } + }, + { + "name": "default0_cpu-clock_(msec)_S0_C3", + "units": None, + "value": 2003.109192, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-clock (msec)", + "cluster": 0, + "hw_thread_count": 2, + "core": 3, + "comment_value": 1.998, + "comment_units": "CPUs utilized" + } + }, + { + "name": "default0_context-switches_S0_C3", + "units": None, + "value": 245, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "context-switches", + "cluster": 0, + "hw_thread_count": 2, + "core": 3, + "comment_value": 0.122, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cpu-migrations_S0_C3", + "units": None, + "value": 6, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-migrations", + "cluster": 0, + "hw_thread_count": 2, + "core": 3, + "comment_value": 0.003, + "comment_units": "K/sec" + } + }, + { + "name": "default0_page-faults_S0_C3", + "units": None, + "value": 0, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "page-faults", + "cluster": 0, + "hw_thread_count": 2, + "core": 3, + "comment_value": 0.0, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cycles_S0_C3", + "units": None, + "value": 23626131, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cycles", + "cluster": 0, + "hw_thread_count": 2, + "core": 3, + "comment_value": 0.012, + "comment_units": "GHz" + } + }, + { + "name": "default0_instructions_S0_C3", + "units": None, + "value": 7714748, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "instructions", + "cluster": 0, + "hw_thread_count": 2, + "core": 3, + "comment_value": 0.33, + "comment_units": "insn per cycle" + } + }, + { + "name": "default0_branches_S0_C3", + "units": None, + "value": 1805933, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branches", + "cluster": 0, + "hw_thread_count": 2, + "core": 3, + "comment_value": 0.902, + "comment_units": "M/sec" + } + }, + { + "name": "default0_branch-misses_S0_C3", + "units": None, + "value": 193243, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branch-misses", + "cluster": 0, + "hw_thread_count": 2, + "core": 3, + "comment_value": 10.7, + "comment_units": "% of all branches" + } + }, + { + "name": "default0_cpu-clock_(msec)_S0_C4", + "units": None, + "value": 2003.143584, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-clock (msec)", + "cluster": 0, + "hw_thread_count": 2, + "core": 4, + "comment_value": 1.998, + "comment_units": "CPUs utilized" + } + }, + { + "name": "default0_context-switches_S0_C4", + "units": None, + "value": 596, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "context-switches", + "cluster": 0, + "hw_thread_count": 2, + "core": 4, + "comment_value": 0.298, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cpu-migrations_S0_C4", + "units": None, + "value": 6, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-migrations", + "cluster": 0, + "hw_thread_count": 2, + "core": 4, + "comment_value": 0.003, + "comment_units": "K/sec" + } + }, + { + "name": "default0_page-faults_S0_C4", + "units": None, + "value": 113, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "page-faults", + "cluster": 0, + "hw_thread_count": 2, + "core": 4, + "comment_value": 0.056, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cycles_S0_C4", + "units": None, + "value": 53837367, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cycles", + "cluster": 0, + "hw_thread_count": 2, + "core": 4, + "comment_value": 0.027, + "comment_units": "GHz" + } + }, + { + "name": "default0_instructions_S0_C4", + "units": None, + "value": 23264962, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "instructions", + "cluster": 0, + "hw_thread_count": 2, + "core": 4, + "comment_value": 0.43, + "comment_units": "insn per cycle" + } + }, + { + "name": "default0_branches_S0_C4", + "units": None, + "value": 4975165, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branches", + "cluster": 0, + "hw_thread_count": 2, + "core": 4, + "comment_value": 2.484, + "comment_units": "M/sec" + } + }, + { + "name": "default0_branch-misses_S0_C4", + "units": None, + "value": 301069, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branch-misses", + "cluster": 0, + "hw_thread_count": 2, + "core": 4, + "comment_value": 6.05, + "comment_units": "% of all branches" + } + }, + { + "name": "default0_cpu-clock_(msec)_S0_C5", + "units": None, + "value": 2003.151837, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-clock (msec)", + "cluster": 0, + "hw_thread_count": 2, + "core": 5, + "comment_value": 1.998, + "comment_units": "CPUs utilized" + } + }, + { + "name": "default0_context-switches_S0_C5", + "units": None, + "value": 172, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "context-switches", + "cluster": 0, + "hw_thread_count": 2, + "core": 5, + "comment_value": 0.086, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cpu-migrations_S0_C5", + "units": None, + "value": 4, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-migrations", + "cluster": 0, + "hw_thread_count": 2, + "core": 5, + "comment_value": 0.002, + "comment_units": "K/sec" + } + }, + { + "name": "default0_page-faults_S0_C5", + "units": None, + "value": 37, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "page-faults", + "cluster": 0, + "hw_thread_count": 2, + "core": 5, + "comment_value": 0.018, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cycles_S0_C5", + "units": None, + "value": 24086889, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cycles", + "cluster": 0, + "hw_thread_count": 2, + "core": 5, + "comment_value": 0.012, + "comment_units": "GHz" + } + }, + { + "name": "default0_instructions_S0_C5", + "units": None, + "value": 7219194, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "instructions", + "cluster": 0, + "hw_thread_count": 2, + "core": 5, + "comment_value": 0.3, + "comment_units": "insn per cycle" + } + }, + { + "name": "default0_branches_S0_C5", + "units": None, + "value": 1537648, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branches", + "cluster": 0, + "hw_thread_count": 2, + "core": 5, + "comment_value": 0.768, + "comment_units": "M/sec" + } + }, + { + "name": "default0_branch-misses_S0_C5", + "units": None, + "value": 177565, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branch-misses", + "cluster": 0, + "hw_thread_count": 2, + "core": 5, + "comment_value": 11.55, + "comment_units": "% of all branches" + } + }, + { + "name": "default0_cpu-clock_(msec)_S0_C6", + "units": None, + "value": 2003.1609, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-clock (msec)", + "cluster": 0, + "hw_thread_count": 2, + "core": 6, + "comment_value": 1.998, + "comment_units": "CPUs utilized" + } + }, + { + "name": "default0_context-switches_S0_C6", + "units": None, + "value": 146, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "context-switches", + "cluster": 0, + "hw_thread_count": 2, + "core": 6, + "comment_value": 0.073, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cpu-migrations_S0_C6", + "units": None, + "value": 3, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-migrations", + "cluster": 0, + "hw_thread_count": 2, + "core": 6, + "comment_value": 0.001, + "comment_units": "K/sec" + } + }, + { + "name": "default0_page-faults_S0_C6", + "units": None, + "value": 69, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "page-faults", + "cluster": 0, + "hw_thread_count": 2, + "core": 6, + "comment_value": 0.034, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cycles_S0_C6", + "units": None, + "value": 27327018, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cycles", + "cluster": 0, + "hw_thread_count": 2, + "core": 6, + "comment_value": 0.014, + "comment_units": "GHz" + } + }, + { + "name": "default0_instructions_S0_C6", + "units": None, + "value": 7956363, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "instructions", + "cluster": 0, + "hw_thread_count": 2, + "core": 6, + "comment_value": 0.29, + "comment_units": "insn per cycle" + } + }, + { + "name": "default0_branches_S0_C6", + "units": None, + "value": 1834119, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branches", + "cluster": 0, + "hw_thread_count": 2, + "core": 6, + "comment_value": 0.916, + "comment_units": "M/sec" + } + }, + { + "name": "default0_branch-misses_S0_C6", + "units": None, + "value": 210607, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branch-misses", + "cluster": 0, + "hw_thread_count": 2, + "core": 6, + "comment_value": 11.48, + "comment_units": "% of all branches" + } + }, + { + "name": "default0_cpu-clock_(msec)_S0_C7", + "units": None, + "value": 2003.187967, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-clock (msec)", + "cluster": 0, + "hw_thread_count": 2, + "core": 7, + "comment_value": 1.998, + "comment_units": "CPUs utilized" + } + }, + { + "name": "default0_context-switches_S0_C7", + "units": None, + "value": 91, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "context-switches", + "cluster": 0, + "hw_thread_count": 2, + "core": 7, + "comment_value": 0.045, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cpu-migrations_S0_C7", + "units": None, + "value": 1, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-migrations", + "cluster": 0, + "hw_thread_count": 2, + "core": 7, + "comment_value": 0.0, + "comment_units": "K/sec" + } + }, + { + "name": "default0_page-faults_S0_C7", + "units": None, + "value": 32, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "page-faults", + "cluster": 0, + "hw_thread_count": 2, + "core": 7, + "comment_value": 0.016, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cycles_S0_C7", + "units": None, + "value": 26120485, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cycles", + "cluster": 0, + "hw_thread_count": 2, + "core": 7, + "comment_value": 0.013, + "comment_units": "GHz" + } + }, + { + "name": "default0_instructions_S0_C7", + "units": None, + "value": 10457563, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "instructions", + "cluster": 0, + "hw_thread_count": 2, + "core": 7, + "comment_value": 0.4, + "comment_units": "insn per cycle" + } + }, + { + "name": "default0_branches_S0_C7", + "units": None, + "value": 2337445, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branches", + "cluster": 0, + "hw_thread_count": 2, + "core": 7, + "comment_value": 1.167, + "comment_units": "M/sec" + } + }, + { + "name": "default0_branch-misses_S0_C7", + "units": None, + "value": 238864, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branch-misses", + "cluster": 0, + "hw_thread_count": 2, + "core": 7, + "comment_value": 10.22, + "comment_units": "% of all branches" + } + } +], +), +], + +} + + + +class StatParserTest(unittest.TestCase): + + maxDiff = None + + def _test_pair(self, stdout, metrics): + metrics_dut = PerfInstrument._extract_stat_metrics('default0', stdout) + count = 0 + for metric_dut in metrics_dut: + # metric names are guaranteed to be unique by the documentation + metric = next(m for m in metrics if m['name'] == metric_dut['name']) + self.assertEqual(metric, metric_dut) + count += 1 + self.assertEqual(count, len(metrics)) + + def _test_key(self, key): + for stdout, metrics in STAT_PAIRS[key]: + self._test_pair(stdout, metrics) + + def test_all_cpus_many_events(self): + self._test_key('-a -e r1,r2,r3,r4,r5,r6,r7,r8') + + def test_all_cpus_no_aggregate_per_core(self): + self._test_key('-a -A --per-core') + + def test_all_cpus_no_aggregate_per_socket(self): + self._test_key('-a -A --per-socket') + + def test_all_cpus_no_aggregate_many_events_per_socket(self): + self._test_key('-a -A -e r1,r2,r3,r4,r5,r6,r7,r8 --per-socket') From 4e3859556d826b23942f43b7c0471595760aaa63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre-Cl=C3=A9ment=20Tosi?= Date: Wed, 19 Jun 2019 15:05:33 +0100 Subject: [PATCH 4/7] fixup! instruments/perf: Support ALL perf subcommands --- wa/instruments/perf.py | 95 +++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 48 deletions(-) diff --git a/wa/instruments/perf.py b/wa/instruments/perf.py index b69cb101e..9da3b09d0 100644 --- a/wa/instruments/perf.py +++ b/wa/instruments/perf.py @@ -75,6 +75,53 @@ class PerfInstrument(Instrument): Please refer to the ``sysfile_values`` runtime parameter to do so from an agenda. + + When running ``perf stat``, this instrument reports the captured + counters as unitless :class:`Metrics` with the following classifiers: + + - ``'name'``: The name of the event as reported by ``perf``. This name + may not be unique when aggregation is disabled as the same counter is + then captured for multiple hardware threads; + - ``'label'``: Label given to the run of ``perf stat``; + - ``'target'``: The target ``perf`` reports for the captured events. + This is shared across all events of a run and is further specialized + by ``'hw_thread'``, ``'core'`` and ``'cluster'`` if applicable; + - ``'duration'``, ``'duration_units'``: duration of the ``perf`` run; + - ``'count_error'``: A string containing the error corresponding that + prevented the counter from being captured. Only available if an error + occured. In this case the value of the metric is always ``0``; + - ``'hw_thread_count'``: Number of **hardware** threads that were + contributing to the counter. Only available when the automatic + aggregation done by ``perf stat`` is disabled. See ``'hw_thread'``, + ``'core'`` and ``'cluster'``; + - ``'hw_thread'``: When the ``--no-aggr`` option is used, holds the + index of the hardware thread that incremented the counter. In this + case, ``'hw_thread_count'`` is always ``1``. For backward + compatibility, the ``'cpu'`` classifier is provided as a synonym of + ``'hw_thread'`` (unlike what its name might suggest, on systems + supporting hardware multithreading, ``'cpu'`` is not a synonym of + ``'core'``!); + - ``'cluster'``: When the ``--per-socket`` option is used, holds the + index of the cluster (_i.e._ "socket" in ``perf`` terminology) that + incremented the counter and ``'hw_thread_count'`` holds the number of + hardware threads in the cluster. When the ``--per-core`` option is + used, this classifier gives the index of the cluster of the core. + - ``'core'``: When the ``--per-core`` option is used, holds the index + (within its cluster) of the core that incremented the counter and + ``'hw_thread_count'`` holds the number of hardware threads in the + core. + - ``'enabled'``: When ``perf`` needs to capture more hardware events + than there are hardware counters, it shares the hardware counters + among the events through time-slicing. This classifier holds the + fraction (between ``0.0`` and ``100.0``) of the run that a hardware + counter was allocated to the the event. Available only for hardware + events and only when time-slicing was required. + - ``'comment_value'``, ``'comment_units'``: Some counters may come with + an extra "comment" (following a ``#``) added by ``perf``. The + ``'comment_value'`` holds the numeric (``int`` or ``float``) value of + the comment while ``'comment_units'`` holds the rest of the comment + (typically the units). Only available for the events for which + ``perf`` added a comment. """ parameters = [ @@ -259,54 +306,6 @@ def teardown(self, context): @classmethod def _extract_stat_metrics(cls, label, stdout): - """ - When running ``perf stat``, this instrument reports the captured - counters as unitless :class:`Metrics` with the following classifiers: - - - ``'name'``: The name of the event as reported by ``perf``. This name - may not be unique when aggregation is disabled as the same counter is - then captured for multiple hardware threads; - - ``'label'``: Label given to the run of ``perf stat``; - - ``'target'``: The target ``perf`` reports for the captured events. - This is shared across all events of a run and is further specialized - by ``'hw_thread'``, ``'core'`` and ``'cluster'`` if applicable; - - ``'duration'``, ``'duration_units'``: duration of the ``perf`` run; - - ``'count_error'``: A string containing the error corresponding that - prevented the counter from being captured. Only available if an error - occured. In this case the value of the metric is always ``0``; - - ``'hw_thread_count'``: Number of **hardware** threads that were - contributing to the counter. Only available when the automatic - aggregation done by ``perf stat`` is disabled. See ``'hw_thread'``, - ``'core'`` and ``'cluster'``; - - ``'hw_thread'``: When the ``--no-aggr`` option is used, holds the - index of the hardware thread that incremented the counter. In this - case, ``'hw_thread_count'`` is always ``1``. For backward - compatibility, the ``'cpu'`` classifier is provided as a synonym of - ``'hw_thread'`` (unlike what its name might suggest, on systems - supporting hardware multithreading, ``'cpu'`` is not a synonym of - ``'core'``!); - - ``'cluster'``: When the ``--per-socket`` option is used, holds the - index of the cluster (_i.e._ "socket" in ``perf`` terminology) that - incremented the counter and ``'hw_thread_count'`` holds the number of - hardware threads in the cluster. When the ``--per-core`` option is - used, this classifier gives the index of the cluster of the core. - - ``'core'``: When the ``--per-core`` option is used, holds the index - (within its cluster) of the core that incremented the counter and - ``'hw_thread_count'`` holds the number of hardware threads in the - core. - - ``'enabled'``: When ``perf`` needs to capture more hardware events - than there are hardware counters, it shares the hardware counters - among the events through time-slicing. This classifier holds the - fraction (between ``0.0`` and ``100.0``) of the run that a hardware - counter was allocated to the the event. Available only for hardware - events and only when time-slicing was required. - - ``'comment_value'``, ``'comment_units'``: Some counters may come with - an extra "comment" (following a ``#``) added by ``perf``. The - ``'comment_value'`` holds the numeric (``int`` or ``float``) value of - the comment while ``'comment_units'`` holds the rest of the comment - (typically the units). Only available for the events for which - ``perf`` added a comment. - """ match = cls._stat_regex.search(stdout) if match is None: return From 651cf49b055bbf074bc07776b1d3581bab200fe7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre-Cl=C3=A9ment=20Tosi?= Date: Wed, 19 Jun 2019 15:05:44 +0100 Subject: [PATCH 5/7] fixup! instruments/perf: Support ALL perf subcommands --- wa/instruments/perf.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/wa/instruments/perf.py b/wa/instruments/perf.py index 9da3b09d0..b255abdc0 100644 --- a/wa/instruments/perf.py +++ b/wa/instruments/perf.py @@ -358,7 +358,8 @@ def _extract_stat_count(match, classifiers): A (name, value) tuple for the matched counter (value is 0 if an error occurred). """ - name = f'{classifiers["label"]}_{match["name"]}'.replace(' ', '_') + name = '{}_{}'.format(classifiers['label'], + match['name']).replace(' ', '_') classifiers['name'] = match['name'] # But metrics need a unique name (classifiers not enough) so this # name might be specialized by the following: @@ -376,14 +377,14 @@ def _extract_stat_count(match, classifiers): classifiers['hw_thread'] = int(match['hw_thread']) classifiers['hw_thread_count'] = 1 classifiers['cpu'] = int(match['hw_thread']) # deprecated! - name += f'_T{classifiers["hw_thread"]}' + name += '_T{}'.format(classifiers["hw_thread"]) elif match['cluster']: # --per-core or --per-socket classifiers['cluster'] = int(match['cluster']) classifiers['hw_thread_count'] = int(match['hw_thread_count']) - name += f'_S{classifiers["cluster"]}' + name += '_S{}'.format(classifiers["cluster"]) if match['core']: # --per-core classifiers['core'] = int(match['core']) - name += f'_C{classifiers["core"]}' + name += '_C{}'.format(classifiers["core"]) if match['comment_value']: try: classifiers['comment_value'] = int(match['comment_value']) From 84470ddbc892e349b5a00b221938b5aa66d8ac8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre-Cl=C3=A9ment=20Tosi?= Date: Tue, 25 Jun 2019 20:55:30 +0100 Subject: [PATCH 6/7] fixup! instruments/perf: Support ALL perf subcommands --- wa/instruments/perf.py | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/wa/instruments/perf.py b/wa/instruments/perf.py index b255abdc0..8fe8d7d0f 100644 --- a/wa/instruments/perf.py +++ b/wa/instruments/perf.py @@ -291,14 +291,35 @@ def update_output(self, context): self.commands.items(), self.post_commands.items()) for label, cmd in all_commands: - if 'stat' in cmd.command: + classifiers = { + 'label': label, + 'command': cmd.command, + } + + outputs = [] + if 'o' in cmd.kwflags: + outputs.append((cmd.kwflags['o'], 'data')) + if 'output' in cmd.kwflags: + outputs.append((cmd.kwflags['output'], 'data')) + if cmd.stdout and not cmd.stdout.startswith('&'): + outputs.append((cmd.stdout, 'data')) + if cmd.stderr and not cmd.stderr.startswith('&'): + outputs.append((cmd.stderr, 'log')) + + metrics_done = False + for output, kind in outputs: # perf stat supports redirecting its stdout to --output/-o: - stat_file = (cmd.kwflags.get('o', None) or - cmd.kwflags.get('output', None) or - cmd.stdout) - with open(os.path.join(outdir, label, stat_file)) as f: - for metric in self._extract_stat_metrics(label, f.read()): - context.add_metric(**metric) + output_path = os.path.join(outdir, label, output) + if 'stat' in cmd.command and not metrics_done: + metrics_done = True + with open(output_path) as f: + for metric in self._extract_stat_metrics(label, + f.read()): + context.add_metric(**metric) + kind = 'raw' + context.add_artifact(os.path.join(label, output), + output_path, kind, + classifiers=classifiers) def teardown(self, context): # pylint: disable=unused-argument From 4cd05297e4bde3b67787c2e81633ed7d821e0932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre-Cl=C3=A9ment=20Tosi?= Date: Wed, 26 Jun 2019 11:20:17 +0100 Subject: [PATCH 7/7] fixup! instruments/perf: Support ALL perf subcommands --- wa/instruments/perf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wa/instruments/perf.py b/wa/instruments/perf.py index 8fe8d7d0f..b9ba3af50 100644 --- a/wa/instruments/perf.py +++ b/wa/instruments/perf.py @@ -26,7 +26,7 @@ 'PerfInstrument', ] -DEFAULT_EVENTS = ['migration', 'cs'] +DEFAULT_EVENTS = ['migrations', 'cs'] DEFAULT_OPTIONSTRING = '-a'