From 94645972f466261eeb88326651ee51fec6f23787 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pierre-Cl=C3=A9ment=20Tosi?= <pierre-clement.tosi@arm.com>
Date: Wed, 1 May 2019 11:49:38 +0100
Subject: [PATCH 1/7] instruments/perf: Support ALL perf subcommands

Introduce an implementation of the PerfInstrument that is more generic
than the previous one and which is expected to be able to handle all
potential calls to perf (irrespective of the subcommand, flags, options
or arguments being used) but which maintains backward compatibility with
the previous implementation, targeting perf-stat.
---
 wa/instruments/perf.py | 277 +++++++++++++++++++++++++++++------------
 1 file changed, 197 insertions(+), 80 deletions(-)

diff --git a/wa/instruments/perf.py b/wa/instruments/perf.py
index c5179cdfa..549fd5b00 100644
--- a/wa/instruments/perf.py
+++ b/wa/instruments/perf.py
@@ -1,4 +1,4 @@
-#    Copyright 2013-2015 ARM Limited
+#    Copyright 2013-2019 ARM Limited
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,74 +13,170 @@
 # limitations under the License.
 #
 
-
-# pylint: disable=unused-argument
+import collections
 import os
-import re
 
+from devlib.utils.cli import Command
 from devlib.trace.perf import PerfCollector
-
 from wa import Instrument, Parameter
 from wa.utils.types import list_or_string, list_of_strs
 
-PERF_COUNT_REGEX = re.compile(r'^(CPU\d+)?\s*(\d+)\s*(.*?)\s*(\[\s*\d+\.\d+%\s*\])?\s*$')
-
+__all__ = [
+    'PerfInstrument',
+]
 
-class PerfInstrument(Instrument):
 
-    name = 'perf'
-    description = """
-    Perf is a Linux profiling with performance counters.
+class YamlCommandDescriptor(collections.OrderedDict):
 
-    Performance counters are CPU hardware registers that count hardware events
-    such as instructions executed, cache-misses suffered, or branches
-    mispredicted. They form a basis for profiling applications to trace dynamic
-    control flow and identify hotspots.
+    def __init__(self, yaml_dict):
+        super(YamlCommandDescriptor, self).__init__()
+        if isinstance(yaml_dict, YamlCommandDescriptor):
+            for k, v in yaml_dict.items():
+                self[k] = v
+            return
+        yaml_dict_copy = yaml_dict.copy()
+        for label, parameters in yaml_dict_copy.items():
+            self[label] = str(Command(kwflags_join=',',
+                                      kwflags_sep='=',
+                                      end_of_options='--',
+                                      **parameters))
 
-    pref accepts options and events. If no option is given the default '-a' is
-    used. For events, the default events are migrations and cs. They both can
-    be specified in the config file.
 
-    Events must be provided as a list that contains them and they will look like
-    this ::
+DEFAULT_EVENTS = ['migration', 'cs']
+DEFAULT_OPTIONSTRING = '-a'
 
-        perf_events = ['migrations', 'cs']
 
-    Events can be obtained by typing the following in the command line on the
-    device ::
-
-        perf list
-
-    Whereas options, they can be provided as a single string as following ::
-
-        perf_options = '-a -i'
-
-    Options can be obtained by running the following in the command line ::
+class PerfInstrument(Instrument):
 
-        man perf-stat
+    name = 'perf'
+    description = """
+    Perf is a Linux profiling tool based on performance counters.
+
+    Performance counters are typically CPU hardware registers (found in the
+    Performance Monitoring Unit) that count hardware events such as
+    instructions executed, cache-misses suffered, or branches mispredicted.
+    Because each ``event`` corresponds to a hardware counter, the maximum
+    number of events that can be tracked is imposed by the available hardware.
+
+    By extension, performance counters, in the context of ``perf``, also refer
+    to so-called "software counters" representing events that can be tracked by
+    the OS kernel (e.g. context switches). As these are software events, the
+    counters are kept in RAM and the hardware virtually imposes no limit on the
+    number that can be used.
+
+    This instrument allows a straight-forward way of calling ``perf stat``
+    through the named parameters ``optionstring`` and ``events``, which is the
+    default behaviour (see the defaults of these parameters).  However, it can
+    also be used through the more advanced ``commands`` dictionary which
+    provides a flexible access to all ways ``perf`` can be used.
+
+    The ``pre_commands`` and ``post_commands`` are provided to suit those
+    ``perf`` commands that don't actually capture data (``list``, ``config``,
+    ``report``, ...).
+
+    Commands are tagged with _labels_ which are used to define in which
+    directory they run. Therefore, a pair of commands (_e.g._ a `record`
+    followed by a `report`) sharing the same label can access the same files
+    while commands with different labels can use the same filename with the
+    guarantee of avoiding clashes.
+
+    Depending on the subcommand used, ``perf`` might require setting:
+
+        - ``/proc/sys/kernel/printk`` to ``4``
+        - ``/proc/sys/kernel/kptr_restrict`` to ``0``
+
+    Please refer to the ``sysfile_values`` runtime parameter to do so from an
+    agenda.
     """
 
     parameters = [
-        Parameter('events', kind=list_of_strs, default=['migrations', 'cs'],
-                  global_alias='perf_events',
-                  constraint=(lambda x: x, 'must not be empty.'),
-                  description="""Specifies the events to be counted."""),
-        Parameter('optionstring', kind=list_or_string, default='-a',
-                  global_alias='perf_options',
-                  description="""Specifies options to be used for the perf command. This
-                  may be a list of option strings, in which case, multiple instances of perf
-                  will be kicked off -- one for each option string. This may be used to e.g.
-                  collected different events from different big.LITTLE clusters.
+        Parameter('force_install', kind=bool, default=False,
+                  description="""
+                  Always install ``perf`` binary even if ``perf`` is already
+                  present on the device.
                   """),
+        Parameter('events', kind=list_of_strs, default=None,
+                  description="""
+                  List of events the default ``perf stat`` should capture.
+                  Valid events can be obtained from ``perf list`` and
+                  ``perf --help``.
+                  This parameter is ignored if ``commands`` is passed.
+
+                  default: {}
+                  """.format(
+                      ','.join('``{}``'.format(e) for e in DEFAULT_EVENTS))
+                  ),
+        Parameter('optionstring', kind=list_or_string, default=None,
+                  description="""
+                  String of options the default ``perf stat`` should use.
+                  For backward compatibility, this may be be a list of strings.
+                  In that case, a ``perf stat`` command will be launched for
+                  each string. This parameter is ignored if ``commands`` is
+                  passed.
+                  This parameter is ignored if ``commands`` is passed.
+
+                  default: ``{}``
+                  """.format(DEFAULT_OPTIONSTRING)
+                  ),
         Parameter('labels', kind=list_of_strs, default=None,
-                  global_alias='perf_labels',
-                  description="""Provides labels for pref output. If specified, the number of
-                  labels must match the number of ``optionstring``\ s.
+                  description=r"""
+                  These labels act like the keys of the ``commands`` parameter.
+                  They are provided for backward compatibility. If specified,
+                  the number of labels must match the number of
+                  ``optionstring``\ s. This parameter is ignored if
+                  ``commands`` is passed.
                   """),
-        Parameter('force_install', kind=bool, default=False,
+        Parameter('pre_commands', kind=YamlCommandDescriptor, default=None,
                   description="""
-                  always install perf binary even if perf is already present on the device.
-                  """),
+                  Dictionary of commands to be run before the workloads run
+                  (same format as ``commands``).
+                 """),
+        Parameter('commands', kind=YamlCommandDescriptor, default=None,
+                  description="""
+                  Dictionary in which keys are considered as *labels* and
+                  values are themselves dictionaries with the following
+                  entries:
+
+                      - ``command`` (``str``): The ``perf`` subcommand
+                        (``stat``, ``record``, ...);
+                      - ``flags`` (``str`` or ``list``): Switch flags without
+                        their leading hyphens (``no-inherit``, ``all-cpus``,
+                        ``a``, ...);
+                      - ``kwflags`` (``dict``): Dictionary of flag names (no
+                        hyphen) as keys and their corresponding values.
+                        These values can be ``list``s for flags taking CSV
+                        inputs (``event``, ``pid``, ...);
+                      - ``args`` (``str`` or valid command): the post-``--``
+                        arguments. This is typically the command ``perf`` will
+                        launch and monitor. Therefore, a valid command
+                        dictionary (same as this one) is accepted;
+
+                  As an example, the default behaviour can be replicated
+                  through::
+
+                      :language: yaml
+
+                      perf:
+                          commands:
+                              default_behaviour:
+                                  command: stat
+                                  flags:
+                                      - all-cpus
+                                  kwflags:
+                                      event:
+                                          - migrations
+                                          - cs
+                                  args:
+                                      command: sleep
+                                      args: 1000
+                                  stderr: '&1'
+                                  stdout: stat.out
+                 """),
+        Parameter('post_commands', kind=YamlCommandDescriptor, default=None,
+                  description="""
+                  Dictionary of commands to be run after the workloads run
+                  (same format as ``commands``).
+                 """),
     ]
 
     def __init__(self, target, **kwargs):
@@ -88,51 +184,72 @@ def __init__(self, target, **kwargs):
         self.collector = None
 
     def initialize(self, context):
+        # pylint: disable=unused-argument
+        # pylint: disable=access-member-before-definition
+        # pylint: disable=attribute-defined-outside-init
+        if self.commands is None:
+            if self.optionstring is None:
+                self.optionstring = DEFAULT_OPTIONSTRING
+
+            if self.events is None:
+                self.events = DEFAULT_EVENTS
+
+            if isinstance(self.optionstring, str):
+                self.optionstring = [self.optionstring]
+
+            if not self.labels:
+                self.labels = ['default{}'.format(i)
+                               for i, _ in enumerate(self.optionstring)]
+            elif isinstance(self.labels, str):
+                self.labels = [self.labels]
+
+            if len(self.labels) != len(self.optionstring):
+                raise ValueError('Lengths of labels and optionstring differ')
+
+            self.commands = YamlCommandDescriptor({
+                label: {
+                    'command': 'stat',
+                    'kwflags': {'event': self.events},
+                    'options': options,
+                    'args': {
+                        'command': 'sleep',
+                        'args': 1000,
+                    },
+                    'stderr': '&1',
+                    'stdout': 'stat.out',
+                }
+                for label, options in zip(self.labels, self.optionstring)
+            })
+        else:
+            for name in ['optionstring', 'events', 'labels']:
+                if self.__dict__[name] is not None:
+                    raise ValueError(
+                        '{} should not be passed if commands is'.format(name))
+
         self.collector = PerfCollector(self.target,
-                                       self.events,
-                                       self.optionstring,
-                                       self.labels,
-                                       self.force_install)
+                                       self.force_install,
+                                       self.pre_commands,
+                                       self.commands,
+                                       self.post_commands)
 
     def setup(self, context):
+        # pylint: disable=unused-argument
         self.collector.reset()
 
     def start(self, context):
+        # pylint: disable=unused-argument
         self.collector.start()
 
     def stop(self, context):
+        # pylint: disable=unused-argument
         self.collector.stop()
 
     def update_output(self, context):
-        self.logger.info('Extracting reports from target...')
         outdir = os.path.join(context.output_directory, 'perf')
-        self.collector.get_trace(outdir)
-
-        for host_file in os.listdir(outdir):
-            label = host_file.split('.out')[0]
-            host_file_path = os.path.join(outdir, host_file)
-            context.add_artifact(label, host_file_path, 'raw')
-            with open(host_file_path) as fh:
-                in_results_section = False
-                for line in fh:
-                    if 'Performance counter stats' in line:
-                        in_results_section = True
-                        next(fh)  # skip the following blank line
-                    if in_results_section:
-                        if not line.strip():  # blank line
-                            in_results_section = False
-                            break
-                        else:
-                            line = line.split('#')[0]  # comment
-                            match = PERF_COUNT_REGEX.search(line)
-                            if match:
-                                classifiers = {}
-                                cpu = match.group(1)
-                                if cpu is not None:
-                                    classifiers['cpu'] = int(cpu.replace('CPU', ''))
-                                count = int(match.group(2))
-                                metric = '{}_{}'.format(label, match.group(3))
-                                context.add_metric(metric, count, classifiers=classifiers)
+        self.collector.get_traces(outdir)
+        # HUGE TODO: add parsers for supported post_commands
+        #    (or should these be in devlib?)
 
     def teardown(self, context):
+        # pylint: disable=unused-argument
         self.collector.reset()

From e8f036006485999a83406c9d7597de12bd69a835 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pierre-Cl=C3=A9ment=20Tosi?= <pierre-clement.tosi@arm.com>
Date: Tue, 11 Jun 2019 14:32:10 +0100
Subject: [PATCH 2/7] fixup! instruments/perf: Support ALL perf subcommands

---
 wa/instruments/perf.py | 200 +++++++++++++++++++++++++++++++++++------
 1 file changed, 171 insertions(+), 29 deletions(-)

diff --git a/wa/instruments/perf.py b/wa/instruments/perf.py
index 549fd5b00..b69cb101e 100644
--- a/wa/instruments/perf.py
+++ b/wa/instruments/perf.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 #
 
-import collections
+import itertools
 import os
+import re
+import shlex
 
-from devlib.utils.cli import Command
-from devlib.trace.perf import PerfCollector
+from devlib.trace.perf import PerfCollector, PerfCommandDict
 from wa import Instrument, Parameter
 from wa.utils.types import list_or_string, list_of_strs
 
@@ -25,23 +26,6 @@
     'PerfInstrument',
 ]
 
-
-class YamlCommandDescriptor(collections.OrderedDict):
-
-    def __init__(self, yaml_dict):
-        super(YamlCommandDescriptor, self).__init__()
-        if isinstance(yaml_dict, YamlCommandDescriptor):
-            for k, v in yaml_dict.items():
-                self[k] = v
-            return
-        yaml_dict_copy = yaml_dict.copy()
-        for label, parameters in yaml_dict_copy.items():
-            self[label] = str(Command(kwflags_join=',',
-                                      kwflags_sep='=',
-                                      end_of_options='--',
-                                      **parameters))
-
-
 DEFAULT_EVENTS = ['migration', 'cs']
 DEFAULT_OPTIONSTRING = '-a'
 
@@ -70,6 +54,10 @@ class PerfInstrument(Instrument):
     also be used through the more advanced ``commands`` dictionary which
     provides a flexible access to all ways ``perf`` can be used.
 
+    In both cases, if a ``stat`` command is issued, this workload will
+    automatically parse its output into run ``metrics``. For this reason,
+    please avoid the ``-x`` ``stat`` flag.
+
     The ``pre_commands`` and ``post_commands`` are provided to suit those
     ``perf`` commands that don't actually capture data (``list``, ``config``,
     ``report``, ...).
@@ -126,12 +114,12 @@ class PerfInstrument(Instrument):
                   ``optionstring``\ s. This parameter is ignored if
                   ``commands`` is passed.
                   """),
-        Parameter('pre_commands', kind=YamlCommandDescriptor, default=None,
+        Parameter('pre_commands', kind=PerfCommandDict, default=None,
                   description="""
                   Dictionary of commands to be run before the workloads run
                   (same format as ``commands``).
                  """),
-        Parameter('commands', kind=YamlCommandDescriptor, default=None,
+        Parameter('commands', kind=PerfCommandDict, default=None,
                   description="""
                   Dictionary in which keys are considered as *labels* and
                   values are themselves dictionaries with the following
@@ -172,7 +160,7 @@ class PerfInstrument(Instrument):
                                   stderr: '&1'
                                   stdout: stat.out
                  """),
-        Parameter('post_commands', kind=YamlCommandDescriptor, default=None,
+        Parameter('post_commands', kind=PerfCommandDict, default=None,
                   description="""
                   Dictionary of commands to be run after the workloads run
                   (same format as ``commands``).
@@ -187,6 +175,10 @@ def initialize(self, context):
         # pylint: disable=unused-argument
         # pylint: disable=access-member-before-definition
         # pylint: disable=attribute-defined-outside-init
+        if self.pre_commands is None:
+            self.pre_commands = PerfCommandDict({})
+        if self.post_commands is None:
+            self.post_commands = PerfCommandDict({})
         if self.commands is None:
             if self.optionstring is None:
                 self.optionstring = DEFAULT_OPTIONSTRING
@@ -206,11 +198,11 @@ def initialize(self, context):
             if len(self.labels) != len(self.optionstring):
                 raise ValueError('Lengths of labels and optionstring differ')
 
-            self.commands = YamlCommandDescriptor({
+            self.commands = PerfCommandDict({
                 label: {
                     'command': 'stat',
                     'kwflags': {'event': self.events},
-                    'options': options,
+                    'options': shlex.split(options),
                     'args': {
                         'command': 'sleep',
                         'args': 1000,
@@ -233,8 +225,9 @@ def initialize(self, context):
                                        self.post_commands)
 
     def setup(self, context):
-        # pylint: disable=unused-argument
         self.collector.reset()
+        version = self.collector.execute('--version').strip()
+        context.update_metadata('versions', self.name, version)
 
     def start(self, context):
         # pylint: disable=unused-argument
@@ -245,11 +238,160 @@ def stop(self, context):
         self.collector.stop()
 
     def update_output(self, context):
-        outdir = os.path.join(context.output_directory, 'perf')
+        outdir = os.path.join(context.output_directory, self.name)
         self.collector.get_traces(outdir)
-        # HUGE TODO: add parsers for supported post_commands
-        #    (or should these be in devlib?)
+        all_commands = itertools.chain(self.pre_commands.items(),
+                                       self.commands.items(),
+                                       self.post_commands.items())
+        for label, cmd in all_commands:
+            if 'stat' in cmd.command:
+                # perf stat supports redirecting its stdout to --output/-o:
+                stat_file = (cmd.kwflags.get('o', None) or
+                             cmd.kwflags.get('output', None) or
+                             cmd.stdout)
+                with open(os.path.join(outdir, label, stat_file)) as f:
+                    for metric in self._extract_stat_metrics(label, f.read()):
+                        context.add_metric(**metric)
 
     def teardown(self, context):
         # pylint: disable=unused-argument
         self.collector.reset()
+
+    @classmethod
+    def _extract_stat_metrics(cls, label, stdout):
+        """
+        When running ``perf stat``, this instrument reports the captured
+        counters as unitless :class:`Metrics` with the following classifiers:
+
+        - ``'name'``: The name of the event as reported by ``perf``. This name
+          may not be unique when aggregation is disabled as the same counter is
+          then captured for multiple hardware threads;
+        - ``'label'``: Label given to the run of ``perf stat``;
+        - ``'target'``: The target ``perf`` reports for the captured events.
+          This is shared across all events of a run and is further specialized
+          by ``'hw_thread'``, ``'core'`` and ``'cluster'`` if applicable;
+        - ``'duration'``, ``'duration_units'``: duration of the ``perf`` run;
+        - ``'count_error'``: A string containing the error corresponding that
+          prevented the counter from being captured. Only available if an error
+          occured. In this case the value of the metric is always ``0``;
+        - ``'hw_thread_count'``: Number of **hardware** threads that were
+          contributing to the counter. Only available when the automatic
+          aggregation done by ``perf stat`` is disabled. See ``'hw_thread'``,
+          ``'core'`` and ``'cluster'``;
+        - ``'hw_thread'``: When the ``--no-aggr`` option is used, holds the
+          index of the hardware thread that incremented the counter. In this
+          case, ``'hw_thread_count'`` is always ``1``. For backward
+          compatibility, the ``'cpu'`` classifier is provided as a synonym of
+          ``'hw_thread'`` (unlike what its name might suggest, on systems
+          supporting hardware multithreading, ``'cpu'`` is not a synonym of
+          ``'core'``!);
+        - ``'cluster'``: When the ``--per-socket`` option is used, holds the
+          index of the cluster (_i.e._ "socket" in ``perf`` terminology) that
+          incremented the counter and ``'hw_thread_count'`` holds the number of
+          hardware threads in the cluster. When the ``--per-core`` option is
+          used, this classifier gives the index of the cluster of the core.
+        - ``'core'``: When the ``--per-core`` option is used, holds the index
+          (within its cluster) of the core that incremented the counter and
+          ``'hw_thread_count'`` holds the number of hardware threads in the
+          core.
+        - ``'enabled'``: When ``perf`` needs to capture more hardware events
+          than there are hardware counters, it shares the hardware counters
+          among the events through time-slicing. This classifier holds the
+          fraction (between ``0.0`` and ``100.0``) of the run that a hardware
+          counter was allocated to the the event. Available only for hardware
+          events and only when time-slicing was required.
+        - ``'comment_value'``, ``'comment_units'``: Some counters may come with
+          an extra "comment" (following a ``#``) added by ``perf``. The
+          ``'comment_value'`` holds the numeric (``int`` or ``float``) value of
+          the comment while ``'comment_units'`` holds the rest of the comment
+          (typically the units). Only available for the events for which
+          ``perf`` added a comment.
+        """
+        match = cls._stat_regex.search(stdout)
+        if match is None:
+            return
+        base_classifiers = {
+            'label': label,
+            'target': match['target'],
+            'duration': float(match['duration'].replace(',', '')),
+            'duration_units': match['duration_units'],
+        }
+        for m in cls._stat_counter_regex.finditer(match['counters']):
+            classifiers = base_classifiers.copy()
+            name, count = cls._extract_stat_count(m, classifiers)
+            yield {
+                'name': name,
+                'units': None,
+                'value': count,
+                'classifiers': classifiers,
+            }
+
+    _stat_regex = re.compile(
+        r'Performance counter stats for (?P<target>.*?)\s*:\s*$'
+        r'^(?P<counters>.*)$'
+        r'^\s*(?P<duration>[0-9.,]+)\s*(?P<duration_units>\S+)\s*time elapsed',
+        flags=(re.S | re.M))
+
+    _stat_counter_regex = re.compile(
+        r'^\s*{aggr}?\s*{count}\s*{name}\s*{comment}?(?:{enabled}|$)'.format(
+            aggr=r'(?:{hw_thread}|(?:{cluster}{core}?\s*{thread_cnt}))'.format(
+                hw_thread=r'(?:CPU-?(?P<hw_thread>\d+))',
+                cluster=r'S(?P<cluster>\d+)',
+                core=r'(?:-C(?P<core>\d+))',
+                thread_cnt=r'(?P<hw_thread_count>\d+)'),
+            count=r'(?P<count>[0-9.,]+|\<not supported\>|\<not counted\>)',
+            name=r'(?P<name>.*?)',
+            comment=r'(?:#\s*{value}\s*{units}\s*)'.format(
+                value=r'(?P<comment_value>[0-9,.]+)',
+                units=r'(?P<comment_units>.*?)'),
+            enabled=r'(?:[\[\(](?P<enabled>[0-9.]+)%[\)\]])'),
+        flags=re.M)
+
+    @staticmethod
+    def _extract_stat_count(match, classifiers):
+        """Extracts the counter classifiers and count from a counter_match.
+
+        Parameters:
+            match        A :class:`re.Match` from :attr:`_stat_counter_regex`
+            classifiers  A dictionary to be completed for the matched counter
+
+        Returns:
+            A (name, value) tuple for the matched counter (value is 0 if an
+            error occurred).
+        """
+        name = f'{classifiers["label"]}_{match["name"]}'.replace(' ', '_')
+        classifiers['name'] = match['name']
+        # But metrics need a unique name (classifiers not enough) so this
+        # name might be specialized by the following:
+        try:
+            count = int(match['count'].replace(',', ''))
+        except ValueError:
+            try:
+                # some "counters" return a float (e.g. "task-clock"):
+                count = float(match['count'].replace(',', ''))
+            except ValueError:
+                # perf may report "not supported" or "not counted":
+                count = 0  # as metrics have to be numeric, can't use None
+                classifiers['count_error'] = match['count']
+        if match['hw_thread']:  # --no-aggr
+            classifiers['hw_thread'] = int(match['hw_thread'])
+            classifiers['hw_thread_count'] = 1
+            classifiers['cpu'] = int(match['hw_thread'])  # deprecated!
+            name += f'_T{classifiers["hw_thread"]}'
+        elif match['cluster']:  # --per-core or --per-socket
+            classifiers['cluster'] = int(match['cluster'])
+            classifiers['hw_thread_count'] = int(match['hw_thread_count'])
+            name += f'_S{classifiers["cluster"]}'
+            if match['core']:  # --per-core
+                classifiers['core'] = int(match['core'])
+                name += f'_C{classifiers["core"]}'
+        if match['comment_value']:
+            try:
+                classifiers['comment_value'] = int(match['comment_value'])
+            except ValueError:
+                classifiers['comment_value'] = float(match['comment_value'])
+        if match['comment_units']:
+            classifiers['comment_units'] = match['comment_units']
+        if match['enabled']:
+            classifiers['enabled'] = float(match['enabled'])
+        return (name, count)

From 55359d8402ca11baad58d6aa7200d902fb67daf3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pierre-Cl=C3=A9ment=20Tosi?= <pierre-clement.tosi@arm.com>
Date: Tue, 11 Jun 2019 14:32:34 +0100
Subject: [PATCH 3/7] instruments/perf: Add tests for perf stat parser

Add tests with parser inputs (i.e. perf stat stdout outputs) and parser
outputs (i.e. arrays of WA metrics) for the `perf stat` parser of
PerfInstrument. This will be useful when modifying the code of the
parser, to verify its robustness.

NB: These tests are not exhaustive.
---
 tests/test_instrument_perf.py | 2062 +++++++++++++++++++++++++++++++++
 1 file changed, 2062 insertions(+)
 create mode 100644 tests/test_instrument_perf.py

diff --git a/tests/test_instrument_perf.py b/tests/test_instrument_perf.py
new file mode 100644
index 000000000..60ea64fc1
--- /dev/null
+++ b/tests/test_instrument_perf.py
@@ -0,0 +1,2062 @@
+#    copyright 2019 Arm limited
+#
+# licensed under the apache license, version 2.0 (the "license");
+# you may not use this file except in compliance with the license.
+# you may obtain a copy of the license at
+#
+#     http://www.apache.org/licenses/license-2.
+#
+# unless required by applicable law or agreed to in writing, software
+# distributed under the license is distributed on an "as is" basis,
+# without warranties or conditions of any kind, either express or implied.
+# see the license for the specific language governing permissions and
+# limitations under the license.
+
+import unittest
+
+from wa.instruments.perf import PerfInstrument
+
+STAT_PAIRS = {
+
+    '-a -e r1,r2,r3,r4,r5,r6,r7,r8': [
+(
+# Pixel 2 - OS 4.4.88-ga1592dc22912
+# perf version 3.9.rc8.ge9aa1d6
+"""
+ Performance counter stats for 'sleep 1000':
+
+              1139 migrations                                                   [100.00%]
+              6141 cs
+          14648295 r1                                                           [74.87%]
+           2966422 r2                                                           [74.96%]
+          11872707 r3                                                           [74.94%]
+        8184054637 r4                                                           [75.11%]
+           2409014 r5                                                           [75.30%]
+          86957873 r6                                                           [75.27%]
+          34552449 r7                                                           [75.14%]
+       15730113018 r8                                                           [74.88%]
+
+       1.681693229 seconds time elapsed
+""",
+[
+    {
+	"name": "default0_migrations",
+	"value": 1139,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 1.681693229,
+	    "duration_units": "seconds",
+	    "name": "migrations",
+	    "enabled": 100.0
+	}
+    },
+    {
+	"name": "default0_cs",
+	"value": 6141,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 1.681693229,
+	    "duration_units": "seconds",
+	    "name": "cs"
+	}
+    },
+    {
+	"name": "default0_r1",
+	"value": 14648295,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 1.681693229,
+	    "duration_units": "seconds",
+	    "name": "r1",
+	    "enabled": 74.87
+	}
+    },
+    {
+	"name": "default0_r2",
+	"value": 2966422,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 1.681693229,
+	    "duration_units": "seconds",
+	    "name": "r2",
+	    "enabled": 74.96
+	}
+    },
+    {
+	"name": "default0_r3",
+	"value": 11872707,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 1.681693229,
+	    "duration_units": "seconds",
+	    "name": "r3",
+	    "enabled": 74.94
+	}
+    },
+    {
+	"name": "default0_r4",
+	"value": 8184054637,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 1.681693229,
+	    "duration_units": "seconds",
+	    "name": "r4",
+	    "enabled": 75.11
+	}
+    },
+    {
+	"name": "default0_r5",
+	"value": 2409014,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 1.681693229,
+	    "duration_units": "seconds",
+	    "name": "r5",
+	    "enabled": 75.3
+	}
+    },
+    {
+	"name": "default0_r6",
+	"value": 86957873,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 1.681693229,
+	    "duration_units": "seconds",
+	    "name": "r6",
+	    "enabled": 75.27
+	}
+    },
+    {
+	"name": "default0_r7",
+	"value": 34552449,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 1.681693229,
+	    "duration_units": "seconds",
+	    "name": "r7",
+	    "enabled": 75.14
+	}
+    },
+    {
+	"name": "default0_r8",
+	"value": 15730113018,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 1.681693229,
+	    "duration_units": "seconds",
+	    "name": "r8",
+	    "enabled": 74.88
+	}
+    }
+],
+),
+],
+
+    '-a -A': [
+(
+# Pixel 2 - OS 4.4.88-ga1592dc22912
+# perf version 3.9.rc8.ge9aa1d6
+"""
+ Performance counter stats for 'sleep 1000':
+
+ CPU0                   201 migrations                                                    (100.00%)
+ CPU1                   217 migrations                                                    (100.00%)
+ CPU2                   241 migrations                                                    (100.00%)
+ CPU3                   216 migrations                                                    (100.00%)
+ CPU4                    79 migrations                                                    (100.00%)
+ CPU5                    40 migrations                                                    (100.00%)
+ CPU6                    55 migrations                                                    (100.00%)
+ CPU7                    70 migrations                                                    (100.00%)
+ CPU0                  2285 cs
+ CPU1                  1454 cs
+ CPU2                  2704 cs
+ CPU3                  2085 cs
+ CPU4                  1790 cs
+ CPU5                  1240 cs
+ CPU6                   636 cs
+ CPU7                  1557 cs
+
+        2.494999050 seconds time elapsed
+""",
+[
+    {
+	"name": "default0_migrations_T0",
+	"value": 201,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.49499905,
+	    "duration_units": "seconds",
+	    "name": "migrations",
+	    "hw_thread": 0,
+	    "hw_thread_count": 1,
+	    "cpu": 0,
+	    "enabled": 100.0
+	}
+    },
+    {
+	"name": "default0_migrations_T1",
+	"value": 217,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.49499905,
+	    "duration_units": "seconds",
+	    "name": "migrations",
+	    "hw_thread": 1,
+	    "hw_thread_count": 1,
+	    "cpu": 1,
+	    "enabled": 100.0
+	}
+    },
+    {
+	"name": "default0_migrations_T2",
+	"value": 241,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.49499905,
+	    "duration_units": "seconds",
+	    "name": "migrations",
+	    "hw_thread": 2,
+	    "hw_thread_count": 1,
+	    "cpu": 2,
+	    "enabled": 100.0
+	}
+    },
+    {
+	"name": "default0_migrations_T3",
+	"value": 216,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.49499905,
+	    "duration_units": "seconds",
+	    "name": "migrations",
+	    "hw_thread": 3,
+	    "hw_thread_count": 1,
+	    "cpu": 3,
+	    "enabled": 100.0
+	}
+    },
+    {
+	"name": "default0_migrations_T4",
+	"value": 79,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.49499905,
+	    "duration_units": "seconds",
+	    "name": "migrations",
+	    "hw_thread": 4,
+	    "hw_thread_count": 1,
+	    "cpu": 4,
+	    "enabled": 100.0
+	}
+    },
+    {
+	"name": "default0_migrations_T5",
+	"value": 40,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.49499905,
+	    "duration_units": "seconds",
+	    "name": "migrations",
+	    "hw_thread": 5,
+	    "hw_thread_count": 1,
+	    "cpu": 5,
+	    "enabled": 100.0
+	}
+    },
+    {
+	"name": "default0_migrations_T6",
+	"value": 55,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.49499905,
+	    "duration_units": "seconds",
+	    "name": "migrations",
+	    "hw_thread": 6,
+	    "hw_thread_count": 1,
+	    "cpu": 6,
+	    "enabled": 100.0
+	}
+    },
+    {
+	"name": "default0_migrations_T7",
+	"value": 70,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.49499905,
+	    "duration_units": "seconds",
+	    "name": "migrations",
+	    "hw_thread": 7,
+	    "hw_thread_count": 1,
+	    "cpu": 7,
+	    "enabled": 100.0
+	}
+    },
+    {
+	"name": "default0_cs_T0",
+	"value": 2285,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.49499905,
+	    "duration_units": "seconds",
+	    "name": "cs",
+	    "hw_thread": 0,
+	    "hw_thread_count": 1,
+	    "cpu": 0
+	}
+    },
+    {
+	"name": "default0_cs_T1",
+	"value": 1454,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.49499905,
+	    "duration_units": "seconds",
+	    "name": "cs",
+	    "hw_thread": 1,
+	    "hw_thread_count": 1,
+	    "cpu": 1
+	}
+    },
+    {
+	"name": "default0_cs_T2",
+	"value": 2704,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.49499905,
+	    "duration_units": "seconds",
+	    "name": "cs",
+	    "hw_thread": 2,
+	    "hw_thread_count": 1,
+	    "cpu": 2
+	}
+    },
+    {
+	"name": "default0_cs_T3",
+	"value": 2085,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.49499905,
+	    "duration_units": "seconds",
+	    "name": "cs",
+	    "hw_thread": 3,
+	    "hw_thread_count": 1,
+	    "cpu": 3
+	}
+    },
+    {
+	"name": "default0_cs_T4",
+	"value": 1790,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.49499905,
+	    "duration_units": "seconds",
+	    "name": "cs",
+	    "hw_thread": 4,
+	    "hw_thread_count": 1,
+	    "cpu": 4
+	}
+    },
+    {
+	"name": "default0_cs_T5",
+	"value": 1240,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.49499905,
+	    "duration_units": "seconds",
+	    "name": "cs",
+	    "hw_thread": 5,
+	    "hw_thread_count": 1,
+	    "cpu": 5
+	}
+    },
+    {
+	"name": "default0_cs_T6",
+	"value": 636,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.49499905,
+	    "duration_units": "seconds",
+	    "name": "cs",
+	    "hw_thread": 6,
+	    "hw_thread_count": 1,
+	    "cpu": 6
+	}
+    },
+    {
+	"name": "default0_cs_T7",
+	"value": 1557,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.49499905,
+	    "duration_units": "seconds",
+	    "name": "cs",
+	    "hw_thread": 7,
+	    "hw_thread_count": 1,
+	    "cpu": 7
+	}
+    }
+],
+),
+],
+
+    '-a -A --per-socket': [
+(
+# Pixel 2 - OS 4.4.88-ga1592dc22912
+# perf version 3.9.rc8.ge9aa1d6
+"""
+ Performance counter stats for 'sleep 1000':
+
+S0        4                697 migrations                                                    (100.00%)
+S0        4               7801 cs
+S1        4                203 migrations                                                    (100.00%)
+S1        4               4408 cs
+
+       2.262571267 seconds time elapsed
+""",
+[
+    {
+	"name": "default0_migrations_S0",
+	"value": 697,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.262571267,
+	    "duration_units": "seconds",
+	    "name": "migrations",
+	    "cluster": 0,
+	    "hw_thread_count": 4,
+	    "enabled": 100.0
+	}
+    },
+    {
+	"name": "default0_cs_S0",
+	"value": 7801,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.262571267,
+	    "duration_units": "seconds",
+	    "name": "cs",
+	    "cluster": 0,
+	    "hw_thread_count": 4
+	}
+    },
+    {
+	"name": "default0_migrations_S1",
+	"value": 203,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.262571267,
+	    "duration_units": "seconds",
+	    "name": "migrations",
+	    "cluster": 1,
+	    "hw_thread_count": 4,
+	    "enabled": 100.0
+	}
+    },
+    {
+	"name": "default0_cs_S1",
+	"value": 4408,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.262571267,
+	    "duration_units": "seconds",
+	    "name": "cs",
+	    "cluster": 1,
+	    "hw_thread_count": 4
+	}
+    }
+],
+),
+],
+    "-a -A -e r1,r2,r3,r4,r5,r6,r7,r8 --per-socket": [
+(
+# Pixel 2 - OS 4.4.88-ga1592dc22912
+# perf version 3.9.rc8.ge9aa1d6
+"""
+ Performance counter stats for 'sleep 1000':
+
+S0        4                725 migrations                                                    (100.00%)
+S0        4               7202 cs
+S0        4            9439048 r1                                                            (37.55%)
+S0        4             179650 r2                                                            (37.54%)
+S0        4            3856583 r3                                                            (37.56%)
+S0        4           71399486 r4                                                            (37.49%)
+S0        4             251669 r5                                                            (37.48%)
+S0        4           39189196 r6                                                            (37.46%)
+S0        4           19239860 r7                                                            (37.47%)
+S0        4          288165417 r8                                                            (37.47%)
+S1        4                222 migrations                                                    (100.00%)
+S1        4               5225 cs
+S1        4            8222810 r1                                                            (37.55%)
+S1        4            2852407 r2                                                            (37.55%)
+S1        4            5519117 r3                                                            (37.55%)
+S1        4         7193421718 r4                                                            (37.49%)
+S1        4            3236589 r5                                                            (37.47%)
+S1        4                  0 r6                                                            (37.47%)
+S1        4                  0 r7                                                            (37.47%)
+S1        4        13821910139 r8                                                            (37.47%)
+
+       2.256465902 seconds time elapsed
+""",
+[
+    {
+	"name": "default0_migrations_S0",
+	"value": 725,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "migrations",
+	    "cluster": 0,
+	    "hw_thread_count": 4,
+	    "enabled": 100.0
+	}
+    },
+    {
+	"name": "default0_cs_S0",
+	"value": 7202,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "cs",
+	    "cluster": 0,
+	    "hw_thread_count": 4
+	}
+    },
+    {
+	"name": "default0_r1_S0",
+	"value": 9439048,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "r1",
+	    "cluster": 0,
+	    "hw_thread_count": 4,
+	    "enabled": 37.55
+	}
+    },
+    {
+	"name": "default0_r2_S0",
+	"value": 179650,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "r2",
+	    "cluster": 0,
+	    "hw_thread_count": 4,
+	    "enabled": 37.54
+	}
+    },
+    {
+	"name": "default0_r3_S0",
+	"value": 3856583,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "r3",
+	    "cluster": 0,
+	    "hw_thread_count": 4,
+	    "enabled": 37.56
+	}
+    },
+    {
+	"name": "default0_r4_S0",
+	"value": 71399486,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "r4",
+	    "cluster": 0,
+	    "hw_thread_count": 4,
+	    "enabled": 37.49
+	}
+    },
+    {
+	"name": "default0_r5_S0",
+	"value": 251669,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "r5",
+	    "cluster": 0,
+	    "hw_thread_count": 4,
+	    "enabled": 37.48
+	}
+    },
+    {
+	"name": "default0_r6_S0",
+	"value": 39189196,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "r6",
+	    "cluster": 0,
+	    "hw_thread_count": 4,
+	    "enabled": 37.46
+	}
+    },
+    {
+	"name": "default0_r7_S0",
+	"value": 19239860,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "r7",
+	    "cluster": 0,
+	    "hw_thread_count": 4,
+	    "enabled": 37.47
+	}
+    },
+    {
+	"name": "default0_r8_S0",
+	"value": 288165417,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "r8",
+	    "cluster": 0,
+	    "hw_thread_count": 4,
+	    "enabled": 37.47
+	}
+    },
+    {
+	"name": "default0_migrations_S1",
+	"value": 222,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "migrations",
+	    "cluster": 1,
+	    "hw_thread_count": 4,
+	    "enabled": 100.0
+	}
+    },
+    {
+	"name": "default0_cs_S1",
+	"value": 5225,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "cs",
+	    "cluster": 1,
+	    "hw_thread_count": 4
+	}
+    },
+    {
+	"name": "default0_r1_S1",
+	"value": 8222810,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "r1",
+	    "cluster": 1,
+	    "hw_thread_count": 4,
+	    "enabled": 37.55
+	}
+    },
+    {
+	"name": "default0_r2_S1",
+	"value": 2852407,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "r2",
+	    "cluster": 1,
+	    "hw_thread_count": 4,
+	    "enabled": 37.55
+	}
+    },
+    {
+	"name": "default0_r3_S1",
+	"value": 5519117,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "r3",
+	    "cluster": 1,
+	    "hw_thread_count": 4,
+	    "enabled": 37.55
+	}
+    },
+    {
+	"name": "default0_r4_S1",
+	"value": 7193421718,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "r4",
+	    "cluster": 1,
+	    "hw_thread_count": 4,
+	    "enabled": 37.49
+	}
+    },
+    {
+	"name": "default0_r5_S1",
+	"value": 3236589,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "r5",
+	    "cluster": 1,
+	    "hw_thread_count": 4,
+	    "enabled": 37.47
+	}
+    },
+    {
+	"name": "default0_r6_S1",
+	"value": 0,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "r6",
+	    "cluster": 1,
+	    "hw_thread_count": 4,
+	    "enabled": 37.47
+	}
+    },
+    {
+	"name": "default0_r7_S1",
+	"value": 0,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "r7",
+	    "cluster": 1,
+	    "hw_thread_count": 4,
+	    "enabled": 37.47
+	}
+    },
+    {
+	"name": "default0_r8_S1",
+	"value": 13821910139,
+	"units": None,
+	"classifiers": {
+	    "label": "default0",
+	    "target": "'sleep 1000'",
+	    "duration": 2.256465902,
+	    "duration_units": "seconds",
+	    "name": "r8",
+	    "cluster": 1,
+	    "hw_thread_count": 4,
+	    "enabled": 37.47
+	}
+    }
+],
+),
+],
+
+    '-a -A --per-core': [
+(
+# Ubuntu 18.04.2 LTS - OS 4.15.0-50-generic
+# perf version 4.15.18
+"""
+ Performance counter stats for 'system wide':
+
+S0-C0           2        2003.008100      cpu-clock (msec)          #    1.998 CPUs utilized
+S0-C0           2                 38      context-switches          #    0.019 K/sec
+S0-C0           2                  3      cpu-migrations            #    0.001 K/sec
+S0-C0           2                 73      page-faults               #    0.036 K/sec
+S0-C0           2         15,750,905      cycles                    #    0.008 GHz
+S0-C0           2          4,042,693      instructions              #    0.26  insn per cycle
+S0-C0           2            860,481      branches                  #    0.430 M/sec
+S0-C0           2            166,940      branch-misses             #   19.40% of all branches
+S0-C1           2        2003.042586      cpu-clock (msec)          #    1.998 CPUs utilized
+S0-C1           2                155      context-switches          #    0.077 K/sec
+S0-C1           2                  1      cpu-migrations            #    0.000 K/sec
+S0-C1           2                386      page-faults               #    0.193 K/sec
+S0-C1           2        407,532,423      cycles                    #    0.203 GHz
+S0-C1           2         73,526,057      instructions              #    0.18  insn per cycle
+S0-C1           2         22,478,777      branches                  #   11.222 M/sec
+S0-C1           2            293,815      branch-misses             #    1.31% of all branches
+S0-C2           2        2003.076028      cpu-clock (msec)          #    1.998 CPUs utilized
+S0-C2           2                213      context-switches          #    0.106 K/sec
+S0-C2           2                  2      cpu-migrations            #    0.001 K/sec
+S0-C2           2                  1      page-faults               #    0.000 K/sec
+S0-C2           2         18,605,672      cycles                    #    0.009 GHz
+S0-C2           2          4,406,356      instructions              #    0.24  insn per cycle
+S0-C2           2          1,088,504      branches                  #    0.543 M/sec
+S0-C2           2            142,203      branch-misses             #   13.06% of all branches
+S0-C3           2        2003.109192      cpu-clock (msec)          #    1.998 CPUs utilized
+S0-C3           2                245      context-switches          #    0.122 K/sec
+S0-C3           2                  6      cpu-migrations            #    0.003 K/sec
+S0-C3           2                  0      page-faults               #    0.000 K/sec
+S0-C3           2         23,626,131      cycles                    #    0.012 GHz
+S0-C3           2          7,714,748      instructions              #    0.33  insn per cycle
+S0-C3           2          1,805,933      branches                  #    0.902 M/sec
+S0-C3           2            193,243      branch-misses             #   10.70% of all branches
+S0-C4           2        2003.143584      cpu-clock (msec)          #    1.998 CPUs utilized
+S0-C4           2                596      context-switches          #    0.298 K/sec
+S0-C4           2                  6      cpu-migrations            #    0.003 K/sec
+S0-C4           2                113      page-faults               #    0.056 K/sec
+S0-C4           2         53,837,367      cycles                    #    0.027 GHz
+S0-C4           2         23,264,962      instructions              #    0.43  insn per cycle
+S0-C4           2          4,975,165      branches                  #    2.484 M/sec
+S0-C4           2            301,069      branch-misses             #    6.05% of all branches
+S0-C5           2        2003.151837      cpu-clock (msec)          #    1.998 CPUs utilized
+S0-C5           2                172      context-switches          #    0.086 K/sec
+S0-C5           2                  4      cpu-migrations            #    0.002 K/sec
+S0-C5           2                 37      page-faults               #    0.018 K/sec
+S0-C5           2         24,086,889      cycles                    #    0.012 GHz
+S0-C5           2          7,219,194      instructions              #    0.30  insn per cycle
+S0-C5           2          1,537,648      branches                  #    0.768 M/sec
+S0-C5           2            177,565      branch-misses             #   11.55% of all branches
+S0-C6           2        2003.160900      cpu-clock (msec)          #    1.998 CPUs utilized
+S0-C6           2                146      context-switches          #    0.073 K/sec
+S0-C6           2                  3      cpu-migrations            #    0.001 K/sec
+S0-C6           2                 69      page-faults               #    0.034 K/sec
+S0-C6           2         27,327,018      cycles                    #    0.014 GHz
+S0-C6           2          7,956,363      instructions              #    0.29  insn per cycle
+S0-C6           2          1,834,119      branches                  #    0.916 M/sec
+S0-C6           2            210,607      branch-misses             #   11.48% of all branches
+S0-C7           2        2003.187967      cpu-clock (msec)          #    1.998 CPUs utilized
+S0-C7           2                 91      context-switches          #    0.045 K/sec
+S0-C7           2                  1      cpu-migrations            #    0.000 K/sec
+S0-C7           2                 32      page-faults               #    0.016 K/sec
+S0-C7           2         26,120,485      cycles                    #    0.013 GHz
+S0-C7           2         10,457,563      instructions              #    0.40  insn per cycle
+S0-C7           2          2,337,445      branches                  #    1.167 M/sec
+S0-C7           2            238,864      branch-misses             #   10.22% of all branches
+
+       1.002350964 seconds time elapsed
+""",
+[
+    {
+        "name": "default0_cpu-clock_(msec)_S0_C0",
+        "units": None,
+        "value": 2003.0081,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cpu-clock (msec)",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 0,
+            "comment_value": 1.998,
+            "comment_units": "CPUs utilized"
+        }
+    },
+    {
+        "name": "default0_context-switches_S0_C0",
+        "units": None,
+        "value": 38,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "context-switches",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 0,
+            "comment_value": 0.019,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_cpu-migrations_S0_C0",
+        "units": None,
+        "value": 3,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cpu-migrations",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 0,
+            "comment_value": 0.001,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_page-faults_S0_C0",
+        "units": None,
+        "value": 73,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "page-faults",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 0,
+            "comment_value": 0.036,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_cycles_S0_C0",
+        "units": None,
+        "value": 15750905,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cycles",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 0,
+            "comment_value": 0.008,
+            "comment_units": "GHz"
+        }
+    },
+    {
+        "name": "default0_instructions_S0_C0",
+        "units": None,
+        "value": 4042693,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "instructions",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 0,
+            "comment_value": 0.26,
+            "comment_units": "insn per cycle"
+        }
+    },
+    {
+        "name": "default0_branches_S0_C0",
+        "units": None,
+        "value": 860481,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "branches",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 0,
+            "comment_value": 0.43,
+            "comment_units": "M/sec"
+        }
+    },
+    {
+        "name": "default0_branch-misses_S0_C0",
+        "units": None,
+        "value": 166940,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "branch-misses",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 0,
+            "comment_value": 19.4,
+            "comment_units": "% of all branches"
+        }
+    },
+    {
+        "name": "default0_cpu-clock_(msec)_S0_C1",
+        "units": None,
+        "value": 2003.042586,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cpu-clock (msec)",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 1,
+            "comment_value": 1.998,
+            "comment_units": "CPUs utilized"
+        }
+    },
+    {
+        "name": "default0_context-switches_S0_C1",
+        "units": None,
+        "value": 155,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "context-switches",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 1,
+            "comment_value": 0.077,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_cpu-migrations_S0_C1",
+        "units": None,
+        "value": 1,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cpu-migrations",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 1,
+            "comment_value": 0.0,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_page-faults_S0_C1",
+        "units": None,
+        "value": 386,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "page-faults",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 1,
+            "comment_value": 0.193,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_cycles_S0_C1",
+        "units": None,
+        "value": 407532423,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cycles",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 1,
+            "comment_value": 0.203,
+            "comment_units": "GHz"
+        }
+    },
+    {
+        "name": "default0_instructions_S0_C1",
+        "units": None,
+        "value": 73526057,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "instructions",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 1,
+            "comment_value": 0.18,
+            "comment_units": "insn per cycle"
+        }
+    },
+    {
+        "name": "default0_branches_S0_C1",
+        "units": None,
+        "value": 22478777,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "branches",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 1,
+            "comment_value": 11.222,
+            "comment_units": "M/sec"
+        }
+    },
+    {
+        "name": "default0_branch-misses_S0_C1",
+        "units": None,
+        "value": 293815,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "branch-misses",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 1,
+            "comment_value": 1.31,
+            "comment_units": "% of all branches"
+        }
+    },
+    {
+        "name": "default0_cpu-clock_(msec)_S0_C2",
+        "units": None,
+        "value": 2003.076028,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cpu-clock (msec)",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 2,
+            "comment_value": 1.998,
+            "comment_units": "CPUs utilized"
+        }
+    },
+    {
+        "name": "default0_context-switches_S0_C2",
+        "units": None,
+        "value": 213,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "context-switches",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 2,
+            "comment_value": 0.106,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_cpu-migrations_S0_C2",
+        "units": None,
+        "value": 2,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cpu-migrations",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 2,
+            "comment_value": 0.001,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_page-faults_S0_C2",
+        "units": None,
+        "value": 1,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "page-faults",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 2,
+            "comment_value": 0.0,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_cycles_S0_C2",
+        "units": None,
+        "value": 18605672,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cycles",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 2,
+            "comment_value": 0.009,
+            "comment_units": "GHz"
+        }
+    },
+    {
+        "name": "default0_instructions_S0_C2",
+        "units": None,
+        "value": 4406356,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "instructions",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 2,
+            "comment_value": 0.24,
+            "comment_units": "insn per cycle"
+        }
+    },
+    {
+        "name": "default0_branches_S0_C2",
+        "units": None,
+        "value": 1088504,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "branches",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 2,
+            "comment_value": 0.543,
+            "comment_units": "M/sec"
+        }
+    },
+    {
+        "name": "default0_branch-misses_S0_C2",
+        "units": None,
+        "value": 142203,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "branch-misses",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 2,
+            "comment_value": 13.06,
+            "comment_units": "% of all branches"
+        }
+    },
+    {
+        "name": "default0_cpu-clock_(msec)_S0_C3",
+        "units": None,
+        "value": 2003.109192,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cpu-clock (msec)",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 3,
+            "comment_value": 1.998,
+            "comment_units": "CPUs utilized"
+        }
+    },
+    {
+        "name": "default0_context-switches_S0_C3",
+        "units": None,
+        "value": 245,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "context-switches",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 3,
+            "comment_value": 0.122,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_cpu-migrations_S0_C3",
+        "units": None,
+        "value": 6,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cpu-migrations",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 3,
+            "comment_value": 0.003,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_page-faults_S0_C3",
+        "units": None,
+        "value": 0,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "page-faults",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 3,
+            "comment_value": 0.0,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_cycles_S0_C3",
+        "units": None,
+        "value": 23626131,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cycles",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 3,
+            "comment_value": 0.012,
+            "comment_units": "GHz"
+        }
+    },
+    {
+        "name": "default0_instructions_S0_C3",
+        "units": None,
+        "value": 7714748,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "instructions",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 3,
+            "comment_value": 0.33,
+            "comment_units": "insn per cycle"
+        }
+    },
+    {
+        "name": "default0_branches_S0_C3",
+        "units": None,
+        "value": 1805933,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "branches",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 3,
+            "comment_value": 0.902,
+            "comment_units": "M/sec"
+        }
+    },
+    {
+        "name": "default0_branch-misses_S0_C3",
+        "units": None,
+        "value": 193243,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "branch-misses",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 3,
+            "comment_value": 10.7,
+            "comment_units": "% of all branches"
+        }
+    },
+    {
+        "name": "default0_cpu-clock_(msec)_S0_C4",
+        "units": None,
+        "value": 2003.143584,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cpu-clock (msec)",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 4,
+            "comment_value": 1.998,
+            "comment_units": "CPUs utilized"
+        }
+    },
+    {
+        "name": "default0_context-switches_S0_C4",
+        "units": None,
+        "value": 596,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "context-switches",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 4,
+            "comment_value": 0.298,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_cpu-migrations_S0_C4",
+        "units": None,
+        "value": 6,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cpu-migrations",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 4,
+            "comment_value": 0.003,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_page-faults_S0_C4",
+        "units": None,
+        "value": 113,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "page-faults",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 4,
+            "comment_value": 0.056,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_cycles_S0_C4",
+        "units": None,
+        "value": 53837367,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cycles",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 4,
+            "comment_value": 0.027,
+            "comment_units": "GHz"
+        }
+    },
+    {
+        "name": "default0_instructions_S0_C4",
+        "units": None,
+        "value": 23264962,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "instructions",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 4,
+            "comment_value": 0.43,
+            "comment_units": "insn per cycle"
+        }
+    },
+    {
+        "name": "default0_branches_S0_C4",
+        "units": None,
+        "value": 4975165,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "branches",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 4,
+            "comment_value": 2.484,
+            "comment_units": "M/sec"
+        }
+    },
+    {
+        "name": "default0_branch-misses_S0_C4",
+        "units": None,
+        "value": 301069,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "branch-misses",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 4,
+            "comment_value": 6.05,
+            "comment_units": "% of all branches"
+        }
+    },
+    {
+        "name": "default0_cpu-clock_(msec)_S0_C5",
+        "units": None,
+        "value": 2003.151837,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cpu-clock (msec)",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 5,
+            "comment_value": 1.998,
+            "comment_units": "CPUs utilized"
+        }
+    },
+    {
+        "name": "default0_context-switches_S0_C5",
+        "units": None,
+        "value": 172,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "context-switches",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 5,
+            "comment_value": 0.086,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_cpu-migrations_S0_C5",
+        "units": None,
+        "value": 4,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cpu-migrations",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 5,
+            "comment_value": 0.002,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_page-faults_S0_C5",
+        "units": None,
+        "value": 37,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "page-faults",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 5,
+            "comment_value": 0.018,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_cycles_S0_C5",
+        "units": None,
+        "value": 24086889,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cycles",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 5,
+            "comment_value": 0.012,
+            "comment_units": "GHz"
+        }
+    },
+    {
+        "name": "default0_instructions_S0_C5",
+        "units": None,
+        "value": 7219194,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "instructions",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 5,
+            "comment_value": 0.3,
+            "comment_units": "insn per cycle"
+        }
+    },
+    {
+        "name": "default0_branches_S0_C5",
+        "units": None,
+        "value": 1537648,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "branches",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 5,
+            "comment_value": 0.768,
+            "comment_units": "M/sec"
+        }
+    },
+    {
+        "name": "default0_branch-misses_S0_C5",
+        "units": None,
+        "value": 177565,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "branch-misses",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 5,
+            "comment_value": 11.55,
+            "comment_units": "% of all branches"
+        }
+    },
+    {
+        "name": "default0_cpu-clock_(msec)_S0_C6",
+        "units": None,
+        "value": 2003.1609,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cpu-clock (msec)",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 6,
+            "comment_value": 1.998,
+            "comment_units": "CPUs utilized"
+        }
+    },
+    {
+        "name": "default0_context-switches_S0_C6",
+        "units": None,
+        "value": 146,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "context-switches",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 6,
+            "comment_value": 0.073,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_cpu-migrations_S0_C6",
+        "units": None,
+        "value": 3,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cpu-migrations",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 6,
+            "comment_value": 0.001,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_page-faults_S0_C6",
+        "units": None,
+        "value": 69,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "page-faults",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 6,
+            "comment_value": 0.034,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_cycles_S0_C6",
+        "units": None,
+        "value": 27327018,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cycles",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 6,
+            "comment_value": 0.014,
+            "comment_units": "GHz"
+        }
+    },
+    {
+        "name": "default0_instructions_S0_C6",
+        "units": None,
+        "value": 7956363,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "instructions",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 6,
+            "comment_value": 0.29,
+            "comment_units": "insn per cycle"
+        }
+    },
+    {
+        "name": "default0_branches_S0_C6",
+        "units": None,
+        "value": 1834119,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "branches",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 6,
+            "comment_value": 0.916,
+            "comment_units": "M/sec"
+        }
+    },
+    {
+        "name": "default0_branch-misses_S0_C6",
+        "units": None,
+        "value": 210607,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "branch-misses",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 6,
+            "comment_value": 11.48,
+            "comment_units": "% of all branches"
+        }
+    },
+    {
+        "name": "default0_cpu-clock_(msec)_S0_C7",
+        "units": None,
+        "value": 2003.187967,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cpu-clock (msec)",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 7,
+            "comment_value": 1.998,
+            "comment_units": "CPUs utilized"
+        }
+    },
+    {
+        "name": "default0_context-switches_S0_C7",
+        "units": None,
+        "value": 91,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "context-switches",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 7,
+            "comment_value": 0.045,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_cpu-migrations_S0_C7",
+        "units": None,
+        "value": 1,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cpu-migrations",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 7,
+            "comment_value": 0.0,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_page-faults_S0_C7",
+        "units": None,
+        "value": 32,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "page-faults",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 7,
+            "comment_value": 0.016,
+            "comment_units": "K/sec"
+        }
+    },
+    {
+        "name": "default0_cycles_S0_C7",
+        "units": None,
+        "value": 26120485,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "cycles",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 7,
+            "comment_value": 0.013,
+            "comment_units": "GHz"
+        }
+    },
+    {
+        "name": "default0_instructions_S0_C7",
+        "units": None,
+        "value": 10457563,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "instructions",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 7,
+            "comment_value": 0.4,
+            "comment_units": "insn per cycle"
+        }
+    },
+    {
+        "name": "default0_branches_S0_C7",
+        "units": None,
+        "value": 2337445,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "branches",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 7,
+            "comment_value": 1.167,
+            "comment_units": "M/sec"
+        }
+    },
+    {
+        "name": "default0_branch-misses_S0_C7",
+        "units": None,
+        "value": 238864,
+        "classifiers": {
+            "label": "default0",
+            "target": "'system wide'",
+            "duration": 1.002350964,
+            "duration_units": "seconds",
+            "name": "branch-misses",
+            "cluster": 0,
+            "hw_thread_count": 2,
+            "core": 7,
+            "comment_value": 10.22,
+            "comment_units": "% of all branches"
+        }
+    }
+],
+),
+],
+
+}
+
+
+
+class StatParserTest(unittest.TestCase):
+
+    maxDiff = None
+
+    def _test_pair(self, stdout, metrics):
+        metrics_dut = PerfInstrument._extract_stat_metrics('default0', stdout)
+        count = 0
+        for metric_dut in metrics_dut:
+            # metric names are guaranteed to be unique by the documentation
+            metric = next(m for m in metrics if m['name'] == metric_dut['name'])
+            self.assertEqual(metric, metric_dut)
+            count += 1
+        self.assertEqual(count, len(metrics))
+
+    def _test_key(self, key):
+        for stdout, metrics in STAT_PAIRS[key]:
+            self._test_pair(stdout, metrics)
+
+    def test_all_cpus_many_events(self):
+        self._test_key('-a -e r1,r2,r3,r4,r5,r6,r7,r8')
+
+    def test_all_cpus_no_aggregate_per_core(self):
+        self._test_key('-a -A --per-core')
+
+    def test_all_cpus_no_aggregate_per_socket(self):
+        self._test_key('-a -A --per-socket')
+
+    def test_all_cpus_no_aggregate_many_events_per_socket(self):
+        self._test_key('-a -A -e r1,r2,r3,r4,r5,r6,r7,r8 --per-socket')

From 4e3859556d826b23942f43b7c0471595760aaa63 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pierre-Cl=C3=A9ment=20Tosi?= <pierre-clement.tosi@arm.com>
Date: Wed, 19 Jun 2019 15:05:33 +0100
Subject: [PATCH 4/7] fixup! instruments/perf: Support ALL perf subcommands

---
 wa/instruments/perf.py | 95 +++++++++++++++++++++---------------------
 1 file changed, 47 insertions(+), 48 deletions(-)

diff --git a/wa/instruments/perf.py b/wa/instruments/perf.py
index b69cb101e..9da3b09d0 100644
--- a/wa/instruments/perf.py
+++ b/wa/instruments/perf.py
@@ -75,6 +75,53 @@ class PerfInstrument(Instrument):
 
     Please refer to the ``sysfile_values`` runtime parameter to do so from an
     agenda.
+
+    When running ``perf stat``, this instrument reports the captured
+    counters as unitless :class:`Metrics` with the following classifiers:
+
+    - ``'name'``: The name of the event as reported by ``perf``. This name
+      may not be unique when aggregation is disabled as the same counter is
+      then captured for multiple hardware threads;
+    - ``'label'``: Label given to the run of ``perf stat``;
+    - ``'target'``: The target ``perf`` reports for the captured events.
+      This is shared across all events of a run and is further specialized
+      by ``'hw_thread'``, ``'core'`` and ``'cluster'`` if applicable;
+    - ``'duration'``, ``'duration_units'``: duration of the ``perf`` run;
+    - ``'count_error'``: A string containing the error corresponding that
+      prevented the counter from being captured. Only available if an error
+      occured. In this case the value of the metric is always ``0``;
+    - ``'hw_thread_count'``: Number of **hardware** threads that were
+      contributing to the counter. Only available when the automatic
+      aggregation done by ``perf stat`` is disabled. See ``'hw_thread'``,
+      ``'core'`` and ``'cluster'``;
+    - ``'hw_thread'``: When the ``--no-aggr`` option is used, holds the
+      index of the hardware thread that incremented the counter. In this
+      case, ``'hw_thread_count'`` is always ``1``. For backward
+      compatibility, the ``'cpu'`` classifier is provided as a synonym of
+      ``'hw_thread'`` (unlike what its name might suggest, on systems
+      supporting hardware multithreading, ``'cpu'`` is not a synonym of
+      ``'core'``!);
+    - ``'cluster'``: When the ``--per-socket`` option is used, holds the
+      index of the cluster (_i.e._ "socket" in ``perf`` terminology) that
+      incremented the counter and ``'hw_thread_count'`` holds the number of
+      hardware threads in the cluster. When the ``--per-core`` option is
+      used, this classifier gives the index of the cluster of the core.
+    - ``'core'``: When the ``--per-core`` option is used, holds the index
+      (within its cluster) of the core that incremented the counter and
+      ``'hw_thread_count'`` holds the number of hardware threads in the
+      core.
+    - ``'enabled'``: When ``perf`` needs to capture more hardware events
+      than there are hardware counters, it shares the hardware counters
+      among the events through time-slicing. This classifier holds the
+      fraction (between ``0.0`` and ``100.0``) of the run that a hardware
+      counter was allocated to the the event. Available only for hardware
+      events and only when time-slicing was required.
+    - ``'comment_value'``, ``'comment_units'``: Some counters may come with
+      an extra "comment" (following a ``#``) added by ``perf``. The
+      ``'comment_value'`` holds the numeric (``int`` or ``float``) value of
+      the comment while ``'comment_units'`` holds the rest of the comment
+      (typically the units). Only available for the events for which
+      ``perf`` added a comment.
     """
 
     parameters = [
@@ -259,54 +306,6 @@ def teardown(self, context):
 
     @classmethod
     def _extract_stat_metrics(cls, label, stdout):
-        """
-        When running ``perf stat``, this instrument reports the captured
-        counters as unitless :class:`Metrics` with the following classifiers:
-
-        - ``'name'``: The name of the event as reported by ``perf``. This name
-          may not be unique when aggregation is disabled as the same counter is
-          then captured for multiple hardware threads;
-        - ``'label'``: Label given to the run of ``perf stat``;
-        - ``'target'``: The target ``perf`` reports for the captured events.
-          This is shared across all events of a run and is further specialized
-          by ``'hw_thread'``, ``'core'`` and ``'cluster'`` if applicable;
-        - ``'duration'``, ``'duration_units'``: duration of the ``perf`` run;
-        - ``'count_error'``: A string containing the error corresponding that
-          prevented the counter from being captured. Only available if an error
-          occured. In this case the value of the metric is always ``0``;
-        - ``'hw_thread_count'``: Number of **hardware** threads that were
-          contributing to the counter. Only available when the automatic
-          aggregation done by ``perf stat`` is disabled. See ``'hw_thread'``,
-          ``'core'`` and ``'cluster'``;
-        - ``'hw_thread'``: When the ``--no-aggr`` option is used, holds the
-          index of the hardware thread that incremented the counter. In this
-          case, ``'hw_thread_count'`` is always ``1``. For backward
-          compatibility, the ``'cpu'`` classifier is provided as a synonym of
-          ``'hw_thread'`` (unlike what its name might suggest, on systems
-          supporting hardware multithreading, ``'cpu'`` is not a synonym of
-          ``'core'``!);
-        - ``'cluster'``: When the ``--per-socket`` option is used, holds the
-          index of the cluster (_i.e._ "socket" in ``perf`` terminology) that
-          incremented the counter and ``'hw_thread_count'`` holds the number of
-          hardware threads in the cluster. When the ``--per-core`` option is
-          used, this classifier gives the index of the cluster of the core.
-        - ``'core'``: When the ``--per-core`` option is used, holds the index
-          (within its cluster) of the core that incremented the counter and
-          ``'hw_thread_count'`` holds the number of hardware threads in the
-          core.
-        - ``'enabled'``: When ``perf`` needs to capture more hardware events
-          than there are hardware counters, it shares the hardware counters
-          among the events through time-slicing. This classifier holds the
-          fraction (between ``0.0`` and ``100.0``) of the run that a hardware
-          counter was allocated to the the event. Available only for hardware
-          events and only when time-slicing was required.
-        - ``'comment_value'``, ``'comment_units'``: Some counters may come with
-          an extra "comment" (following a ``#``) added by ``perf``. The
-          ``'comment_value'`` holds the numeric (``int`` or ``float``) value of
-          the comment while ``'comment_units'`` holds the rest of the comment
-          (typically the units). Only available for the events for which
-          ``perf`` added a comment.
-        """
         match = cls._stat_regex.search(stdout)
         if match is None:
             return

From 651cf49b055bbf074bc07776b1d3581bab200fe7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pierre-Cl=C3=A9ment=20Tosi?= <pierre-clement.tosi@arm.com>
Date: Wed, 19 Jun 2019 15:05:44 +0100
Subject: [PATCH 5/7] fixup! instruments/perf: Support ALL perf subcommands

---
 wa/instruments/perf.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/wa/instruments/perf.py b/wa/instruments/perf.py
index 9da3b09d0..b255abdc0 100644
--- a/wa/instruments/perf.py
+++ b/wa/instruments/perf.py
@@ -358,7 +358,8 @@ def _extract_stat_count(match, classifiers):
             A (name, value) tuple for the matched counter (value is 0 if an
             error occurred).
         """
-        name = f'{classifiers["label"]}_{match["name"]}'.replace(' ', '_')
+        name = '{}_{}'.format(classifiers['label'],
+                              match['name']).replace(' ', '_')
         classifiers['name'] = match['name']
         # But metrics need a unique name (classifiers not enough) so this
         # name might be specialized by the following:
@@ -376,14 +377,14 @@ def _extract_stat_count(match, classifiers):
             classifiers['hw_thread'] = int(match['hw_thread'])
             classifiers['hw_thread_count'] = 1
             classifiers['cpu'] = int(match['hw_thread'])  # deprecated!
-            name += f'_T{classifiers["hw_thread"]}'
+            name += '_T{}'.format(classifiers["hw_thread"])
         elif match['cluster']:  # --per-core or --per-socket
             classifiers['cluster'] = int(match['cluster'])
             classifiers['hw_thread_count'] = int(match['hw_thread_count'])
-            name += f'_S{classifiers["cluster"]}'
+            name += '_S{}'.format(classifiers["cluster"])
             if match['core']:  # --per-core
                 classifiers['core'] = int(match['core'])
-                name += f'_C{classifiers["core"]}'
+                name += '_C{}'.format(classifiers["core"])
         if match['comment_value']:
             try:
                 classifiers['comment_value'] = int(match['comment_value'])

From 84470ddbc892e349b5a00b221938b5aa66d8ac8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pierre-Cl=C3=A9ment=20Tosi?= <pierre-clement.tosi@arm.com>
Date: Tue, 25 Jun 2019 20:55:30 +0100
Subject: [PATCH 6/7] fixup! instruments/perf: Support ALL perf subcommands

---
 wa/instruments/perf.py | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/wa/instruments/perf.py b/wa/instruments/perf.py
index b255abdc0..8fe8d7d0f 100644
--- a/wa/instruments/perf.py
+++ b/wa/instruments/perf.py
@@ -291,14 +291,35 @@ def update_output(self, context):
                                        self.commands.items(),
                                        self.post_commands.items())
         for label, cmd in all_commands:
-            if 'stat' in cmd.command:
+            classifiers = {
+                'label': label,
+                'command': cmd.command,
+            }
+
+            outputs = []
+            if 'o' in cmd.kwflags:
+                outputs.append((cmd.kwflags['o'], 'data'))
+            if 'output' in cmd.kwflags:
+                outputs.append((cmd.kwflags['output'], 'data'))
+            if cmd.stdout and not cmd.stdout.startswith('&'):
+                outputs.append((cmd.stdout, 'data'))
+            if cmd.stderr and not cmd.stderr.startswith('&'):
+                outputs.append((cmd.stderr, 'log'))
+
+            metrics_done = False
+            for output, kind in outputs:
                 # perf stat supports redirecting its stdout to --output/-o:
-                stat_file = (cmd.kwflags.get('o', None) or
-                             cmd.kwflags.get('output', None) or
-                             cmd.stdout)
-                with open(os.path.join(outdir, label, stat_file)) as f:
-                    for metric in self._extract_stat_metrics(label, f.read()):
-                        context.add_metric(**metric)
+                output_path = os.path.join(outdir, label, output)
+                if 'stat' in cmd.command and not metrics_done:
+                    metrics_done = True
+                    with open(output_path) as f:
+                        for metric in self._extract_stat_metrics(label,
+                                                                 f.read()):
+                            context.add_metric(**metric)
+                    kind = 'raw'
+                context.add_artifact(os.path.join(label, output),
+                                     output_path, kind,
+                                     classifiers=classifiers)
 
     def teardown(self, context):
         # pylint: disable=unused-argument

From 4cd05297e4bde3b67787c2e81633ed7d821e0932 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pierre-Cl=C3=A9ment=20Tosi?= <pierre-clement.tosi@arm.com>
Date: Wed, 26 Jun 2019 11:20:17 +0100
Subject: [PATCH 7/7] fixup! instruments/perf: Support ALL perf subcommands

---
 wa/instruments/perf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wa/instruments/perf.py b/wa/instruments/perf.py
index 8fe8d7d0f..b9ba3af50 100644
--- a/wa/instruments/perf.py
+++ b/wa/instruments/perf.py
@@ -26,7 +26,7 @@
     'PerfInstrument',
 ]
 
-DEFAULT_EVENTS = ['migration', 'cs']
+DEFAULT_EVENTS = ['migrations', 'cs']
 DEFAULT_OPTIONSTRING = '-a'