diff --git a/tests/test_instrument_perf.py b/tests/test_instrument_perf.py new file mode 100644 index 000000000..60ea64fc1 --- /dev/null +++ b/tests/test_instrument_perf.py @@ -0,0 +1,2062 @@ +# copyright 2019 Arm limited +# +# licensed under the apache license, version 2.0 (the "license"); +# you may not use this file except in compliance with the license. +# you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2. +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. + +import unittest + +from wa.instruments.perf import PerfInstrument + +STAT_PAIRS = { + + '-a -e r1,r2,r3,r4,r5,r6,r7,r8': [ +( +# Pixel 2 - OS 4.4.88-ga1592dc22912 +# perf version 3.9.rc8.ge9aa1d6 +""" + Performance counter stats for 'sleep 1000': + + 1139 migrations [100.00%] + 6141 cs + 14648295 r1 [74.87%] + 2966422 r2 [74.96%] + 11872707 r3 [74.94%] + 8184054637 r4 [75.11%] + 2409014 r5 [75.30%] + 86957873 r6 [75.27%] + 34552449 r7 [75.14%] + 15730113018 r8 [74.88%] + + 1.681693229 seconds time elapsed +""", +[ + { + "name": "default0_migrations", + "value": 1139, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "migrations", + "enabled": 100.0 + } + }, + { + "name": "default0_cs", + "value": 6141, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "cs" + } + }, + { + "name": "default0_r1", + "value": 14648295, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "r1", + "enabled": 74.87 + } + }, + { + "name": "default0_r2", + "value": 2966422, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "r2", + "enabled": 74.96 + } + }, + { + "name": "default0_r3", + "value": 11872707, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "r3", + "enabled": 74.94 + } + }, + { + "name": "default0_r4", + "value": 8184054637, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "r4", + "enabled": 75.11 + } + }, + { + "name": "default0_r5", + "value": 2409014, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "r5", + "enabled": 75.3 + } + }, + { + "name": "default0_r6", + "value": 86957873, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "r6", + "enabled": 75.27 + } + }, + { + "name": "default0_r7", + "value": 34552449, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "r7", + "enabled": 75.14 + } + }, + { + "name": "default0_r8", + "value": 15730113018, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 1.681693229, + "duration_units": "seconds", + "name": "r8", + "enabled": 74.88 + } + } +], +), +], + + '-a -A': [ +( +# Pixel 2 - OS 4.4.88-ga1592dc22912 +# perf version 3.9.rc8.ge9aa1d6 +""" + Performance counter stats for 'sleep 1000': + + CPU0 201 migrations (100.00%) + CPU1 217 migrations (100.00%) + CPU2 241 migrations (100.00%) + CPU3 216 migrations (100.00%) + CPU4 79 migrations (100.00%) + CPU5 40 migrations (100.00%) + CPU6 55 migrations (100.00%) + CPU7 70 migrations (100.00%) + CPU0 2285 cs + CPU1 1454 cs + CPU2 2704 cs + CPU3 2085 cs + CPU4 1790 cs + CPU5 1240 cs + CPU6 636 cs + CPU7 1557 cs + + 2.494999050 seconds time elapsed +""", +[ + { + "name": "default0_migrations_T0", + "value": 201, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "migrations", + "hw_thread": 0, + "hw_thread_count": 1, + "cpu": 0, + "enabled": 100.0 + } + }, + { + "name": "default0_migrations_T1", + "value": 217, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "migrations", + "hw_thread": 1, + "hw_thread_count": 1, + "cpu": 1, + "enabled": 100.0 + } + }, + { + "name": "default0_migrations_T2", + "value": 241, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "migrations", + "hw_thread": 2, + "hw_thread_count": 1, + "cpu": 2, + "enabled": 100.0 + } + }, + { + "name": "default0_migrations_T3", + "value": 216, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "migrations", + "hw_thread": 3, + "hw_thread_count": 1, + "cpu": 3, + "enabled": 100.0 + } + }, + { + "name": "default0_migrations_T4", + "value": 79, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "migrations", + "hw_thread": 4, + "hw_thread_count": 1, + "cpu": 4, + "enabled": 100.0 + } + }, + { + "name": "default0_migrations_T5", + "value": 40, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "migrations", + "hw_thread": 5, + "hw_thread_count": 1, + "cpu": 5, + "enabled": 100.0 + } + }, + { + "name": "default0_migrations_T6", + "value": 55, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "migrations", + "hw_thread": 6, + "hw_thread_count": 1, + "cpu": 6, + "enabled": 100.0 + } + }, + { + "name": "default0_migrations_T7", + "value": 70, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "migrations", + "hw_thread": 7, + "hw_thread_count": 1, + "cpu": 7, + "enabled": 100.0 + } + }, + { + "name": "default0_cs_T0", + "value": 2285, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "cs", + "hw_thread": 0, + "hw_thread_count": 1, + "cpu": 0 + } + }, + { + "name": "default0_cs_T1", + "value": 1454, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "cs", + "hw_thread": 1, + "hw_thread_count": 1, + "cpu": 1 + } + }, + { + "name": "default0_cs_T2", + "value": 2704, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "cs", + "hw_thread": 2, + "hw_thread_count": 1, + "cpu": 2 + } + }, + { + "name": "default0_cs_T3", + "value": 2085, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "cs", + "hw_thread": 3, + "hw_thread_count": 1, + "cpu": 3 + } + }, + { + "name": "default0_cs_T4", + "value": 1790, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "cs", + "hw_thread": 4, + "hw_thread_count": 1, + "cpu": 4 + } + }, + { + "name": "default0_cs_T5", + "value": 1240, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "cs", + "hw_thread": 5, + "hw_thread_count": 1, + "cpu": 5 + } + }, + { + "name": "default0_cs_T6", + "value": 636, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "cs", + "hw_thread": 6, + "hw_thread_count": 1, + "cpu": 6 + } + }, + { + "name": "default0_cs_T7", + "value": 1557, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.49499905, + "duration_units": "seconds", + "name": "cs", + "hw_thread": 7, + "hw_thread_count": 1, + "cpu": 7 + } + } +], +), +], + + '-a -A --per-socket': [ +( +# Pixel 2 - OS 4.4.88-ga1592dc22912 +# perf version 3.9.rc8.ge9aa1d6 +""" + Performance counter stats for 'sleep 1000': + +S0 4 697 migrations (100.00%) +S0 4 7801 cs +S1 4 203 migrations (100.00%) +S1 4 4408 cs + + 2.262571267 seconds time elapsed +""", +[ + { + "name": "default0_migrations_S0", + "value": 697, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.262571267, + "duration_units": "seconds", + "name": "migrations", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 100.0 + } + }, + { + "name": "default0_cs_S0", + "value": 7801, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.262571267, + "duration_units": "seconds", + "name": "cs", + "cluster": 0, + "hw_thread_count": 4 + } + }, + { + "name": "default0_migrations_S1", + "value": 203, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.262571267, + "duration_units": "seconds", + "name": "migrations", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 100.0 + } + }, + { + "name": "default0_cs_S1", + "value": 4408, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.262571267, + "duration_units": "seconds", + "name": "cs", + "cluster": 1, + "hw_thread_count": 4 + } + } +], +), +], + "-a -A -e r1,r2,r3,r4,r5,r6,r7,r8 --per-socket": [ +( +# Pixel 2 - OS 4.4.88-ga1592dc22912 +# perf version 3.9.rc8.ge9aa1d6 +""" + Performance counter stats for 'sleep 1000': + +S0 4 725 migrations (100.00%) +S0 4 7202 cs +S0 4 9439048 r1 (37.55%) +S0 4 179650 r2 (37.54%) +S0 4 3856583 r3 (37.56%) +S0 4 71399486 r4 (37.49%) +S0 4 251669 r5 (37.48%) +S0 4 39189196 r6 (37.46%) +S0 4 19239860 r7 (37.47%) +S0 4 288165417 r8 (37.47%) +S1 4 222 migrations (100.00%) +S1 4 5225 cs +S1 4 8222810 r1 (37.55%) +S1 4 2852407 r2 (37.55%) +S1 4 5519117 r3 (37.55%) +S1 4 7193421718 r4 (37.49%) +S1 4 3236589 r5 (37.47%) +S1 4 0 r6 (37.47%) +S1 4 0 r7 (37.47%) +S1 4 13821910139 r8 (37.47%) + + 2.256465902 seconds time elapsed +""", +[ + { + "name": "default0_migrations_S0", + "value": 725, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "migrations", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 100.0 + } + }, + { + "name": "default0_cs_S0", + "value": 7202, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "cs", + "cluster": 0, + "hw_thread_count": 4 + } + }, + { + "name": "default0_r1_S0", + "value": 9439048, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r1", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 37.55 + } + }, + { + "name": "default0_r2_S0", + "value": 179650, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r2", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 37.54 + } + }, + { + "name": "default0_r3_S0", + "value": 3856583, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r3", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 37.56 + } + }, + { + "name": "default0_r4_S0", + "value": 71399486, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r4", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 37.49 + } + }, + { + "name": "default0_r5_S0", + "value": 251669, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r5", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 37.48 + } + }, + { + "name": "default0_r6_S0", + "value": 39189196, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r6", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 37.46 + } + }, + { + "name": "default0_r7_S0", + "value": 19239860, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r7", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 37.47 + } + }, + { + "name": "default0_r8_S0", + "value": 288165417, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r8", + "cluster": 0, + "hw_thread_count": 4, + "enabled": 37.47 + } + }, + { + "name": "default0_migrations_S1", + "value": 222, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "migrations", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 100.0 + } + }, + { + "name": "default0_cs_S1", + "value": 5225, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "cs", + "cluster": 1, + "hw_thread_count": 4 + } + }, + { + "name": "default0_r1_S1", + "value": 8222810, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r1", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 37.55 + } + }, + { + "name": "default0_r2_S1", + "value": 2852407, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r2", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 37.55 + } + }, + { + "name": "default0_r3_S1", + "value": 5519117, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r3", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 37.55 + } + }, + { + "name": "default0_r4_S1", + "value": 7193421718, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r4", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 37.49 + } + }, + { + "name": "default0_r5_S1", + "value": 3236589, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r5", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 37.47 + } + }, + { + "name": "default0_r6_S1", + "value": 0, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r6", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 37.47 + } + }, + { + "name": "default0_r7_S1", + "value": 0, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r7", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 37.47 + } + }, + { + "name": "default0_r8_S1", + "value": 13821910139, + "units": None, + "classifiers": { + "label": "default0", + "target": "'sleep 1000'", + "duration": 2.256465902, + "duration_units": "seconds", + "name": "r8", + "cluster": 1, + "hw_thread_count": 4, + "enabled": 37.47 + } + } +], +), +], + + '-a -A --per-core': [ +( +# Ubuntu 18.04.2 LTS - OS 4.15.0-50-generic +# perf version 4.15.18 +""" + Performance counter stats for 'system wide': + +S0-C0 2 2003.008100 cpu-clock (msec) # 1.998 CPUs utilized +S0-C0 2 38 context-switches # 0.019 K/sec +S0-C0 2 3 cpu-migrations # 0.001 K/sec +S0-C0 2 73 page-faults # 0.036 K/sec +S0-C0 2 15,750,905 cycles # 0.008 GHz +S0-C0 2 4,042,693 instructions # 0.26 insn per cycle +S0-C0 2 860,481 branches # 0.430 M/sec +S0-C0 2 166,940 branch-misses # 19.40% of all branches +S0-C1 2 2003.042586 cpu-clock (msec) # 1.998 CPUs utilized +S0-C1 2 155 context-switches # 0.077 K/sec +S0-C1 2 1 cpu-migrations # 0.000 K/sec +S0-C1 2 386 page-faults # 0.193 K/sec +S0-C1 2 407,532,423 cycles # 0.203 GHz +S0-C1 2 73,526,057 instructions # 0.18 insn per cycle +S0-C1 2 22,478,777 branches # 11.222 M/sec +S0-C1 2 293,815 branch-misses # 1.31% of all branches +S0-C2 2 2003.076028 cpu-clock (msec) # 1.998 CPUs utilized +S0-C2 2 213 context-switches # 0.106 K/sec +S0-C2 2 2 cpu-migrations # 0.001 K/sec +S0-C2 2 1 page-faults # 0.000 K/sec +S0-C2 2 18,605,672 cycles # 0.009 GHz +S0-C2 2 4,406,356 instructions # 0.24 insn per cycle +S0-C2 2 1,088,504 branches # 0.543 M/sec +S0-C2 2 142,203 branch-misses # 13.06% of all branches +S0-C3 2 2003.109192 cpu-clock (msec) # 1.998 CPUs utilized +S0-C3 2 245 context-switches # 0.122 K/sec +S0-C3 2 6 cpu-migrations # 0.003 K/sec +S0-C3 2 0 page-faults # 0.000 K/sec +S0-C3 2 23,626,131 cycles # 0.012 GHz +S0-C3 2 7,714,748 instructions # 0.33 insn per cycle +S0-C3 2 1,805,933 branches # 0.902 M/sec +S0-C3 2 193,243 branch-misses # 10.70% of all branches +S0-C4 2 2003.143584 cpu-clock (msec) # 1.998 CPUs utilized +S0-C4 2 596 context-switches # 0.298 K/sec +S0-C4 2 6 cpu-migrations # 0.003 K/sec +S0-C4 2 113 page-faults # 0.056 K/sec +S0-C4 2 53,837,367 cycles # 0.027 GHz +S0-C4 2 23,264,962 instructions # 0.43 insn per cycle +S0-C4 2 4,975,165 branches # 2.484 M/sec +S0-C4 2 301,069 branch-misses # 6.05% of all branches +S0-C5 2 2003.151837 cpu-clock (msec) # 1.998 CPUs utilized +S0-C5 2 172 context-switches # 0.086 K/sec +S0-C5 2 4 cpu-migrations # 0.002 K/sec +S0-C5 2 37 page-faults # 0.018 K/sec +S0-C5 2 24,086,889 cycles # 0.012 GHz +S0-C5 2 7,219,194 instructions # 0.30 insn per cycle +S0-C5 2 1,537,648 branches # 0.768 M/sec +S0-C5 2 177,565 branch-misses # 11.55% of all branches +S0-C6 2 2003.160900 cpu-clock (msec) # 1.998 CPUs utilized +S0-C6 2 146 context-switches # 0.073 K/sec +S0-C6 2 3 cpu-migrations # 0.001 K/sec +S0-C6 2 69 page-faults # 0.034 K/sec +S0-C6 2 27,327,018 cycles # 0.014 GHz +S0-C6 2 7,956,363 instructions # 0.29 insn per cycle +S0-C6 2 1,834,119 branches # 0.916 M/sec +S0-C6 2 210,607 branch-misses # 11.48% of all branches +S0-C7 2 2003.187967 cpu-clock (msec) # 1.998 CPUs utilized +S0-C7 2 91 context-switches # 0.045 K/sec +S0-C7 2 1 cpu-migrations # 0.000 K/sec +S0-C7 2 32 page-faults # 0.016 K/sec +S0-C7 2 26,120,485 cycles # 0.013 GHz +S0-C7 2 10,457,563 instructions # 0.40 insn per cycle +S0-C7 2 2,337,445 branches # 1.167 M/sec +S0-C7 2 238,864 branch-misses # 10.22% of all branches + + 1.002350964 seconds time elapsed +""", +[ + { + "name": "default0_cpu-clock_(msec)_S0_C0", + "units": None, + "value": 2003.0081, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-clock (msec)", + "cluster": 0, + "hw_thread_count": 2, + "core": 0, + "comment_value": 1.998, + "comment_units": "CPUs utilized" + } + }, + { + "name": "default0_context-switches_S0_C0", + "units": None, + "value": 38, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "context-switches", + "cluster": 0, + "hw_thread_count": 2, + "core": 0, + "comment_value": 0.019, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cpu-migrations_S0_C0", + "units": None, + "value": 3, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-migrations", + "cluster": 0, + "hw_thread_count": 2, + "core": 0, + "comment_value": 0.001, + "comment_units": "K/sec" + } + }, + { + "name": "default0_page-faults_S0_C0", + "units": None, + "value": 73, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "page-faults", + "cluster": 0, + "hw_thread_count": 2, + "core": 0, + "comment_value": 0.036, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cycles_S0_C0", + "units": None, + "value": 15750905, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cycles", + "cluster": 0, + "hw_thread_count": 2, + "core": 0, + "comment_value": 0.008, + "comment_units": "GHz" + } + }, + { + "name": "default0_instructions_S0_C0", + "units": None, + "value": 4042693, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "instructions", + "cluster": 0, + "hw_thread_count": 2, + "core": 0, + "comment_value": 0.26, + "comment_units": "insn per cycle" + } + }, + { + "name": "default0_branches_S0_C0", + "units": None, + "value": 860481, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branches", + "cluster": 0, + "hw_thread_count": 2, + "core": 0, + "comment_value": 0.43, + "comment_units": "M/sec" + } + }, + { + "name": "default0_branch-misses_S0_C0", + "units": None, + "value": 166940, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branch-misses", + "cluster": 0, + "hw_thread_count": 2, + "core": 0, + "comment_value": 19.4, + "comment_units": "% of all branches" + } + }, + { + "name": "default0_cpu-clock_(msec)_S0_C1", + "units": None, + "value": 2003.042586, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-clock (msec)", + "cluster": 0, + "hw_thread_count": 2, + "core": 1, + "comment_value": 1.998, + "comment_units": "CPUs utilized" + } + }, + { + "name": "default0_context-switches_S0_C1", + "units": None, + "value": 155, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "context-switches", + "cluster": 0, + "hw_thread_count": 2, + "core": 1, + "comment_value": 0.077, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cpu-migrations_S0_C1", + "units": None, + "value": 1, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-migrations", + "cluster": 0, + "hw_thread_count": 2, + "core": 1, + "comment_value": 0.0, + "comment_units": "K/sec" + } + }, + { + "name": "default0_page-faults_S0_C1", + "units": None, + "value": 386, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "page-faults", + "cluster": 0, + "hw_thread_count": 2, + "core": 1, + "comment_value": 0.193, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cycles_S0_C1", + "units": None, + "value": 407532423, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cycles", + "cluster": 0, + "hw_thread_count": 2, + "core": 1, + "comment_value": 0.203, + "comment_units": "GHz" + } + }, + { + "name": "default0_instructions_S0_C1", + "units": None, + "value": 73526057, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "instructions", + "cluster": 0, + "hw_thread_count": 2, + "core": 1, + "comment_value": 0.18, + "comment_units": "insn per cycle" + } + }, + { + "name": "default0_branches_S0_C1", + "units": None, + "value": 22478777, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branches", + "cluster": 0, + "hw_thread_count": 2, + "core": 1, + "comment_value": 11.222, + "comment_units": "M/sec" + } + }, + { + "name": "default0_branch-misses_S0_C1", + "units": None, + "value": 293815, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branch-misses", + "cluster": 0, + "hw_thread_count": 2, + "core": 1, + "comment_value": 1.31, + "comment_units": "% of all branches" + } + }, + { + "name": "default0_cpu-clock_(msec)_S0_C2", + "units": None, + "value": 2003.076028, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-clock (msec)", + "cluster": 0, + "hw_thread_count": 2, + "core": 2, + "comment_value": 1.998, + "comment_units": "CPUs utilized" + } + }, + { + "name": "default0_context-switches_S0_C2", + "units": None, + "value": 213, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "context-switches", + "cluster": 0, + "hw_thread_count": 2, + "core": 2, + "comment_value": 0.106, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cpu-migrations_S0_C2", + "units": None, + "value": 2, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-migrations", + "cluster": 0, + "hw_thread_count": 2, + "core": 2, + "comment_value": 0.001, + "comment_units": "K/sec" + } + }, + { + "name": "default0_page-faults_S0_C2", + "units": None, + "value": 1, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "page-faults", + "cluster": 0, + "hw_thread_count": 2, + "core": 2, + "comment_value": 0.0, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cycles_S0_C2", + "units": None, + "value": 18605672, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cycles", + "cluster": 0, + "hw_thread_count": 2, + "core": 2, + "comment_value": 0.009, + "comment_units": "GHz" + } + }, + { + "name": "default0_instructions_S0_C2", + "units": None, + "value": 4406356, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "instructions", + "cluster": 0, + "hw_thread_count": 2, + "core": 2, + "comment_value": 0.24, + "comment_units": "insn per cycle" + } + }, + { + "name": "default0_branches_S0_C2", + "units": None, + "value": 1088504, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branches", + "cluster": 0, + "hw_thread_count": 2, + "core": 2, + "comment_value": 0.543, + "comment_units": "M/sec" + } + }, + { + "name": "default0_branch-misses_S0_C2", + "units": None, + "value": 142203, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branch-misses", + "cluster": 0, + "hw_thread_count": 2, + "core": 2, + "comment_value": 13.06, + "comment_units": "% of all branches" + } + }, + { + "name": "default0_cpu-clock_(msec)_S0_C3", + "units": None, + "value": 2003.109192, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-clock (msec)", + "cluster": 0, + "hw_thread_count": 2, + "core": 3, + "comment_value": 1.998, + "comment_units": "CPUs utilized" + } + }, + { + "name": "default0_context-switches_S0_C3", + "units": None, + "value": 245, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "context-switches", + "cluster": 0, + "hw_thread_count": 2, + "core": 3, + "comment_value": 0.122, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cpu-migrations_S0_C3", + "units": None, + "value": 6, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-migrations", + "cluster": 0, + "hw_thread_count": 2, + "core": 3, + "comment_value": 0.003, + "comment_units": "K/sec" + } + }, + { + "name": "default0_page-faults_S0_C3", + "units": None, + "value": 0, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "page-faults", + "cluster": 0, + "hw_thread_count": 2, + "core": 3, + "comment_value": 0.0, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cycles_S0_C3", + "units": None, + "value": 23626131, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cycles", + "cluster": 0, + "hw_thread_count": 2, + "core": 3, + "comment_value": 0.012, + "comment_units": "GHz" + } + }, + { + "name": "default0_instructions_S0_C3", + "units": None, + "value": 7714748, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "instructions", + "cluster": 0, + "hw_thread_count": 2, + "core": 3, + "comment_value": 0.33, + "comment_units": "insn per cycle" + } + }, + { + "name": "default0_branches_S0_C3", + "units": None, + "value": 1805933, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branches", + "cluster": 0, + "hw_thread_count": 2, + "core": 3, + "comment_value": 0.902, + "comment_units": "M/sec" + } + }, + { + "name": "default0_branch-misses_S0_C3", + "units": None, + "value": 193243, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branch-misses", + "cluster": 0, + "hw_thread_count": 2, + "core": 3, + "comment_value": 10.7, + "comment_units": "% of all branches" + } + }, + { + "name": "default0_cpu-clock_(msec)_S0_C4", + "units": None, + "value": 2003.143584, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-clock (msec)", + "cluster": 0, + "hw_thread_count": 2, + "core": 4, + "comment_value": 1.998, + "comment_units": "CPUs utilized" + } + }, + { + "name": "default0_context-switches_S0_C4", + "units": None, + "value": 596, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "context-switches", + "cluster": 0, + "hw_thread_count": 2, + "core": 4, + "comment_value": 0.298, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cpu-migrations_S0_C4", + "units": None, + "value": 6, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-migrations", + "cluster": 0, + "hw_thread_count": 2, + "core": 4, + "comment_value": 0.003, + "comment_units": "K/sec" + } + }, + { + "name": "default0_page-faults_S0_C4", + "units": None, + "value": 113, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "page-faults", + "cluster": 0, + "hw_thread_count": 2, + "core": 4, + "comment_value": 0.056, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cycles_S0_C4", + "units": None, + "value": 53837367, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cycles", + "cluster": 0, + "hw_thread_count": 2, + "core": 4, + "comment_value": 0.027, + "comment_units": "GHz" + } + }, + { + "name": "default0_instructions_S0_C4", + "units": None, + "value": 23264962, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "instructions", + "cluster": 0, + "hw_thread_count": 2, + "core": 4, + "comment_value": 0.43, + "comment_units": "insn per cycle" + } + }, + { + "name": "default0_branches_S0_C4", + "units": None, + "value": 4975165, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branches", + "cluster": 0, + "hw_thread_count": 2, + "core": 4, + "comment_value": 2.484, + "comment_units": "M/sec" + } + }, + { + "name": "default0_branch-misses_S0_C4", + "units": None, + "value": 301069, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branch-misses", + "cluster": 0, + "hw_thread_count": 2, + "core": 4, + "comment_value": 6.05, + "comment_units": "% of all branches" + } + }, + { + "name": "default0_cpu-clock_(msec)_S0_C5", + "units": None, + "value": 2003.151837, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-clock (msec)", + "cluster": 0, + "hw_thread_count": 2, + "core": 5, + "comment_value": 1.998, + "comment_units": "CPUs utilized" + } + }, + { + "name": "default0_context-switches_S0_C5", + "units": None, + "value": 172, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "context-switches", + "cluster": 0, + "hw_thread_count": 2, + "core": 5, + "comment_value": 0.086, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cpu-migrations_S0_C5", + "units": None, + "value": 4, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-migrations", + "cluster": 0, + "hw_thread_count": 2, + "core": 5, + "comment_value": 0.002, + "comment_units": "K/sec" + } + }, + { + "name": "default0_page-faults_S0_C5", + "units": None, + "value": 37, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "page-faults", + "cluster": 0, + "hw_thread_count": 2, + "core": 5, + "comment_value": 0.018, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cycles_S0_C5", + "units": None, + "value": 24086889, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cycles", + "cluster": 0, + "hw_thread_count": 2, + "core": 5, + "comment_value": 0.012, + "comment_units": "GHz" + } + }, + { + "name": "default0_instructions_S0_C5", + "units": None, + "value": 7219194, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "instructions", + "cluster": 0, + "hw_thread_count": 2, + "core": 5, + "comment_value": 0.3, + "comment_units": "insn per cycle" + } + }, + { + "name": "default0_branches_S0_C5", + "units": None, + "value": 1537648, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branches", + "cluster": 0, + "hw_thread_count": 2, + "core": 5, + "comment_value": 0.768, + "comment_units": "M/sec" + } + }, + { + "name": "default0_branch-misses_S0_C5", + "units": None, + "value": 177565, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branch-misses", + "cluster": 0, + "hw_thread_count": 2, + "core": 5, + "comment_value": 11.55, + "comment_units": "% of all branches" + } + }, + { + "name": "default0_cpu-clock_(msec)_S0_C6", + "units": None, + "value": 2003.1609, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-clock (msec)", + "cluster": 0, + "hw_thread_count": 2, + "core": 6, + "comment_value": 1.998, + "comment_units": "CPUs utilized" + } + }, + { + "name": "default0_context-switches_S0_C6", + "units": None, + "value": 146, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "context-switches", + "cluster": 0, + "hw_thread_count": 2, + "core": 6, + "comment_value": 0.073, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cpu-migrations_S0_C6", + "units": None, + "value": 3, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-migrations", + "cluster": 0, + "hw_thread_count": 2, + "core": 6, + "comment_value": 0.001, + "comment_units": "K/sec" + } + }, + { + "name": "default0_page-faults_S0_C6", + "units": None, + "value": 69, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "page-faults", + "cluster": 0, + "hw_thread_count": 2, + "core": 6, + "comment_value": 0.034, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cycles_S0_C6", + "units": None, + "value": 27327018, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cycles", + "cluster": 0, + "hw_thread_count": 2, + "core": 6, + "comment_value": 0.014, + "comment_units": "GHz" + } + }, + { + "name": "default0_instructions_S0_C6", + "units": None, + "value": 7956363, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "instructions", + "cluster": 0, + "hw_thread_count": 2, + "core": 6, + "comment_value": 0.29, + "comment_units": "insn per cycle" + } + }, + { + "name": "default0_branches_S0_C6", + "units": None, + "value": 1834119, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branches", + "cluster": 0, + "hw_thread_count": 2, + "core": 6, + "comment_value": 0.916, + "comment_units": "M/sec" + } + }, + { + "name": "default0_branch-misses_S0_C6", + "units": None, + "value": 210607, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branch-misses", + "cluster": 0, + "hw_thread_count": 2, + "core": 6, + "comment_value": 11.48, + "comment_units": "% of all branches" + } + }, + { + "name": "default0_cpu-clock_(msec)_S0_C7", + "units": None, + "value": 2003.187967, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-clock (msec)", + "cluster": 0, + "hw_thread_count": 2, + "core": 7, + "comment_value": 1.998, + "comment_units": "CPUs utilized" + } + }, + { + "name": "default0_context-switches_S0_C7", + "units": None, + "value": 91, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "context-switches", + "cluster": 0, + "hw_thread_count": 2, + "core": 7, + "comment_value": 0.045, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cpu-migrations_S0_C7", + "units": None, + "value": 1, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cpu-migrations", + "cluster": 0, + "hw_thread_count": 2, + "core": 7, + "comment_value": 0.0, + "comment_units": "K/sec" + } + }, + { + "name": "default0_page-faults_S0_C7", + "units": None, + "value": 32, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "page-faults", + "cluster": 0, + "hw_thread_count": 2, + "core": 7, + "comment_value": 0.016, + "comment_units": "K/sec" + } + }, + { + "name": "default0_cycles_S0_C7", + "units": None, + "value": 26120485, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "cycles", + "cluster": 0, + "hw_thread_count": 2, + "core": 7, + "comment_value": 0.013, + "comment_units": "GHz" + } + }, + { + "name": "default0_instructions_S0_C7", + "units": None, + "value": 10457563, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "instructions", + "cluster": 0, + "hw_thread_count": 2, + "core": 7, + "comment_value": 0.4, + "comment_units": "insn per cycle" + } + }, + { + "name": "default0_branches_S0_C7", + "units": None, + "value": 2337445, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branches", + "cluster": 0, + "hw_thread_count": 2, + "core": 7, + "comment_value": 1.167, + "comment_units": "M/sec" + } + }, + { + "name": "default0_branch-misses_S0_C7", + "units": None, + "value": 238864, + "classifiers": { + "label": "default0", + "target": "'system wide'", + "duration": 1.002350964, + "duration_units": "seconds", + "name": "branch-misses", + "cluster": 0, + "hw_thread_count": 2, + "core": 7, + "comment_value": 10.22, + "comment_units": "% of all branches" + } + } +], +), +], + +} + + + +class StatParserTest(unittest.TestCase): + + maxDiff = None + + def _test_pair(self, stdout, metrics): + metrics_dut = PerfInstrument._extract_stat_metrics('default0', stdout) + count = 0 + for metric_dut in metrics_dut: + # metric names are guaranteed to be unique by the documentation + metric = next(m for m in metrics if m['name'] == metric_dut['name']) + self.assertEqual(metric, metric_dut) + count += 1 + self.assertEqual(count, len(metrics)) + + def _test_key(self, key): + for stdout, metrics in STAT_PAIRS[key]: + self._test_pair(stdout, metrics) + + def test_all_cpus_many_events(self): + self._test_key('-a -e r1,r2,r3,r4,r5,r6,r7,r8') + + def test_all_cpus_no_aggregate_per_core(self): + self._test_key('-a -A --per-core') + + def test_all_cpus_no_aggregate_per_socket(self): + self._test_key('-a -A --per-socket') + + def test_all_cpus_no_aggregate_many_events_per_socket(self): + self._test_key('-a -A -e r1,r2,r3,r4,r5,r6,r7,r8 --per-socket') diff --git a/wa/instruments/perf.py b/wa/instruments/perf.py index c5179cdfa..b9ba3af50 100644 --- a/wa/instruments/perf.py +++ b/wa/instruments/perf.py @@ -1,4 +1,4 @@ -# Copyright 2013-2015 ARM Limited +# Copyright 2013-2019 ARM Limited # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,74 +13,205 @@ # limitations under the License. # - -# pylint: disable=unused-argument +import itertools import os import re +import shlex -from devlib.trace.perf import PerfCollector - +from devlib.trace.perf import PerfCollector, PerfCommandDict from wa import Instrument, Parameter from wa.utils.types import list_or_string, list_of_strs -PERF_COUNT_REGEX = re.compile(r'^(CPU\d+)?\s*(\d+)\s*(.*?)\s*(\[\s*\d+\.\d+%\s*\])?\s*$') +__all__ = [ + 'PerfInstrument', +] + +DEFAULT_EVENTS = ['migrations', 'cs'] +DEFAULT_OPTIONSTRING = '-a' class PerfInstrument(Instrument): name = 'perf' description = """ - Perf is a Linux profiling with performance counters. + Perf is a Linux profiling tool based on performance counters. + + Performance counters are typically CPU hardware registers (found in the + Performance Monitoring Unit) that count hardware events such as + instructions executed, cache-misses suffered, or branches mispredicted. + Because each ``event`` corresponds to a hardware counter, the maximum + number of events that can be tracked is imposed by the available hardware. - Performance counters are CPU hardware registers that count hardware events - such as instructions executed, cache-misses suffered, or branches - mispredicted. They form a basis for profiling applications to trace dynamic - control flow and identify hotspots. + By extension, performance counters, in the context of ``perf``, also refer + to so-called "software counters" representing events that can be tracked by + the OS kernel (e.g. context switches). As these are software events, the + counters are kept in RAM and the hardware virtually imposes no limit on the + number that can be used. - pref accepts options and events. If no option is given the default '-a' is - used. For events, the default events are migrations and cs. They both can - be specified in the config file. + This instrument allows a straight-forward way of calling ``perf stat`` + through the named parameters ``optionstring`` and ``events``, which is the + default behaviour (see the defaults of these parameters). However, it can + also be used through the more advanced ``commands`` dictionary which + provides a flexible access to all ways ``perf`` can be used. - Events must be provided as a list that contains them and they will look like - this :: + In both cases, if a ``stat`` command is issued, this workload will + automatically parse its output into run ``metrics``. For this reason, + please avoid the ``-x`` ``stat`` flag. - perf_events = ['migrations', 'cs'] + The ``pre_commands`` and ``post_commands`` are provided to suit those + ``perf`` commands that don't actually capture data (``list``, ``config``, + ``report``, ...). - Events can be obtained by typing the following in the command line on the - device :: + Commands are tagged with _labels_ which are used to define in which + directory they run. Therefore, a pair of commands (_e.g._ a `record` + followed by a `report`) sharing the same label can access the same files + while commands with different labels can use the same filename with the + guarantee of avoiding clashes. - perf list + Depending on the subcommand used, ``perf`` might require setting: - Whereas options, they can be provided as a single string as following :: + - ``/proc/sys/kernel/printk`` to ``4`` + - ``/proc/sys/kernel/kptr_restrict`` to ``0`` - perf_options = '-a -i' + Please refer to the ``sysfile_values`` runtime parameter to do so from an + agenda. - Options can be obtained by running the following in the command line :: + When running ``perf stat``, this instrument reports the captured + counters as unitless :class:`Metrics` with the following classifiers: - man perf-stat + - ``'name'``: The name of the event as reported by ``perf``. This name + may not be unique when aggregation is disabled as the same counter is + then captured for multiple hardware threads; + - ``'label'``: Label given to the run of ``perf stat``; + - ``'target'``: The target ``perf`` reports for the captured events. + This is shared across all events of a run and is further specialized + by ``'hw_thread'``, ``'core'`` and ``'cluster'`` if applicable; + - ``'duration'``, ``'duration_units'``: duration of the ``perf`` run; + - ``'count_error'``: A string containing the error corresponding that + prevented the counter from being captured. Only available if an error + occured. In this case the value of the metric is always ``0``; + - ``'hw_thread_count'``: Number of **hardware** threads that were + contributing to the counter. Only available when the automatic + aggregation done by ``perf stat`` is disabled. See ``'hw_thread'``, + ``'core'`` and ``'cluster'``; + - ``'hw_thread'``: When the ``--no-aggr`` option is used, holds the + index of the hardware thread that incremented the counter. In this + case, ``'hw_thread_count'`` is always ``1``. For backward + compatibility, the ``'cpu'`` classifier is provided as a synonym of + ``'hw_thread'`` (unlike what its name might suggest, on systems + supporting hardware multithreading, ``'cpu'`` is not a synonym of + ``'core'``!); + - ``'cluster'``: When the ``--per-socket`` option is used, holds the + index of the cluster (_i.e._ "socket" in ``perf`` terminology) that + incremented the counter and ``'hw_thread_count'`` holds the number of + hardware threads in the cluster. When the ``--per-core`` option is + used, this classifier gives the index of the cluster of the core. + - ``'core'``: When the ``--per-core`` option is used, holds the index + (within its cluster) of the core that incremented the counter and + ``'hw_thread_count'`` holds the number of hardware threads in the + core. + - ``'enabled'``: When ``perf`` needs to capture more hardware events + than there are hardware counters, it shares the hardware counters + among the events through time-slicing. This classifier holds the + fraction (between ``0.0`` and ``100.0``) of the run that a hardware + counter was allocated to the the event. Available only for hardware + events and only when time-slicing was required. + - ``'comment_value'``, ``'comment_units'``: Some counters may come with + an extra "comment" (following a ``#``) added by ``perf``. The + ``'comment_value'`` holds the numeric (``int`` or ``float``) value of + the comment while ``'comment_units'`` holds the rest of the comment + (typically the units). Only available for the events for which + ``perf`` added a comment. """ parameters = [ - Parameter('events', kind=list_of_strs, default=['migrations', 'cs'], - global_alias='perf_events', - constraint=(lambda x: x, 'must not be empty.'), - description="""Specifies the events to be counted."""), - Parameter('optionstring', kind=list_or_string, default='-a', - global_alias='perf_options', - description="""Specifies options to be used for the perf command. This - may be a list of option strings, in which case, multiple instances of perf - will be kicked off -- one for each option string. This may be used to e.g. - collected different events from different big.LITTLE clusters. + Parameter('force_install', kind=bool, default=False, + description=""" + Always install ``perf`` binary even if ``perf`` is already + present on the device. """), + Parameter('events', kind=list_of_strs, default=None, + description=""" + List of events the default ``perf stat`` should capture. + Valid events can be obtained from ``perf list`` and + ``perf --help``. + This parameter is ignored if ``commands`` is passed. + + default: {} + """.format( + ','.join('``{}``'.format(e) for e in DEFAULT_EVENTS)) + ), + Parameter('optionstring', kind=list_or_string, default=None, + description=""" + String of options the default ``perf stat`` should use. + For backward compatibility, this may be be a list of strings. + In that case, a ``perf stat`` command will be launched for + each string. This parameter is ignored if ``commands`` is + passed. + This parameter is ignored if ``commands`` is passed. + + default: ``{}`` + """.format(DEFAULT_OPTIONSTRING) + ), Parameter('labels', kind=list_of_strs, default=None, - global_alias='perf_labels', - description="""Provides labels for pref output. If specified, the number of - labels must match the number of ``optionstring``\ s. + description=r""" + These labels act like the keys of the ``commands`` parameter. + They are provided for backward compatibility. If specified, + the number of labels must match the number of + ``optionstring``\ s. This parameter is ignored if + ``commands`` is passed. """), - Parameter('force_install', kind=bool, default=False, + Parameter('pre_commands', kind=PerfCommandDict, default=None, description=""" - always install perf binary even if perf is already present on the device. - """), + Dictionary of commands to be run before the workloads run + (same format as ``commands``). + """), + Parameter('commands', kind=PerfCommandDict, default=None, + description=""" + Dictionary in which keys are considered as *labels* and + values are themselves dictionaries with the following + entries: + + - ``command`` (``str``): The ``perf`` subcommand + (``stat``, ``record``, ...); + - ``flags`` (``str`` or ``list``): Switch flags without + their leading hyphens (``no-inherit``, ``all-cpus``, + ``a``, ...); + - ``kwflags`` (``dict``): Dictionary of flag names (no + hyphen) as keys and their corresponding values. + These values can be ``list``s for flags taking CSV + inputs (``event``, ``pid``, ...); + - ``args`` (``str`` or valid command): the post-``--`` + arguments. This is typically the command ``perf`` will + launch and monitor. Therefore, a valid command + dictionary (same as this one) is accepted; + + As an example, the default behaviour can be replicated + through:: + + :language: yaml + + perf: + commands: + default_behaviour: + command: stat + flags: + - all-cpus + kwflags: + event: + - migrations + - cs + args: + command: sleep + args: 1000 + stderr: '&1' + stdout: stat.out + """), + Parameter('post_commands', kind=PerfCommandDict, default=None, + description=""" + Dictionary of commands to be run after the workloads run + (same format as ``commands``). + """), ] def __init__(self, target, **kwargs): @@ -88,51 +219,200 @@ def __init__(self, target, **kwargs): self.collector = None def initialize(self, context): + # pylint: disable=unused-argument + # pylint: disable=access-member-before-definition + # pylint: disable=attribute-defined-outside-init + if self.pre_commands is None: + self.pre_commands = PerfCommandDict({}) + if self.post_commands is None: + self.post_commands = PerfCommandDict({}) + if self.commands is None: + if self.optionstring is None: + self.optionstring = DEFAULT_OPTIONSTRING + + if self.events is None: + self.events = DEFAULT_EVENTS + + if isinstance(self.optionstring, str): + self.optionstring = [self.optionstring] + + if not self.labels: + self.labels = ['default{}'.format(i) + for i, _ in enumerate(self.optionstring)] + elif isinstance(self.labels, str): + self.labels = [self.labels] + + if len(self.labels) != len(self.optionstring): + raise ValueError('Lengths of labels and optionstring differ') + + self.commands = PerfCommandDict({ + label: { + 'command': 'stat', + 'kwflags': {'event': self.events}, + 'options': shlex.split(options), + 'args': { + 'command': 'sleep', + 'args': 1000, + }, + 'stderr': '&1', + 'stdout': 'stat.out', + } + for label, options in zip(self.labels, self.optionstring) + }) + else: + for name in ['optionstring', 'events', 'labels']: + if self.__dict__[name] is not None: + raise ValueError( + '{} should not be passed if commands is'.format(name)) + self.collector = PerfCollector(self.target, - self.events, - self.optionstring, - self.labels, - self.force_install) + self.force_install, + self.pre_commands, + self.commands, + self.post_commands) def setup(self, context): self.collector.reset() + version = self.collector.execute('--version').strip() + context.update_metadata('versions', self.name, version) def start(self, context): + # pylint: disable=unused-argument self.collector.start() def stop(self, context): + # pylint: disable=unused-argument self.collector.stop() def update_output(self, context): - self.logger.info('Extracting reports from target...') - outdir = os.path.join(context.output_directory, 'perf') - self.collector.get_trace(outdir) - - for host_file in os.listdir(outdir): - label = host_file.split('.out')[0] - host_file_path = os.path.join(outdir, host_file) - context.add_artifact(label, host_file_path, 'raw') - with open(host_file_path) as fh: - in_results_section = False - for line in fh: - if 'Performance counter stats' in line: - in_results_section = True - next(fh) # skip the following blank line - if in_results_section: - if not line.strip(): # blank line - in_results_section = False - break - else: - line = line.split('#')[0] # comment - match = PERF_COUNT_REGEX.search(line) - if match: - classifiers = {} - cpu = match.group(1) - if cpu is not None: - classifiers['cpu'] = int(cpu.replace('CPU', '')) - count = int(match.group(2)) - metric = '{}_{}'.format(label, match.group(3)) - context.add_metric(metric, count, classifiers=classifiers) + outdir = os.path.join(context.output_directory, self.name) + self.collector.get_traces(outdir) + all_commands = itertools.chain(self.pre_commands.items(), + self.commands.items(), + self.post_commands.items()) + for label, cmd in all_commands: + classifiers = { + 'label': label, + 'command': cmd.command, + } + + outputs = [] + if 'o' in cmd.kwflags: + outputs.append((cmd.kwflags['o'], 'data')) + if 'output' in cmd.kwflags: + outputs.append((cmd.kwflags['output'], 'data')) + if cmd.stdout and not cmd.stdout.startswith('&'): + outputs.append((cmd.stdout, 'data')) + if cmd.stderr and not cmd.stderr.startswith('&'): + outputs.append((cmd.stderr, 'log')) + + metrics_done = False + for output, kind in outputs: + # perf stat supports redirecting its stdout to --output/-o: + output_path = os.path.join(outdir, label, output) + if 'stat' in cmd.command and not metrics_done: + metrics_done = True + with open(output_path) as f: + for metric in self._extract_stat_metrics(label, + f.read()): + context.add_metric(**metric) + kind = 'raw' + context.add_artifact(os.path.join(label, output), + output_path, kind, + classifiers=classifiers) def teardown(self, context): + # pylint: disable=unused-argument self.collector.reset() + + @classmethod + def _extract_stat_metrics(cls, label, stdout): + match = cls._stat_regex.search(stdout) + if match is None: + return + base_classifiers = { + 'label': label, + 'target': match['target'], + 'duration': float(match['duration'].replace(',', '')), + 'duration_units': match['duration_units'], + } + for m in cls._stat_counter_regex.finditer(match['counters']): + classifiers = base_classifiers.copy() + name, count = cls._extract_stat_count(m, classifiers) + yield { + 'name': name, + 'units': None, + 'value': count, + 'classifiers': classifiers, + } + + _stat_regex = re.compile( + r'Performance counter stats for (?P.*?)\s*:\s*$' + r'^(?P.*)$' + r'^\s*(?P[0-9.,]+)\s*(?P\S+)\s*time elapsed', + flags=(re.S | re.M)) + + _stat_counter_regex = re.compile( + r'^\s*{aggr}?\s*{count}\s*{name}\s*{comment}?(?:{enabled}|$)'.format( + aggr=r'(?:{hw_thread}|(?:{cluster}{core}?\s*{thread_cnt}))'.format( + hw_thread=r'(?:CPU-?(?P\d+))', + cluster=r'S(?P\d+)', + core=r'(?:-C(?P\d+))', + thread_cnt=r'(?P\d+)'), + count=r'(?P[0-9.,]+|\|\)', + name=r'(?P.*?)', + comment=r'(?:#\s*{value}\s*{units}\s*)'.format( + value=r'(?P[0-9,.]+)', + units=r'(?P.*?)'), + enabled=r'(?:[\[\(](?P[0-9.]+)%[\)\]])'), + flags=re.M) + + @staticmethod + def _extract_stat_count(match, classifiers): + """Extracts the counter classifiers and count from a counter_match. + + Parameters: + match A :class:`re.Match` from :attr:`_stat_counter_regex` + classifiers A dictionary to be completed for the matched counter + + Returns: + A (name, value) tuple for the matched counter (value is 0 if an + error occurred). + """ + name = '{}_{}'.format(classifiers['label'], + match['name']).replace(' ', '_') + classifiers['name'] = match['name'] + # But metrics need a unique name (classifiers not enough) so this + # name might be specialized by the following: + try: + count = int(match['count'].replace(',', '')) + except ValueError: + try: + # some "counters" return a float (e.g. "task-clock"): + count = float(match['count'].replace(',', '')) + except ValueError: + # perf may report "not supported" or "not counted": + count = 0 # as metrics have to be numeric, can't use None + classifiers['count_error'] = match['count'] + if match['hw_thread']: # --no-aggr + classifiers['hw_thread'] = int(match['hw_thread']) + classifiers['hw_thread_count'] = 1 + classifiers['cpu'] = int(match['hw_thread']) # deprecated! + name += '_T{}'.format(classifiers["hw_thread"]) + elif match['cluster']: # --per-core or --per-socket + classifiers['cluster'] = int(match['cluster']) + classifiers['hw_thread_count'] = int(match['hw_thread_count']) + name += '_S{}'.format(classifiers["cluster"]) + if match['core']: # --per-core + classifiers['core'] = int(match['core']) + name += '_C{}'.format(classifiers["core"]) + if match['comment_value']: + try: + classifiers['comment_value'] = int(match['comment_value']) + except ValueError: + classifiers['comment_value'] = float(match['comment_value']) + if match['comment_units']: + classifiers['comment_units'] = match['comment_units'] + if match['enabled']: + classifiers['enabled'] = float(match['enabled']) + return (name, count)