Skip to content

Commit 94c6015

Browse files
committed
wrap the perfevent library by Viktor Leis and improve the interface for counter definition
1 parent 23bb9a5 commit 94c6015

File tree

7 files changed

+338
-0
lines changed

7 files changed

+338
-0
lines changed

src/cpp-utility/perf/Counter.hpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#pragma once
2+
3+
#include "PerfEvent.hpp"
4+
#include <cpp-utility/perf/definition/DurationCounterDefinition.hpp>
5+
#include <cpp-utility/perf/definition/ExtendedCounterDefinition.hpp>
6+
#include <cpp-utility/perf/definition/PerfCounterDefinition.hpp>
7+
#include <cpp-utility/std/New.hpp>
8+
9+
namespace utility::perf {
10+
11+
// Duration Counters
12+
static const Counter DURATION = std::new_nothrow<definition::DurationCounterDefinition<std::nano>>("duration_ms");
13+
static const Counter DURATION_NS = std::new_nothrow<definition::DurationCounterDefinition<std::nano>>("duration_ns");
14+
static const Counter DURATION_MS =
15+
std::new_nothrow<definition::DurationCounterDefinition<std::milli>>("duration_ms");
16+
static const Counter DURATION_S =
17+
std::new_nothrow<definition::DurationCounterDefinition<std::ratio<1, 1>>>("duration_s");
18+
19+
// Perf Counters
20+
static const Counter CYCLES =
21+
std::new_nothrow<definition::PerfCounterDefinition>("cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES);
22+
23+
static const Counter INSTRUCTIONS =
24+
std::new_nothrow<definition::PerfCounterDefinition>("instructions", PERF_TYPE_HARDWARE,
25+
PERF_COUNT_HW_INSTRUCTIONS);
26+
27+
static const Counter L1D_MISSES =
28+
std::new_nothrow<definition::PerfCounterDefinition>("l1d-misses", PERF_TYPE_HW_CACHE,
29+
PERF_COUNT_HW_CACHE_L1D | (PERF_COUNT_HW_CACHE_OP_READ << 8) |
30+
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16));
31+
32+
static const Counter L1I_MISSES =
33+
std::new_nothrow<definition::PerfCounterDefinition>("l1i-misses", PERF_TYPE_HW_CACHE,
34+
PERF_COUNT_HW_CACHE_L1I | (PERF_COUNT_HW_CACHE_OP_READ << 8) |
35+
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16));
36+
37+
static const Counter DTLB_MISSES = std::new_nothrow<definition::PerfCounterDefinition>(
38+
"dtlb-misses", PERF_TYPE_HW_CACHE,
39+
PERF_COUNT_HW_CACHE_DTLB | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16));
40+
41+
static const Counter ITLB_MISSES = std::new_nothrow<definition::PerfCounterDefinition>(
42+
"itlb-misses", PERF_TYPE_HW_CACHE,
43+
PERF_COUNT_HW_CACHE_ITLB | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16));
44+
45+
static const Counter LLC_MISSES =
46+
std::new_nothrow<definition::PerfCounterDefinition>("llc-misses", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES);
47+
48+
static const Counter BRANCH_MISSES =
49+
std::new_nothrow<definition::PerfCounterDefinition>("branch-misses", PERF_TYPE_HARDWARE,
50+
PERF_COUNT_HW_BRANCH_MISSES);
51+
52+
static const Counter TASK_CLOCK =
53+
std::new_nothrow<definition::PerfCounterDefinition>("task-clock", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK);
54+
55+
// Extended Counters, dividing two existing counters;
56+
static const Counter IPC =
57+
std::new_nothrow<definition::DivCounterDefinition>("ipc", std::array<Counter, 2>{INSTRUCTIONS, CYCLES});
58+
static const Counter CPUS =
59+
std::new_nothrow<definition::DivCounterDefinition>("cpus", std::array<Counter, 2>{TASK_CLOCK, DURATION});
60+
static const Counter GHZ =
61+
std::new_nothrow<definition::DivCounterDefinition>("ghz", std::array<Counter, 2>{CYCLES, TASK_CLOCK});
62+
63+
} // namespace utility::perf

src/cpp-utility/perf/PerfCounter.hpp

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#pragma once
2+
3+
#include <cpp-utility/compiler/CompilerHints.hpp>
4+
#include <cstring>
5+
#include <linux/perf_event.h>
6+
#include <sys/ioctl.h>
7+
#include <syscall.h>
8+
#include <unistd.h>
9+
10+
namespace utility::perf {
11+
12+
using PerfConfig = std::pair<uint64_t, uint64_t>;
13+
14+
class PerfCounter {
15+
private:
16+
struct alignas(32) read_format {
17+
uint64_t value{};
18+
uint64_t time_enabled{};
19+
uint64_t time_running{};
20+
};
21+
22+
perf_event_attr pe{};
23+
int fd;
24+
read_format prev;
25+
read_format data;
26+
27+
public:
28+
explicit PerfCounter(PerfConfig config) : fd(0) {
29+
memset(&pe, 0, sizeof(perf_event_attr));
30+
pe.type = config.first;
31+
pe.size = sizeof(perf_event_attr);
32+
pe.config = config.second;
33+
pe.disabled = true;
34+
pe.inherit = 1;
35+
pe.inherit_stat = 0;
36+
pe.exclude_kernel = false;
37+
pe.exclude_hv = false;
38+
pe.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
39+
40+
fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0); // NOLINT
41+
}
42+
43+
forceinline void start() {
44+
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
45+
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
46+
read(fd, &prev, sizeof(uint64_t) * 3);
47+
}
48+
49+
forceinline void stop() {
50+
read(fd, &data, sizeof(uint64_t) * 3);
51+
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
52+
}
53+
54+
forceinline double get() const {
55+
return static_cast<double>(data.value - prev.value) *
56+
(static_cast<double>(data.time_enabled - prev.time_enabled) /
57+
static_cast<double>(data.time_running - prev.time_running));
58+
}
59+
};
60+
61+
} // namespace utility::perf

src/cpp-utility/perf/PerfEvent.hpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#pragma once
2+
3+
#include "PerfCounter.hpp"
4+
#include <cpp-utility/perf/definition/CounterDefinition.hpp>
5+
#include <cpp-utility/std/Hash.hpp>
6+
7+
namespace utility::perf {
8+
9+
using Counter = definition::Counter;
10+
11+
namespace definition {
12+
class PerfCounterDefinition;
13+
template<typename>
14+
class DurationCounterDefinition;
15+
} // namespace definition
16+
17+
class PerfEvent {
18+
friend class definition::PerfCounterDefinition;
19+
template<typename>
20+
friend class definition::DurationCounterDefinition;
21+
22+
private:
23+
std::vector<PerfCounter> perfCounters;
24+
std::chrono::time_point<std::chrono::steady_clock> startTime;
25+
std::chrono::time_point<std::chrono::steady_clock> stopTime;
26+
std::unordered_map<PerfConfig, size_t> perfCounterMap;
27+
28+
double readPerfCounter(PerfConfig perfConfig) {
29+
if (perfCounterMap.contains(perfConfig)) {
30+
return perfCounters[perfCounterMap[perfConfig]].get();
31+
} else {
32+
return NAN;
33+
};
34+
}
35+
36+
void addCounter(Counter counter) {
37+
// if creating the counter definition failed, the pointer will be null
38+
if (not counter) {
39+
throw std::runtime_error{"cannot add an invalid counter!"};
40+
}
41+
42+
// we only have to take of perf counters, duration is always measured
43+
if (counter->isPerfCounter() and not perfCounterMap.contains(counter->getPerfConfig())) {
44+
perfCounterMap[counter->getPerfConfig()] = perfCounters.size();
45+
perfCounters.emplace_back(counter->getPerfConfig());
46+
}
47+
48+
// add additional counters needed to compute extended counters
49+
for (auto* dependency : counter->dependsOn()) {
50+
addCounter(dependency);
51+
}
52+
}
53+
54+
public:
55+
explicit PerfEvent(const std::vector<Counter>& counters) {
56+
for (auto* counter : counters) {
57+
addCounter(counter);
58+
}
59+
}
60+
61+
forceinline void start() {
62+
for (auto& counters : perfCounters) {
63+
counters.start();
64+
}
65+
startTime = std::chrono::steady_clock::now();
66+
}
67+
68+
forceinline void stop() {
69+
stopTime = std::chrono::steady_clock::now();
70+
for (auto& counters : perfCounters) {
71+
counters.stop();
72+
}
73+
}
74+
75+
double get(Counter counter) {
76+
return counter->compute(*this);
77+
}
78+
};
79+
80+
} // namespace utility::perf
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#include <utility>
2+
3+
#pragma once
4+
5+
namespace utility::perf {
6+
7+
class PerfEvent;
8+
9+
namespace definition {
10+
11+
class CounterDefinition {
12+
friend PerfEvent;
13+
14+
private:
15+
const std::string_view name;
16+
17+
virtual bool isPerfCounter() const = 0;
18+
19+
virtual PerfConfig getPerfConfig() const = 0;
20+
21+
virtual std::vector<CounterDefinition*> dependsOn() const = 0;
22+
23+
virtual double compute(PerfEvent&) const = 0;
24+
25+
protected:
26+
explicit CounterDefinition(const std::string_view& name) noexcept : name(name) {}
27+
28+
public:
29+
std::string getName() const {
30+
return std::string(name);
31+
}
32+
};
33+
34+
using Counter = CounterDefinition*;
35+
36+
} // namespace definition
37+
} // namespace utility::perf
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#pragma once
2+
3+
namespace utility::perf::definition {
4+
5+
template<typename Ratio = std::nano>
6+
class DurationCounterDefinition : public CounterDefinition {
7+
public:
8+
explicit DurationCounterDefinition(const std::string_view& name) noexcept : CounterDefinition(name){};
9+
10+
bool isPerfCounter() const override {
11+
return false;
12+
}
13+
14+
PerfConfig getPerfConfig() const override {
15+
throw std::runtime_error{"no perf config available!"};
16+
}
17+
18+
std::vector<Counter> dependsOn() const override {
19+
return {};
20+
}
21+
22+
double compute(PerfEvent& event) const override {
23+
return std::chrono::duration<double, Ratio>(event.stopTime - event.startTime).count();
24+
}
25+
};
26+
27+
} // namespace utility::perf::definition
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#pragma once
2+
3+
namespace utility::perf::definition {
4+
5+
template<size_t n_dependencies>
6+
class ExtendedCounterDefinition : public CounterDefinition {
7+
protected:
8+
const std::array<Counter, n_dependencies> dependencies;
9+
10+
public:
11+
ExtendedCounterDefinition(const std::string_view& name,
12+
const std::array<Counter, n_dependencies>& dependencies) noexcept
13+
: CounterDefinition(name), dependencies(std::move(dependencies)) {}
14+
15+
bool isPerfCounter() const override {
16+
return false;
17+
}
18+
19+
PerfConfig getPerfConfig() const override {
20+
throw std::runtime_error{"no perf config available!"};
21+
}
22+
23+
std::vector<Counter> dependsOn() const override {
24+
return std::vector(std::begin(dependencies), std::end(dependencies));
25+
}
26+
};
27+
28+
class DivCounterDefinition : public ExtendedCounterDefinition<2> {
29+
public:
30+
DivCounterDefinition(const std::string_view& name, const std::array<Counter, 2>& dependencies) noexcept
31+
: ExtendedCounterDefinition(name, dependencies) {}
32+
33+
double compute(PerfEvent& event) const override {
34+
return event.get(dependencies[0]) / event.get(dependencies[1]);
35+
}
36+
};
37+
38+
} // namespace utility::perf::definition
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#pragma once
2+
3+
#include <utility>
4+
5+
namespace utility::perf::definition {
6+
7+
class PerfCounterDefinition : public CounterDefinition {
8+
private:
9+
PerfConfig perfConfig;
10+
11+
public:
12+
PerfCounterDefinition(const std::string_view& name, uint64_t perfTypeId, uint64_t perfEventId) noexcept
13+
: CounterDefinition(name), perfConfig(std::make_pair(perfTypeId, perfEventId)) {}
14+
15+
bool isPerfCounter() const override {
16+
return true;
17+
}
18+
19+
PerfConfig getPerfConfig() const override {
20+
return perfConfig;
21+
}
22+
23+
std::vector<Counter> dependsOn() const override {
24+
return {};
25+
}
26+
27+
double compute(PerfEvent& event) const override {
28+
return event.readPerfCounter(perfConfig);
29+
}
30+
};
31+
32+
} // namespace utility::perf::definition

0 commit comments

Comments
 (0)