Skip to content

Commit

Permalink
Support multi-GPU monitoring (#15)
Browse files Browse the repository at this point in the history
Inspired by PR #12 from @herrnils

* Add support for monitoring multiple GPUs (usage and memory)
* Update `show-graph` tool to visualize all GPU metrics in a single view
* Enable GPU monitoring in `uprof-sample` if GPU_MONITOR_NVIDIA is set
* Fix invalid visualization in `show-graph` tool when multiple event
files are passed time-unordered
  • Loading branch information
cedric-chedaleux authored Oct 4, 2024
2 parents 04f7ecd + 4fa5dd3 commit 3f145e4
Show file tree
Hide file tree
Showing 8 changed files with 139 additions and 64 deletions.
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@

This project provides a tiny C++ profiling library for monitoring:
* execution time
* CPU usage
* CPU(s) usage
* memory usage
* GPU usage and memory
* GPU(s) usage and memory

This library aims at collecting metrics on embedded devices to monitor device
performance while operating heavy tasks or booting for example. Those metrics can
Expand Down Expand Up @@ -69,11 +69,13 @@ To monitor a specific GPU, you must subclass `IGPUMonitor`:
class MyGPUMonitor: public uprofile::IGPUMonitor {
public:
float getUsage() override;
void getMemory(int& usedMem, int& totalMem) override;
const std::vector<float>& getUsage() const override;
void getMemory(std::vector<int>& usedMem, std::vector<int>& totalMem) override;
}
```

As you can see from the interface methods, `ccpuprofile` **supports multi-gpu monitoring**.

And then inject it at runtime to the `uprofile` monitoring system:

```cpp
Expand Down
12 changes: 7 additions & 5 deletions lib/igpumonitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
#ifndef IGPUMONITOR_H_
#define IGPUMONITOR_H_

#include <vector>

namespace uprofile
{

/**
* Interface to implement for monitoring GPU usage and memory
* Interface to implement for monitoring GPU(s) usage and memory
*
* No generic abstraction of GPU metrics exists
* on Linux nor Windows. So specific IGPUMonitor class should
Expand All @@ -33,10 +35,10 @@ class IGPUMonitor
// Return if monitor is currently watching data
virtual bool watching() const = 0;

// Usage should be in percentage
virtual float getUsage() const = 0;
// usedMem and totalMem should be returned as KiB
virtual void getMemory(int& usedMem, int& totalMem) const = 0;
// Usages should be in percentage
virtual const std::vector<float>& getUsage() const = 0;
// usedMems and totalMems should be returned as KiB
virtual void getMemory(std::vector<int>& usedMem, std::vector<int>& totalMem) const = 0;
};

}
Expand Down
104 changes: 75 additions & 29 deletions lib/monitors/nvidiamonitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,39 +20,81 @@
#include <sys/wait.h>
#include <unistd.h>
#endif
#include <memory>

using namespace std;

const string nvidiaSmiCmdName = "/usr/bin/nvidia-smi";
const string errorMsg = "Failed to monitor nvidia-smi process";

struct RawMetric {
string index;
string usage;
string usedMem;
string totalMem;
};

#if defined(__linux__)
int read_nvidia_smi_stdout(int fd, string& gpuUsage, string& usedMem, string& totalMem)
int read_nvidia_smi_stdout(int fd, vector<RawMetric>& metrics)
{
string line;
while (line.find('\n') == string::npos) { // full line read
char buffer[4096];
ssize_t count = read(fd, buffer, sizeof(buffer)); // if child process crashes, we gonna be blocked here forever
if (count == -1) {
return errno;
} else if (count > 0) { // there is something to read
line += string(buffer, count);
size_t nbCollected = 0;
while (nbCollected < metrics.size()) {
string line;

// nvidia-smi dumps metrics for each GPU line by line
// so read the stdout line by line and fill the input metrics buffer
while (line.find('\n') == string::npos) { // full line read
char buffer[4096];
ssize_t count = read(fd, buffer, sizeof(buffer)); // if child process crashes, we gonna be blocked here forever
if (count == -1) {
return errno;
} else if (count > 0) { // there is something to read
line += string(buffer, count);
}
}
}

// Remove colon to have only spaces and use istringstream
auto noSpaceEnd = remove(line.begin(), line.end(), ',');
if (noSpaceEnd == line.end()) { // output trace does not have comma so something went wrong with the command
return ENODATA;
// Remove colon to have only spaces and use istringstream
auto noSpaceEnd = remove(line.begin(), line.end(), ',');
if (noSpaceEnd == line.end()) { // output trace does not have comma so something went wrong with the command
return ENODATA;
}
line.erase(noSpaceEnd, line.end());
istringstream ss(line);
RawMetric metric;
ss >> metric.index >> metric.usage >> metric.usedMem >> metric.totalMem;
metrics[nbCollected] = metric;
nbCollected++;
}

line.erase(noSpaceEnd, line.end());
std::istringstream ss(line);
ss >> gpuUsage >> usedMem >> totalMem;

return 0;
}
#endif

uprofile::NvidiaMonitor::NvidiaMonitor()
{
// Place nvidia-smi command to retrieve number of GPUs
// to initialize usage and memsvectors
try {
char buffer[128];
string result = "";
string cmd = nvidiaSmiCmdName;
cmd += " --query-gpu=count --format=csv,noheader,nounits";
FILE* pipe = popen(cmd.c_str(), "r");
if (!pipe) {
throw runtime_error("popen() failed!");
}
while (!feof(pipe)) {
if (fgets(buffer, 128, pipe) != NULL)
result += buffer;
}
pclose(pipe);
m_nbGPUs = static_cast<size_t>(std::stoull(result));
m_totalMems = vector<int>(m_nbGPUs, 0);
m_usedMems = vector<int>(m_nbGPUs, 0);
m_gpuUsages = vector<float>(m_nbGPUs, 0.0);
} catch (const exception& err) {
cerr << errorMsg << endl;
}
}

uprofile::NvidiaMonitor::~NvidiaMonitor()
Expand All @@ -70,17 +112,17 @@ void uprofile::NvidiaMonitor::stop()
abortWatchGPU();
}

float uprofile::NvidiaMonitor::getUsage() const
const std::vector<float>& uprofile::NvidiaMonitor::getUsage() const
{
std::lock_guard<std::mutex> lock(m_mutex);
return m_gpuUsage;
return m_gpuUsages;
}

void uprofile::NvidiaMonitor::getMemory(int& usedMem, int& totalMem) const
void uprofile::NvidiaMonitor::getMemory(std::vector<int>& usedMem, std::vector<int>& totalMem) const
{
std::lock_guard<std::mutex> lock(m_mutex);
usedMem = m_usedMem;
totalMem = m_totalMem;
usedMem = m_usedMems;
totalMem = m_totalMems;
}

void uprofile::NvidiaMonitor::watchGPU(int period)
Expand All @@ -94,7 +136,7 @@ void uprofile::NvidiaMonitor::watchGPU(int period)
args[0] = (char*)"/usr/bin/nvidia-smi";
string period_arg = "-lms=" + to_string(period); // lms stands for continuous watching
args[1] = (char*)period_arg.c_str();
args[2] = (char*)"--query-gpu=utilization.gpu,memory.used,memory.total";
args[2] = (char*)"--query-gpu=index,utilization.gpu,memory.used,memory.total";
args[3] = (char*)"--format=csv,noheader,nounits";
args[4] = NULL;
string output;
Expand Down Expand Up @@ -128,9 +170,9 @@ void uprofile::NvidiaMonitor::watchGPU(int period)
m_watching = true;
m_watcherThread = unique_ptr<std::thread>(new thread([stdout_fd, pid, this]() {
while (watching()) {
string gpuUsage, usedMem, totalMem;
vector<RawMetric> metrics(m_nbGPUs);
// if the child process crashes, an error is raised here and threads ends up
int err = read_nvidia_smi_stdout(stdout_fd, gpuUsage, usedMem, totalMem);
int err = read_nvidia_smi_stdout(stdout_fd, metrics);
if (err != 0) {
cerr << errorMsg << ": read_error = " << strerror(err) << endl;
unique_lock<mutex> lk(m_mutex);
Expand All @@ -140,9 +182,13 @@ void uprofile::NvidiaMonitor::watchGPU(int period)
}

unique_lock<mutex> lk(m_mutex);
m_gpuUsage = !gpuUsage.empty() ? stof(gpuUsage) : 0.f;
m_usedMem = !usedMem.empty() ? stoi(usedMem) * 1024 : 0; // MiB to KiB
m_totalMem = !totalMem.empty() ? stoi(totalMem) * 1024 : 0; // MiB to KiB
for (size_t i = 0; i < metrics.size(); ++i) {
const auto& m = metrics[i];
size_t idx = stoull(m.index);
m_gpuUsages[idx] = !m.usage.empty() ? stof(m.usage) : 0.f;
m_usedMems[idx] = !m.usedMem.empty() ? stoi(m.usedMem) * 1024 : 0; // MiB to KiB
m_totalMems[idx] = !m.totalMem.empty() ? stoi(m.totalMem) * 1024 : 0; // MiB to KiB
}
lk.unlock();
}
}));
Expand Down
13 changes: 6 additions & 7 deletions lib/monitors/nvidiamonitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
#include <thread>
#include <vector>

using namespace std;

namespace uprofile
{
class NvidiaMonitor : public IGPUMonitor
Expand All @@ -30,8 +28,8 @@ class NvidiaMonitor : public IGPUMonitor
UPROFAPI void start(int period) override;
UPROFAPI void stop() override;
UPROFAPI bool watching() const override;
UPROFAPI float getUsage() const override;
UPROFAPI void getMemory(int& usedMem, int& totalMem) const override;
UPROFAPI const std::vector<float>& getUsage() const override;
UPROFAPI void getMemory(std::vector<int>& usedMem, std::vector<int>& totalMem) const override;

private:
void watchGPU(int period);
Expand All @@ -40,9 +38,10 @@ class NvidiaMonitor : public IGPUMonitor
mutable std::mutex m_mutex;
std::unique_ptr<std::thread> m_watcherThread;
bool m_watching = false;
int m_totalMem = 0;
int m_usedMem = 0;
float m_gpuUsage = 0.f;
std::vector<int> m_totalMems;
std::vector<int> m_usedMems;
std::vector<float> m_gpuUsages;
size_t m_nbGPUs = 0;
};

}
Expand Down
14 changes: 9 additions & 5 deletions lib/uprofileimpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,10 @@ void UProfileImpl::dumpGpuUsage()
return;
}

float usage = m_gpuMonitor->getUsage();
write(ProfilingType::GPU_USAGE, {std::to_string(usage)});
auto const& usage = m_gpuMonitor->getUsage();
for (size_t i = 0; i < usage.size(); ++i) {
write(ProfilingType::GPU_USAGE, {std::to_string(i), std::to_string(usage[i])});
}
}

void UProfileImpl::dumpGpuMemory()
Expand All @@ -200,9 +202,11 @@ void UProfileImpl::dumpGpuMemory()
return;
}

int usedMem, totalMem;
m_gpuMonitor->getMemory(usedMem, totalMem);
write(ProfilingType::GPU_MEMORY, {std::to_string(usedMem), std::to_string(totalMem)});
vector<int> usedMems, totalMems;
m_gpuMonitor->getMemory(usedMems, totalMems);
for (size_t i = 0; i < usedMems.size(); ++i) {
write(ProfilingType::GPU_MEMORY, {std::to_string(i), std::to_string(usedMems[i]), std::to_string(totalMems[i])});
}
}

vector<float> UProfileImpl::getInstantCpuUsage()
Expand Down
4 changes: 4 additions & 0 deletions sample/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ ELSE()
add_compile_options(-Wall -Werror)
ENDIF()

IF (GPU_MONITOR_NVIDIA)
ADD_DEFINITIONS(-DGPU_MONITOR_NVIDIA)
ENDIF()

SET(Sample_SRCS
main.cpp
)
Expand Down
8 changes: 8 additions & 0 deletions sample/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#include <stdlib.h>
#include <thread>
#include <uprofile.h>
#if defined(GPU_MONITOR_NVIDIA)
#include <monitors/nvidiamonitor.h>
#endif

void printSystemMemory()
{
Expand All @@ -32,6 +35,11 @@ int main(int argc, char* argv[])
printf(")\n");

// --- START MONITORING ---
#if defined(GPU_MONITOR_NVIDIA)
uprofile::addGPUMonitor(new uprofile::NvidiaMonitor);
uprofile::startGPUMemoryMonitoring(200);
uprofile::startGPUUsageMonitoring(200);
#endif
uprofile::startCPUUsageMonitoring(200);
uprofile::startSystemMemoryMonitoring(200);
uprofile::startProcessMemoryMonitoring(200);
Expand Down
38 changes: 24 additions & 14 deletions tools/show-graph
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def create_cpu_graphs(df):


def create_sys_mem_graphs(df):
# 'sys_mem' metrics (format is 'mem:<timestamp>:<total>:<available>:<free>')
# 'sys_mem' metrics (format is 'sys_mem:<timestamp>:<total>:<available>:<free>')
if df.empty:
return None

Expand All @@ -108,7 +108,7 @@ def create_sys_mem_graphs(df):


def create_proc_mem_graphs(df):
# 'proc_mem' metrics (format is 'mem:<timestamp>:<rss>:<shared>')
# 'proc_mem' metrics (format is 'proc_mem:<timestamp>:<rss>:<shared>')
if df.empty:
return None

Expand All @@ -121,27 +121,34 @@ def create_proc_mem_graphs(df):


def create_gpu_mem_graphs(df):
# 'gpu_mem' metrics (format is 'gpu_mem:<timestamp>:<total>:<used>')
# 'gpu_mem' metrics (format is 'gpu_mem:<timestamp>:<gpu_number>:<total>:<used>')
if df.empty:
return None

names = ["Used", "Total"]
for index in range(2):
yield go.Scatter(x=pd.to_datetime(df['timestamp'], unit='ms'),
y=(pd.to_numeric(df["extra_{}".format(index + 1)], downcast="integer") / 1024),
name=names[index],
showlegend=True)

gpus = pd.unique(df['extra_1'])
for gpu in gpus:
gpu_df = df[df['extra_1'] == gpu]
for index in range(2):
yield go.Scatter(x=pd.to_datetime(gpu_df['timestamp'], unit='ms'),
y=(pd.to_numeric(gpu_df["extra_{}".format(index + 2)], downcast="integer") / 1024),
name="GPU {} {}".format(gpu, names[index]),
showlegend=True)


def create_gpu_usage_graphs(df):
# 'gpu' metrics (format is 'gpu:<timestamp>:<percentage_usage>')
# 'gpu' metrics (format is 'gpu:<timestamp>:<gpu_number>:<percentage_usage>')
if df.empty:
return None

yield go.Scatter(x=pd.to_datetime(df['timestamp'], unit='ms'),
y=pd.to_numeric(df['extra_1']),
name="GPU usage",
showlegend=True)
gpus = pd.unique(df['extra_1'])
for gpu in gpus:
gpu_df = df[df['extra_1'] == gpu]
yield go.Scatter(x=pd.to_datetime(gpu_df['timestamp'], unit='ms'),
y=pd.to_numeric(gpu_df['extra_2']),
name="GPU {} usage".format(gpu),
showlegend=True)


def build_graphs(input_files, metrics):
Expand All @@ -165,7 +172,10 @@ def build_graphs(input_files, metrics):
global_df = pd.DataFrame()
for input in input_files:
with open(input) as f:
global_df = pd.concat([global_df, read(f)])
global_df = pd.concat([global_df, read(f)], sort=True)

# Make sure data are sorted by ascending timestamp
global_df.sort_values('timestamp')

for index, metric in enumerate(metrics):
row_index = index + 1
Expand Down

0 comments on commit 3f145e4

Please sign in to comment.