Skip to content

Commit a325bee

Browse files
committed
Add functions to get gpu/cpu/version info, add to benchmark output
1 parent 3925aa6 commit a325bee

File tree

3 files changed

+304
-1
lines changed

3 files changed

+304
-1
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,6 @@ __pycache__/
1515

1616
# Default benchmark output directory
1717
benchmark_output
18+
19+
# Generated Config
20+
presto/docker/config/generated

presto/testing/performance_benchmarks/conftest.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from pathlib import Path
1919
from .benchmark_keys import BenchmarkKeys
2020
from ..common.conftest import *
21+
from .system_info import get_system_specs, get_version_info
2122

2223

2324
def pytest_addoption(parser):
@@ -56,11 +57,19 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config):
5657
terminalreporter.write_line(line)
5758
terminalreporter.write_line("")
5859

59-
6060
def pytest_sessionfinish(session, exitstatus):
6161
bench_output_dir = session.config.getoption("--output-dir")
6262
tag = session.config.getoption("--tag")
6363
json_result = {}
64+
try:
65+
specs = get_system_specs()
66+
versions = get_version_info()
67+
json_result = {
68+
'hardware': specs,
69+
'versions': versions
70+
}
71+
except Exception as e:
72+
print(f"Error getting system specs: {e}")
6473

6574
if tag:
6675
bench_output_dir = f"{bench_output_dir}/{tag}"
Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import platform
16+
import subprocess
17+
import re
18+
import os
19+
20+
21+
def get_system_specs():
22+
"""
23+
Get GPU and CPU specifications of the current machine.
24+
25+
Returns:
26+
dict: A dictionary containing system specifications with keys:
27+
- arch: CPU architecture (e.g., "x86_64", "aarch64")
28+
- gpu_count: Number of GPUs available
29+
- cpu_count: Number of CPU cores
30+
- cpu_name: CPU model name
31+
- cpu_memory: Total system memory in GB
32+
- gpu_name: GPU model name
33+
- gpu_memory: GPU memory in GB (for a single GPU)
34+
"""
35+
specs = {}
36+
37+
# CPU Architecture
38+
specs["arch"] = platform.machine()
39+
40+
# CPU Count (physical cores)
41+
specs["cpu_count"] = os.cpu_count()
42+
43+
# CPU Name
44+
specs["cpu_name"] = _get_cpu_name()
45+
46+
# CPU Memory (total system RAM in GB)
47+
specs["cpu_memory_gb"] = _get_cpu_memory_gb()
48+
49+
# GPU Information
50+
gpu_info = _get_gpu_info()
51+
specs["gpu_count"] = gpu_info["count"]
52+
specs["gpu_name"] = gpu_info["name"]
53+
specs["gpu_memory_gb"] = gpu_info["memory_gb"]
54+
55+
return specs
56+
57+
58+
def _get_cpu_name():
59+
"""Extract CPU model name from /proc/cpuinfo or platform info."""
60+
try:
61+
# Try to get from /proc/cpuinfo (Linux)
62+
with open("/proc/cpuinfo", "r") as f:
63+
for line in f:
64+
if "model name" in line:
65+
# Extract the CPU name after the colon
66+
name = line.split(":")[1].strip()
67+
# Simplify to just the brand (Intel/AMD)
68+
if "Intel" in name:
69+
return "intel"
70+
elif "AMD" in name:
71+
return "amd"
72+
else:
73+
return name.lower()
74+
except Exception:
75+
pass
76+
77+
# Fallback to platform processor
78+
processor = platform.processor()
79+
if processor:
80+
if "Intel" in processor or "intel" in processor:
81+
return "intel"
82+
elif "AMD" in processor or "amd" in processor:
83+
return "amd"
84+
return processor.lower()
85+
86+
return "unknown"
87+
88+
89+
def _get_cpu_memory_gb():
90+
"""Get total system memory in GB."""
91+
try:
92+
# Read from /proc/meminfo (Linux)
93+
with open("/proc/meminfo", "r") as f:
94+
for line in f:
95+
if line.startswith("MemTotal:"):
96+
# Extract memory in KB and convert to GB
97+
mem_kb = int(line.split()[1])
98+
mem_gb = round(mem_kb / (1024 * 1024))
99+
return mem_gb
100+
except Exception:
101+
pass
102+
103+
return 0
104+
105+
106+
def _get_gpu_info():
107+
"""
108+
Get GPU information using nvidia-smi.
109+
110+
Returns:
111+
dict: Dictionary with keys 'count', 'name', and 'memory_gb'
112+
"""
113+
gpu_info = {
114+
"count": 0,
115+
"name": "none",
116+
"memory_gb": 0
117+
}
118+
119+
try:
120+
# Run nvidia-smi to get GPU information
121+
result = subprocess.run(
122+
["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader,nounits"],
123+
capture_output=True,
124+
text=True,
125+
check=True
126+
)
127+
128+
lines = result.stdout.strip().split("\n")
129+
gpu_info["count"] = len(lines)
130+
131+
if lines and lines[0]:
132+
# Parse first GPU info (assuming all GPUs are the same)
133+
parts = lines[0].split(",")
134+
if len(parts) >= 2:
135+
gpu_name = parts[0].strip()
136+
# Simplify GPU name (e.g., "NVIDIA A100-SXM4-80GB" -> "a100")
137+
gpu_info["name"] = _simplify_gpu_name(gpu_name)
138+
139+
# Memory in MB, convert to GB
140+
memory_mb = float(parts[1].strip())
141+
gpu_info["memory_gb"] = round(memory_mb / 1024)
142+
143+
except (subprocess.CalledProcessError, FileNotFoundError):
144+
# nvidia-smi not available or failed
145+
pass
146+
147+
return gpu_info
148+
149+
150+
def _simplify_gpu_name(full_name):
151+
"""
152+
Simplify GPU name to a short identifier.
153+
154+
Examples:
155+
"NVIDIA A100-SXM4-80GB" -> "a100"
156+
"Tesla V100-PCIE-32GB" -> "v100"
157+
"NVIDIA GeForce RTX 3090" -> "rtx3090"
158+
"""
159+
name_lower = full_name.lower()
160+
161+
# Common GPU patterns
162+
patterns = [
163+
(r"a100", "a100"),
164+
(r"a40", "a40"),
165+
(r"a30", "a30"),
166+
(r"v100", "v100"),
167+
(r"t4", "t4"),
168+
(r"h100", "h100"),
169+
(r"rtx\s*(\d+)", r"rtx\1"),
170+
(r"gtx\s*(\d+)", r"gtx\1"),
171+
]
172+
173+
for pattern, replacement in patterns:
174+
match = re.search(pattern, name_lower)
175+
if match:
176+
if "\\" in replacement: # Has capture group
177+
return re.sub(pattern, replacement, name_lower)
178+
else:
179+
return replacement
180+
181+
# If no pattern matches, return cleaned name
182+
# Remove common prefixes and extra info
183+
clean_name = re.sub(r"(nvidia|tesla|geforce)\s*", "", name_lower)
184+
clean_name = re.sub(r"[-_].*", "", clean_name) # Remove everything after dash/underscore
185+
clean_name = clean_name.strip()
186+
187+
return clean_name if clean_name else full_name.lower()
188+
189+
190+
def get_version_info():
191+
"""
192+
Get version information for Presto, Velox, CUDA, and CUDA driver.
193+
194+
Returns:
195+
dict: A dictionary containing version information with keys:
196+
- version_presto: Git hash of the presto repository
197+
- version_velox: Git hash of the velox repository
198+
- version_cuda: CUDA runtime version
199+
- version_cuda_driver: CUDA driver version
200+
"""
201+
versions = {}
202+
203+
# Get Presto git hash
204+
versions["version_presto"] = _get_git_hash("/raid/johallaron/projects/presto")
205+
206+
# Get Velox git hash
207+
versions["version_velox"] = _get_git_hash("/raid/johallaron/projects/velox")
208+
209+
# Get CUDA versions
210+
cuda_info = _get_cuda_versions()
211+
versions["version_cuda"] = cuda_info["runtime"]
212+
versions["version_cuda_driver"] = cuda_info["driver"]
213+
214+
return versions
215+
216+
217+
def _get_git_hash(repo_path):
218+
"""
219+
Get the git commit hash for a repository.
220+
221+
Args:
222+
repo_path: Path to the git repository
223+
224+
Returns:
225+
str: Git commit hash, or "unknown" if not available
226+
"""
227+
try:
228+
result = subprocess.run(
229+
["git", "-C", repo_path, "rev-parse", "HEAD"],
230+
capture_output=True,
231+
text=True,
232+
check=True
233+
)
234+
return result.stdout.strip()
235+
except (subprocess.CalledProcessError, FileNotFoundError):
236+
return "unknown"
237+
238+
239+
def _get_cuda_versions():
240+
"""
241+
Get CUDA runtime and driver versions.
242+
243+
Returns:
244+
dict: Dictionary with keys 'runtime' and 'driver'
245+
"""
246+
cuda_info = {
247+
"runtime": "unknown",
248+
"driver": "unknown"
249+
}
250+
251+
try:
252+
# Get CUDA driver version from nvidia-smi
253+
result = subprocess.run(
254+
["nvidia-smi", "--query-gpu=driver_version", "--format=csv,noheader"],
255+
capture_output=True,
256+
text=True,
257+
check=True
258+
)
259+
driver_version = result.stdout.strip().split("\n")[0].strip()
260+
cuda_info["driver"] = driver_version
261+
262+
# Get CUDA runtime version from nvidia-smi
263+
result = subprocess.run(
264+
["nvidia-smi"],
265+
capture_output=True,
266+
text=True,
267+
check=True
268+
)
269+
# Parse CUDA version from output (typically in header)
270+
# Example: "CUDA Version: 12.2"
271+
match = re.search(r"CUDA Version:\s*(\d+\.\d+)", result.stdout)
272+
if match:
273+
cuda_info["runtime"] = match.group(1)
274+
275+
except (subprocess.CalledProcessError, FileNotFoundError):
276+
pass
277+
278+
return cuda_info
279+
280+
281+
if __name__ == "__main__":
282+
# Example usage
283+
import json
284+
print("System Specs:")
285+
specs = get_system_specs()
286+
print(json.dumps(specs, indent=2))
287+
288+
print("\nVersion Info:")
289+
versions = get_version_info()
290+
print(json.dumps(versions, indent=2))
291+

0 commit comments

Comments
 (0)