|
| 1 | +# Copyright (c) 2025, NVIDIA CORPORATION. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +import platform |
| 16 | +import subprocess |
| 17 | +import re |
| 18 | +import os |
| 19 | + |
| 20 | + |
| 21 | +def get_system_specs(): |
| 22 | + """ |
| 23 | + Get GPU and CPU specifications of the current machine. |
| 24 | + |
| 25 | + Returns: |
| 26 | + dict: A dictionary containing system specifications with keys: |
| 27 | + - arch: CPU architecture (e.g., "x86_64", "aarch64") |
| 28 | + - gpu_count: Number of GPUs available |
| 29 | + - cpu_count: Number of CPU cores |
| 30 | + - cpu_name: CPU model name |
| 31 | + - cpu_memory: Total system memory in GB |
| 32 | + - gpu_name: GPU model name |
| 33 | + - gpu_memory: GPU memory in GB (for a single GPU) |
| 34 | + """ |
| 35 | + specs = {} |
| 36 | + |
| 37 | + # CPU Architecture |
| 38 | + specs["arch"] = platform.machine() |
| 39 | + |
| 40 | + # CPU Count (physical cores) |
| 41 | + specs["cpu_count"] = os.cpu_count() |
| 42 | + |
| 43 | + # CPU Name |
| 44 | + specs["cpu_name"] = _get_cpu_name() |
| 45 | + |
| 46 | + # CPU Memory (total system RAM in GB) |
| 47 | + specs["cpu_memory_gb"] = _get_cpu_memory_gb() |
| 48 | + |
| 49 | + # GPU Information |
| 50 | + gpu_info = _get_gpu_info() |
| 51 | + specs["gpu_count"] = gpu_info["count"] |
| 52 | + specs["gpu_name"] = gpu_info["name"] |
| 53 | + specs["gpu_memory_gb"] = gpu_info["memory_gb"] |
| 54 | + |
| 55 | + return specs |
| 56 | + |
| 57 | + |
| 58 | +def _get_cpu_name(): |
| 59 | + """Extract CPU model name from /proc/cpuinfo or platform info.""" |
| 60 | + try: |
| 61 | + # Try to get from /proc/cpuinfo (Linux) |
| 62 | + with open("/proc/cpuinfo", "r") as f: |
| 63 | + for line in f: |
| 64 | + if "model name" in line: |
| 65 | + # Extract the CPU name after the colon |
| 66 | + name = line.split(":")[1].strip() |
| 67 | + # Simplify to just the brand (Intel/AMD) |
| 68 | + if "Intel" in name: |
| 69 | + return "intel" |
| 70 | + elif "AMD" in name: |
| 71 | + return "amd" |
| 72 | + else: |
| 73 | + return name.lower() |
| 74 | + except Exception: |
| 75 | + pass |
| 76 | + |
| 77 | + # Fallback to platform processor |
| 78 | + processor = platform.processor() |
| 79 | + if processor: |
| 80 | + if "Intel" in processor or "intel" in processor: |
| 81 | + return "intel" |
| 82 | + elif "AMD" in processor or "amd" in processor: |
| 83 | + return "amd" |
| 84 | + return processor.lower() |
| 85 | + |
| 86 | + return "unknown" |
| 87 | + |
| 88 | + |
| 89 | +def _get_cpu_memory_gb(): |
| 90 | + """Get total system memory in GB.""" |
| 91 | + try: |
| 92 | + # Read from /proc/meminfo (Linux) |
| 93 | + with open("/proc/meminfo", "r") as f: |
| 94 | + for line in f: |
| 95 | + if line.startswith("MemTotal:"): |
| 96 | + # Extract memory in KB and convert to GB |
| 97 | + mem_kb = int(line.split()[1]) |
| 98 | + mem_gb = round(mem_kb / (1024 * 1024)) |
| 99 | + return mem_gb |
| 100 | + except Exception: |
| 101 | + pass |
| 102 | + |
| 103 | + return 0 |
| 104 | + |
| 105 | + |
| 106 | +def _get_gpu_info(): |
| 107 | + """ |
| 108 | + Get GPU information using nvidia-smi. |
| 109 | + |
| 110 | + Returns: |
| 111 | + dict: Dictionary with keys 'count', 'name', and 'memory_gb' |
| 112 | + """ |
| 113 | + gpu_info = { |
| 114 | + "count": 0, |
| 115 | + "name": "none", |
| 116 | + "memory_gb": 0 |
| 117 | + } |
| 118 | + |
| 119 | + try: |
| 120 | + # Run nvidia-smi to get GPU information |
| 121 | + result = subprocess.run( |
| 122 | + ["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader,nounits"], |
| 123 | + capture_output=True, |
| 124 | + text=True, |
| 125 | + check=True |
| 126 | + ) |
| 127 | + |
| 128 | + lines = result.stdout.strip().split("\n") |
| 129 | + gpu_info["count"] = len(lines) |
| 130 | + |
| 131 | + if lines and lines[0]: |
| 132 | + # Parse first GPU info (assuming all GPUs are the same) |
| 133 | + parts = lines[0].split(",") |
| 134 | + if len(parts) >= 2: |
| 135 | + gpu_name = parts[0].strip() |
| 136 | + # Simplify GPU name (e.g., "NVIDIA A100-SXM4-80GB" -> "a100") |
| 137 | + gpu_info["name"] = _simplify_gpu_name(gpu_name) |
| 138 | + |
| 139 | + # Memory in MB, convert to GB |
| 140 | + memory_mb = float(parts[1].strip()) |
| 141 | + gpu_info["memory_gb"] = round(memory_mb / 1024) |
| 142 | + |
| 143 | + except (subprocess.CalledProcessError, FileNotFoundError): |
| 144 | + # nvidia-smi not available or failed |
| 145 | + pass |
| 146 | + |
| 147 | + return gpu_info |
| 148 | + |
| 149 | + |
| 150 | +def _simplify_gpu_name(full_name): |
| 151 | + """ |
| 152 | + Simplify GPU name to a short identifier. |
| 153 | + |
| 154 | + Examples: |
| 155 | + "NVIDIA A100-SXM4-80GB" -> "a100" |
| 156 | + "Tesla V100-PCIE-32GB" -> "v100" |
| 157 | + "NVIDIA GeForce RTX 3090" -> "rtx3090" |
| 158 | + """ |
| 159 | + name_lower = full_name.lower() |
| 160 | + |
| 161 | + # Common GPU patterns |
| 162 | + patterns = [ |
| 163 | + (r"a100", "a100"), |
| 164 | + (r"a40", "a40"), |
| 165 | + (r"a30", "a30"), |
| 166 | + (r"v100", "v100"), |
| 167 | + (r"t4", "t4"), |
| 168 | + (r"h100", "h100"), |
| 169 | + (r"rtx\s*(\d+)", r"rtx\1"), |
| 170 | + (r"gtx\s*(\d+)", r"gtx\1"), |
| 171 | + ] |
| 172 | + |
| 173 | + for pattern, replacement in patterns: |
| 174 | + match = re.search(pattern, name_lower) |
| 175 | + if match: |
| 176 | + if "\\" in replacement: # Has capture group |
| 177 | + return re.sub(pattern, replacement, name_lower) |
| 178 | + else: |
| 179 | + return replacement |
| 180 | + |
| 181 | + # If no pattern matches, return cleaned name |
| 182 | + # Remove common prefixes and extra info |
| 183 | + clean_name = re.sub(r"(nvidia|tesla|geforce)\s*", "", name_lower) |
| 184 | + clean_name = re.sub(r"[-_].*", "", clean_name) # Remove everything after dash/underscore |
| 185 | + clean_name = clean_name.strip() |
| 186 | + |
| 187 | + return clean_name if clean_name else full_name.lower() |
| 188 | + |
| 189 | + |
| 190 | +def get_version_info(): |
| 191 | + """ |
| 192 | + Get version information for Presto, Velox, CUDA, and CUDA driver. |
| 193 | + |
| 194 | + Returns: |
| 195 | + dict: A dictionary containing version information with keys: |
| 196 | + - version_presto: Git hash of the presto repository |
| 197 | + - version_velox: Git hash of the velox repository |
| 198 | + - version_cuda: CUDA runtime version |
| 199 | + - version_cuda_driver: CUDA driver version |
| 200 | + """ |
| 201 | + versions = {} |
| 202 | + |
| 203 | + # Get Presto git hash |
| 204 | + versions["version_presto"] = _get_git_hash("/raid/johallaron/projects/presto") |
| 205 | + |
| 206 | + # Get Velox git hash |
| 207 | + versions["version_velox"] = _get_git_hash("/raid/johallaron/projects/velox") |
| 208 | + |
| 209 | + # Get CUDA versions |
| 210 | + cuda_info = _get_cuda_versions() |
| 211 | + versions["version_cuda"] = cuda_info["runtime"] |
| 212 | + versions["version_cuda_driver"] = cuda_info["driver"] |
| 213 | + |
| 214 | + return versions |
| 215 | + |
| 216 | + |
| 217 | +def _get_git_hash(repo_path): |
| 218 | + """ |
| 219 | + Get the git commit hash for a repository. |
| 220 | + |
| 221 | + Args: |
| 222 | + repo_path: Path to the git repository |
| 223 | + |
| 224 | + Returns: |
| 225 | + str: Git commit hash, or "unknown" if not available |
| 226 | + """ |
| 227 | + try: |
| 228 | + result = subprocess.run( |
| 229 | + ["git", "-C", repo_path, "rev-parse", "HEAD"], |
| 230 | + capture_output=True, |
| 231 | + text=True, |
| 232 | + check=True |
| 233 | + ) |
| 234 | + return result.stdout.strip() |
| 235 | + except (subprocess.CalledProcessError, FileNotFoundError): |
| 236 | + return "unknown" |
| 237 | + |
| 238 | + |
| 239 | +def _get_cuda_versions(): |
| 240 | + """ |
| 241 | + Get CUDA runtime and driver versions. |
| 242 | + |
| 243 | + Returns: |
| 244 | + dict: Dictionary with keys 'runtime' and 'driver' |
| 245 | + """ |
| 246 | + cuda_info = { |
| 247 | + "runtime": "unknown", |
| 248 | + "driver": "unknown" |
| 249 | + } |
| 250 | + |
| 251 | + try: |
| 252 | + # Get CUDA driver version from nvidia-smi |
| 253 | + result = subprocess.run( |
| 254 | + ["nvidia-smi", "--query-gpu=driver_version", "--format=csv,noheader"], |
| 255 | + capture_output=True, |
| 256 | + text=True, |
| 257 | + check=True |
| 258 | + ) |
| 259 | + driver_version = result.stdout.strip().split("\n")[0].strip() |
| 260 | + cuda_info["driver"] = driver_version |
| 261 | + |
| 262 | + # Get CUDA runtime version from nvidia-smi |
| 263 | + result = subprocess.run( |
| 264 | + ["nvidia-smi"], |
| 265 | + capture_output=True, |
| 266 | + text=True, |
| 267 | + check=True |
| 268 | + ) |
| 269 | + # Parse CUDA version from output (typically in header) |
| 270 | + # Example: "CUDA Version: 12.2" |
| 271 | + match = re.search(r"CUDA Version:\s*(\d+\.\d+)", result.stdout) |
| 272 | + if match: |
| 273 | + cuda_info["runtime"] = match.group(1) |
| 274 | + |
| 275 | + except (subprocess.CalledProcessError, FileNotFoundError): |
| 276 | + pass |
| 277 | + |
| 278 | + return cuda_info |
| 279 | + |
| 280 | + |
| 281 | +if __name__ == "__main__": |
| 282 | + # Example usage |
| 283 | + import json |
| 284 | + print("System Specs:") |
| 285 | + specs = get_system_specs() |
| 286 | + print(json.dumps(specs, indent=2)) |
| 287 | + |
| 288 | + print("\nVersion Info:") |
| 289 | + versions = get_version_info() |
| 290 | + print(json.dumps(versions, indent=2)) |
| 291 | + |
0 commit comments