Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions src/vllm_cli/system/capabilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@ def _determine_gpu_architecture(capability: Tuple[int, int]) -> Dict[str, Any]:
"""
Determine GPU architecture and supported features from compute capability.

Includes support for Jetson platforms (Xavier SM 7.2, Orin SM 8.7, Thor SM 10.x).

Args:
capability: CUDA compute capability tuple (major, minor)

Expand All @@ -236,14 +238,14 @@ def _determine_gpu_architecture(capability: Tuple[int, int]) -> Dict[str, Any]:
major, minor = capability
arch_info = {}

# Architecture mapping
# Architecture mapping (including Jetson devices)
if major == 12:
# Blackwell B100/B200 series (sm_120)
arch_info["architecture"] = "Blackwell"
arch_info["generation"] = "Latest"
arch_info["sm_version"] = f"sm_{major}{minor}"
elif major == 10:
# Blackwell consumer/lower-tier (sm_100)
# Blackwell consumer/lower-tier (sm_100) or Jetson Thor
arch_info["architecture"] = "Blackwell"
arch_info["generation"] = "Latest"
arch_info["sm_version"] = f"sm_{major}{minor}"
Expand All @@ -255,6 +257,12 @@ def _determine_gpu_architecture(capability: Tuple[int, int]) -> Dict[str, Any]:
arch_info["architecture"] = "Ada Lovelace"
arch_info["generation"] = "Current"
arch_info["sm_version"] = f"sm_{major}{minor}"
elif major == 8 and minor == 7:
# Jetson Orin (AGX, NX, Nano) uses SM 8.7
arch_info["architecture"] = "Ampere (Jetson Orin)"
arch_info["generation"] = "Current"
arch_info["sm_version"] = f"sm_{major}{minor}"
arch_info["is_jetson"] = True
elif major == 8 and minor >= 6:
arch_info["architecture"] = "Ampere"
arch_info["generation"] = "Current"
Expand All @@ -267,6 +275,12 @@ def _determine_gpu_architecture(capability: Tuple[int, int]) -> Dict[str, Any]:
arch_info["architecture"] = "Turing"
arch_info["generation"] = "Previous"
arch_info["sm_version"] = f"sm_{major}{minor}"
elif major == 7 and minor == 2:
# Jetson Xavier (AGX, NX) uses SM 7.2
arch_info["architecture"] = "Volta (Jetson Xavier)"
arch_info["generation"] = "Previous"
arch_info["sm_version"] = f"sm_{major}{minor}"
arch_info["is_jetson"] = True
elif major == 7 and minor < 5:
arch_info["architecture"] = "Volta"
arch_info["generation"] = "Previous"
Expand Down
4 changes: 3 additions & 1 deletion src/vllm_cli/system/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,12 +768,14 @@ def get_vllm_platform_info() -> Dict[str, Any]:

arch_map = {
(7, 0): "Volta V100",
(7, 2): "Volta (Jetson Xavier)",
(7, 5): "Turing (T4, RTX 20xx)",
(8, 0): "Ampere A100",
(8, 6): "Ampere (RTX 30xx, A40)",
(8, 7): "Ampere (Jetson Orin)",
(8, 9): "Ada Lovelace (RTX 40xx, L40)",
(9, 0): "Hopper (H100)",
(10, 0): "Blackwell (B100/B200)",
(10, 0): "Blackwell (B100/B200, Jetson Thor)",
(12, 0): "Blackwell (RTX 6000)",
}

Expand Down
71 changes: 57 additions & 14 deletions src/vllm_cli/system/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

Provides functions for detecting and gathering information about
available GPUs using multiple methods (nvidia-smi, PyTorch fallback).
Includes support for NVIDIA Jetson platforms (Orin, Thor, Xavier).
"""
import logging
import subprocess
Expand All @@ -12,10 +13,39 @@
logger = logging.getLogger(__name__)


def _safe_int(value: str, default: int = 0) -> int:
"""
Safely convert a string to int, handling N/A and empty values.

This is particularly important for Jetson devices where nvidia-smi
returns [N/A] for memory and utilization fields.

Args:
value: String value to convert
default: Default value if conversion fails

Returns:
Integer value or default
"""
if not value:
return default
value = value.strip()
if value in ("[N/A]", "N/A", "[Not Supported]", ""):
return default
try:
return int(value)
except (ValueError, TypeError):
return default


def get_gpu_info() -> List[Dict[str, Any]]:
"""
Get information about available GPUs.

Supports both discrete NVIDIA GPUs and Jetson integrated GPUs.
Falls back to PyTorch detection if nvidia-smi fails or returns
incomplete data.

Returns:
List of GPU information dictionaries
"""
Expand All @@ -32,35 +62,48 @@ def get_gpu_info() -> List[Dict[str, Any]]:
capture_output=True,
text=True,
check=True,
timeout=10, # Add timeout for Jetson devices
)

for line in result.stdout.strip().split("\n"):
if line:
parts = line.split(", ")
if len(parts) >= 7:
if len(parts) >= 2: # At least index and name required
# Safely parse each field, handling [N/A] values (common on Jetson)
memory_total = _safe_int(parts[2] if len(parts) > 2 else "0")
memory_used = _safe_int(parts[3] if len(parts) > 3 else "0")
memory_free = _safe_int(parts[4] if len(parts) > 4 else "0")

# Calculate memory_free if not provided but total is available
if memory_free == 0 and memory_total > 0 and memory_used >= 0:
memory_free = memory_total - memory_used

gpus.append(
{
"index": int(parts[0]),
"name": parts[1],
"memory_total": int(parts[2])
* 1024
* 1024, # Convert to bytes
"memory_used": int(parts[3]) * 1024 * 1024,
"memory_free": int(parts[4]) * 1024 * 1024,
"utilization": int(parts[5]) if parts[5] else 0,
"temperature": int(parts[6]) if parts[6] else 0,
"index": _safe_int(parts[0]),
"name": parts[1].strip() if len(parts) > 1 else "Unknown GPU",
"memory_total": memory_total * 1024 * 1024, # MB to bytes
"memory_used": memory_used * 1024 * 1024,
"memory_free": memory_free * 1024 * 1024,
"utilization": _safe_int(parts[5] if len(parts) > 5 else "0"),
"temperature": _safe_int(parts[6] if len(parts) > 6 else "0"),
}
)

except subprocess.TimeoutExpired:
logger.warning("nvidia-smi timed out")
except (subprocess.CalledProcessError, FileNotFoundError):
logger.debug("nvidia-smi not available or failed")

# Try to get info from torch as fallback
gpus = _try_pytorch_gpu_detection()
except Exception as e:
logger.warning(f"Unexpected error getting GPU info: {e}")
logger.debug(f"nvidia-smi parsing error: {e}")

# Fallback to PyTorch if nvidia-smi didn't find GPUs or returned incomplete data
# This is essential for Jetson devices where nvidia-smi returns [N/A] for memory
if not gpus or all(gpu.get("memory_total", 0) == 0 for gpu in gpus):
logger.debug("Falling back to PyTorch GPU detection")
pytorch_gpus = _try_pytorch_gpu_detection()
if pytorch_gpus:
gpus = pytorch_gpus

return gpus

Expand Down
4 changes: 4 additions & 0 deletions src/vllm_cli/ui/gpu_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,10 @@ def create_no_gpu_warning() -> Text:
warning_text.append(" • NVIDIA GPU is installed\n", style="dim white")
warning_text.append(" • NVIDIA drivers are installed\n", style="dim white")
warning_text.append(" • CUDA toolkit is available\n", style="dim white")
warning_text.append("\n", style="white")
warning_text.append("For Jetson devices (Orin, Thor, Xavier):\n", style="dim cyan")
warning_text.append(" • Ensure JetPack SDK is properly installed\n", style="dim white")
warning_text.append(" • Verify PyTorch with CUDA support is available\n", style="dim white")

return warning_text

Expand Down
Loading