diff --git a/src/vllm_cli/system/capabilities.py b/src/vllm_cli/system/capabilities.py index 99567c0..30d4b5d 100644 --- a/src/vllm_cli/system/capabilities.py +++ b/src/vllm_cli/system/capabilities.py @@ -227,6 +227,8 @@ def _determine_gpu_architecture(capability: Tuple[int, int]) -> Dict[str, Any]: """ Determine GPU architecture and supported features from compute capability. + Includes support for Jetson platforms (Xavier SM 7.2, Orin SM 8.7, Thor SM 10.x). + Args: capability: CUDA compute capability tuple (major, minor) @@ -236,14 +238,14 @@ def _determine_gpu_architecture(capability: Tuple[int, int]) -> Dict[str, Any]: major, minor = capability arch_info = {} - # Architecture mapping + # Architecture mapping (including Jetson devices) if major == 12: # Blackwell B100/B200 series (sm_120) arch_info["architecture"] = "Blackwell" arch_info["generation"] = "Latest" arch_info["sm_version"] = f"sm_{major}{minor}" elif major == 10: - # Blackwell consumer/lower-tier (sm_100) + # Blackwell consumer/lower-tier (sm_100) or Jetson Thor arch_info["architecture"] = "Blackwell" arch_info["generation"] = "Latest" arch_info["sm_version"] = f"sm_{major}{minor}" @@ -255,6 +257,12 @@ def _determine_gpu_architecture(capability: Tuple[int, int]) -> Dict[str, Any]: arch_info["architecture"] = "Ada Lovelace" arch_info["generation"] = "Current" arch_info["sm_version"] = f"sm_{major}{minor}" + elif major == 8 and minor == 7: + # Jetson Orin (AGX, NX, Nano) uses SM 8.7 + arch_info["architecture"] = "Ampere (Jetson Orin)" + arch_info["generation"] = "Current" + arch_info["sm_version"] = f"sm_{major}{minor}" + arch_info["is_jetson"] = True elif major == 8 and minor >= 6: arch_info["architecture"] = "Ampere" arch_info["generation"] = "Current" @@ -267,6 +275,12 @@ def _determine_gpu_architecture(capability: Tuple[int, int]) -> Dict[str, Any]: arch_info["architecture"] = "Turing" arch_info["generation"] = "Previous" arch_info["sm_version"] = f"sm_{major}{minor}" + elif major == 7 and minor == 2: + # Jetson Xavier (AGX, NX) uses SM 7.2 + arch_info["architecture"] = "Volta (Jetson Xavier)" + arch_info["generation"] = "Previous" + arch_info["sm_version"] = f"sm_{major}{minor}" + arch_info["is_jetson"] = True elif major == 7 and minor < 5: arch_info["architecture"] = "Volta" arch_info["generation"] = "Previous" diff --git a/src/vllm_cli/system/dependencies.py b/src/vllm_cli/system/dependencies.py index a210c20..d91e954 100644 --- a/src/vllm_cli/system/dependencies.py +++ b/src/vllm_cli/system/dependencies.py @@ -768,12 +768,14 @@ def get_vllm_platform_info() -> Dict[str, Any]: arch_map = { (7, 0): "Volta V100", + (7, 2): "Volta (Jetson Xavier)", (7, 5): "Turing (T4, RTX 20xx)", (8, 0): "Ampere A100", (8, 6): "Ampere (RTX 30xx, A40)", + (8, 7): "Ampere (Jetson Orin)", (8, 9): "Ada Lovelace (RTX 40xx, L40)", (9, 0): "Hopper (H100)", - (10, 0): "Blackwell (B100/B200)", + (10, 0): "Blackwell (B100/B200, Jetson Thor)", (12, 0): "Blackwell (RTX 6000)", } diff --git a/src/vllm_cli/system/gpu.py b/src/vllm_cli/system/gpu.py index 7f05b62..9604584 100644 --- a/src/vllm_cli/system/gpu.py +++ b/src/vllm_cli/system/gpu.py @@ -4,6 +4,7 @@ Provides functions for detecting and gathering information about available GPUs using multiple methods (nvidia-smi, PyTorch fallback). +Includes support for NVIDIA Jetson platforms (Orin, Thor, Xavier). """ import logging import subprocess @@ -12,10 +13,39 @@ logger = logging.getLogger(__name__) +def _safe_int(value: str, default: int = 0) -> int: + """ + Safely convert a string to int, handling N/A and empty values. + + This is particularly important for Jetson devices where nvidia-smi + returns [N/A] for memory and utilization fields. + + Args: + value: String value to convert + default: Default value if conversion fails + + Returns: + Integer value or default + """ + if not value: + return default + value = value.strip() + if value in ("[N/A]", "N/A", "[Not Supported]", ""): + return default + try: + return int(value) + except (ValueError, TypeError): + return default + + def get_gpu_info() -> List[Dict[str, Any]]: """ Get information about available GPUs. + Supports both discrete NVIDIA GPUs and Jetson integrated GPUs. + Falls back to PyTorch detection if nvidia-smi fails or returns + incomplete data. + Returns: List of GPU information dictionaries """ @@ -32,23 +62,31 @@ def get_gpu_info() -> List[Dict[str, Any]]: capture_output=True, text=True, check=True, + timeout=10, # Add timeout for Jetson devices ) for line in result.stdout.strip().split("\n"): if line: parts = line.split(", ") - if len(parts) >= 7: + if len(parts) >= 2: # At least index and name required + # Safely parse each field, handling [N/A] values (common on Jetson) + memory_total = _safe_int(parts[2] if len(parts) > 2 else "0") + memory_used = _safe_int(parts[3] if len(parts) > 3 else "0") + memory_free = _safe_int(parts[4] if len(parts) > 4 else "0") + + # Calculate memory_free if not provided but total is available + if memory_free == 0 and memory_total > 0 and memory_used >= 0: + memory_free = memory_total - memory_used + gpus.append( { - "index": int(parts[0]), - "name": parts[1], - "memory_total": int(parts[2]) - * 1024 - * 1024, # Convert to bytes - "memory_used": int(parts[3]) * 1024 * 1024, - "memory_free": int(parts[4]) * 1024 * 1024, - "utilization": int(parts[5]) if parts[5] else 0, - "temperature": int(parts[6]) if parts[6] else 0, + "index": _safe_int(parts[0]), + "name": parts[1].strip() if len(parts) > 1 else "Unknown GPU", + "memory_total": memory_total * 1024 * 1024, # MB to bytes + "memory_used": memory_used * 1024 * 1024, + "memory_free": memory_free * 1024 * 1024, + "utilization": _safe_int(parts[5] if len(parts) > 5 else "0"), + "temperature": _safe_int(parts[6] if len(parts) > 6 else "0"), } ) @@ -56,11 +94,16 @@ def get_gpu_info() -> List[Dict[str, Any]]: logger.warning("nvidia-smi timed out") except (subprocess.CalledProcessError, FileNotFoundError): logger.debug("nvidia-smi not available or failed") - - # Try to get info from torch as fallback - gpus = _try_pytorch_gpu_detection() except Exception as e: - logger.warning(f"Unexpected error getting GPU info: {e}") + logger.debug(f"nvidia-smi parsing error: {e}") + + # Fallback to PyTorch if nvidia-smi didn't find GPUs or returned incomplete data + # This is essential for Jetson devices where nvidia-smi returns [N/A] for memory + if not gpus or all(gpu.get("memory_total", 0) == 0 for gpu in gpus): + logger.debug("Falling back to PyTorch GPU detection") + pytorch_gpus = _try_pytorch_gpu_detection() + if pytorch_gpus: + gpus = pytorch_gpus return gpus diff --git a/src/vllm_cli/ui/gpu_utils.py b/src/vllm_cli/ui/gpu_utils.py index ca617a2..aa6c405 100644 --- a/src/vllm_cli/ui/gpu_utils.py +++ b/src/vllm_cli/ui/gpu_utils.py @@ -208,6 +208,10 @@ def create_no_gpu_warning() -> Text: warning_text.append(" • NVIDIA GPU is installed\n", style="dim white") warning_text.append(" • NVIDIA drivers are installed\n", style="dim white") warning_text.append(" • CUDA toolkit is available\n", style="dim white") + warning_text.append("\n", style="white") + warning_text.append("For Jetson devices (Orin, Thor, Xavier):\n", style="dim cyan") + warning_text.append(" • Ensure JetPack SDK is properly installed\n", style="dim white") + warning_text.append(" • Verify PyTorch with CUDA support is available\n", style="dim white") return warning_text