Chen-zexi · massif-01 · Nov 30, 2025 · Jan 9, 2026 · Jan 9, 2026
diff --git a/src/vllm_cli/system/capabilities.py b/src/vllm_cli/system/capabilities.py
@@ -227,6 +227,8 @@ def _determine_gpu_architecture(capability: Tuple[int, int]) -> Dict[str, Any]:
     """
     Determine GPU architecture and supported features from compute capability.
 
+    Includes support for Jetson platforms (Xavier SM 7.2, Orin SM 8.7, Thor SM 10.x).
+
     Args:
         capability: CUDA compute capability tuple (major, minor)
 
@@ -236,14 +238,14 @@ def _determine_gpu_architecture(capability: Tuple[int, int]) -> Dict[str, Any]:
     major, minor = capability
     arch_info = {}
 
-    # Architecture mapping
+    # Architecture mapping (including Jetson devices)
     if major == 12:
         # Blackwell B100/B200 series (sm_120)
         arch_info["architecture"] = "Blackwell"
         arch_info["generation"] = "Latest"
         arch_info["sm_version"] = f"sm_{major}{minor}"
     elif major == 10:
-        # Blackwell consumer/lower-tier (sm_100)
+        # Blackwell consumer/lower-tier (sm_100) or Jetson Thor
         arch_info["architecture"] = "Blackwell"
         arch_info["generation"] = "Latest"
         arch_info["sm_version"] = f"sm_{major}{minor}"
@@ -255,6 +257,12 @@ def _determine_gpu_architecture(capability: Tuple[int, int]) -> Dict[str, Any]:
         arch_info["architecture"] = "Ada Lovelace"
         arch_info["generation"] = "Current"
         arch_info["sm_version"] = f"sm_{major}{minor}"
+    elif major == 8 and minor == 7:
+        # Jetson Orin (AGX, NX, Nano) uses SM 8.7
+        arch_info["architecture"] = "Ampere (Jetson Orin)"
+        arch_info["generation"] = "Current"
+        arch_info["sm_version"] = f"sm_{major}{minor}"
+        arch_info["is_jetson"] = True
     elif major == 8 and minor >= 6:
         arch_info["architecture"] = "Ampere"
         arch_info["generation"] = "Current"
@@ -267,6 +275,12 @@ def _determine_gpu_architecture(capability: Tuple[int, int]) -> Dict[str, Any]:
         arch_info["architecture"] = "Turing"
         arch_info["generation"] = "Previous"
         arch_info["sm_version"] = f"sm_{major}{minor}"
+    elif major == 7 and minor == 2:
+        # Jetson Xavier (AGX, NX) uses SM 7.2
+        arch_info["architecture"] = "Volta (Jetson Xavier)"
+        arch_info["generation"] = "Previous"
+        arch_info["sm_version"] = f"sm_{major}{minor}"
+        arch_info["is_jetson"] = True
     elif major == 7 and minor < 5:
         arch_info["architecture"] = "Volta"
         arch_info["generation"] = "Previous"

diff --git a/src/vllm_cli/system/dependencies.py b/src/vllm_cli/system/dependencies.py
@@ -768,12 +768,14 @@ def get_vllm_platform_info() -> Dict[str, Any]:
 
                 arch_map = {
                     (7, 0): "Volta V100",
+                    (7, 2): "Volta (Jetson Xavier)",
                     (7, 5): "Turing (T4, RTX 20xx)",
                     (8, 0): "Ampere A100",
                     (8, 6): "Ampere (RTX 30xx, A40)",
+                    (8, 7): "Ampere (Jetson Orin)",
                     (8, 9): "Ada Lovelace (RTX 40xx, L40)",
                     (9, 0): "Hopper (H100)",
-                    (10, 0): "Blackwell (B100/B200)",
+                    (10, 0): "Blackwell (B100/B200, Jetson Thor)",
                     (12, 0): "Blackwell (RTX 6000)",
                 }
 

diff --git a/src/vllm_cli/system/gpu.py b/src/vllm_cli/system/gpu.py
@@ -4,6 +4,7 @@
 
 Provides functions for detecting and gathering information about
 available GPUs using multiple methods (nvidia-smi, PyTorch fallback).
+Includes support for NVIDIA Jetson platforms (Orin, Thor, Xavier).
 """
 import logging
 import subprocess
@@ -12,10 +13,39 @@
 logger = logging.getLogger(__name__)
 
 
+def _safe_int(value: str, default: int = 0) -> int:
+    """
+    Safely convert a string to int, handling N/A and empty values.
+
+    This is particularly important for Jetson devices where nvidia-smi
+    returns [N/A] for memory and utilization fields.
+
+    Args:
+        value: String value to convert
+        default: Default value if conversion fails
+
+    Returns:
+        Integer value or default
+    """
+    if not value:
+        return default
+    value = value.strip()
+    if value in ("[N/A]", "N/A", "[Not Supported]", ""):
+        return default
+    try:
+        return int(value)
+    except (ValueError, TypeError):
+        return default
+
+
 def get_gpu_info() -> List[Dict[str, Any]]:
     """
     Get information about available GPUs.
 
+    Supports both discrete NVIDIA GPUs and Jetson integrated GPUs.
+    Falls back to PyTorch detection if nvidia-smi fails or returns
+    incomplete data.
+
     Returns:
         List of GPU information dictionaries
     """
@@ -32,35 +62,48 @@ def get_gpu_info() -> List[Dict[str, Any]]:
             capture_output=True,
             text=True,
             check=True,
+            timeout=10,  # Add timeout for Jetson devices
         )
 
         for line in result.stdout.strip().split("\n"):
             if line:
                 parts = line.split(", ")
-                if len(parts) >= 7:
+                if len(parts) >= 2:  # At least index and name required
+                    # Safely parse each field, handling [N/A] values (common on Jetson)
+                    memory_total = _safe_int(parts[2] if len(parts) > 2 else "0")
+                    memory_used = _safe_int(parts[3] if len(parts) > 3 else "0")
+                    memory_free = _safe_int(parts[4] if len(parts) > 4 else "0")
+
+                    # Calculate memory_free if not provided but total is available
+                    if memory_free == 0 and memory_total > 0 and memory_used >= 0:
+                        memory_free = memory_total - memory_used
+
                     gpus.append(
                         {
-                            "index": int(parts[0]),
-                            "name": parts[1],
-                            "memory_total": int(parts[2])
-                            * 1024
-                            * 1024,  # Convert to bytes
-                            "memory_used": int(parts[3]) * 1024 * 1024,
-                            "memory_free": int(parts[4]) * 1024 * 1024,
-                            "utilization": int(parts[5]) if parts[5] else 0,
-                            "temperature": int(parts[6]) if parts[6] else 0,
+                            "index": _safe_int(parts[0]),
+                            "name": parts[1].strip() if len(parts) > 1 else "Unknown GPU",
+                            "memory_total": memory_total * 1024 * 1024,  # MB to bytes
+                            "memory_used": memory_used * 1024 * 1024,
+                            "memory_free": memory_free * 1024 * 1024,
+                            "utilization": _safe_int(parts[5] if len(parts) > 5 else "0"),
+                            "temperature": _safe_int(parts[6] if len(parts) > 6 else "0"),
                         }
                     )
 
     except subprocess.TimeoutExpired:
         logger.warning("nvidia-smi timed out")
     except (subprocess.CalledProcessError, FileNotFoundError):
         logger.debug("nvidia-smi not available or failed")
-
-        # Try to get info from torch as fallback
-        gpus = _try_pytorch_gpu_detection()
     except Exception as e:
-        logger.warning(f"Unexpected error getting GPU info: {e}")
+        logger.debug(f"nvidia-smi parsing error: {e}")
+
+    # Fallback to PyTorch if nvidia-smi didn't find GPUs or returned incomplete data
+    # This is essential for Jetson devices where nvidia-smi returns [N/A] for memory
+    if not gpus or all(gpu.get("memory_total", 0) == 0 for gpu in gpus):
+        logger.debug("Falling back to PyTorch GPU detection")
+        pytorch_gpus = _try_pytorch_gpu_detection()
+        if pytorch_gpus:
+            gpus = pytorch_gpus
 
     return gpus
 

diff --git a/src/vllm_cli/ui/gpu_utils.py b/src/vllm_cli/ui/gpu_utils.py
@@ -208,6 +208,10 @@ def create_no_gpu_warning() -> Text:
     warning_text.append("  • NVIDIA GPU is installed\n", style="dim white")
     warning_text.append("  • NVIDIA drivers are installed\n", style="dim white")
     warning_text.append("  • CUDA toolkit is available\n", style="dim white")
+    warning_text.append("\n", style="white")
+    warning_text.append("For Jetson devices (Orin, Thor, Xavier):\n", style="dim cyan")
+    warning_text.append("  • Ensure JetPack SDK is properly installed\n", style="dim white")
+    warning_text.append("  • Verify PyTorch with CUDA support is available\n", style="dim white")
 
     return warning_text