diff --git a/README.md b/README.md
index e9e3cb2b4..16e0365d7 100644
--- a/README.md
+++ b/README.md
@@ -228,6 +228,80 @@ The response will contain the model's reply:
 }
 ```
 
+## NVIDIA NIM Support
+
+Docker Model Runner supports running NVIDIA NIM (NVIDIA Inference Microservices) containers directly. This provides a simplified workflow for deploying NVIDIA's optimized inference containers.
+
+### Prerequisites
+
+- Docker with NVIDIA GPU support (nvidia-docker2 or Docker with NVIDIA Container Runtime)
+- NGC API Key (optional, but required for some NIM models)
+- Docker login to nvcr.io registry
+
+### Quick Start
+
+1. **Login to NVIDIA Container Registry:**
+
+```bash
+docker login nvcr.io
+Username: $oauthtoken
+Password: <PASTE_API_KEY_HERE>
+```
+
+2. **Set NGC API Key (if required by the model):**
+
+```bash
+export NGC_API_KEY=<PASTE_API_KEY_HERE>
+```
+
+3. **Run a NIM model:**
+
+```bash
+docker model run nvcr.io/nim/google/gemma-3-1b-it:latest
+```
+
+That's it! The Docker Model Runner will:
+- Automatically detect that this is a NIM image
+- Pull the NIM container image
+- Configure it with proper GPU support, shared memory (16GB), and NGC credentials
+- Start the container and wait for it to be ready
+- Provide an interactive chat interface
+
+### Features
+
+- **Automatic GPU Detection**: Automatically configures NVIDIA GPU support if available
+- **Persistent Caching**: Models are cached in `~/.cache/nim` (or `$LOCAL_NIM_CACHE` if set)
+- **Interactive Chat**: Supports both single prompt and interactive chat modes
+- **Container Reuse**: Existing NIM containers are reused across runs
+
+### Example Usage
+
+**Single prompt:**
+```bash
+docker model run nvcr.io/nim/google/gemma-3-1b-it:latest "Explain quantum computing"
+```
+
+**Interactive chat:**
+```bash
+docker model run nvcr.io/nim/google/gemma-3-1b-it:latest
+> Tell me a joke
+...
+> /bye
+```
+
+### Configuration
+
+- **NGC_API_KEY**: Set this environment variable to authenticate with NVIDIA's services
+- **LOCAL_NIM_CACHE**: Override the default cache location (default: `~/.cache/nim`)
+
+### Technical Details
+
+NIM containers:
+- Run on port 8000 (localhost only)
+- Use 16GB shared memory by default
+- Mount `~/.cache/nim` for model caching
+- Support NVIDIA GPU acceleration when available
+
 ## Metrics
 
 The Model Runner exposes [the metrics endpoint](https://github.com/ggml-org/llama.cpp/tree/master/tools/server#get-metrics-prometheus-compatible-metrics-exporter) of llama.cpp server at the `/metrics` endpoint. This allows you to monitor model performance, request statistics, and resource usage.
diff --git a/cmd/cli/commands/nim.go b/cmd/cli/commands/nim.go
new file mode 100644
index 000000000..2cb6b0a5d
--- /dev/null
+++ b/cmd/cli/commands/nim.go
@@ -0,0 +1,353 @@
+package commands
+
+import (
+	"bufio"
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/docker/docker/api/types/container"
+	"github.com/docker/docker/api/types/image"
+	"github.com/docker/docker/api/types/mount"
+	"github.com/docker/docker/client"
+	"github.com/docker/go-connections/nat"
+	gpupkg "github.com/docker/model-runner/cmd/cli/pkg/gpu"
+	"github.com/spf13/cobra"
+)
+
+const (
+	// nimPrefix is the registry prefix for NVIDIA NIM images
+	nimPrefix = "nvcr.io/nim/"
+	// nimContainerPrefix is the prefix for NIM container names
+	nimContainerPrefix = "docker-model-nim-"
+	// nimDefaultPort is the default port for NIM containers
+	nimDefaultPort = 8000
+	// nimDefaultShmSize is the default shared memory size for NIM containers (16GB)
+	nimDefaultShmSize = 17179869184
+)
+
+// isNIMImage checks if the given model reference is an NVIDIA NIM image
+func isNIMImage(model string) bool {
+	return strings.HasPrefix(model, nimPrefix)
+}
+
+// nimContainerName generates a container name for a NIM image
+func nimContainerName(model string) string {
+	// Extract the model name from the reference (e.g., nvcr.io/nim/google/gemma-3-1b-it:latest -> google-gemma-3-1b-it)
+	parts := strings.Split(strings.TrimPrefix(model, nimPrefix), "/")
+	name := strings.Join(parts, "-")
+	// Remove tag if present
+	if idx := strings.Index(name, ":"); idx != -1 {
+		name = name[:idx]
+	}
+	// Replace any remaining special characters
+	name = strings.ReplaceAll(name, ":", "-")
+	name = strings.ReplaceAll(name, "/", "-")
+	return nimContainerPrefix + name
+}
+
+// pullNIMImage pulls the NIM Docker image
+func pullNIMImage(ctx context.Context, dockerClient *client.Client, model string, cmd *cobra.Command) error {
+	cmd.Printf("Pulling NIM image %s...\n", model)
+
+	reader, err := dockerClient.ImagePull(ctx, model, image.PullOptions{})
+	if err != nil {
+		return fmt.Errorf("failed to pull NIM image: %w", err)
+	}
+	defer reader.Close()
+
+	// Stream pull progress
+	io.Copy(cmd.OutOrStdout(), reader)
+
+	return nil
+}
+
+// findNIMContainer finds an existing NIM container for the given model
+func findNIMContainer(ctx context.Context, dockerClient *client.Client, model string) (string, error) {
+	containerName := nimContainerName(model)
+
+	containers, err := dockerClient.ContainerList(ctx, container.ListOptions{
+		All: true,
+	})
+	if err != nil {
+		return "", fmt.Errorf("failed to list containers: %w", err)
+	}
+
+	for _, c := range containers {
+		for _, name := range c.Names {
+			if strings.TrimPrefix(name, "/") == containerName {
+				return c.ID, nil
+			}
+		}
+	}
+
+	return "", nil
+}
+
+// createNIMContainer creates and starts a NIM container
+func createNIMContainer(ctx context.Context, dockerClient *client.Client, model string, cmd *cobra.Command) (string, error) {
+	containerName := nimContainerName(model)
+
+	// Get NGC API key from environment
+	ngcAPIKey := os.Getenv("NGC_API_KEY")
+	if ngcAPIKey == "" {
+		cmd.Println("Warning: NGC_API_KEY environment variable is not set. NIM may require authentication.")
+	}
+
+	// Check for GPU support
+	gpu, err := gpupkg.ProbeGPUSupport(ctx, dockerClient)
+	if err != nil {
+		cmd.Printf("Warning: Failed to probe GPU support: %v\n", err)
+		gpu = gpupkg.GPUSupportNone
+	}
+
+	// Create cache directory
+	cacheDir := os.Getenv("LOCAL_NIM_CACHE")
+	if cacheDir == "" {
+		homeDir, err := os.UserHomeDir()
+		if err != nil {
+			return "", fmt.Errorf("failed to get home directory: %w", err)
+		}
+		cacheDir = homeDir + "/.cache/nim"
+	}
+
+	// Create the cache directory if it doesn't exist
+	if err := os.MkdirAll(cacheDir, 0755); err != nil {
+		return "", fmt.Errorf("failed to create NIM cache directory: %w", err)
+	}
+
+	// Container configuration
+	env := []string{}
+	if ngcAPIKey != "" {
+		env = append(env, "NGC_API_KEY="+ngcAPIKey)
+	}
+
+	portStr := strconv.Itoa(nimDefaultPort)
+	config := &container.Config{
+		Image: model,
+		Env:   env,
+		ExposedPorts: nat.PortSet{
+			nat.Port(portStr + "/tcp"): struct{}{},
+		},
+	}
+
+	hostConfig := &container.HostConfig{
+		ShmSize: nimDefaultShmSize,
+		Mounts: []mount.Mount{
+			{
+				Type:   mount.TypeBind,
+				Source: cacheDir,
+				Target: "/opt/nim/.cache",
+			},
+		},
+		PortBindings: nat.PortMap{
+			nat.Port(portStr + "/tcp"): []nat.PortBinding{
+				{
+					HostIP:   "127.0.0.1",
+					HostPort: portStr,
+				},
+			},
+		},
+	}
+
+	// Add GPU support if available
+	if gpu == gpupkg.GPUSupportCUDA {
+		if ok, err := gpupkg.HasNVIDIARuntime(ctx, dockerClient); err == nil && ok {
+			hostConfig.Runtime = "nvidia"
+		}
+		hostConfig.DeviceRequests = []container.DeviceRequest{{
+			Count:        -1,
+			Capabilities: [][]string{{"gpu"}},
+		}}
+	}
+
+	// Create the container
+	resp, err := dockerClient.ContainerCreate(ctx, config, hostConfig, nil, nil, containerName)
+	if err != nil {
+		return "", fmt.Errorf("failed to create NIM container: %w", err)
+	}
+
+	// Start the container
+	if err := dockerClient.ContainerStart(ctx, resp.ID, container.StartOptions{}); err != nil {
+		return "", fmt.Errorf("failed to start NIM container: %w", err)
+	}
+
+	cmd.Printf("Started NIM container %s\n", containerName)
+	if gpu == gpupkg.GPUSupportCUDA {
+		cmd.Println("GPU support enabled")
+	} else {
+		cmd.Println("Warning: No GPU detected. NIM performance may be limited.")
+	}
+
+	return resp.ID, nil
+}
+
+// waitForNIMReady waits for the NIM container to be ready
+func waitForNIMReady(ctx context.Context, cmd *cobra.Command) error {
+	cmd.Println("Waiting for NIM to be ready (this may take several minutes)...")
+
+	client := &http.Client{
+		Timeout: 5 * time.Second,
+	}
+
+	maxRetries := 120 // 10 minutes with 5 second intervals
+	for i := 0; i < maxRetries; i++ {
+		resp, err := client.Get(fmt.Sprintf("http://127.0.0.1:%d/v1/models", nimDefaultPort))
+		if err == nil {
+			resp.Body.Close()
+			if resp.StatusCode == http.StatusOK {
+				cmd.Println("NIM is ready!")
+				return nil
+			}
+		}
+
+		if i%12 == 0 { // Print status every minute
+			elapsed := i * 5
+			cmd.Printf("Still waiting for NIM to initialize... (%d seconds elapsed)\n", elapsed)
+		}
+
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		case <-time.After(5 * time.Second):
+			// Continue waiting
+		}
+	}
+
+	return fmt.Errorf("NIM failed to become ready within timeout. Check container logs with: docker logs $(docker ps -q --filter name=docker-model-nim-)")
+}
+
+// runNIMModel handles running an NVIDIA NIM image
+func runNIMModel(ctx context.Context, dockerClient *client.Client, model string, cmd *cobra.Command) error {
+	// Check if container already exists
+	containerID, err := findNIMContainer(ctx, dockerClient, model)
+	if err != nil {
+		return err
+	}
+
+	if containerID != "" {
+		// Container exists, check if it's running
+		inspect, err := dockerClient.ContainerInspect(ctx, containerID)
+		if err != nil {
+			return fmt.Errorf("failed to inspect NIM container: %w", err)
+		}
+
+		if !inspect.State.Running {
+			// Container exists but is not running, start it
+			if err := dockerClient.ContainerStart(ctx, containerID, container.StartOptions{}); err != nil {
+				return fmt.Errorf("failed to start existing NIM container: %w", err)
+			}
+			cmd.Printf("Started existing NIM container %s\n", nimContainerName(model))
+		} else {
+			cmd.Printf("Using existing NIM container %s\n", nimContainerName(model))
+		}
+	} else {
+		// Pull the image
+		if err := pullNIMImage(ctx, dockerClient, model, cmd); err != nil {
+			return err
+		}
+
+		// Create and start container
+		containerID, err = createNIMContainer(ctx, dockerClient, model, cmd)
+		if err != nil {
+			return err
+		}
+	}
+
+	// Wait for NIM to be ready
+	if err := waitForNIMReady(ctx, cmd); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// chatWithNIM sends chat requests to a NIM container
+func chatWithNIM(cmd *cobra.Command, model, prompt string) error {
+	// Use the desktop client to chat with the NIM through its OpenAI-compatible API
+	// The NIM container runs on localhost:8000 and provides an OpenAI-compatible API
+
+	// Create a simple HTTP client to talk to the NIM
+	client := &http.Client{
+		Timeout: 300 * time.Second,
+	}
+
+	// Build the request payload - use just the model base name without registry
+	modelName := strings.TrimPrefix(model, nimPrefix)
+	if idx := strings.LastIndex(modelName, ":"); idx != -1 {
+		modelName = modelName[:idx]
+	}
+
+	reqBody := fmt.Sprintf(`{
+		"model": "%s",
+		"messages": [
+			{"role": "user", "content": %q}
+		],
+		"stream": true
+	}`, modelName, prompt)
+
+	req, err := http.NewRequest(http.MethodPost, fmt.Sprintf("http://127.0.0.1:%d/v1/chat/completions", nimDefaultPort), strings.NewReader(reqBody))
+	if err != nil {
+		return fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := client.Do(req)
+	if err != nil {
+		return fmt.Errorf("failed to send request to NIM: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		return fmt.Errorf("NIM returned error status %d: %s", resp.StatusCode, string(body))
+	}
+
+	// Stream the response - parse SSE events
+	scanner := bufio.NewScanner(resp.Body)
+	for scanner.Scan() {
+		line := scanner.Text()
+
+		// SSE events start with "data: "
+		if strings.HasPrefix(line, "data: ") {
+			data := strings.TrimPrefix(line, "data: ")
+
+			// Skip [DONE] message
+			if data == "[DONE]" {
+				continue
+			}
+
+			// Parse the JSON and extract the content
+			// For simplicity, we'll use basic string parsing
+			// In production, we'd use proper JSON parsing
+			if strings.Contains(data, `"content"`) {
+				// Extract content field - simple approach
+				contentStart := strings.Index(data, `"content":"`)
+				if contentStart != -1 {
+					contentStart += len(`"content":"`)
+					contentEnd := strings.Index(data[contentStart:], `"`)
+					if contentEnd != -1 {
+						content := data[contentStart : contentStart+contentEnd]
+						// Unescape basic JSON escapes
+						content = strings.ReplaceAll(content, `\n`, "\n")
+						content = strings.ReplaceAll(content, `\t`, "\t")
+						content = strings.ReplaceAll(content, `\"`, `"`)
+						cmd.Print(content)
+					}
+				}
+			}
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		return fmt.Errorf("error reading response: %w", err)
+	}
+
+	return nil
+}
diff --git a/cmd/cli/commands/nim_test.go b/cmd/cli/commands/nim_test.go
new file mode 100644
index 000000000..c89e88cf4
--- /dev/null
+++ b/cmd/cli/commands/nim_test.go
@@ -0,0 +1,86 @@
+package commands
+
+import (
+	"testing"
+)
+
+func TestIsNIMImage(t *testing.T) {
+	tests := []struct {
+		name     string
+		model    string
+		expected bool
+	}{
+		{
+			name:     "NIM image with full path",
+			model:    "nvcr.io/nim/google/gemma-3-1b-it:latest",
+			expected: true,
+		},
+		{
+			name:     "NIM image without tag",
+			model:    "nvcr.io/nim/meta/llama-3.1-8b-instruct",
+			expected: true,
+		},
+		{
+			name:     "Regular Docker Hub image",
+			model:    "docker.io/library/ubuntu:latest",
+			expected: false,
+		},
+		{
+			name:     "Regular image without registry",
+			model:    "ubuntu:latest",
+			expected: false,
+		},
+		{
+			name:     "HuggingFace model",
+			model:    "hf.co/TheBloke/Llama-2-7B-Chat-GGUF",
+			expected: false,
+		},
+		{
+			name:     "Local model path",
+			model:    "./models/llama-2-7b.gguf",
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := isNIMImage(tt.model)
+			if result != tt.expected {
+				t.Errorf("isNIMImage(%q) = %v, want %v", tt.model, result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestNIMContainerName(t *testing.T) {
+	tests := []struct {
+		name     string
+		model    string
+		expected string
+	}{
+		{
+			name:     "NIM image with tag",
+			model:    "nvcr.io/nim/google/gemma-3-1b-it:latest",
+			expected: "docker-model-nim-google-gemma-3-1b-it",
+		},
+		{
+			name:     "NIM image without tag",
+			model:    "nvcr.io/nim/meta/llama-3.1-8b-instruct",
+			expected: "docker-model-nim-meta-llama-3.1-8b-instruct",
+		},
+		{
+			name:     "NIM image with version tag",
+			model:    "nvcr.io/nim/nvidia/nemo:24.01",
+			expected: "docker-model-nim-nvidia-nemo",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := nimContainerName(tt.model)
+			if result != tt.expected {
+				t.Errorf("nimContainerName(%q) = %q, want %q", tt.model, result, tt.expected)
+			}
+		})
+	}
+}
diff --git a/cmd/cli/commands/run.go b/cmd/cli/commands/run.go
index a3e672f7e..06a193dae 100644
--- a/cmd/cli/commands/run.go
+++ b/cmd/cli/commands/run.go
@@ -363,6 +363,63 @@ func newRunCmd() *cobra.Command {
 				}
 			}
 
+			// Check if this is an NVIDIA NIM image
+			if isNIMImage(model) {
+				// NIM images are handled differently - they run as Docker containers
+				// Create a Docker client
+				dockerCLI := getDockerCLI()
+				dockerClient, err := desktop.DockerClientForContext(dockerCLI, dockerCLI.CurrentContext())
+				if err != nil {
+					return fmt.Errorf("failed to create Docker client: %w", err)
+				}
+				
+				// Run the NIM model
+				if err := runNIMModel(cmd.Context(), dockerClient, model, cmd); err != nil {
+					return fmt.Errorf("failed to run NIM model: %w", err)
+				}
+				
+				// If no prompt provided, enter interactive mode
+				if prompt == "" {
+					scanner := bufio.NewScanner(os.Stdin)
+					cmd.Println("Interactive chat mode started. Type '/bye' to exit.")
+					
+					for {
+						userInput, err := readMultilineInput(cmd, scanner)
+						if err != nil {
+							if err.Error() == "EOF" {
+								cmd.Println("\nChat session ended.")
+								break
+							}
+							return fmt.Errorf("Error reading input: %v", err)
+						}
+						
+						if strings.ToLower(strings.TrimSpace(userInput)) == "/bye" {
+							cmd.Println("Chat session ended.")
+							break
+						}
+						
+						if strings.TrimSpace(userInput) == "" {
+							continue
+						}
+						
+						if err := chatWithNIM(cmd, model, userInput); err != nil {
+							cmd.PrintErr(fmt.Errorf("failed to chat with NIM: %w", err))
+							continue
+						}
+						
+						cmd.Println()
+					}
+					return nil
+				}
+				
+				// Single prompt mode
+				if err := chatWithNIM(cmd, model, prompt); err != nil {
+					return fmt.Errorf("failed to chat with NIM: %w", err)
+				}
+				cmd.Println()
+				return nil
+			}
+
 			if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), cmd); err != nil {
 				return fmt.Errorf("unable to initialize standalone model runner: %w", err)
 			}