docker · doringeman · Jan 8, 2026 · Jan 8, 2026 · Jan 8, 2026 · Jan 8, 2026
diff --git a/llamacpp/Makefile b/llamacpp/Makefile
@@ -30,6 +30,9 @@ ifeq ($(DETECTED_OS),macOS)
 		-DGGML_NATIVE=OFF \
 		-DGGML_OPENMP=OFF \
 		-DLLAMA_CURL=OFF \
+		-DLLAMA_BUILD_COMMON=ON \
+		-DLLAMA_BUILD_SERVER=ON \
+		-DLLAMA_BUILD_TOOLS=ON \
 		-GNinja \
 		-S $(NATIVE_DIR)
 	@echo "Building..."
@@ -43,6 +46,9 @@ ifeq ($(DETECTED_OS),macOS)
 	rm -rf $(INSTALL_DIR)/lib/cmake
 	rm -rf $(INSTALL_DIR)/lib/pkgconfig
 	rm -rf $(INSTALL_DIR)/include
+	@echo "Fixing rpath..."
+	install_name_tool -delete_rpath "$(shell pwd)/$(BUILD_DIR)/bin" $(INSTALL_DIR)/bin/com.docker.llama-server
+	install_name_tool -add_rpath "@executable_path/../lib" $(INSTALL_DIR)/bin/com.docker.llama-server
 	@echo "Build complete! Binaries are in $(INSTALL_DIR)"
 else ifeq ($(DETECTED_OS),Linux)
 	@echo "Linux build not implemented yet"
@@ -87,9 +93,9 @@ install-dir:
 
 help:
 	@echo "Available targets:"
-	@echo "  build      	- Build llama.cpp (macOS only for now)"
-	@echo "  install-deps	- Install build dependencies"
-	@echo "  build-dir		- Print build directory path"
-	@echo "  install-dir	- Print install directory path"
-	@echo "  clean       	- Clean build artifacts"
-	@echo "  help        	- Show this help"
+	@echo "  build        - Build llama.cpp (macOS only for now)"
+	@echo "  install-deps - Install build dependencies"
+	@echo "  build-dir    - Print build directory path"
+	@echo "  install-dir  - Print install directory path"
+	@echo "  clean        - Clean build artifacts"
+	@echo "  help         - Show this help"
diff --git a/llamacpp/native/CMakeLists.txt b/llamacpp/native/CMakeLists.txt
@@ -41,10 +41,8 @@ if (DDLLAMA_BUILD_SERVER)
 
     add_custom_target(com.docker.llama-server ALL DEPENDS "${LLAMA_SERVER_DST}")
 
-    # Install the renamed binary using TARGETS instead of PROGRAMS for better cross-platform support
-    install(TARGETS llama-server
-        RUNTIME DESTINATION bin
-        RENAME "com.docker.llama-server${CMAKE_EXECUTABLE_SUFFIX}")
+    # Install the renamed binary
+    install(PROGRAMS "${LLAMA_SERVER_DST}" DESTINATION bin)
 endif()
 
 if (WIN32 AND DDLLAMA_BUILD_UTILS)

diff --git a/pkg/inference/scheduling/runner.go b/pkg/inference/scheduling/runner.go
@@ -222,8 +222,9 @@ func (r *runner) wait(ctx context.Context) error {
 			return r.err
 		default:
 		}
-		// Create and execute a request targeting a known-valid endpoint.
-		readyRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/v1/models", http.NoBody)
+		// Create and execute a request targeting the health endpoint.
+		// Note: /health returns 503 during model loading, 200 when ready.
+		readyRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/health", http.NoBody)
 		if err != nil {
 			return fmt.Errorf("readiness request creation failed: %w", err)
 		}