ml-explore · zamderax · Jul 29, 2025 · Jul 30, 2025 · Jul 31, 2025 · Aug 3, 2025
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -14,8 +14,11 @@ parameters:
 jobs:
 
   mac_build_and_test:
+    parameters:
+      xcode-version:
+        type: string
     macos:
-      xcode: 15.3.0
+      xcode: << parameters.xcode-version >>
     resource_class: macos.m1.medium.gen1
     steps:
       - checkout
@@ -59,7 +62,10 @@ workflows:
         - not: << pipeline.parameters.nightly_build >>
         - not: << pipeline.parameters.weekly_build >>
     jobs:
-      - mac_build_and_test
+      - mac_build_and_test:
+          matrix:
+            parameters:
+              xcode-version: ["15.3.0", "16.3.0"]
 
   prb:
     when:
@@ -72,4 +78,7 @@ workflows:
       - apple/authenticate:
           context: pr-approval
       - mac_build_and_test:
+          matrix:
+            parameters:
+              xcode-version: ["15.3.0", "16.3.0"]
           requires: [ hold ]
diff --git a/Package.swift b/Package.swift
@@ -93,6 +93,9 @@ let package = Package(
                 // bnns instead of simd (accelerate)
                 "mlx/mlx/backend/cpu/gemms/simd_fp16.cpp",
                 "mlx/mlx/backend/cpu/gemms/simd_bf16.cpp",
+
+                // exclude CUDA backend files (not supported in this build)
+                "mlx/mlx/backend/cuda",
             ],
 
             cSettings: [

diff --git a/[email protected] b/[email protected]
@@ -0,0 +1,250 @@
+// swift-tools-version: 6.1
+// The swift-tools-version declares the minimum version of Swift required to build this package.
+// Copyright © 2024 Apple Inc.
+
+import Foundation
+import PackageDescription
+
+// Function to get exclude list based on whether CUDA trait is enabled
+func getExcludeList(forCUDA: Bool) -> [String] {
+    var excludes = [
+        // vendor docs
+        "metal-cpp.patch",
+        "vendor-README.md",
+
+        // example code + mlx-c distributed
+        "mlx-c/examples",
+        "mlx-c/mlx/c/distributed.cpp",
+        "mlx-c/mlx/c/distributed_group.cpp",
+
+        // vendored library, include header only
+        "json",
+
+        // vendored library
+        "fmt/test",
+        "fmt/doc",
+        "fmt/support",
+        "fmt/src/os.cc",
+        "fmt/src/fmt.cc",
+
+        // mlx files that are not part of the build
+        "mlx/ACKNOWLEDGMENTS.md",
+        "mlx/CMakeLists.txt",
+        "mlx/CODE_OF_CONDUCT.md",
+        "mlx/CONTRIBUTING.md",
+        "mlx/LICENSE",
+        "mlx/MANIFEST.in",
+        "mlx/README.md",
+        "mlx/benchmarks",
+        "mlx/cmake",
+        "mlx/docs",
+        "mlx/examples",
+        "mlx/mlx.pc.in",
+        "mlx/pyproject.toml",
+        "mlx/python",
+        "mlx/setup.py",
+        "mlx/tests",
+
+        // opt-out of these backends (using metal)
+        "mlx/mlx/backend/no_metal",
+        "mlx/mlx/backend/no_gpu",
+
+        // build variants (we are opting _out_ of these)
+        "mlx/mlx/io/no_safetensors.cpp",
+        "mlx/mlx/io/gguf.cpp",
+        "mlx/mlx/io/gguf_quants.cpp",
+
+        // see PrepareMetalShaders -- don't build the kernels in place
+        "mlx/mlx/backend/metal/kernels",
+        "mlx/mlx/backend/metal/nojit_kernels.cpp",
+        "mlx/mlx/backend/metal/no_metal.cpp",
+
+        // do not build distributed support (yet)
+        "mlx/mlx/distributed/mpi/mpi.cpp",
+        "mlx/mlx/distributed/ring/ring.cpp",
+
+        // bnns instead of simd (accelerate)
+        "mlx/mlx/backend/cpu/gemms/simd_fp16.cpp",
+        "mlx/mlx/backend/cpu/gemms/simd_bf16.cpp",
+
+        // Always exclude the individual backend compiled files
+        // We use backend_compiled.cpp to conditionally include them
+        "mlx/mlx/backend/cpu/compiled.cpp",
+        "mlx/mlx/backend/cuda/compiled.cpp",
+        "mlx/mlx/backend/no_cpu/compiled.cpp",
+        "mlx-conditional/compiled_conditional.cpp",
+    ]
+
+    if forCUDA {
+        // When building with CUDA, exclude CPU backend
+        // (CUDA backend will be used)
+    } else {
+        // When building without CUDA, exclude CUDA backend directory
+        excludes.append("mlx/mlx/backend/cuda")
+        excludes.append("mlx/mlx/backend/no_cpu")
+    }
+
+    return excludes
+}
+
+let package = Package(
+    name: "mlx-swift",
+
+    platforms: [
+        .macOS("13.3"),
+        .iOS(.v16),
+        .tvOS(.v16),
+        .visionOS(.v1),
+    ],
+
+    products: [
+        // main targets
+        .library(name: "MLX", targets: ["MLX"]),
+        .library(name: "MLXRandom", targets: ["MLXRandom"]),
+        .library(name: "MLXNN", targets: ["MLXNN"]),
+        .library(name: "MLXOptimizers", targets: ["MLXOptimizers"]),
+        .library(name: "MLXFFT", targets: ["MLXFFT"]),
+        .library(name: "MLXLinalg", targets: ["MLXLinalg"]),
+        .library(name: "MLXFast", targets: ["MLXFast"]),
+    ],
+
+    traits: [
+        .trait(name: "CUDA")
+    ],
+
+    dependencies: [
+        // for Complex type
+        .package(url: "https://github.com/apple/swift-numerics", from: "1.0.0")
+    ],
+
+    targets: [
+        .target(
+            name: "Cmlx",
+            exclude: getExcludeList(forCUDA: false),  // Default to CPU backend
+
+            cSettings: [
+                .headerSearchPath("mlx"),
+                .headerSearchPath("mlx-c"),
+            ],
+
+            cxxSettings: [
+                .headerSearchPath("mlx"),
+                .headerSearchPath("mlx-c"),
+                .headerSearchPath("metal-cpp"),
+                .headerSearchPath("json/single_include/nlohmann"),
+                .headerSearchPath("fmt/include"),
+
+                .define("MLX_USE_ACCELERATE"),
+                .define("ACCELERATE_NEW_LAPACK"),
+                .define("_METAL_"),
+                .define("SWIFTPM_BUNDLE", to: "\"mlx-swift_Cmlx\""),
+                .define("METAL_PATH", to: "\"default.metallib\""),
+                .define("MLX_VERSION", to: "\"0.27.1\""),
+                .define("MLX_BUILD_CUDA", .when(traits: ["CUDA"])),
+            ],
+
+            linkerSettings: [
+                .linkedFramework("Foundation"),
+                .linkedFramework("Metal"),
+                .linkedFramework("Accelerate"),
+                .linkedLibrary("cudart", .when(traits: ["CUDA"])),
+                .linkedLibrary("cublas", .when(traits: ["CUDA"])),
+                .linkedLibrary("cufft", .when(traits: ["CUDA"])),
+                .linkedLibrary("cudnn", .when(traits: ["CUDA"])),
+            ]
+        ),
+
+        .testTarget(
+            name: "CmlxTests",
+            dependencies: ["Cmlx"]
+        ),
+
+        .target(
+            name: "MLX",
+            dependencies: [
+                "Cmlx",
+                .product(name: "Numerics", package: "swift-numerics"),
+            ],
+            swiftSettings: [
+                .enableExperimentalFeature("StrictConcurrency"),
+                .define("CUDA_AVAILABLE", .when(traits: ["CUDA"])),
+            ]
+        ),
+        .target(
+            name: "MLXRandom",
+            dependencies: ["MLX"],
+            swiftSettings: [
+                .enableExperimentalFeature("StrictConcurrency")
+            ]
+        ),
+        .target(
+            name: "MLXFast",
+            dependencies: ["MLX", "Cmlx"],
+            swiftSettings: [
+                .enableExperimentalFeature("StrictConcurrency")
+            ]
+        ),
+        .target(
+            name: "MLXNN",
+            dependencies: ["MLX", "MLXRandom", "MLXFast"],
+            swiftSettings: [
+                .enableExperimentalFeature("StrictConcurrency")
+            ]
+        ),
+        .target(
+            name: "MLXOptimizers",
+            dependencies: ["MLX", "MLXNN"],
+            swiftSettings: [
+                .enableExperimentalFeature("StrictConcurrency")
+            ]
+        ),
+        .target(
+            name: "MLXFFT",
+            dependencies: ["MLX"],
+            swiftSettings: [
+                .enableExperimentalFeature("StrictConcurrency")
+            ]
+        ),
+        .target(
+            name: "MLXLinalg",
+            dependencies: ["MLX"],
+            swiftSettings: [
+                .enableExperimentalFeature("StrictConcurrency")
+            ]
+        ),
+
+        .testTarget(
+            name: "MLXTests",
+            dependencies: [
+                "MLX", "MLXRandom", "MLXNN", "MLXOptimizers", "MLXFFT", "MLXLinalg", "MLXFast",
+            ]
+        ),
+
+        // ------
+        // Example programs
+
+        .executableTarget(
+            name: "Example1",
+            dependencies: ["MLX"],
+            path: "Source/Examples",
+            sources: ["Example1.swift"]
+        ),
+        .executableTarget(
+            name: "Tutorial",
+            dependencies: ["MLX"],
+            path: "Source/Examples",
+            sources: ["Tutorial.swift"]
+        ),
+
+    ],
+    cxxLanguageStandard: .gnucxx17
+)
+
+if Context.environment["MLX_SWIFT_BUILD_DOC"] == "1"
+    || Context.environment["SPI_GENERATE_DOCS"] == "1"
+{
+    // docc builder
+    package.dependencies.append(
+        .package(url: "https://github.com/apple/swift-docc-plugin", from: "1.3.0")
+    )
+}
diff --git a/README.md b/README.md
@@ -69,6 +69,62 @@ dependencies: [.product(name: "MLX", package: "mlx-swift"),
 > SwiftPM (command line) cannot build the Metal shaders so the ultimate build has to be done
 > via Xcode.
 
+### CUDA Support (Swift 6.1+)
+
+MLX Swift now supports CUDA backend through Swift Package Traits (requires Swift 6.1 or later). This allows you to leverage NVIDIA GPUs for acceleration when available.
+
+#### Building with CUDA
+
+To build with CUDA support enabled:
+
+```bash
+swift build --traits CUDA
+```
+
+#### Using CUDA in Your Package
+
+When depending on mlx-swift with CUDA support in your `Package.swift`:
+
+```swift
+dependencies: [
+    .package(
+        url: "https://github.com/ml-explore/mlx-swift", 
+        from: "0.27.1",
+        traits: ["CUDA"]
+    )
+]
+```
+
+#### Requirements for CUDA
+
+- Swift 6.1 or later since this version Support Swift Package Traits 
+- CUDA Toolkit installed
+- Compatible NVIDIA GPU
+- cuDNN library
+
+#### How It Works
+
+The CUDA support uses Swift Package Manager's traits feature (SE-0450) to conditionally:
+- Compile CUDA backend code instead of CPU backend
+- Link CUDA libraries (cudart, cublas, cufft, cudnn)
+- Define appropriate compilation flags
+
+The implementation uses version-specific package manifests (SE-0135):
+- `Package.swift` - Standard manifest for Swift 5.10
+- `[email protected]` - Enhanced manifest with traits support
+
+#### Checking CUDA Availability
+
+In your Swift code, you can check if CUDA support is available:
+
+```swift
+#if CUDA_AVAILABLE
+print("CUDA backend is enabled")
+#else
+print("Using CPU backend")
+#endif
+```
+
 ### xcodebuild
 
 Although `SwiftPM` (command line) cannot build the Metal shaders, `xcodebuild` can and

diff --git a/Source/Cmlx/backend_compiled.cpp b/Source/Cmlx/backend_compiled.cpp
@@ -0,0 +1,13 @@
+// Backend compiled selector
+// Copyright © 2024 Apple Inc.
+// This file includes the appropriate backend based on build configuration
+
+#ifdef MLX_BUILD_CUDA
+    // Include CUDA backend
+    #include "mlx/mlx/backend/cuda/compiled.cpp"
+    #include "mlx/mlx/backend/no_cpu/compiled.cpp"
+#else
+    // Include CPU backend (default)
+    #include "mlx/mlx/backend/cpu/compiled.cpp"
+    #include "mlx/mlx/backend/cuda/no_cuda.cpp"
+#endif
diff --git a/Source/Cmlx/include/mlx/c/array.h b/Source/Cmlx/include/mlx/c/array.h
@@ -247,7 +247,7 @@ int mlx_array_item_float64(double* res, const mlx_array arr);
 /**
  * Access the value of a scalar array.
  */
-int mlx_array_item_complex64(float _Complex* res, const mlx_array arr);
+int mlx_array_item_complex64(void* res, const mlx_array arr);
 
 #ifdef HAS_FLOAT16
 /**
@@ -319,10 +319,10 @@ const float* mlx_array_data_float32(const mlx_array arr);
  */
 const double* mlx_array_data_float64(const mlx_array arr);
 /**
- * Returns a pointer to the array data, cast to `_Complex*`.
+ * Returns a pointer to the array data, cast to `void*`.
  * Array must be evaluated, otherwise returns NULL.
  */
-const float _Complex* mlx_array_data_complex64(const mlx_array arr);
+const void* mlx_array_data_complex64(const mlx_array arr);
 
 #ifdef HAS_FLOAT16
 /**

diff --git a/Source/Cmlx/mlx b/Source/Cmlx/mlx