WIP

tmattio · tmattio · commit 5202b164c9aa · 2025-06-04T11:35:03.000-04:00
diff --git a/nx/bench/bench_conv.ml b/nx/bench/bench_conv.ml
@@ -0,0 +1,42 @@
+open Nx
+
+(* Test data specification *)
+let test_specs =
+  [
+    (* ("tiny_4x4", [| 1; 1; 4; 4 |], [| 1; 1; 3; 3 |]); *)
+    (* ("small_8x8", [| 1; 1; 8; 8 |], [| 1; 1; 3; 3 |]); *)
+    ("medium_16x16", [| 1; 4; 16; 16 |], [| 8; 4; 3; 3 |]);
+    (* Skip large tests for now - they're too slow and might cause memory issues *)
+    (* ("channels_32x32", [| 1; 8; 32; 32 |], [| 16; 8; 3; 3 |]); *)
+    (* ("kernel_5x5", [| 1; 4; 16; 16 |], [| 8; 4; 5; 5 |]); *)
+    (* ("batch_16x16", [| 4; 4; 16; 16 |], [| 8; 4; 3; 3 |]); *)
+  ]
+
+(* Create all test data upfront and keep references *)
+let test_data =
+  List.map
+    (fun (name, x_shape, k_shape) ->
+      let x = ones float32 x_shape in
+      let k = ones float32 k_shape in
+      (name, x, k))
+    test_specs
+
+(* Benchmark original implementation *)
+let bench_original () =
+  List.map
+    (fun (name, x, k) ->
+      Ubench.create ("" ^ name) (fun () ->
+          Nx.convolve2d ~padding_mode:`Valid x k |> ignore))
+    test_data
+
+let () =
+  Printf.printf "Convolution Benchmarks\n";
+  Printf.printf "=====================\n\n";
+
+  let tests = bench_original () in
+
+  Printf.printf "Running %d benchmarks...\n" (List.length tests);
+  flush stdout;
+
+  let results = Ubench.run ~warmup:1 ~trials:3 ~min_time:0.01 tests in
+  Ubench.print_report results
diff --git a/nx/bench/bench_nx.ml b/nx/bench/bench_nx.ml
@@ -1,117 +1,108 @@
-let string_of_dtype : type a b. (a, b) Nx.dtype -> string = function
-  | Float32 -> "float32"
-  | Float64 -> "float64"
-  | _ -> "other" (* Only float32 and float64 are used here *)
+open Nx
 
-(* Helper for binary operations: takes two arrays *)
-let binary_op_bench : type a b.
-    ((a, b) Nx.t -> (a, b) Nx.t -> (a, b) Nx.t) ->
-    int ->
-    (a, b) Nx.dtype ->
-    unit ->
-    unit =
- fun op size dtype ->
-  let shape = [| size; size |] in
+(* Helper to create test arrays *)
+let make_array dtype shape =
   (* TODO: Fix Nx.rand and use it instead of ones *)
-  let a = Nx.astype dtype (Nx.ones Nx.float32 shape) in
-  let b = Nx.astype dtype (Nx.ones Nx.float32 shape) in
-  fun () -> op a b |> ignore
+  Nx.astype dtype (Nx.ones float32 shape)
 
-(* Helper for unary operations: takes one array *)
-let unary_op_bench : type a b.
-    ((a, b) Nx.t -> (a, b) Nx.t) -> int -> (a, b) Nx.dtype -> unit -> unit =
- fun op size dtype ->
-  let shape = [| size; size |] in
-  (* TODO: Fix Nx.rand and use it instead of ones *)
-  let a = Nx.astype dtype (Nx.ones Nx.float32 shape) in
-  fun () -> op a |> ignore
+(* Benchmark functions *)
+let bench_add : type a b. int -> (a, b) dtype -> unit -> unit =
+ fun size dtype () ->
+  let a = make_array dtype [| size; size |] in
+  let b = make_array dtype [| size; size |] in
+  Nx.add a b |> ignore
 
-(* Helper for reduction operations: reduces array to scalar/smaller array *)
-let reduction_op_bench : type a b c d.
-    ((a, b) Nx.t -> (c, d) Nx.t) -> int -> (a, b) Nx.dtype -> unit -> unit =
- fun op size dtype ->
-  let shape = [| size; size |] in
-  (* TODO: Fix Nx.rand and use it instead of ones *)
-  let a = Nx.astype dtype (Nx.ones Nx.float32 shape) in
-  fun () -> op a |> ignore
+let bench_mul : type a b. int -> (a, b) dtype -> unit -> unit =
+ fun size dtype () ->
+  let a = make_array dtype [| size; size |] in
+  let b = make_array dtype [| size; size |] in
+  Nx.mul a b |> ignore
 
-(* Helper for matrix operations like matmul *)
-let matmul_bench : type a b. int -> (a, b) Nx.dtype -> unit -> unit =
- fun size dtype ->
-  (* TODO: Fix Nx.rand and use it instead of ones *)
-  let a = Nx.astype dtype (Nx.ones Nx.float32 [| size; size |]) in
-  let b = Nx.astype dtype (Nx.ones Nx.float32 [| size; size |]) in
-  fun () -> Nx.matmul a b |> ignore
+let bench_square : type a b. int -> (a, b) dtype -> unit -> unit =
+ fun size dtype () ->
+  let a = make_array dtype [| size; size |] in
+  Nx.square a |> ignore
 
-(* List of operations to benchmark *)
-let operations : type a b.
-    int -> (a, b) Nx.dtype -> (string * (unit -> unit)) list =
- fun size dtype ->
-  List.concat
-    [
-      (* Binary operations *)
-      [
-        ("Addition", binary_op_bench Nx.add size dtype);
-        ("Multiplication", binary_op_bench Nx.mul size dtype);
-        (* Unary operations *)
-        ("Square", unary_op_bench Nx.square size dtype);
-      ];
-      (* Matrix operations - skip for large sizes *)
-      (if size <= 100 then [ ("MatMul", matmul_bench size dtype) ] else []);
-      (* Reductions *)
-      [ ("Sum", reduction_op_bench Nx.sum size dtype) ];
-    ]
+let bench_sqrt : type b. int -> (float, b) dtype -> unit -> unit =
+ fun size dtype () ->
+  let a = make_array dtype [| size; size |] in
+  Nx.sqrt a |> ignore
 
-let float_operations : type b.
-    int -> (float, b) Nx.dtype -> (string * (unit -> unit)) list =
- fun size dtype ->
-  [
-    (* Float-specific unary operations *)
-    ("Sqrt", unary_op_bench Nx.sqrt size dtype);
-    ("Exp", unary_op_bench Nx.exp size dtype);
-  ]
+let bench_exp : type b. int -> (float, b) dtype -> unit -> unit =
+ fun size dtype () ->
+  let a = make_array dtype [| size; size |] in
+  Nx.exp a |> ignore
 
-(* Generate benchmark tests for all combinations *)
-let tests ~sizes =
-  let tests_on_dtype (type a b) (dtype : (a, b) Nx.dtype) =
-    List.concat_map
-      (fun size ->
-        let ops = operations size dtype in
-        List.map
-          (fun (op_name, bench_fun) ->
-            let name =
-              Printf.sprintf "%s on %dx%d %s" op_name size size
-                (string_of_dtype dtype)
-            in
-            Ubench.create name bench_fun)
-          ops)
-      sizes
+let bench_sum : type a b. int -> (a, b) dtype -> unit -> unit =
+ fun size dtype () ->
+  let a = make_array dtype [| size; size |] in
+  Nx.sum a |> ignore
+
+let bench_matmul : type a b. int -> (a, b) dtype -> unit -> unit =
+ fun size dtype () ->
+  let a = make_array dtype [| size; size |] in
+  let b = make_array dtype [| size; size |] in
+  Nx.matmul a b |> ignore
+
+let bench_conv2d : type a b. int -> int -> (a, b) dtype -> unit -> unit =
+ fun size kernel_size dtype () ->
+  let input = make_array dtype [| 1; 3; size; size |] in
+  let kernel = make_array dtype [| 16; 3; kernel_size; kernel_size |] in
+  Nx.convolve2d ~padding_mode:`Same input kernel |> ignore
+
+(* Generate benchmarks *)
+let make_benchmarks () =
+  let sizes = [50; 100] in  (* Reduced for faster runs *)
+  let dtype_name : type a b. (a, b) dtype -> string = function
+    | Float32 -> "f32"
+    | Float64 -> "f64"
+    | _ -> "other"
   in
-  let tests_float_on_dtype (type b) (dtype : (float, b) Nx.dtype) =
-    List.concat_map
-      (fun size ->
-        let ops = float_operations size dtype in
-        List.map
-          (fun (op_name, bench_fun) ->
-            let name =
-              Printf.sprintf "%s on %dx%d %s" op_name size size
-                (string_of_dtype dtype)
-            in
-            Ubench.create name bench_fun)
-          ops)
-      sizes
+  
+  let bench_for_dtype : type a b. (a, b) dtype -> _ =
+    fun dtype ->
+      List.concat_map (fun size ->
+        let name s = Printf.sprintf "%s %dx%d %s" s size size (dtype_name dtype) in
+        List.concat [
+          (* Basic operations *)
+          [ Ubench.create (name "add") (bench_add size dtype);
+            Ubench.create (name "mul") (bench_mul size dtype);
+            Ubench.create (name "square") (bench_square size dtype);
+            Ubench.create (name "sum") (bench_sum size dtype);
+          ];
+          
+          (* Float-specific operations *)
+          (match dtype with
+           | Float32 -> 
+               [ Ubench.create (name "sqrt") (bench_sqrt size Float32);
+                 Ubench.create (name "exp") (bench_exp size Float32); ]
+           | Float64 ->
+               [ Ubench.create (name "sqrt") (bench_sqrt size Float64);
+                 Ubench.create (name "exp") (bench_exp size Float64); ]
+           | _ -> []);
+          
+          (* Matrix operations - skip large sizes *)
+          (if size < 100 then
+             [ Ubench.create (name "matmul") (bench_matmul size dtype) ]
+           else []);
+          
+          (* Convolution - skip large sizes *)
+          (if size < 100 then
+             [ Ubench.create (name "conv2d-3x3") (bench_conv2d size 3 dtype);
+               Ubench.create (name "conv2d-5x5") (bench_conv2d size 5 dtype); ]
+           else []);
+        ]
+      ) sizes
   in
-  List.concat
-    [
-      tests_on_dtype Float32;
-      tests_on_dtype Float64;
-      tests_float_on_dtype Float32;
-      tests_float_on_dtype Float64;
-    ]
+  
+  List.concat [
+    bench_for_dtype Float32;
+    bench_for_dtype Float64;
+  ]
 
-(* Run the benchmarks *)
+(* Run benchmarks *)
 let () =
-  print_endline "# Nx Benchmarks";
-  let tests = tests ~sizes:[ 50; 100; 500 ] in
-  let results = Ubench.run ~warmup:1 ~trials:3 ~min_time:0.01 tests in
-  Ubench.print_report results
+  print_endline "# Nx Benchmarks\n";
+  let benchmarks = make_benchmarks () in
+  let results = Ubench.run ~warmup:1 ~trials:2 ~min_time:0.001 benchmarks in
+  Ubench.print_report results
diff --git a/nx/bench/dune b/nx/bench/dune
@@ -2,3 +2,9 @@
  (name bench_nx)
  (modules bench_nx)
  (libraries nx ubench))
+
+
+(executable
+ (name bench_conv)
+ (modules bench_conv)
+ (libraries nx ubench))
diff --git a/nx/lib/native/internal.ml b/nx/lib/native/internal.ml
@@ -14,12 +14,12 @@ type ('a, 'b) t = {
 (* Helper to map logical indices through a chain of view transformations *)
 (* This is needed when views can't be composed into a single view *)
 let iterate_view_indices shape indices f =
-  (* Helper to iterate through all indices of a tensor *)
   let ndim = Array.length shape in
   if ndim = 0 then f indices
   else
     let rec iter_dim d =
-      if d = ndim then f (Array.copy indices)
+      if d = ndim then
+        f indices
       else
         for i = 0 to shape.(d) - 1 do
           indices.(d) <- i;
diff --git a/nx/lib/native/nx_native.ml b/nx/lib/native/nx_native.ml
@@ -77,17 +77,15 @@ let op_assign target_t source_t = Internal.blit source_t target_t
 
 (* Helper for binary operations that ensures inputs are materializable first *)
 let binary_op op_func a b =
-  let a' = ensure_materializable a in
-  let b' = ensure_materializable b in
-  let ctx = a'.context in
-  let out_shape = Internal.shape a' in
-  let out_size = Internal.numel a' in
-  let out_dtype = a'.dtype in
+  let ctx = a.context in
+  let out_shape = Internal.shape a in
+  let out_size = Internal.numel a in
+  let out_dtype = a.dtype in
   let out_tensor =
     op_buffer ctx out_dtype out_size |> fun t ->
     with_view t (Lazy_view.create (Symbolic_shape.of_ints out_shape))
   in
-  op_func ctx a' b' out_tensor;
+  op_func ctx a b out_tensor;
   out_tensor
 
 (* Helper for binary comparison operations *)