@@ -337,6 +337,39 @@ module Arch = struct
337337 ;;
338338end
339339
340+ module Cpu_feature = struct
341+ type t =
342+ [ `SSE2
343+ | `SSE41
344+ | `AVX2
345+ | `AVX512
346+ | `NEON
347+ ]
348+
349+ let by_suffix fn =
350+ if String. ends_with fn ~suffix: " _sse2"
351+ then Some `SSE2
352+ else if String. ends_with fn ~suffix: " _sse41"
353+ then Some `SSE41
354+ else if String. ends_with fn ~suffix: " _avx2"
355+ then Some `AVX2
356+ else if String. ends_with fn ~suffix: " _avx512"
357+ then Some `AVX512
358+ else if String. ends_with fn ~suffix: " _neon"
359+ then Some `NEON
360+ else None
361+ ;;
362+
363+ let is_supported ~architecture (f : t option ) =
364+ match f, architecture with
365+ | Some `NEON , `arm64 -> true
366+ | Some `NEON , _ -> false
367+ | Some (`SSE2 | `SSE41 | `AVX2 | `AVX512 ), `amd64 -> true
368+ | Some (`SSE2 | `SSE41 | `AVX2 | `AVX512 ), _ -> false
369+ | None , _ -> true
370+ ;;
371+ end
372+
340373module Module : sig
341374 module Name : sig
342375 type t
@@ -1175,10 +1208,7 @@ module File_kind = struct
11751208 ; assembler : [ `C_comp | `Msvc_asm ]
11761209 }
11771210
1178- type c =
1179- { arch : [ `Arm64 | `X86 ] option
1180- ; flags : string list
1181- }
1211+ type c = { cpu_feature : Cpu_feature .t option }
11821212
11831213 type ml =
11841214 { kind : [ `Ml | `Mli | `Mll | `Mly ]
@@ -1223,27 +1253,7 @@ module File_kind = struct
12231253 else None , None , `C_comp
12241254 in
12251255 Some (Asm { syntax; arch; os; assembler })
1226- | ".c" ->
1227- let arch, flags =
1228- let fn = Filename. remove_extension fn in
1229- let check suffix = String. ends_with fn ~suffix in
1230- let x86 gnu _msvc =
1231- (* CR rgrinberg: select msvc flags on windows *)
1232- Some `X86 , gnu
1233- in
1234- if check " _sse2"
1235- then x86 [ " -msse2" ] [ " /arch:SSE2" ]
1236- else if check " _sse41"
1237- then x86 [ " -msse4.1" ] [ " /arch:AVX" ]
1238- else if check " _avx2"
1239- then x86 [ " -mavx2" ] [ " /arch:AVX2" ]
1240- else if check " _avx512"
1241- then x86 [ " -mavx512f" ; " -mavx512vl" ; " -mavx512bw" ] [ " /arch:AVX512" ]
1242- else if String. ends_with fn ~suffix: " _neon"
1243- then Some `Arm64 , []
1244- else None , []
1245- in
1246- Some (C { arch; flags })
1256+ | ".c" -> Some (C { cpu_feature = Cpu_feature. by_suffix fn })
12471257 | ".h" -> Some Header
12481258 | ".defaults.ml" ->
12491259 let fn' = fname ^ " .ml" in
@@ -1439,14 +1449,7 @@ module Library = struct
14391449 | Some `Amd64 , _ -> false
14401450 ;;
14411451
1442- let keep_c { File_kind. arch; flags = _ } ~architecture =
1443- match arch with
1444- | None -> true
1445- | Some `Arm64 -> architecture = `arm64
1446- | Some `X86 -> architecture = `amd64 || architecture = `x86_64
1447- ;;
1448-
1449- let make_c (c : File_kind.c ) ~fn ~os_type ~word_size =
1452+ let make_c (c : File_kind.c ) ~fn ~os_type ~word_size ~ccomp_type =
14501453 let extra_flags =
14511454 if
14521455 String. starts_with ~prefix: " blake3_" fn
@@ -1459,7 +1462,25 @@ module Library = struct
14591462 ]
14601463 else []
14611464 in
1462- { Source. flags = extra_flags @ c.flags; name = fn }
1465+ let cpu_feature_flags =
1466+ match ccomp_type, c.cpu_feature with
1467+ (* gcc / clang / mingw *)
1468+ | `Other , None -> []
1469+ | `Other , Some `SSE2 -> [ " -msse2" ]
1470+ | `Other , Some `SSE41 -> [ " -msse4.1" ]
1471+ | `Other , Some `AVX2 -> [ " -mavx2" ]
1472+ | `Other , Some `AVX512 -> [ " -mavx512f" ; " -mavx512vl" ; " -mavx512bw" ]
1473+ | `Other , Some `NEON -> []
1474+ (* MSVC *)
1475+ | `Msvc , None -> []
1476+ | `Msvc , Some `SSE2 -> [ " /arch:SSE2" ]
1477+ (* msvc doesn't have sse41 support so blake3 uses the avx flag instead. *)
1478+ | `Msvc , Some `SSE41 -> [ " /arch:AVX" ]
1479+ | `Msvc , Some `AVX2 -> [ " /arch:AVX2" ]
1480+ | `Msvc , Some `AVX512 -> [ " /arch:AVX512" ]
1481+ | `Msvc , Some `NEON -> []
1482+ in
1483+ { Source. flags = extra_flags @ cpu_feature_flags; name = fn }
14631484 ;;
14641485
14651486 let gen_build_info_module (ml : File_kind.ml ) =
@@ -1627,8 +1648,8 @@ module Library = struct
16271648 |> List. partition_map_skip ~f: (fun ((src : File_kind.t Source.t ), fn ) ->
16281649 match src.kind with
16291650 | C c ->
1630- if keep_c c ~architecture
1631- then `Left (make_c c ~fn ~os_type ~word_size )
1651+ if Cpu_feature. is_supported c.cpu_feature ~architecture
1652+ then `Left (make_c c ~fn ~os_type ~word_size ~ccomp_type )
16321653 else `Skip
16331654 | Ml _ -> `Middle fn
16341655 | Header -> `Skip
0 commit comments