From a46eea2b31c4c6a2f1e39834b0fa552ab6241288 Mon Sep 17 00:00:00 2001 From: Dmitrii Zarukin Date: Mon, 6 Jan 2025 09:38:21 -0800 Subject: [PATCH] cpu: x64: brgemm: add f32:f16 support on avx512_core and avx2 --- src/cpu/x64/brgemm/brgemm_utils.cpp | 8 +++++++- src/cpu/x64/brgemm/jit_brgemm_kernel.cpp | 14 ++++++++++---- src/cpu/x64/cpu_isa_traits.hpp | 5 +++-- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/cpu/x64/brgemm/brgemm_utils.cpp b/src/cpu/x64/brgemm/brgemm_utils.cpp index 8f39f1182ac..f97e100d539 100644 --- a/src/cpu/x64/brgemm/brgemm_utils.cpp +++ b/src/cpu/x64/brgemm/brgemm_utils.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2022-2024 Intel Corporation +* Copyright 2022-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -140,6 +140,12 @@ void set_isa_impl(brgemm_desc_t *brg) { is_isa_ok(avx512_core_amx_fp16), avx512_core_amx_fp16, is_isa_ok(avx512_core_fp16), avx512_core_fp16, is_isa_ok(avx2_vnni_2), avx2_vnni_2); + } else if (brg->dt_a == data_type::f32 && brg->dt_b == data_type::f16) { + // Distinguish f32:f16 case upconversion for f16 on AVX512_CORE and + // AVX2. + brg->isa_impl = utils::map(true, isa_undef, + is_isa_ok(avx512_core_fp16), avx512_core_fp16, + is_isa_ok(avx512_core), avx512_core, is_isa_ok(avx2), avx2); } else { brg->isa_impl = utils::map(true, isa_undef, is_isa_ok(avx512_core_fp16), avx512_core_fp16); diff --git a/src/cpu/x64/brgemm/jit_brgemm_kernel.cpp b/src/cpu/x64/brgemm/jit_brgemm_kernel.cpp index 33cfc48eddb..3ffbf3eb28c 100644 --- a/src/cpu/x64/brgemm/jit_brgemm_kernel.cpp +++ b/src/cpu/x64/brgemm/jit_brgemm_kernel.cpp @@ -2158,10 +2158,12 @@ void jit_brgemm_kernel_t::gemm_microkernel(int bd_block2, bool is_bdb_tail, } else if (one_of(dt, data_type::s8, data_type::u8)) { uni_vpbroadcastd(v1, ptr[reg_aux_A + offset]); } else if (dt == data_type::f16) { - if (brg.isa_impl == avx2_vnni_2) + if (brg.isa_impl == avx2_vnni_2) { vbcstnesh2ps(v1, ptr[reg_aux_A + offset]); - else + } else if (is_superset(brg.isa_impl, avx512_core_fp16)) { + // Broadcast is not supported for legacy f16-conversions. vcvtph2psx(v1, ptr_b[reg_aux_A + offset]); + } } } @@ -2212,7 +2214,7 @@ void jit_brgemm_kernel_t::gemm_microkernel(int bd_block2, bool is_bdb_tail, vpermw(vmm_load, f16_perm_odd_vreg_, vmm_load); vcvtph2psx(vmm_load, Vmm_lower_t(vmm_load.getIdx())); } else { - vcvtph2psx(vmm_load, addr); + uni_vcvtph2psx(vmm_load, addr); } } else if (brg.dt_b == data_type::bf16 && brg.isa_impl == avx2_vnni_2) { @@ -2265,8 +2267,12 @@ void jit_brgemm_kernel_t::gemm_microkernel(int bd_block2, bool is_bdb_tail, else vpermw(vmm_load, f16_perm_odd_vreg_, vmm_load); vcvtph2psx(vmm_load, Vmm_lower_t(vmm_load.getIdx())); + } else if (is_ld_tail + && !is_superset(brg.isa_impl, avx512_core)) { + load_bytes(vmm_load, addr, ldb_B_offset(0, true)); + vcvtph2ps(vmm_load, Xmm(vmm_load.getIdx())); } else { - vcvtph2psx(vmm_load, addr); + uni_vcvtph2psx(vmm_load, addr); } } else if (brg.dt_b == data_type::bf16 && brg.isa_impl == avx2_vnni_2) { diff --git a/src/cpu/x64/cpu_isa_traits.hpp b/src/cpu/x64/cpu_isa_traits.hpp index 89233c48d4e..ffed0fed916 100644 --- a/src/cpu/x64/cpu_isa_traits.hpp +++ b/src/cpu/x64/cpu_isa_traits.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2018-2024 Intel Corporation +* Copyright 2018-2025 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -497,7 +497,8 @@ inline data_type_t get_mac_emu_data_type(const data_type_t data_type, if (isa == avx2_vnni_2) return f32; break; case f16: - if (utils::one_of(isa, avx2_vnni_2, avx512_core_fp16)) + if (utils::one_of(isa, avx2, avx2_vnni_2, avx512_core, + avx512_core_fp16)) return f32; break; case f8_e5m2: