From a66743cdd41682950db137a4e01295a8e175cfbe Mon Sep 17 00:00:00 2001 From: sunxiaoxia2022 Date: Thu, 6 Nov 2025 14:40:38 +0800 Subject: [PATCH] [FORK][FIX] fix performance issues caused by using the thread pool --- src/cpu/matmul/cpu_matmul_list.cpp | 2 +- src/cpu/x64/jit_brgemm_conv_utils.cpp | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/src/cpu/matmul/cpu_matmul_list.cpp b/src/cpu/matmul/cpu_matmul_list.cpp index 6868128bcf0..2a2313a8f2b 100644 --- a/src/cpu/matmul/cpu_matmul_list.cpp +++ b/src/cpu/matmul/cpu_matmul_list.cpp @@ -95,11 +95,11 @@ const impl_list_item_t impl_list[] = REG_MATMUL_P({ CPU_INSTANCE_AVX512(brgemm_matmul_t,avx512_core) CPU_INSTANCE_AVX2(brgemm_matmul_t,avx2_vnni_2) CPU_INSTANCE_AVX2(brgemm_matmul_t,avx2_vnni) + CPU_INSTANCE_AVX2(brgemm_matmul_t, avx2) CPU_INSTANCE(gemm_f32_matmul_t) CPU_INSTANCE(gemm_bf16_matmul_t, f32) CPU_INSTANCE(gemm_bf16_matmul_t, bf16) CPU_INSTANCE(gemm_x8s8s32x_matmul_t) - CPU_INSTANCE_AVX2(brgemm_matmul_t, avx2) CPU_INSTANCE(ref_matmul_t) CPU_INSTANCE(ref_matmul_int8_t) // These implementations are enabled only when DNNL_EXPERIMENTAL_SPARSE diff --git a/src/cpu/x64/jit_brgemm_conv_utils.cpp b/src/cpu/x64/jit_brgemm_conv_utils.cpp index ec5f9da6ff2..d1d745d265d 100644 --- a/src/cpu/x64/jit_brgemm_conv_utils.cpp +++ b/src/cpu/x64/jit_brgemm_conv_utils.cpp @@ -2296,10 +2296,6 @@ status_t init_conf(jit_brgemm_conv_conf_t &jcp, cpu_isa_t isa, if (try_exec_type_res == false) return status::unimplemented; -#if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL - adjust_nthr(jcp, src_d, dst_d); -#endif - // ============ end blocking =========================================== jcp.brg_type = (jcp.use_uker && one_of(jcp.exec_type, exec_base, exec_trans)) @@ -2530,10 +2526,6 @@ status_t init_1x1_conf(jit_brgemm_conv_conf_t &jcp, cpu_isa_t isa, } best_brgb.save_to_jcp(jcp); -#if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL - adjust_nthr(jcp, src_d, dst_d); -#endif - // =============== end blocking ================================= jcp.brg_stride_a = jcp.ic_block * jcp.src_dsz;