From 4946ea8cda0a28d0e6ec3b05feb32e55356bbd44 Mon Sep 17 00:00:00 2001 From: Roy Oursler Date: Thu, 6 Feb 2025 13:51:41 -0800 Subject: [PATCH] xe: jit: gemm: move catalog generation to its own compilation unit GCC 11 can take upwards of 15 minutes to compile the gemm catalog. This patch moves kcatalog generation to its own compilation unit and removes -fvar-tracking to limit compile times. --- src/gpu/intel/jit/gemm/CMakeLists.txt | 6 ++- src/gpu/intel/jit/gemm/gen_gemm_kernel.cpp | 10 ++--- src/gpu/intel/jit/gemm/gen_gemm_kernel_db.cpp | 38 +++++++++++++++++++ src/gpu/intel/jit/gemm/gen_gemm_kernel_db.hpp | 36 ++++++++++++++++++ 4 files changed, 82 insertions(+), 8 deletions(-) create mode 100644 src/gpu/intel/jit/gemm/gen_gemm_kernel_db.cpp create mode 100644 src/gpu/intel/jit/gemm/gen_gemm_kernel_db.hpp diff --git a/src/gpu/intel/jit/gemm/CMakeLists.txt b/src/gpu/intel/jit/gemm/CMakeLists.txt index 17b401a0e4e..2036858ecb4 100644 --- a/src/gpu/intel/jit/gemm/CMakeLists.txt +++ b/src/gpu/intel/jit/gemm/CMakeLists.txt @@ -68,8 +68,12 @@ set_property(GLOBAL APPEND PROPERTY DNNL_LIB_DEPS include_directories_with_host_compiler_before(${OBJ_LIB} ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/../ngen) -# Workaround for LTO bug in GCC 10, 11, 12 (possibly other versions) if(CMAKE_COMPILER_IS_GNUCC) + # Workaround for LTO bug in GCC 10, 11, 12 (possibly other versions) set_source_files_properties(generator/pieces/loop_sequencer.cpp PROPERTIES COMPILE_FLAGS -fno-lto) set_source_files_properties(generator/generator.cpp PROPERTIES COMPILE_FLAGS -fno-lto) + + + # Workaround for excessively long compile time in GCC 11, 12 (possibly other versions) + set_source_files_properties(gen_gemm_kernel_db.cpp PROPERTIES COMPILE_FLAGS -fno-var-tracking) endif() diff --git a/src/gpu/intel/jit/gemm/gen_gemm_kernel.cpp b/src/gpu/intel/jit/gemm/gen_gemm_kernel.cpp index 0170bf16803..e465d5375ab 100644 --- a/src/gpu/intel/jit/gemm/gen_gemm_kernel.cpp +++ b/src/gpu/intel/jit/gemm/gen_gemm_kernel.cpp @@ -17,6 +17,7 @@ #include "gpu/intel/jit/gemm/gen_gemm_kernel.hpp" #include "common/impl_registration.hpp" #include "gpu/intel/compute/device_info.hpp" +#include "gpu/intel/jit/gemm/gen_gemm_kernel_db.hpp" #include "gpu/intel/jit/gemm/include/generator.hpp" #include "gpu/intel/jit/gemm/include/strategy_parser.hpp" #include "gpu/intel/jit/utils/ngen_type_bridge.hpp" @@ -28,11 +29,6 @@ namespace gpu { namespace intel { namespace jit { -#define _CATALOG_ gemm_catalog -#include "selector/db/kernel.db" -; -#undef _CATALOG_ - status_t gen_gemm_kernel_desc_t::create_generator( const compute::compute_engine_t &engine, compute::kernel_t &kernel) const { @@ -599,7 +595,7 @@ status_t gen_gemm_nocopy_kernel_desc_t::select_kernel(compute::gpu_arch_t arch, eval_params.batch = (batch_dims > 0); eval_params.deterministic = (mode & mode_deterministic); - entry_ = select(gemm_catalog, static_cast(match_params.size()), + entry_ = select(catalog(), static_cast(match_params.size()), match_params.data(), eval_params, aux_params_); if (!entry_) return status::unimplemented; @@ -743,7 +739,7 @@ status_t gen_gemm_xe_systolic_kernel_desc_t::select_kernel( eval_params.cConvert = (acc_type != c_type); eval_params.batch = (batch_dims > 0); - entry_ = select(gemm_catalog, match_params, eval_params, aux_params_); + entry_ = select(catalog(), match_params, eval_params, aux_params_); if (!entry_) return status::unimplemented; diff --git a/src/gpu/intel/jit/gemm/gen_gemm_kernel_db.cpp b/src/gpu/intel/jit/gemm/gen_gemm_kernel_db.cpp new file mode 100644 index 00000000000..0c2306412b6 --- /dev/null +++ b/src/gpu/intel/jit/gemm/gen_gemm_kernel_db.cpp @@ -0,0 +1,38 @@ +/******************************************************************************* +* Copyright 2025 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "gpu/intel/jit/gemm/gen_gemm_kernel_db.hpp" + +namespace dnnl { +namespace impl { +namespace gpu { +namespace intel { +namespace jit { + +#define _CATALOG_ gemm_catalog +#include "selector/db/kernel.db" +#undef _CATALOG_ + +const kcatalog::Catalog &catalog() { + static const kcatalog::Catalog c = gemm_catalog; + return c; +}; + +} // namespace jit +} // namespace intel +} // namespace gpu +} // namespace impl +} // namespace dnnl diff --git a/src/gpu/intel/jit/gemm/gen_gemm_kernel_db.hpp b/src/gpu/intel/jit/gemm/gen_gemm_kernel_db.hpp new file mode 100644 index 00000000000..ac4075f4590 --- /dev/null +++ b/src/gpu/intel/jit/gemm/gen_gemm_kernel_db.hpp @@ -0,0 +1,36 @@ +/******************************************************************************* +* Copyright 2025 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef GPU_INTEL_JIT_GEMM_GEN_GEMM_KERNEL_DB_HPP +#define GPU_INTEL_JIT_GEMM_GEN_GEMM_KERNEL_DB_HPP + +#include "gpu/intel/jit/gemm/include/kernel_catalog.hpp" + +namespace dnnl { +namespace impl { +namespace gpu { +namespace intel { +namespace jit { + +const kcatalog::Catalog &catalog(); + +} +} // namespace intel +} // namespace gpu +} // namespace impl +} // namespace dnnl + +#endif