Skip to content

Commit f40b4b7

Browse files
committed
xe: jit: gemm: move catalog generation to its own compilation unit
GCC 11 can take upwards of 15 minutes to compile the gemm catalog. This patch moves kcatalog generation to its own compilation unit and removes -fvar-tracking to limit compile times.
1 parent 77859aa commit f40b4b7

File tree

4 files changed

+83
-9
lines changed

4 files changed

+83
-9
lines changed

Diff for: src/gpu/intel/jit/gemm/CMakeLists.txt

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#===============================================================================
2-
# Copyright 2024 Intel Corporation
2+
# Copyright 2024-2025 Intel Corporation
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
55
# you may not use this file except in compliance with the License.
@@ -68,8 +68,12 @@ set_property(GLOBAL APPEND PROPERTY DNNL_LIB_DEPS
6868

6969
include_directories_with_host_compiler_before(${OBJ_LIB} ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/../ngen)
7070

71-
# Workaround for LTO bug in GCC 10, 11, 12 (possibly other versions)
7271
if(CMAKE_COMPILER_IS_GNUCC)
72+
# Workaround for LTO bug in GCC 10, 11, 12 (possibly other versions)
7373
set_source_files_properties(generator/pieces/loop_sequencer.cpp PROPERTIES COMPILE_FLAGS -fno-lto)
7474
set_source_files_properties(generator/generator.cpp PROPERTIES COMPILE_FLAGS -fno-lto)
75+
76+
77+
# Workaround for excessively long compile time in GCC 11, 12 (possibly other versions)
78+
set_source_files_properties(gen_gemm_kernel_db.cpp PROPERTIES COMPILE_FLAGS -fno-var-tracking)
7579
endif()

Diff for: src/gpu/intel/jit/gemm/gen_gemm_kernel.cpp

+3-7
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "gpu/intel/jit/gemm/gen_gemm_kernel.hpp"
1818
#include "common/impl_registration.hpp"
1919
#include "gpu/intel/compute/device_info.hpp"
20+
#include "gpu/intel/jit/gemm/gen_gemm_kernel_db.hpp"
2021
#include "gpu/intel/jit/gemm/include/generator.hpp"
2122
#include "gpu/intel/jit/gemm/include/strategy_parser.hpp"
2223
#include "gpu/intel/jit/utils/ngen_type_bridge.hpp"
@@ -28,11 +29,6 @@ namespace gpu {
2829
namespace intel {
2930
namespace jit {
3031

31-
#define _CATALOG_ gemm_catalog
32-
#include "selector/db/kernel.db"
33-
;
34-
#undef _CATALOG_
35-
3632
status_t gen_gemm_kernel_desc_t::create_generator(
3733
const compute::compute_engine_t &engine,
3834
compute::kernel_t &kernel) const {
@@ -599,7 +595,7 @@ status_t gen_gemm_nocopy_kernel_desc_t::select_kernel(compute::gpu_arch_t arch,
599595
eval_params.batch = (batch_dims > 0);
600596
eval_params.deterministic = (mode & mode_deterministic);
601597

602-
entry_ = select(gemm_catalog, static_cast<int>(match_params.size()),
598+
entry_ = select(catalog(), static_cast<int>(match_params.size()),
603599
match_params.data(), eval_params, aux_params_);
604600

605601
if (!entry_) return status::unimplemented;
@@ -743,7 +739,7 @@ status_t gen_gemm_xe_systolic_kernel_desc_t::select_kernel(
743739
eval_params.cConvert = (acc_type != c_type);
744740
eval_params.batch = (batch_dims > 0);
745741

746-
entry_ = select(gemm_catalog, match_params, eval_params, aux_params_);
742+
entry_ = select(catalog(), match_params, eval_params, aux_params_);
747743

748744
if (!entry_) return status::unimplemented;
749745

Diff for: src/gpu/intel/jit/gemm/gen_gemm_kernel_db.cpp

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/*******************************************************************************
2+
* Copyright 2025 Intel Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*******************************************************************************/
16+
17+
#include "gpu/intel/jit/gemm/gen_gemm_kernel_db.hpp"
18+
19+
namespace dnnl {
20+
namespace impl {
21+
namespace gpu {
22+
namespace intel {
23+
namespace jit {
24+
25+
#define _CATALOG_ gemm_catalog
26+
#include "selector/db/kernel.db"
27+
#undef _CATALOG_
28+
29+
const kcatalog::Catalog &catalog() {
30+
static const kcatalog::Catalog c = gemm_catalog;
31+
return c;
32+
};
33+
34+
} // namespace jit
35+
} // namespace intel
36+
} // namespace gpu
37+
} // namespace impl
38+
} // namespace dnnl

Diff for: src/gpu/intel/jit/gemm/gen_gemm_kernel_db.hpp

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*******************************************************************************
2+
* Copyright 2025 Intel Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*******************************************************************************/
16+
17+
#ifndef GPU_INTEL_JIT_GEMM_GEN_GEMM_KERNEL_DB_HPP
18+
#define GPU_INTEL_JIT_GEMM_GEN_GEMM_KERNEL_DB_HPP
19+
20+
#include "gpu/intel/jit/gemm/include/kernel_catalog.hpp"
21+
22+
namespace dnnl {
23+
namespace impl {
24+
namespace gpu {
25+
namespace intel {
26+
namespace jit {
27+
28+
const kcatalog::Catalog &catalog();
29+
30+
}
31+
} // namespace intel
32+
} // namespace gpu
33+
} // namespace impl
34+
} // namespace dnnl
35+
36+
#endif

0 commit comments

Comments
 (0)