From b6a6e82b8a478ff0f64e87c21d076bbdb5ea2116 Mon Sep 17 00:00:00 2001 From: Vin Huang Date: Wed, 24 Jun 2026 05:43:04 +0000 Subject: [PATCH] [hipSPARSELt] Remove legacy hip kernel launcher backend Remove the spmm/hip/ directory and its associated files that implemented a standalone HIP kernel launcher backend, which has been deprecated. Clean up all references to the removed code. --- .../src/include/hip_solution_adapter.hpp | 92 --- .../src/include/kernel_arguments.hpp | 294 ------- .../src/include/kernel_launcher.hpp | 531 ------------ .../src/include/rocsparselt_spmm_utils.hpp | 4 - .../rocsparselt/src/rocsparselt_auxiliary.cpp | 4 - .../src/spmm/hip/hip_solution_adapter.cpp | 377 --------- .../src/spmm/hip/kernel_arguments.cpp | 246 ------ .../src/spmm/hip/kernel_launcher.cpp | 781 ------------------ .../rocsparselt/src/spmm/rocsparselt_spmm.hpp | 4 - 9 files changed, 2333 deletions(-) delete mode 100644 projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/include/hip_solution_adapter.hpp delete mode 100644 projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/include/kernel_arguments.hpp delete mode 100644 projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/include/kernel_launcher.hpp delete mode 100644 projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/spmm/hip/hip_solution_adapter.cpp delete mode 100644 projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/spmm/hip/kernel_arguments.cpp delete mode 100644 projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/spmm/hip/kernel_launcher.cpp diff --git a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/include/hip_solution_adapter.hpp b/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/include/hip_solution_adapter.hpp deleted file mode 100644 index ea96b1f0698b..000000000000 --- a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/include/hip_solution_adapter.hpp +++ /dev/null @@ -1,92 +0,0 @@ -/******************************************************************************* - * - * MIT License - * - * Copyright (c) 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - *******************************************************************************/ - -#pragma once - -#include "handle.h" -#include "kernel_arguments.hpp" -#include - -#include -#include - -class SolutionAdapter -{ -public: - SolutionAdapter(); - SolutionAdapter(std::string const& name); - ~SolutionAdapter(); - std::string name() const - { - return m_name; - } - hipError_t loadLibrary(std::string const& path); - hipError_t loadCodeObject(const _rocsparselt_handle* handle, std::string const& name); - hipError_t loadCodeObject(const _rocsparselt_handle* handle, - const void* image, - std::string const& name); - hipError_t loadCodeObjectBytes(const _rocsparselt_handle* handle, - std::vector const& bytes, - std::string const& name); - hipError_t launchKernel(const _rocsparselt_handle* handle, KernelInvocation const& kernel); - hipError_t launchKernel(const _rocsparselt_handle* handle, - KernelInvocation const& kernel, - hipStream_t stream, - hipEvent_t startEvent, - hipEvent_t stopEvent, - int iter = 1); - hipError_t launchKernels(const _rocsparselt_handle* handle, - std::vector const& kernels); - hipError_t launchKernels(const _rocsparselt_handle* handle, - std::vector const& kernels, - hipStream_t stream, - hipEvent_t startEvent, - hipEvent_t stopEvent); - hipError_t launchKernels(const _rocsparselt_handle* handle, - std::vector const& kernels, - hipStream_t stream, - std::vector const& startEvents, - std::vector const& stopEvents); - hipError_t initKernel(std::string const& name); - size_t getKernelCounts(std::string const& category); - KernelParams* getKernelParams(std::string const& category); - -private: - using function_table = std::map; - - hipError_t getKernel(hipFunction_t& rv, std::string const& name); - std::mutex m_access; - std::unordered_map m_modules; - std::unordered_map m_kernels; - std::string m_name = "HipSolutionAdapter"; - std::vector m_loadedModuleNames; - std::vector m_lib_handles; - std::vector m_lib_functions; - std::vector m_loadedLibNames; - friend std::ostream& operator<<(std::ostream& stream, SolutionAdapter const& adapter); -}; -std::ostream& operator<<(std::ostream& stream, SolutionAdapter const& adapter); -std::ostream& operator<<(std::ostream& stream, std::shared_ptr const& ptr); diff --git a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/include/kernel_arguments.hpp b/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/include/kernel_arguments.hpp deleted file mode 100644 index 0ea1081936d9..000000000000 --- a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/include/kernel_arguments.hpp +++ /dev/null @@ -1,294 +0,0 @@ -/******************************************************************************* - * - * MIT License - * - * Copyright (c) 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - *******************************************************************************/ - -#pragma once - -#include "hipsparselt_ostream.hpp" -#include -#include -#include -#include -#include -#include -class KernelArguments -{ -public: - KernelArguments(bool log = true); - virtual ~KernelArguments(); - - void reserve(size_t bytes, size_t count); - - template - void append(std::string const& name, T value); - - template - void appendUnbound(std::string const& name); - - template - void bind(std::string const& name, T value); - - bool isFullyBound() const; - - void const* data() const; - size_t size() const; - - friend std::ostream& operator<<(std::ostream& stream, const KernelArguments& t); - friend class const_iterator; - - using ArgPair = std::pair; - class const_iterator : public std::iterator - { - public: - const_iterator(KernelArguments const& args); - const_iterator(KernelArguments const& args, std::string const& name); - const_iterator(const const_iterator& other) = default; - const_iterator& operator++(); - const_iterator operator++(int); - bool operator==(const const_iterator& rhs) const; - bool operator!=(const const_iterator& rhs) const; - ArgPair const& operator*() const; - ArgPair const* operator->() const; - void reset(); - template - operator T() const; - - private: - void assignCurrentArg(); - - std::vector::const_iterator m_currentArg; - KernelArguments const& m_args; - ArgPair m_value; - }; - - const_iterator begin() const; - const_iterator end() const; - -private: - enum - { - ArgOffset, - ArgSize, - ArgBound, - ArgString, - NumArgFields - }; - using Arg = std::tuple; - static_assert(std::tuple_size::value == NumArgFields, - "Enum for fields of Arg tuple doesn't match size of tuple."); - - void alignTo(size_t alignment); - - template - void append(std::string const& name, T value, bool bound); - - template - std::string stringForValue(T value, bool bound); - - void appendRecord(std::string const& name, Arg info); - - template - void writeValue(size_t offset, T value); - - std::vector m_data; - - std::vector m_names; - std::unordered_map m_argRecords; - - bool m_log; -}; - -KernelArguments::const_iterator begin(KernelArguments const&); -KernelArguments::const_iterator end(KernelArguments const&); - -template -inline void KernelArguments::append(std::string const& name, T value) -{ - append(name, value, true); -} - -template -inline void KernelArguments::appendUnbound(std::string const& name) -{ - append(name, static_cast(0), false); -} - -template -inline void KernelArguments::bind(std::string const& name, T value) -{ - if(!m_log) - { - throw std::runtime_error("Binding is not supported without logging."); - } - - auto it = m_argRecords.find(name); - if(it == m_argRecords.end()) - { - throw std::runtime_error("Attempt to bind unknown argument " + name); - } - - auto& record = it->second; - - if(std::get(record)) - { - throw std::runtime_error("Attempt to bind already bound argument " + name); - } - - if(sizeof(T) != std::get(record)) - { - throw std::runtime_error("Size mismatch in binding argument " + name); - } - - size_t offset = std::get(record); - - if(offset % alignof(T) != 0) - { - throw std::runtime_error("Alignment error in argument " + name + ": type mismatch?"); - } - - writeValue(offset, value); - - std::get(record) = stringForValue(value, true); - std::get(record) = true; -} - -template -inline std::string KernelArguments::stringForValue(T value, bool bound) -{ - if(!m_log) - return ""; - - if(!bound) - return ""; - - using castType = std::conditional_t::value, void const*, T>; - - hipsparselt_internal_ostream msg; - msg << static_cast(value); - return msg.str(); -} - -template -inline void KernelArguments::append(std::string const& name, T value, bool bound) -{ - alignTo(alignof(T)); - - size_t offset = m_data.size(); - size_t size = sizeof(T); - - if(m_log) - { - std::string valueString = stringForValue(value, bound); - appendRecord(name, Arg(offset, size, bound, valueString)); - } - - m_data.insert(m_data.end(), sizeof(value), 0); - writeValue(offset, value); -} - -template -inline void KernelArguments::writeValue(size_t offset, T value) -{ - if(offset + sizeof(T) > m_data.size()) - { - throw std::runtime_error("Value exceeds allocated bounds."); - } - - std::memcpy(&m_data[offset], &value, sizeof(T)); -} - -inline void KernelArguments::alignTo(size_t alignment) -{ - size_t extraElements = m_data.size() % alignment; - size_t padding = (alignment - extraElements) % alignment; - - m_data.insert(m_data.end(), padding, 0); -} - -inline void KernelArguments::appendRecord(std::string const& name, KernelArguments::Arg record) -{ - auto it = m_argRecords.find(name); - if(it != m_argRecords.end()) - { - throw std::runtime_error("Duplicate argument name: " + name); - } - - m_argRecords[name] = record; - m_names.push_back(name); -} - -template -KernelArguments::const_iterator::operator T() const -{ - if(sizeof(T) != m_value.second) - { - throw std::bad_cast(); - } - return *reinterpret_cast(const_cast(m_value.first)); -} - -/** - * \ingroup Launching - * Describes a single kernel invocation including kernel name, launch - * bounds, and arguments. - */ -struct KernelInvocation -{ -public: - std::string kernelName; - - dim3 workGroupSize; - dim3 numWorkGroups; - dim3 numWorkItems; - size_t sharedMemBytes = 0; - - KernelArguments args; -}; - -struct KernelParams -{ - char SolutionNameMin[256]; - int DataType; - int DestDataType; - int ComputeDataType; - bool TransposeA; - bool TransposeB; - unsigned int WorkGroup[3]; - unsigned int ThreadTile[3]; - unsigned int MacroTile[3]; - size_t StaggerU; - size_t DepthU; - size_t GlobalSplitU; - size_t StaggerStrideShift; - int WorkGroupMapping; - size_t PackBatchDims; - bool UseInitialStridesAB; - bool UseInitialStridesCD; - bool ActivationFused; - int GlobalAccumulation; - bool Activation; - bool ActivationHPA; - char ActivationType[32]; -}; diff --git a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/include/kernel_launcher.hpp b/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/include/kernel_launcher.hpp deleted file mode 100644 index e86cbe790423..000000000000 --- a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/include/kernel_launcher.hpp +++ /dev/null @@ -1,531 +0,0 @@ -/******************************************************************************* - * - * MIT License - * - * Copyright (c) 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - *******************************************************************************/ - -#pragma once - -#include "activation.hpp" -#include "handle.h" -#include "tuple_helper.hpp" -#include "utility.hpp" -#include -#include - -/** - * Zero-padding description - */ -struct ZeroPad -{ - ZeroPad(int32_t ai = -1, int32_t bi = -1, int64_t ps = 0, int64_t pe = 0) - : anchorIndex(ai) - , anchorPos(-1) - , boundIndex(bi) - , padStart(ps) - , padEnd(pe){}; - - int32_t anchorIndex; - int32_t anchorPos; //! position of anchorIndex in A or B tensor - int32_t boundIndex; - int32_t boundPos; //! position of anchroIndex in A or B tensor - int64_t padStart; - int64_t padEnd; - - bool valid() const - { - return anchorIndex != -1; - }; - std::string description() const; -}; -using ZeroPads = std::vector; - -/** - * Represents a pair of free indices in a tensor contraction. - */ -struct FreeIndex -{ - bool isA; //< True=index is in A; False=index is in B - size_t i; //< Dimension in A or B (depending on isA) - size_t c; //< Dimension of C which corresponds for this index - size_t d; //< Dimension of D which corresponds for this index -}; -using FreeIndices = std::vector; - -/** - * Represents a batched index in a tensor contraction. - */ -struct BatchIndex -{ - size_t a, b, c, d; -}; -using BatchIndices = std::vector; - -/* - * Represents a bound (or summed) index in a tensor contraction. - */ -struct BoundIndex -{ - BoundIndex(size_t xa = 0, size_t xb = 0, bool aMirror = false, bool bMirror = false) - : a(xa) - , b(xb) - , aMirror(aMirror) - , bMirror(bMirror){}; - size_t a, b; //! positions in a or b tensor - ZeroPad aZeroPad; - ZeroPad bZeroPad; - bool aMirror, bMirror; -}; -using BoundIndices = std::vector; - -template -T CeilDivide(T num, T den) -{ - return (num + (den - 1)) / den; -} - -/******************************************************************** - * RocsparseltContractionProblem captures the arguments for a GEMM-like * - * contraction problem, to be passed to runContractionProblem. * - ********************************************************************/ -template -struct RocsparseltContractionProblem -{ - const _rocsparselt_handle* handle; - rocsparselt_operation trans_a; - rocsparselt_operation trans_b; - - size_t m; - size_t n; - size_t k; - - const Tc* alpha; - - const Ti* A; - const Ti* const* batch_A; - size_t row_stride_a; - size_t col_stride_a; - size_t batch_stride_a; - size_t buffer_offset_a; - - const Ti* B; - const Ti* const* batch_B; - size_t row_stride_b; - size_t col_stride_b; - size_t batch_stride_b; - size_t buffer_offset_b; - - const Tc* beta; - - const To* C; - const To* const* batch_C; - size_t row_stride_c; - size_t col_stride_c; - size_t batch_stride_c; - size_t buffer_offset_c; - - To* D; - To* const* batch_D; - size_t row_stride_d; - size_t col_stride_d; - size_t batch_stride_d; - size_t buffer_offset_d; - - size_t batch_count; - bool strided_batch; - - bool sparseA; - const unsigned char* metadata; - - hipsparselt_activation_type act_type; - float act_arg0; - float act_arg1; - const void* bias_vector; - int64_t bias_stride; - - void *workspace; - size_t workspaceSize; - - hipStream_t* streams; - int32_t numStreams; - - // gemm - // gemm_strided_batched - RocsparseltContractionProblem(const _rocsparselt_handle* handle, - rocsparselt_operation trans_a, - rocsparselt_operation trans_b, - int64_t m, - int64_t n, - int64_t k, - const Tc* alpha, - const Ti* A, - const Ti* const* batch_A, - int64_t ld_a, - int64_t batch_stride_a, - int64_t offset_a, - const Ti* B, - const Ti* const* batch_B, - int64_t ld_b, - int64_t batch_stride_b, - int64_t offset_b, - const Tc* beta, - To* C, - To* const* batch_C, - int64_t ld_c, - int64_t batch_stride_c, - int64_t offset_c, - int64_t batch_count, - bool strided_batch, - bool sparseA, - const unsigned char* metadata, - hipsparselt_activation_type act_type, - float act_arg0, - float act_arg1, - const void* bias_vector, - int64_t bias_stride, - hipStream_t* streams, - int32_t numStreams) - : handle(handle) - , trans_a(trans_a) - , trans_b(trans_b) - , m(m) - , n(n) - , k(k) - , alpha(alpha) - , A(A) - , batch_A(batch_A) - , row_stride_a(1) - , col_stride_a(ld_a) - , batch_stride_a(batch_stride_a) - , buffer_offset_a(offset_a) - , B(B) - , batch_B(batch_B) - , row_stride_b(1) - , col_stride_b(ld_b) - , batch_stride_b(batch_stride_b) - , buffer_offset_b(offset_b) - , beta(beta) - , C(C) - , batch_C(batch_C) - , row_stride_c(1) - , col_stride_c(ld_c) - , batch_stride_c(batch_stride_c) - , buffer_offset_c(offset_c) - , D(C) - , batch_D(batch_C) - , row_stride_d(1) - , col_stride_d(ld_c) - , batch_stride_d(batch_stride_c) - , buffer_offset_d(offset_c) - , batch_count(batch_count) - , strided_batch(strided_batch) - , sparseA(sparseA) - , metadata(metadata) - , act_type(act_type) - , act_arg0(act_arg0) - , act_arg1(act_arg1) - , bias_vector(bias_vector) - , bias_stride(bias_stride) - , streams(streams) - , numStreams(numStreams) - { - } - - // gemm_ex - // gemm_strided_batched_ex - RocsparseltContractionProblem(const _rocsparselt_handle* handle, - rocsparselt_operation trans_a, - rocsparselt_operation trans_b, - int64_t m, - int64_t n, - int64_t k, - const Tc* alpha, - const Ti* A, - const Ti* const* batch_A, - int64_t ld_a, - int64_t batch_stride_a, - int64_t offset_a, - const Ti* B, - const Ti* const* batch_B, - int64_t ld_b, - int64_t batch_stride_b, - int64_t offset_b, - const Tc* beta, - const To* C, - const To* const* batch_C, - int64_t ld_c, - int64_t batch_stride_c, - int64_t offset_c, - To* D, - To* const* batch_D, - int64_t ld_d, - int64_t batch_stride_d, - int64_t offset_d, - int64_t batch_count, - bool strided_batch, - bool sparseA, - const unsigned char* metadata, - hipsparselt_activation_type act_type, - float act_arg0, - float act_arg1, - const void* bias_vector, - int64_t bias_stride, - void* workspace, - size_t workspaceSize, - hipStream_t* streams, - int32_t numStreams) - : handle(handle) - , trans_a(trans_a) - , trans_b(trans_b) - , m(m) - , n(n) - , k(k) - , alpha(alpha) - , A(A) - , batch_A(batch_A) - , row_stride_a(1) - , col_stride_a(ld_a) - , batch_stride_a(batch_stride_a) - , buffer_offset_a(offset_a) - , B(B) - , batch_B(batch_B) - , row_stride_b(1) - , col_stride_b(ld_b) - , batch_stride_b(batch_stride_b) - , buffer_offset_b(offset_b) - , beta(beta) - , C(C) - , batch_C(batch_C) - , row_stride_c(1) - , col_stride_c(ld_c) - , batch_stride_c(batch_stride_c) - , buffer_offset_c(offset_c) - , D(D) - , batch_D(batch_D) - , row_stride_d(1) - , col_stride_d(ld_d) - , batch_stride_d(batch_stride_d) - , buffer_offset_d(offset_d) - , batch_count(batch_count) - , strided_batch(strided_batch) - , sparseA(sparseA) - , metadata(metadata) - , act_type(act_type) - , act_arg0(act_arg0) - , act_arg1(act_arg1) - , bias_vector(bias_vector) - , bias_stride(bias_stride) - , workspace(workspace) - , workspaceSize(workspaceSize) - , streams(streams) - , numStreams(numStreams) - { - } - - // gemm_ext2 - // gemm_strided_batched_ext2 - RocsparseltContractionProblem(const _rocsparselt_handle* handle, - int64_t m, - int64_t n, - int64_t k, - const Tc* alpha, - const Ti* A, - const Ti* const* batch_A, - int64_t row_stride_a, - int64_t col_stride_a, - int64_t batch_stride_a, - int64_t offset_a, - const Ti* B, - const Ti* const* batch_B, - int64_t row_stride_b, - int64_t col_stride_b, - int64_t batch_stride_b, - int64_t offset_b, - const Tc* beta, - const To* C, - const To* const* batch_C, - int64_t row_stride_c, - int64_t col_stride_c, - int64_t batch_stride_c, - int64_t offset_c, - To* D, - To* const* batch_D, - int64_t row_stride_d, - int64_t col_stride_d, - int64_t batch_stride_d, - int64_t offset_d, - int64_t batch_count, - bool strided_batch, - bool sparseA, - const unsigned char* metadata, - hipsparselt_activation_type act_type, - float act_arg0, - float act_arg1, - const void* bias_vector, - int64_t bias_stride, - hipStream_t* streams, - int32_t numStreams) - : handle(handle) - , trans_a(rocsparselt_operation_none) - , trans_b(rocsparselt_operation_none) - , m(m) - , n(n) - , k(k) - , alpha(alpha) - , A(A) - , batch_A(batch_A) - , row_stride_a(row_stride_a) - , col_stride_a(col_stride_a) - , batch_stride_a(batch_stride_a) - , buffer_offset_a(offset_a) - , B(B) - , batch_B(batch_B) - , row_stride_b(row_stride_b) - , col_stride_b(col_stride_b) - , batch_stride_b(batch_stride_b) - , buffer_offset_b(offset_b) - , beta(beta) - , C(C) - , batch_C(batch_C) - , row_stride_c(row_stride_c) - , col_stride_c(col_stride_c) - , batch_stride_c(batch_stride_c) - , buffer_offset_c(offset_c) - , D(D) - , batch_D(batch_D) - , row_stride_d(row_stride_d) - , col_stride_d(col_stride_d) - , batch_stride_d(batch_stride_d) - , buffer_offset_d(offset_d) - , batch_count(batch_count) - , strided_batch(strided_batch) - , sparseA(sparseA) - , metadata(metadata) - , act_type(act_type) - , act_arg0(act_arg0) - , act_arg1(act_arg1) - , bias_vector(bias_vector) - , bias_stride(bias_stride) - , streams(streams) - , numStreams(numStreams) - { - } - - /*************************************************** - * Print a RocsparseltContractionProblem for debugging * - ***************************************************/ - friend hipsparselt_internal_ostream& operator<<(hipsparselt_internal_ostream& os, - const RocsparseltContractionProblem& prob) - { - return tuple_helper::print_tuple_pairs( - os, - std::make_tuple("a_type", - rocsparselt_precision_string, - "b_type", - rocsparselt_precision_string, - "c_type", - rocsparselt_precision_string, - "d_type", - rocsparselt_precision_string, - "compute_type", - rocsparselt_precision_string, - "transA", - rocsparselt_transpose_letter(prob.trans_a), - "transB", - rocsparselt_transpose_letter(prob.trans_b), - "M", - prob.m, - "N", - prob.n, - "K", - prob.k, - "alpha", - *prob.alpha, - "row_stride_a", - prob.row_stride_a, - "col_stride_a", - prob.col_stride_a, - "row_stride_b", - prob.row_stride_b, - "col_stride_b", - prob.col_stride_b, - "row_stride_c", - prob.row_stride_c, - "col_stride_c", - prob.col_stride_c, - "row_stride_d", - prob.row_stride_d, - "col_stride_d", - prob.col_stride_d, - "beta", - *prob.beta, - "batch_count", - prob.batch_count, - "strided_batch", - prob.strided_batch, - "stride_a", - prob.batch_stride_a, - "stride_b", - prob.batch_stride_b, - "stride_c", - prob.batch_stride_c, - "stride_d", - prob.batch_stride_d, - "activation", - hipsparselt_activation_type_to_string(prob.act_type), - "activation_argument_0", - prob.act_arg0, - "activation_argument_1", - prob.act_arg1, - "bias_stride", - prob.bias_stride)); - }; -}; - -template -rocsparselt_status runContractionProblem(RocsparseltContractionProblem const& problem, - int* config_id, - const int config_max_id, - const int search_iterations); -template -rocsparselt_status initSolutions(const _rocsparselt_handle* handle, - rocsparselt_operation opA, - rocsparselt_operation opB, - int* kernel_counts); - -template -std::string generate_kernel_category_str(rocsparselt_operation opA, rocsparselt_operation opB); - -/*********************************************************************************** - * Whether Kernel Launcher has been initialized for at least one device (used for testing) * - ***********************************************************************************/ -std::atomic_bool& rocsparselt_internal_kl_is_initialized(); - -/********************************************** - * Whether to suppress Kernel error messages * - **********************************************/ -inline bool& rocsparselt_suppress_kl_error_messages() -{ - thread_local bool t_suppress = false; - return t_suppress; -} diff --git a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/include/rocsparselt_spmm_utils.hpp b/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/include/rocsparselt_spmm_utils.hpp index d1bcbcae4014..29889c2a910e 100644 --- a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/include/rocsparselt_spmm_utils.hpp +++ b/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/include/rocsparselt_spmm_utils.hpp @@ -29,11 +29,7 @@ #include "handle.h" #include "hipsparselt_ostream.hpp" #include "utility.hpp" -#if BUILD_WITH_TENSILE #include "tensile_host.hpp" -#else -#include "kernel_launcher.hpp" -#endif #include inline rocsparselt_status getOriginalSizes(rocsparselt_operation opA, diff --git a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/rocsparselt_auxiliary.cpp b/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/rocsparselt_auxiliary.cpp index 3054635b0e01..2122031f754f 100644 --- a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/rocsparselt_auxiliary.cpp +++ b/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/rocsparselt_auxiliary.cpp @@ -26,11 +26,7 @@ #include "definitions.h" #include "handle.h" -#if BUILD_WITH_TENSILE #include "tensile_host.hpp" -#else -#include "kernel_launcher.hpp" -#endif #include "rocsparselt.h" #include "rocsparselt_spmm_utils.hpp" #include "status.h" diff --git a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/spmm/hip/hip_solution_adapter.cpp b/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/spmm/hip/hip_solution_adapter.cpp deleted file mode 100644 index aa294faa6c52..000000000000 --- a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/spmm/hip/hip_solution_adapter.cpp +++ /dev/null @@ -1,377 +0,0 @@ -/******************************************************************************* - * - * MIT License - * - * Copyright (c) 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - *******************************************************************************/ - -#include -#include - -#include -#include - -#include "definitions.h" -#include "hip_solution_adapter.hpp" -#include "hipsparselt_ostream.hpp" -#include "utility.hpp" - -#define HIP_CHECK_RETURN(expr) \ - do \ - { \ - hipError_t e = (expr); \ - if(e) \ - { \ - PRINT_IF_HIP_ERROR(handle, expr); \ - return e; \ - } \ - } while(0) - -SolutionAdapter::SolutionAdapter() {} - -SolutionAdapter::SolutionAdapter(std::string const& name) - : m_name(name) -{ -} - -SolutionAdapter::~SolutionAdapter() -{ - for(auto& module : m_modules) - PRINT_IF_HIP_ERROR_2(hipModuleUnload(module.second)); - for(auto handle : m_lib_handles) - dlclose(handle); -} - -inline hipError_t load_lib_functions(void* handle, const char* name, void** func) -{ - - *(void**)(func) = dlsym(handle, name); - - //std::map>* ktables = (std::map>*)dlsym(handle, "kernel_map"); - if((*func) == NULL) - { - printf("can not find get_kernel_byte.the get_kernel_byte is [%p]\n", *(void**)(func)); - } - - char* err = NULL; - if((err = dlerror()) != NULL) - { - dlclose(handle); - hipsparselt_cerr << "dlsym failed to load functon get_kernel_byte " << std::endl; - return hipErrorInvalidContext; - } - return hipSuccess; -} - -hipError_t SolutionAdapter::loadLibrary(std::string const& path) -{ - void* handle; - char* err; - - dlerror(); - - handle = dlopen(path.c_str(), RTLD_LOCAL | RTLD_LAZY); - - if(!handle || ((err = dlerror()) != NULL)) - { - hipsparselt_cerr << "dlopn failed to load " << path << std::endl; - return hipErrorInvalidContext; - } - - function_table funcs - = {{"get_kernel_byte", NULL}, {"get_kernel_params", NULL}, {"get_kernel_counts", NULL}}; - hipError_t status; - for(auto& func : funcs) - { - if((status = load_lib_functions(handle, func.first.c_str(), &func.second)) != hipSuccess) - return status; - } - - { - std::lock_guard guard(m_access); - m_lib_handles.push_back(handle); - m_lib_functions.push_back(funcs); - m_loadedLibNames.push_back(concatenate(path)); - } - return hipSuccess; -} - -hipError_t SolutionAdapter::loadCodeObjectBytes(const _rocsparselt_handle* handle, - std::vector const& bytes, - std::string const& name) -{ - return loadCodeObject(handle, bytes.data(), name); -} - -hipError_t SolutionAdapter::loadCodeObject(const _rocsparselt_handle* handle, - std::string const& name) -{ - //check if the module already exist. - if(m_modules.find(name) != m_modules.end()) - return hipSuccess; - - for(auto& fucs : m_lib_functions) - { - auto it = fucs.find("get_kernel_byte"); - if(it == fucs.end()) - continue; - - unsigned char* (*get_kernel_byte)(const char*); - *(void**)(&get_kernel_byte) = it->second; - auto k_bytes = get_kernel_byte(name.c_str()); - - if(k_bytes != NULL) - { - return loadCodeObject(handle, k_bytes, name); - } - } - return hipErrorNotFound; -} - -hipError_t SolutionAdapter::loadCodeObject(const _rocsparselt_handle* handle, - const void* image, - std::string const& name) -{ - std::lock_guard guard(m_access); - auto it = m_modules.find(name); - if(it == m_modules.end()) - { - hipModule_t module; - HIP_CHECK_RETURN(hipModuleLoadData(&module, image)); - //hipsparselt_cout << "load module " << name << " success" << std::endl; - m_modules[name] = module; - } - return hipSuccess; -} - -hipError_t SolutionAdapter::initKernel(std::string const& name) -{ - hipFunction_t function; - return getKernel(function, name); -} - -hipError_t SolutionAdapter::getKernel(hipFunction_t& rv, std::string const& name) -{ - std::unique_lock guard(m_access); - hipError_t err = hipSuccess; - - auto it_k = m_kernels.find(name); - if(it_k != m_kernels.end()) - { - rv = it_k->second; - //hipsparselt_cout << "load function " << name << " success" << std::endl; - return err; - } - - hipModule_t module; - auto it_m = m_modules.find(name); - if(it_m != m_modules.end()) - { - module = it_m->second; - err = hipModuleGetFunction(&rv, module, name.c_str()); - if(err == hipSuccess) - { - m_kernels[name] = rv; - //hipsparselt_cout << "load function " << name << " success" << std::endl; - return err; - } - else if(err != hipErrorNotFound) - { - return err; - } - } - return err; -} - -hipError_t SolutionAdapter::launchKernel(const _rocsparselt_handle* handle, - KernelInvocation const& kernel) -{ - return launchKernel(handle, kernel, nullptr, nullptr, nullptr); -} - -hipError_t SolutionAdapter::launchKernel(const _rocsparselt_handle* handle, - KernelInvocation const& kernel, - hipStream_t stream, - hipEvent_t startEvent, - hipEvent_t stopEvent, - int iter) -{ - if(handle->layer_mode & rocsparselt_layer_mode_log_trace) - { - std::ostringstream stream; - stream << "Kernel " << kernel.kernelName << "\n" - << " l" - << " (" << kernel.workGroupSize.x << ", " << kernel.workGroupSize.y << ". " - << kernel.workGroupSize.z << ")" - << " x g" - << " (" << kernel.numWorkGroups.x << ", " << kernel.numWorkGroups.y << ". " - << kernel.numWorkGroups.z << ")" - << " = " - << "(" << kernel.numWorkItems.x << ", " << kernel.numWorkItems.y << ". " - << kernel.numWorkItems.z << ") \n" - << kernel.args << std::endl; - log_trace(handle, __func__, stream.str()); - } - - HIP_CHECK_RETURN(loadCodeObject(handle, kernel.kernelName)); - - hipFunction_t function; - HIP_CHECK_RETURN(getKernel(function, kernel.kernelName)); - - void* kernelArgs = const_cast(kernel.args.data()); - size_t argsSize = kernel.args.size(); - - void* hipLaunchParams[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, - kernelArgs, - HIP_LAUNCH_PARAM_BUFFER_SIZE, - &argsSize, - HIP_LAUNCH_PARAM_END}; - - if(startEvent != nullptr) - HIP_CHECK_RETURN(hipEventRecord(startEvent, stream)); - for(int i = 0; i < iter; i++) - HIP_CHECK_RETURN(hipExtModuleLaunchKernel(function, - kernel.numWorkItems.x, - kernel.numWorkItems.y, - kernel.numWorkItems.z, - kernel.workGroupSize.x, - kernel.workGroupSize.y, - kernel.workGroupSize.z, - kernel.sharedMemBytes, // sharedMem - stream, // stream - nullptr, - (void**)&hipLaunchParams, - nullptr, // event - nullptr // event - )); - if(stopEvent != nullptr) - HIP_CHECK_RETURN(hipEventRecord(stopEvent, stream)); - return hipSuccess; -} - -hipError_t SolutionAdapter::launchKernels(const _rocsparselt_handle* handle, - std::vector const& kernels) -{ - for(auto const& k : kernels) - { - HIP_CHECK_RETURN(launchKernel(handle, k)); - } - return hipSuccess; -} - -hipError_t SolutionAdapter::launchKernels(const _rocsparselt_handle* handle, - std::vector const& kernels, - hipStream_t stream, - hipEvent_t startEvent, - hipEvent_t stopEvent) -{ - auto first = kernels.begin(); - auto last = kernels.end() - 1; - - for(auto iter = kernels.begin(); iter != kernels.end(); iter++) - { - hipEvent_t kStart = nullptr; - hipEvent_t kStop = nullptr; - - if(iter == first) - kStart = startEvent; - if(iter == last) - kStop = stopEvent; - - HIP_CHECK_RETURN(launchKernel(handle, *iter, stream, kStart, kStop)); - } - return hipSuccess; -} - -hipError_t SolutionAdapter::launchKernels(const _rocsparselt_handle* handle, - std::vector const& kernels, - hipStream_t stream, - std::vector const& startEvents, - std::vector const& stopEvents) -{ - if(kernels.size() != startEvents.size() || kernels.size() != stopEvents.size()) - throw std::runtime_error(concatenate("Must have an equal number of kernels (", - kernels.size(), - "), start events (", - startEvents.size(), - "), and stop events. (", - stopEvents.size(), - ")")); - - for(size_t i = 0; i < kernels.size(); i++) - { - HIP_CHECK_RETURN(launchKernel(handle, kernels[i], stream, startEvents[i], stopEvents[i])); - } - return hipSuccess; -} - -size_t SolutionAdapter::getKernelCounts(std::string const& category) -{ - for(auto& fucs : m_lib_functions) - { - auto it = fucs.find("get_kernel_counts"); - if(it == fucs.end()) - continue; - - int (*get_kernel_counts)(const char*); - *(void**)(&get_kernel_counts) = it->second; - return get_kernel_counts(category.c_str()); - } - return 0; -} - -KernelParams* SolutionAdapter::getKernelParams(std::string const& category) -{ - for(auto& fucs : m_lib_functions) - { - auto it = fucs.find("get_kernel_params"); - if(it == fucs.end()) - continue; - - KernelParams* (*get_kernel_params)(const char*); - *(void**)(&get_kernel_params) = it->second; - return get_kernel_params(category.c_str()); - } - return nullptr; -} - -std::ostream& operator<<(std::ostream& stream, SolutionAdapter const& adapter) -{ - stream << "hip::SolutionAdapter"; - - stream << " (" << adapter.name() << ", " << adapter.m_modules.size() << " total modules)" - << std::endl; - - return stream; -} - -std::ostream& operator<<(std::ostream& stream, std::shared_ptr const& ptr) -{ - if(ptr) - { - return stream << "*" << *ptr; - } - else - { - return stream << "(nullptr)"; - } -} diff --git a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/spmm/hip/kernel_arguments.cpp b/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/spmm/hip/kernel_arguments.cpp deleted file mode 100644 index d23ab0cdbef6..000000000000 --- a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/spmm/hip/kernel_arguments.cpp +++ /dev/null @@ -1,246 +0,0 @@ -/******************************************************************************* - * - * MIT License - * - * Copyright (c) 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - *******************************************************************************/ - -#include - -#include -#include - -std::ostream& operator<<(std::ostream& stream, const KernelArguments& t) -{ - size_t prevOffset = 0; - for(auto const& name : t.m_names) - { - auto const& iter = t.m_argRecords.find(name); - - if(iter == t.m_argRecords.end()) - throw std::runtime_error("Argument " + name + " not found in record."); - - auto const& record = iter->second; - - size_t offset = std::get(record); - size_t size = std::get(record); - - if(prevOffset != offset) - { - stream << "[" << prevOffset << ".." << offset - 1 << "] " << std::endl; - } - - stream << "[" << offset << ".." << offset + size - 1 << "] " << name << ":"; - - if(std::get(record)) - { - auto oldFill = stream.fill(); - auto oldWidth = stream.width(); - stream << std::hex; - for(size_t i = offset; i < offset + size; i++) - stream << " " << std::setfill('0') << std::setw(2) - << static_cast(t.m_data[i]); - stream << std::dec; - stream.fill(oldFill); - stream.width(oldWidth); - - if(t.m_log) - { - stream << " (" << std::get(record) << ")"; - } - } - else - { - stream << " "; - } - - stream << std::endl; - - prevOffset = offset + size; - } - - return stream; -} - -KernelArguments::KernelArguments(bool log) - : m_log(log) -{ -} - -KernelArguments::~KernelArguments() {} - -void KernelArguments::reserve(size_t bytes, size_t count) -{ - m_data.reserve(bytes); - m_names.reserve(count); - if(m_log) - m_argRecords.reserve(count); -} - -bool KernelArguments::isFullyBound() const -{ - if(!m_log) - return true; - - for(auto const& record : m_argRecords) - { - if(!std::get(record.second)) - return false; - } - - return true; -} - -void const* KernelArguments::data() const -{ - if(!isFullyBound()) - throw std::runtime_error("Arguments not fully bound."); - - return reinterpret_cast(m_data.data()); -} - -size_t KernelArguments::size() const -{ - return m_data.size(); -} - -KernelArguments::const_iterator::const_iterator(KernelArguments const& args) - : m_currentArg(args.m_names.begin()) - , m_args(args) -{ - if(!args.m_log) - { - throw std::runtime_error("KernelArguments::const_iterator requires m_log=true"); - } - assignCurrentArg(); -} - -KernelArguments::const_iterator::const_iterator(KernelArguments const& args, - std::string const& name) - : m_currentArg(args.m_names.begin()) - , m_args(args) -{ - if(!args.m_log) - { - throw std::runtime_error("KernelArguments::const_iterator requires m_log=true"); - } - - if(name.empty()) - { - m_currentArg = m_args.m_names.end(); - } - else - { - while(m_currentArg != args.m_names.end() && *m_currentArg != name) - { - m_currentArg++; - } - } - assignCurrentArg(); -} - -auto KernelArguments::const_iterator::operator++() -> const_iterator& -{ - if(m_currentArg != m_args.m_names.end()) - { - ++m_currentArg; - assignCurrentArg(); - } - return *this; -} - -auto KernelArguments::const_iterator::operator++(int) -> const_iterator -{ - if(m_currentArg != m_args.m_names.end()) - { - auto ret = *this; - ++m_currentArg; - assignCurrentArg(); - return ret; - } - return *this; -} - -bool KernelArguments::const_iterator::operator==(const const_iterator& rhs) const -{ - return m_value == rhs.m_value; -} - -bool KernelArguments::const_iterator::operator!=(const const_iterator& rhs) const -{ - return m_value != rhs.m_value; -} - -auto KernelArguments::const_iterator::operator*() const -> decltype(const_iterator::m_value) const& -{ - return m_value; -} - -auto KernelArguments::const_iterator::operator->() const -> decltype(const_iterator::m_value) const* -{ - return &m_value; -} - -void KernelArguments::const_iterator::reset() -{ - m_currentArg = m_args.m_names.begin(); - assignCurrentArg(); -} - -void KernelArguments::const_iterator::assignCurrentArg() -{ - if(m_currentArg != m_args.m_names.end()) - { - auto const& iter = m_args.m_argRecords.find(*m_currentArg); - - if(iter == m_args.m_argRecords.end()) - { - throw std::runtime_error("Argument " + *m_currentArg + " not found in record."); - } - - auto const& record = iter->second; - - if(!m_args.isFullyBound()) - { - throw std::runtime_error("Arguments not fully bound."); - } - - m_value = std::make_pair( - static_cast(m_args.m_data.data() - + std::get(record)), - (size_t)std::get(record)); - } - else - { - m_value = std::make_pair((void const*)nullptr, (size_t)0); - } -} - -auto KernelArguments::begin() const -> const_iterator -{ - return const_iterator(*this); -} - -auto KernelArguments::end() const -> const_iterator -{ - return const_iterator(*this, ""); -} diff --git a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/spmm/hip/kernel_launcher.cpp b/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/spmm/hip/kernel_launcher.cpp deleted file mode 100644 index 52b6db6ceb5b..000000000000 --- a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/spmm/hip/kernel_launcher.cpp +++ /dev/null @@ -1,781 +0,0 @@ -/******************************************************************************* - * - * MIT License - * - * Copyright (c) 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - *******************************************************************************/ -#include "kernel_launcher.hpp" -#include "activation.hpp" -#include "definitions.h" -#include "handle.h" -#include "hip_solution_adapter.hpp" -#include "hipsparselt_ostream.hpp" -#include "rocsparselt-types.h" -#include "rocsparselt.h" -#include "status.h" -#include "utility.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#define ROCSPARSELT_LIB_PATH "/opt/rocm/hipsparselt/lib" - -namespace -{ -#ifndef WIN32 - std::string rocsparselt_so_path; - - int rocsparselt_dl_iterate_phdr_callback(struct dl_phdr_info* hdr_info, size_t size, void* data) - { - // uncomment to see all dependent .so files - //fprintf(stderr, "rocsparselt so file: %s\n", hdr_info->dlpi_name); - if(hdr_info->dlpi_name && strstr(hdr_info->dlpi_name, "hipsparselt.")) - { - rocsparselt_so_path = hdr_info->dlpi_name; - } - return 0; - } -#endif - - size_t totalAllcoatedElement(std::vector& sizes, - std::vector& strides, - size_t offset) - { - - size_t totalAllocatedElements = 1; - for(int i = 0; i < sizes.size(); i++) - totalAllocatedElements += strides[i] * (sizes[i] - 1); - totalAllocatedElements += offset; - return totalAllocatedElements; - } - - size_t totalAllcoatedElementNonBatch(std::vector& sizes, - std::vector& strides, - BatchIndices& batchIndex) - { - size_t totalAllocatedElementsNonBatch = 1; - for(int idx = 0; idx < sizes.size(); idx++) - { - bool isBatch = batchIndex.end() - != std::find_if(batchIndex.begin(), - batchIndex.end(), - [idx](const BatchIndex& bi) { return bi.a == idx; }); - if(!isBatch) - totalAllocatedElementsNonBatch += strides[idx] * (sizes[idx] - 1); - } - return totalAllocatedElementsNonBatch; - }; - - template - auto ConstructKernelInvoke(const RocsparseltContractionProblem& prob, - const KernelParams& kernel) - { - KernelInvocation ki; - - ki.args = KernelArguments(); - - ki.args.reserve(1024, 128); - - ki.kernelName = kernel.SolutionNameMin; - - ki.workGroupSize.x = kernel.WorkGroup[0] * kernel.WorkGroup[1] * kernel.WorkGroup[2]; - ki.workGroupSize.y = 1; - ki.workGroupSize.z = 1; - - ki.numWorkGroups.x = 1; - ki.numWorkGroups.y = 1; - - // Indices for contraction problem - FreeIndices freeIndex(2); - BoundIndices boundIndex(1); - BatchIndices batchIndex{{2, 2, 2, 2}}; - - // Set up GEMM indices - freeIndex[0].isA = true; - freeIndex[1].isA = false; - freeIndex[0].c = freeIndex[0].d = 0; - freeIndex[1].c = freeIndex[1].d = 1; - - // We set K=0 when alpha==0. - // This makes alpha==0 a change in the problem, and not just a change in the inputs. - // It optimizes all problems with alpha==0 into K=0 and alpha=(don't care) - auto k = prob.k && *prob.alpha ? prob.k : 0; - auto ck = prob.sparseA ? k / 2 : k; - - std::vector sizes_a(3), sizes_b(3), sizes_c(3), sizes_d(3); - std::vector strides_a = {prob.row_stride_a, prob.col_stride_a, prob.batch_stride_a}; - std::vector strides_b = {prob.row_stride_b, prob.col_stride_b, prob.batch_stride_b}; - std::vector strides_c = {prob.row_stride_c, prob.col_stride_c, prob.batch_stride_c}; - std::vector strides_d = {prob.row_stride_d, prob.col_stride_d, prob.batch_stride_d}; - - // If A is transposed, swap the free and bound dimensions and their ranks - if(prob.trans_a != rocsparselt_operation_none) - { - sizes_a[0] = ck; - sizes_a[1] = prob.m; - sizes_a[2] = prob.batch_count; - - freeIndex[0].i = 1; - boundIndex[0].a = 0; - } - else - { - sizes_a[0] = prob.m; - sizes_a[1] = ck; - sizes_a[2] = prob.batch_count; - - freeIndex[0].i = 0; - boundIndex[0].a = 1; - } - - // If B is transposed, swap the free and bound dimensions and their ranks - if(prob.trans_b != rocsparselt_operation_none) - { - sizes_b[0] = prob.n; - sizes_b[1] = k; - sizes_b[2] = prob.batch_count; - - freeIndex[1].i = 0; - boundIndex[0].b = 1; - } - else - { - sizes_b[0] = k; - sizes_b[1] = prob.n; - sizes_b[2] = prob.batch_count; - - freeIndex[1].i = 1; - boundIndex[0].b = 0; - } - - sizes_c[0] = prob.m; - sizes_c[1] = prob.n; - sizes_c[2] = prob.batch_count; - - sizes_d[0] = prob.m; - sizes_d[1] = prob.n; - sizes_d[2] = prob.batch_count; - - FreeIndices freeIndicesA; - FreeIndices freeIndicesB; - std::vector freeSizesA; - std::vector freeSizesB; - - freeIndicesA.reserve(freeIndex.size()); - freeIndicesB.reserve(freeIndex.size()); - freeSizesA.reserve(freeIndex.size()); - freeSizesB.reserve(freeIndex.size()); - for(int i = 0; i < freeIndex.size(); i++) - { - size_t mySize = sizes_d[freeIndex[i].d]; - if(freeIndex[i].isA) - { - freeIndicesA.push_back(freeIndex[i]); - freeSizesA.push_back(mySize); - } - else - { - freeIndicesB.push_back(freeIndex[i]); - freeSizesB.push_back(mySize); - } - } - - for(size_t i = 0; i < freeIndicesA.size(); i++) - { - ki.numWorkGroups.x *= freeSizesA[i]; - } - for(size_t i = 0; i < freeIndicesB.size(); i++) - { - ki.numWorkGroups.y *= freeSizesB[i]; - } - - ki.numWorkGroups.z = 1; - - std::vector batchSizes(batchIndex.size()); - std::vector boundSizes(boundIndex.size()); - for(int i = 0; i < batchIndex.size(); i++) - { - batchSizes[i] = std::max({sizes_a[batchIndex[i].a], - sizes_b[batchIndex[i].b], - sizes_c.empty() ? 0 : sizes_c[batchIndex[i].c], - sizes_d[batchIndex[i].d]}); - } - - for(int i = 0; i < boundIndex.size(); i++) - { - boundSizes[i] = std::max(sizes_a[boundIndex[i].a], sizes_b[boundIndex[i].b]); - } - - for(size_t i = 0; i < batchIndex.size(); i++) - { - if(kernel.PackBatchDims & 0x1) - ki.numWorkGroups.x *= batchSizes[i]; - if(kernel.PackBatchDims & 0x2) - ki.numWorkGroups.y *= batchSizes[i]; - if(!kernel.PackBatchDims) - ki.numWorkGroups.z *= batchSizes[i]; - } - - // CD always contain index0. if this is in the B free indices, then need to - // transposing the output tensor. - bool transposeC01 = freeIndicesB.end() - != std::find_if(freeIndicesB.begin(), - freeIndicesB.end(), - [](const FreeIndex& fi) { return fi.c == 0 /*idx0*/; }); - - if(transposeC01) - std::swap(ki.numWorkGroups.x, ki.numWorkGroups.y); - - ki.numWorkGroups.x = CeilDivide(ki.numWorkGroups.x, kernel.MacroTile[0]); - ki.numWorkGroups.y = CeilDivide(ki.numWorkGroups.y, kernel.MacroTile[1]); - - uint32_t problemNumGroupTiles0 = ki.numWorkGroups.x; - uint32_t problemNumGroupTiles1 = ki.numWorkGroups.y; - - ki.numWorkGroups.y *= kernel.GlobalSplitU; - - ki.numWorkItems.x = ki.workGroupSize.x * ki.numWorkGroups.x; - ki.numWorkItems.y = ki.workGroupSize.y * ki.numWorkGroups.y; - ki.numWorkItems.z = ki.workGroupSize.z * ki.numWorkGroups.z; - - ki.sharedMemBytes = 0; - - uint64_t tensor2dSizeC = totalAllcoatedElement(sizes_c, strides_c, (size_t)0); - uint64_t tensor2dSizeA - = (kernel.PackBatchDims & 0x1) - ? totalAllcoatedElement(sizes_a, strides_a, (size_t)0) - : totalAllcoatedElementNonBatch(sizes_a, strides_a, batchIndex); - uint64_t tensor2dSizeB - = (kernel.PackBatchDims & 0x2) - ? totalAllcoatedElement(sizes_b, strides_b, (size_t)0) - : totalAllcoatedElementNonBatch(sizes_b, strides_b, batchIndex); - - ki.args.append("tensor2dSizeC", tensor2dSizeC); - ki.args.append("tensor2dSizeA", tensor2dSizeA); - ki.args.append("tensor2dSizeB", tensor2dSizeB); - - ki.args.append("d", prob.D); - ki.args.append("c", prob.C); - ki.args.append("a", prob.A); - ki.args.append("b", prob.B); - - if(prob.sparseA) - ki.args.append("metadata", prob.metadata); - - ki.args.append("alpha", *prob.alpha); - ki.args.append("beta", *prob.beta); - - hipsparselt_activation_type act_type - = string_to_hipsparselt_activation_type(kernel.ActivationType); - if((act_type != hipsparselt_activation_type::none) && kernel.ActivationFused - && (!kernel.GlobalAccumulation)) - { - if(kernel.ActivationHPA) - { - //same as the alpha/beta type. - ki.args.append("activation_0", prob.act_arg0); - ki.args.append("activation_1", prob.act_arg1); - } - else - { - ki.args.append("activation_0", static_cast(prob.act_arg0)); - ki.args.append("activation_1", static_cast(prob.act_arg1)); - } - ki.args.append("activationType", static_cast(prob.act_type)); - } - - size_t startStrideCD = kernel.UseInitialStridesCD ? 0 : 1; - size_t startStrideAB = kernel.UseInitialStridesAB ? 0 : 1; - - for(size_t i = startStrideCD; i < sizes_d.size(); i++) - ki.args.append(concatenate_if("strideD", i), strides_d[i]); - - for(size_t i = startStrideCD; i < sizes_c.size(); i++) - ki.args.append(concatenate_if("strideC", i), strides_c[i]); - - for(size_t i = startStrideAB; i < sizes_a.size(); i++) - ki.args.append(concatenate_if("strideA", i), strides_a[i]); - - for(size_t i = startStrideAB; i < sizes_b.size(); i++) - ki.args.append(concatenate_if("strideB", i), strides_b[i]); - - std::vector problemSizes; - problemSizes.resize(0); - problemSizes.reserve(sizes_c.size() + boundSizes.size()); - problemSizes.insert(problemSizes.end(), sizes_c.begin(), sizes_c.end()); - problemSizes.insert(problemSizes.end(), boundSizes.begin(), boundSizes.end()); - - int idx = 0; - for(auto size : problemSizes) - { - ki.args.append(concatenate_if("size_", idx), size); - idx++; - } - - // Caculate staggerU - uint32_t sizeL = boundSizes[0]; - - // how many stride-sized clicks to stagger start offset - unsigned int staggerUIter = kernel.StaggerU; - - // /DepthU/GSU - int unrollLoopIters = sizeL / kernel.DepthU / kernel.GlobalSplitU; - - unsigned int shifted = 1 << kernel.StaggerStrideShift; - - while(staggerUIter > 1) - { - if(unrollLoopIters >= (staggerUIter * shifted)) - break; - - staggerUIter /= 2; // step down to smaller stagger - } - - if(staggerUIter >= 1) - staggerUIter -= 1; - - ki.args.append("staggerUIter", staggerUIter); - ki.args.append("problemNumGroupTiles0", problemNumGroupTiles0); - ki.args.append("problemNumGroupTiles1", problemNumGroupTiles1); - - uint32_t numFullBlocks = problemNumGroupTiles1; - uint32_t wgmRemainder1 = 0; - uint32_t magicNumberWgmRemainder1 = 0; - - if(kernel.WorkGroupMapping != 0) - { - numFullBlocks = problemNumGroupTiles1 / kernel.WorkGroupMapping; - wgmRemainder1 = problemNumGroupTiles1 % kernel.WorkGroupMapping; - if(wgmRemainder1 == 0) - wgmRemainder1 = kernel.WorkGroupMapping; - - uint64_t magicNum; - const int smallMagicShift = 31; - magicNum = (1L << smallMagicShift) / wgmRemainder1 + 1; - assert(magicNum >> 32 == 0); // ensure magic number fits - magicNumberWgmRemainder1 = static_cast(magicNum); - } - - ki.args.append("numFullBlocks", numFullBlocks); - ki.args.append("wgmRemainder1", wgmRemainder1); - ki.args.append("magicNumberWgmRemainder1", magicNumberWgmRemainder1); - - ki.args.append("offsetD", prob.buffer_offset_b); - ki.args.append("offsetC", prob.buffer_offset_c); - ki.args.append("offsetA", prob.buffer_offset_a); - ki.args.append("offsetB", prob.buffer_offset_b); - - ki.args.append("pad", 0); - return ki; - } - - /************************************************** - * The KernelLauncher struct interfaces * - **************************************************/ - class KernelLauncher - { - std::shared_ptr m_deviceProp; - - // The adapter object. mutable is used to allow adapters to be modified - // even when they are stored in a const vector which is immutable in size - struct adapter_s - { - mutable std::atomic adapter{nullptr}; - mutable std::mutex mutex; - }; - - // Each device contains an adapter - std::vector const m_adapters; - - public: - KernelLauncher() - : m_adapters(GetDeviceCount()) - { - // We mark KernelLauncher as initialized. This is so that CI tests can - // verify that the initialization occurs in the "multiheaded" tests - rocsparselt_internal_kl_is_initialized() = true; - } - - // KernelLauncher is not copyable or assignable - KernelLauncher(const KernelLauncher&) = delete; - KernelLauncher& operator=(const KernelLauncher&) = delete; - - // Get the number of devices - static int GetDeviceCount() - { - int count; - if(hipGetDeviceCount(&count) != hipSuccess) - { - hipsparselt_cerr << "\nrocsparselt error: Could not initialize Kernel Launcher " - "host: No devices found" - << std::endl; - hipsparselt_abort(); - } - return count; - } - - ~KernelLauncher() - { - for(auto& a : m_adapters) - delete a.adapter; - } - - auto& get_device_property() const - { - return m_deviceProp; - } - - auto& get_adapters() const - { - return m_adapters; - } - - /******************************************************* - * Testpath() tests that a path exists and is readable * - *******************************************************/ - static bool TestPath(const std::string& path) - { -#ifdef WIN32 - return ((_access(path.c_str(), 4) != -1) || (_access(path.c_str(), 6) != -1)); -#else - return access(path.c_str(), R_OK) == 0; -#endif - } - - /********************************************************************* - * Initialize adapter and library according to environment variables * - * and default paths based on librocsparselt.so location and GPU * - *********************************************************************/ - void initialize(SolutionAdapter& adapter, int32_t deviceId) - { - std::string path; -#ifndef WIN32 - path.reserve(PATH_MAX); -#endif - - // The name of the current GPU platform - std::string processor = rocsparselt_internal_get_arch_name(); - - const char* env = getenv("ROCSPARSELT_SPMM_LIBPATH"); - if(env) - { - path = env; - } - else - { - path = ROCSPARSELT_LIB_PATH; - - // Find the location of librocsparselt.so - // Fall back on hard-coded path if static library or not found - -#ifndef ROCSPARSELT_STATIC_LIB - dl_iterate_phdr(rocsparselt_dl_iterate_phdr_callback, NULL); - if(rocsparselt_so_path.size()) - path = std::string{dirname(&rocsparselt_so_path[0])}; -#endif // ifndef ROCSPARSELT_STATIC_LIB - - // Find the location of the libraries - if(TestPath(path + "/../SPMM_KERNELS/library")) - path += "/../SPMM_KERNELS/library"; - else if(TestPath(path + "/library")) - path += "/library"; - else - path += "/hipsparselt/library"; - } - - auto dir = path + "/libspmm_kernels_" + processor + ".so"; - bool no_match = false; - if(TestPath(dir)) - { - if(adapter.loadLibrary(dir) != hipSuccess) - no_match = true; - } - else - no_match = true; - - if(no_match) - { - static hipsparselt_internal_ostream& once - = hipsparselt_cerr - << "\nrocsparselt warning: No paths matched " << dir - << ". Make sure that ROCSPARSELT_TENSILE_LIBPATH is set correctly." - << std::endl; - } - - hipDeviceProp_t prop; - - THROW_IF_HIP_ERROR(hipGetDeviceProperties(&prop, deviceId)); - - m_deviceProp = std::make_shared(prop); - } - }; - - // Return the library and adapter for the current HIP device - auto& get_adapter(std::shared_ptr* deviceProp = nullptr, int device = -1) - { - try - { - // KernelLauncher is initialized on the first call - static KernelLauncher host; - - if(device == -1) - THROW_IF_HIP_ERROR(hipGetDevice(&device)); - - // Adapter entry for the current HIP device ID - auto& a = host.get_adapters().at(device); - auto* adapter = a.adapter.load(std::memory_order_acquire); - - // Once set, a.adapter contains the adapter for the current HIP device ID - if(!adapter) - { - // Lock so that only one thread performs initialization of the adapter - std::lock_guard lock(a.mutex); - - adapter = a.adapter.load(std::memory_order_relaxed); - if(!adapter) - { - // Allocate a new adapter using the current HIP device - adapter = new SolutionAdapter(); - - // Initialize the adapter and possibly the library - host.initialize(*adapter, device); - - // Atomically change the adapter stored for this device ID - a.adapter.store(adapter, std::memory_order_release); - } - } - - if(deviceProp) - *deviceProp = host.get_device_property(); - - return *adapter; - } - catch(const std::exception& e) - { - hipsparselt_cerr << "\nrocsparselt error: Could not initialize Kernel Launcher host:\n" - << e.what() << std::endl; - hipsparselt_abort(); - } - catch(...) - { - hipsparselt_cerr - << "\nrocsparselt error: Could not initialize Kernel Launcher host:\nUnknown " - "exception thrown" - << std::endl; - hipsparselt_abort(); - } - } - - /************************************************************************** - * We normally print error messages only once, to avoid excessive logging * - **************************************************************************/ - void print_once(const hipsparselt_internal_ostream& msg) - { - if(rocsparselt_suppress_kl_error_messages()) - return; - static constexpr char varname[] = "ROCSPARSELT_VERBOSE_KL_ERROR"; - static const char* verbose = getenv(varname); - if(!verbose) - { - static auto& once = hipsparselt_cerr - << msg - << "\nThis message will be only be displayed once, unless the " - << varname << " environment variable is set." << std::endl; - } - else - hipsparselt_cerr << msg << std::endl; - } - -} // namespace - -/****************************************************************************** - * runContractionProblem used to run a contraction problem described * - * by RocsparseltContractionProblem * - ******************************************************************************/ -template -rocsparselt_status runContractionProblem(const RocsparseltContractionProblem& prob, - int* config_id, - const int config_max_id, - const int search_iterations) -{ - rocsparselt_status status = rocsparselt_status_internal_error; - size_t max_cid = 0; - try - { - std::shared_ptr deviceProp; - - auto& adapter = get_adapter(&deviceProp, prob.handle->device); - std::string str = generate_kernel_category_str(prob.trans_a, prob.trans_b); - max_cid = adapter.getKernelCounts(str); - KernelParams* solution = adapter.getKernelParams(str); - - if(config_max_id != max_cid) - { - hipsparselt_cerr << "config_max_id (" << config_max_id << ") is out of range (" - << max_cid << ") used this value to instead." << std::endl; - } - - if(!max_cid) - { - hipsparselt_internal_ostream msg; - print_once(msg << "\nrocsparselt error: No solution found for " << prob); - status = rocsparselt_status_not_implemented; - } - else - { - if(!search_iterations) - { - RETURN_IF_HIP_ERROR(adapter.launchKernel( - prob.handle, - ConstructKernelInvoke(prob, solution[*config_id]), - prob.streams[0], - nullptr, - nullptr)); - } - else - { - float min_ms = std::numeric_limits::max(); - hipEvent_t startEvent, stopEvent; - float ms; - RETURN_IF_HIP_ERROR(hipEventCreate(&startEvent)); - RETURN_IF_HIP_ERROR(hipEventCreate(&stopEvent)); - for(int id = 0; id < max_cid; id++) - { - auto ki = ConstructKernelInvoke(prob, solution[id]); - //warm up - RETURN_IF_HIP_ERROR( - adapter.launchKernel(prob.handle, ki, prob.streams[0], nullptr, nullptr)); - - RETURN_IF_HIP_ERROR(adapter.launchKernel(prob.handle, - ki, - prob.streams[0], - startEvent, - stopEvent, - search_iterations)); - RETURN_IF_HIP_ERROR(hipEventSynchronize(stopEvent)); - RETURN_IF_HIP_ERROR(hipEventElapsedTime(&ms, startEvent, stopEvent)); - if(ms < min_ms) - { - *config_id = id; - min_ms = ms; - } - - } - RETURN_IF_HIP_ERROR(hipEventDestroy(startEvent)); - RETURN_IF_HIP_ERROR(hipEventDestroy(stopEvent)); - } - status = rocsparselt_status_success; - } - } - catch(const std::exception& e) - { - hipsparselt_internal_ostream msg; - print_once(msg << "\nrocsparselt error: " << (max_cid ? "" : "No ") - << "Solution found, but exception thrown for " << prob << e.what()); - } - catch(...) - { - hipsparselt_internal_ostream msg; - print_once(msg << "\nrocsparselt error: " << (max_cid ? "" : "No ") - << "Solution found, but unknown exception thrown for " << prob); - } - - return status; -} - -/****************************************************************************** - * initSolutions used to initialize specific type's solutions at the early stage. * - * ****************************************************************************/ -template -rocsparselt_status initSolutions(const _rocsparselt_handle* handle, - rocsparselt_operation opA, - rocsparselt_operation opB, - int* kernel_counts) -{ - std::shared_ptr deviceProp; - auto& adapter = get_adapter(&deviceProp, handle->device); - std::string str = generate_kernel_category_str(opA, opB); - - *kernel_counts = adapter.getKernelCounts(str); - if(*kernel_counts <= 0) - return rocsparselt_status_not_implemented; - - KernelParams* solution = adapter.getKernelParams(str); - for(int i = 0; i < *kernel_counts; i++) - PRINT_IF_HIP_ERROR(handle, adapter.loadCodeObject(handle, solution[i].SolutionNameMin)); - return rocsparselt_status_success; -} - -/*************************************************************** - * ! \brief Initialize rocsparselt for the current HIP device, to * - * avoid costly startup time at the first call on that device. * - ***************************************************************/ -extern "C" void rocsparselt_initialize() -{ - get_adapter(); -} - -/******************************************************************************************* - * Whether Kernel Launcher has been initialized for at least one device (used for testing) * - *******************************************************************************************/ -std::atomic_bool& rocsparselt_internal_kl_is_initialized() -{ - static std::atomic_bool init; - return init; -} - -/****************************************************************************** - * Intantiate the cases of runContractionProblem / initSolutions which are * - * needed to satisfy rocsparselt dependencies. * - ******************************************************************************/ -#define GENERATE_DEFINITIONS(Ti, To, Tc, Ca) \ - template <> \ - std::string generate_kernel_category_str(rocsparselt_operation opA, \ - rocsparselt_operation opB) \ - { \ - std::string str = Ca; \ - str += "_"; \ - str += (opA == rocsparselt_operation_none ? "N" : "T"); \ - str += "_"; \ - str += (opB == rocsparselt_operation_none ? "N" : "T"); \ - return str; \ - } \ - template rocsparselt_status runContractionProblem( \ - const RocsparseltContractionProblem&, int*, const int, const int); \ - template rocsparselt_status initSolutions( \ - const _rocsparselt_handle*, rocsparselt_operation, rocsparselt_operation, int*); - -GENERATE_DEFINITIONS(__half, __half, float, "4_4_0") -GENERATE_DEFINITIONS(hip_bfloat16, hip_bfloat16, float, "7_7_0") -GENERATE_DEFINITIONS(int8_t, int8_t, float, "8_8_0") diff --git a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/spmm/rocsparselt_spmm.hpp b/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/spmm/rocsparselt_spmm.hpp index f57787e2ebfe..cf39133ca8e1 100644 --- a/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/spmm/rocsparselt_spmm.hpp +++ b/projects/hipsparselt/library/src/hcc_detail/rocsparselt/src/spmm/rocsparselt_spmm.hpp @@ -33,11 +33,7 @@ #include "handle.h" #include "hipsparselt_ostream.hpp" #include "utility.hpp" -#if BUILD_WITH_TENSILE #include "tensile_host.hpp" -#else -#include "kernel_launcher.hpp" -#endif template rocsparselt_status spmm_typecasting(const char* caller,