diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/CMakeLists.txt b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/CMakeLists.txt index 48758fad0..70216a43a 100644 --- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/CMakeLists.txt +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(StablehloToExecutable) +add_subdirectory(TensorRTToExecutable) diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h index 25cf0bbc6..cb35ba816 100644 --- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h @@ -78,8 +78,7 @@ optionsCreateFromArgs(mlir::MLIRContext *context, [&errMsg](const llvm::StringError &err) { errMsg = err.getMessage(); }); if (errMsg) - return getInternalErrorStatus("failed to initialize options: %s", - errMsg->c_str()); + return getInternalErrorStatus("failed to initialize options: {0}", *errMsg); return result; } diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/CMakeLists.txt b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/CMakeLists.txt new file mode 100644 index 000000000..e549a6d5c --- /dev/null +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/CMakeLists.txt @@ -0,0 +1,4 @@ +set(_TABLEGEN_ARGS ) +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name TensorRTToExecutable ${_TABLEGEN_ARGS}) +add_public_tablegen_target(MLIRTensorRTTensorRTToExecutableIncGen) diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h new file mode 100644 index 000000000..0b0a31fa5 --- /dev/null +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h @@ -0,0 +1,48 @@ +//===- Passes.h -------------------------------------------------*- C++ -*-===// +// +// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES. +// All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// Declarations for passes and pipelines related to 'tensorrt-to-executable'. +/// +//===----------------------------------------------------------------------===// +#ifndef MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES +#define MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES + +#include +#include + +namespace mlirtrt::compiler { + +//===----------------------------------------------------------------------===// +// Add Tablegen'd pass declarations and registration methods. +//===----------------------------------------------------------------------===// +#define GEN_PASS_DECL +#define GEN_PASS_REGISTRATION +#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h.inc" + +//===----------------------------------------------------------------------===// +// Pipeline Registrations +//===----------------------------------------------------------------------===// + +/// Register the TensorRT clustering and compilation pipelines. +void registerTensorRTToExecutablePipelines(); + +} // namespace mlirtrt::compiler + +#endif // MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td new file mode 100644 index 000000000..dd89a4d96 --- /dev/null +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td @@ -0,0 +1,38 @@ +//===- Passes.td ----------------------------------------------------------===// +// +// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES. +// All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#ifndef MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES +#define MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES + +include "mlir/Pass/PassBase.td" + +//===----------------------------------------------------------------------===// +// OutlineTensorRTOpPass +//===----------------------------------------------------------------------===// + +def OutlineTensorRTOpPass : Pass<"outline-tensorrt-op", + "::mlir::ModuleOp"> { + let summary = "Outline all tensorrt ops into a tensorrt module"; + + let dependentDialects = [ + "::mlir::plan::PlanDialect" + ]; +} + +#endif // MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h new file mode 100644 index 000000000..ac9e691c6 --- /dev/null +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h @@ -0,0 +1,97 @@ +//===- TensorRTToExecutable.h -----------------------------------*- C++ -*-===// +// +// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES. +// All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#ifndef MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE +#define MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE + +// TODO (pranavm): MLIR_TRT_TARGET_TENSORRT is only needed because we pull in +// the TranslateToTensorRT.h header. If we move the translation options, we +// won't need it. +#ifdef MLIR_TRT_TARGET_TENSORRT +#include "mlir-tensorrt-dialect/Target/TranslateToTensorRT.h" + +#include "mlir-tensorrt-dialect/Utils/OptionsBundle.h" +#include "mlir-tensorrt/Compiler/Client.h" + +namespace mlirtrt::compiler { + +//===----------------------------------------------------------------------===// +// TensorRTToExecutableOptions +//===----------------------------------------------------------------------===// + +class TensorRTToExecutableTask; + +// TODO (pranavm): Figure out a better way to reuse TRT translation options - +// maybe move to options providers? +struct TensorRTOptions : public OptionsProvider { +public: + using OptionsProvider::OptionsProvider; + mlir::tensorrt::TensorRTTranslationOptions options; + + TensorRTOptions(mlir::OptionsContext &ctx) : OptionsProvider(ctx) { + options.addToOptions(ctx); + } +}; + +struct TensorRTToExecutableOptions + : public mlir::OptionsBundle { + // Default initialization does not require any extensions. + TensorRTToExecutableOptions() = default; + + TensorRTToExecutableOptions(TaskExtensionRegistry extensions); + + Option entrypoint{this, "entrypoint", llvm::cl::init("main"), + llvm::cl::desc("entrypoint function name")}; +}; + +//===----------------------------------------------------------------------===// +// TensorRTToExecutableTask +//===----------------------------------------------------------------------===// + +class TensorRTToExecutableTask + : public CompilationTask { +public: + TensorRTToExecutableTask(mlir::MLIRContext *ctx, + const TensorRTToExecutableOptions &options); + + /// Build the clustering pipeline that occurs on TensorRT Ops. + static void + buildTensorRTClusteringPipeline(mlir::OpPassManager &pm, + const TensorRTToExecutableOptions &options); + + /// Build the compilation pipeline that runs after clustering. + static void + buildPostClusteringPipeline(mlir::OpPassManager &pm, + const TensorRTToExecutableOptions &options); + + static void populatePassManager(mlir::PassManager &pm, + const TensorRTToExecutableOptions &options); +}; + +/// Register the task/options with the client's registry. +void registerTensorRTToExecutableTask(); + +} // namespace mlirtrt::compiler + +MLIR_DECLARE_EXPLICIT_TYPE_ID(mlirtrt::compiler::TensorRTToExecutableTask) + +#endif // MLIR_TRT_TARGET_TENSORRT +#endif // MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h index 3356077c0..fe2c4edfb 100644 --- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h @@ -38,6 +38,10 @@ #include "stablehlo/transforms/Passes.h" #endif // MLIR_TRT_ENABLE_HLO +#ifdef MLIR_TRT_TARGET_TENSORRT +#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h" +#endif // MLIR_TRT_TARGET_TENSORRT + #ifdef MLIR_TRT_ENABLE_EXECUTOR #include "mlir-executor/InitAllPasses.h" #include "mlir/Dialect/Bufferization/Transforms/Passes.h" @@ -65,6 +69,10 @@ inline void registerAllMlirTensorRtPasses() { plan::registerPlanDialectPipelines(); #endif // MLIR_TRT_ENABLE_HLO +#ifdef MLIR_TRT_TARGET_TENSORRT + mlirtrt::compiler::registerTensorRTToExecutablePipelines(); +#endif // MLIR_TRT_TARGET_TENSORRT + #ifdef MLIR_TRT_ENABLE_EXECUTOR registerConvertCUDAToExecutorPass(); bufferization::registerBufferizationPasses(); diff --git a/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/CMakeLists.txt b/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/CMakeLists.txt index 4b065acea..f40cdedbc 100644 --- a/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/CMakeLists.txt +++ b/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/CMakeLists.txt @@ -8,6 +8,7 @@ add_mlir_tensorrt_public_c_api_library(MLIRTensorRTCAPIRegisterAllDialects MLIRFuncInlinerExtension MLIRTensorInferTypeOpInterfaceImpl MLIRTensorRTCompilerStableHloToExecutable + MLIRTensorRTCompilerTensorRTToExecutable MLIRTensorRTCUDAToLLVM MLIRTensorRTCUDATransforms MLIRTensorRTHostToLLVM diff --git a/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/RegisterAllDialects.cpp b/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/RegisterAllDialects.cpp index 1fec30727..320d6ea63 100644 --- a/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/RegisterAllDialects.cpp +++ b/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/RegisterAllDialects.cpp @@ -24,6 +24,7 @@ #include "mlir-tensorrt-c/Compiler/Registration/RegisterAllDialects.h" #include "mlir-tensorrt/Compiler/StablehloToExecutable/StablehloToExecutable.h" +#include "mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h" #include "mlir-tensorrt/Registration/RegisterMlirTensorRtDialects.h" #include "mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h" #include "mlir/CAPI/IR.h" @@ -38,4 +39,5 @@ void mtrtCompilerRegisterPasses() { void mtrtCompilerRegisterTasks() { mlirtrt::compiler::registerStableHloToExecutableTask(); + mlirtrt::compiler::registerTensorRTToExecutableTask(); } diff --git a/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt b/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt index 92006c6d9..12940886b 100644 --- a/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt +++ b/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt @@ -14,4 +14,5 @@ add_mlir_tensorrt_library(MLIRTensorRTCompilerClient MLIRTensorRTSupportDeviceInfo ) -add_subdirectory(StablehloToExecutable) \ No newline at end of file +add_subdirectory(StablehloToExecutable) +add_subdirectory(TensorRTToExecutable) diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/CMakeLists.txt b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/CMakeLists.txt new file mode 100644 index 000000000..ef4f9948a --- /dev/null +++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/CMakeLists.txt @@ -0,0 +1,31 @@ +add_mlir_tensorrt_library(MLIRTensorRTCompilerTensorRTToExecutable + TensorRTToExecutable.cpp + Passes.cpp + + PARTIAL_SOURCES_INTENDED + + DEPENDS + MLIRTensorRTTensorRTToExecutableIncGen + + LINK_LIBS PUBLIC + MLIREmitCTransforms + MLIRIR + MLIRLLVMDialect + MLIRLLVMIRTransforms + MLIRTensorRTCompilerClient + MLIRTensorRTCUDAToExecutor + MLIRTensorRTDropNestedModules + MLIRTensorRTExecutorTransforms + MLIRTensorRTHostToLLVM + MLIRTensorRTMemRefToCUDA + MLIRTensorRTOptionUtils + MLIRTensorRTPipelines + MLIRTensorRTPlanToExecutor + MLIRTensorRTPlanTransforms + MLIRTensorRTRuntimeToExecutor + MLIRTensorRTSCFDetensorizeLoops + MLIRTensorRTTargetLua + MLIRTensorRTTargetTensorRT + MLIRTensorRTTensorRTToTensorRTRuntime + MLIRTensorRTTransforms + ) diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp new file mode 100644 index 000000000..c7db65351 --- /dev/null +++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp @@ -0,0 +1,210 @@ +//===- Passes.cpp --------------------------------------------------------===// +// +// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES. +// All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h" +#include "mlir-executor/Executor/Transforms/Passes.h" +#include "mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h" +#include "mlir-tensorrt/Conversion/Passes.h" +#include "mlir-tensorrt/Dialect/Plan/Transforms/Passes.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Pass/PassOptions.h" + +namespace mlirtrt::compiler { +#define GEN_PASS_DEF_OUTLINETENSORRTOPPASS +#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h.inc" +} // namespace mlirtrt::compiler + +using namespace mlirtrt; +using namespace mlirtrt::compiler; +using namespace mlir; + +/// ClusteringOpts that identifies groups of TensorRT operations and will be +/// clustered into one TensorRT function (which is eventually translated to a +/// engine). +static FailureOr getTensorRTClusteringOptions(Operation *op) { + ClusteringOpts opts; + opts.mergeIndependentClusters = [](Operation *, ClusterRange, Operation *, + ClusterRange) { return true; }; + opts.clusterTarget = Attribute{}; + opts.isClusterableOp = [](Operation *op) { + return llvm::isa_and_present(op->getDialect()); + }; + + return opts; +} + +/// Create a `func.func` operation that represents `regionOp` and inserts into +/// the `module` SymbolTable. The function is given a name starting with +/// `nameBase` but may have numbers appended in order to unique the name. The +/// created function has argument/result types as indicated by the parameters. +static FailureOr +createOutlinedFunc(RewriterBase &rewriter, Location loc, Operation *module, + StringRef nameBase, TypeRange funcArgTypes, + TypeRange funcResultTypes) { + OpBuilder::InsertionGuard g(rewriter); + + // Create the func for outlining the region body. + FunctionType type = + FunctionType::get(rewriter.getContext(), funcArgTypes, funcResultTypes); + auto outlinedFunc = func::FuncOp::create(loc, nameBase, type, {}); + Block *funcBody = outlinedFunc.addEntryBlock(); + + // Add an empty terminator. + rewriter.setInsertionPointToEnd(funcBody); + rewriter.create(loc); + + // Insert into the module. + SymbolTable(module).insert(outlinedFunc, + module->getRegions().front().front().end()); + + // Tag the function with a UnitAttr for identifying the different kinds of + // functions based on the cluster type. + return cast(outlinedFunc.getOperation()); +} + +/// Given the `op`, find the closest ModuleOp and check if the module has a +/// `tensorrt.module` operation in it. If it does, then return the existing +/// `tensorrt.module` operation. Otherwise, create a new `tensorrt.module`. +static tensorrt::TensorRTModuleOp +getOrCreateTensorRTModuleOp(ModuleOp moduleOp) { + SymbolTable symbolTable(moduleOp); + tensorrt::TensorRTModuleOp result = nullptr; + for (auto trtModuleOp : + moduleOp.getBody()->getOps()) { + result = trtModuleOp; + break; + } + if (result) + return result; + + // Create the function. Symbol name de-duplication occurs with insert into the + // symbol table. + result = tensorrt::TensorRTModuleOp::create(moduleOp.getLoc(), "trt_engines"); + symbolTable.insert(result); + return result; +} + +static FailureOr +outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule, + const Cluster &cluster) { + auto inlineGroupOp = + cast(mlir::createRegionOpFromCluster( + cluster, rewriter, + [](OpBuilder &b, Location loc, TypeRange types, Attribute target) { + auto regionOp = b.create(loc, types, target); + b.setInsertionPointToStart(®ionOp.getRegion().emplaceBlock()); + b.create(loc); + return regionOp; + })); + + // Make the region isolated from above. This captures the input operands. + SmallVector inputs = + makeRegionIsolatedFromAbove(rewriter, inlineGroupOp.getRegion()); + + // Create the outlined function + FailureOr func = createOutlinedFunc( + rewriter, inlineGroupOp.getLoc(), trtModule, "tensorrt_cluster", + TypeRange(inputs), inlineGroupOp->getResultTypes()); + if (failed(func)) + return failure(); + + rewriter.setInsertionPoint(inlineGroupOp); + auto callOp = rewriter.create( + inlineGroupOp.getLoc(), inlineGroupOp.getResultTypes(), inputs, + SymbolRefAttr::get(trtModule.getNameAttr(), + {FlatSymbolRefAttr::get(*func)})); + + // Populate the function entry block. + rewriter.eraseBlock(&func->getFunctionBody().front()); + + // Move region op operations to the func body. + Operation *regionYieldOp = inlineGroupOp.getYield(); + rewriter.inlineRegionBefore(inlineGroupOp.getRegion(), + func->getFunctionBody(), + func->getFunctionBody().end()); + rewriter.setInsertionPoint(regionYieldOp); + rewriter.replaceOpWithNewOp(regionYieldOp, + regionYieldOp->getOperands()); + // replace the original region results. + rewriter.replaceOp(inlineGroupOp, callOp); + + return callOp; +} + +namespace { + +//===----------------------------------------------------------------------===// +// OutlineTensorRTOpPass +//===----------------------------------------------------------------------===// +class OutlineTensorRTOpPass + : public compiler::impl::OutlineTensorRTOpPassBase { +public: + using Base::Base; + void runOnOperation() override { + ModuleOp module = getOperation(); + IRRewriter rewriter(&getContext()); + + FailureOr opts = getTensorRTClusteringOptions(module); + if (failed(opts)) { + emitError(module.getLoc()) << "failed to create clustering options"; + return signalPassFailure(); + } + + FailureOr> clusters = + mlir::analyzeAndClusterOperations(module, *opts); + if (failed(clusters)) { + emitError(module.getLoc()) << "failed to cluster operations"; + return signalPassFailure(); + } + + tensorrt::TensorRTModuleOp trtModule = getOrCreateTensorRTModuleOp(module); + + for (const auto &cluster : *clusters) { + if (failed(outlineOp(rewriter, trtModule, cluster))) + return signalPassFailure(); + } + } +}; +} // namespace + +//===----------------------------------------------------------------------===// +// Pipeline Registrations +//===----------------------------------------------------------------------===// + +namespace { +class TensorRTToExecutablePassPipelineOptions + : public PassPipelineOptionsAdaptor {}; +} // namespace + +void mlirtrt::compiler::registerTensorRTToExecutablePipelines() { + PassPipelineRegistration( + "tensorrt-clustering-pipeline", "apply clustering to tensorrt IR", + [](OpPassManager &pm, + const TensorRTToExecutablePassPipelineOptions &opts) { + TensorRTToExecutableTask::buildTensorRTClusteringPipeline(pm, opts); + }); + + PassPipelineRegistration( + "tensorrt-compilation-pipeline", "apply compilation post-clustering", + [](OpPassManager &pm, + const TensorRTToExecutablePassPipelineOptions &opts) { + TensorRTToExecutableTask::buildPostClusteringPipeline(pm, opts); + }); +} diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp new file mode 100644 index 000000000..0fb7a5268 --- /dev/null +++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp @@ -0,0 +1,145 @@ +//===- TensorRTToExecutable.cpp ---------------------------------*- C++ -*-===// +// +// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES. +// All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#ifdef MLIR_TRT_TARGET_TENSORRT + +#include "mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h" +#include "mlir-executor/Conversion/Passes.h" +#include "mlir-executor/Executor/Transforms/Passes.h" +#include "mlir-tensorrt-dialect/TensorRT/Transforms/Passes.h" +#include "mlir-tensorrt/Compiler/OptionsProviders.h" +#include "mlir-tensorrt/Compiler/OptionsRegistry.h" +#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h" +#include "mlir-tensorrt/Conversion/Passes.h" +#include "mlir-tensorrt/Dialect/Plan/Transforms/Passes.h" +#include "mlir-tensorrt/Transforms/Passes.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Transforms/Passes.h" + +using namespace mlir; +using namespace mlirtrt::compiler; + +//===----------------------------------------------------------------------===// +// TensorRTToExecutableOptions +//===----------------------------------------------------------------------===// + +TensorRTToExecutableOptions::TensorRTToExecutableOptions( + TaskExtensionRegistry extensions) { + // TODO (pranavm): We don't need extensions - remove from constructor and add + // `setExtensions` to base class. + assert(extensions.extensions.size() == 0); +} + +//===----------------------------------------------------------------------===// +// TensorRTToExecutableTask +//===----------------------------------------------------------------------===// + +TensorRTToExecutableTask::TensorRTToExecutableTask( + MLIRContext *ctx, const TensorRTToExecutableOptions &options) + : CompilationTask(ctx, options) { + options.get().applyToPassManager(*this); +} + +void TensorRTToExecutableTask::buildTensorRTClusteringPipeline( + OpPassManager &pm, const TensorRTToExecutableOptions &opts) { + pm.addPass(createOutlineTensorRTOpPass()); +} + +void TensorRTToExecutableTask::buildPostClusteringPipeline( + OpPassManager &pm, const TensorRTToExecutableOptions &options) { + // Post-clustering + pm.addPass(createConvertTensorRTToTensorRTRuntimePass()); + + pm.addNestedPass(plan::createPostClusteringValidationPass()); + + pm.addPass(createCanonicalizerPass()); + + pm.addPass(createInlinerPass()); + pm.addNestedPass(createCSEPass()); + pm.addNestedPass(createCanonicalizerPass()); + + // We then perform some final simplification on the top-level func.func ops + // (e.g. public entrypoint functions). + pm.addNestedPass(createSCFDetensorizeLoopsPass()); + pm.addNestedPass(createCanonicalizerPass()); + + // Pre-bufferization + // Simplify and translate functions nested in `tensorrt.module` ops. + auto &trtPM = pm.nest(); + tensorrt::buildTensorRTModuleTransformationPipeline( + trtPM, options.get().options.enableStronglyTyped); + trtPM.addPass(tensorrt::createTranslateTensorRTPass( + nullptr, options.get().options)); + + pm.addPass(createMemRefCastEliminationPass()); + pm.addPass(plan::createPlanAllocTensorsPass()); + pm.addPass(plan::createPlanBufferizePass()); + pm.addPass(createMemRefCastEliminationPass()); + pm.addPass(createCanonicalizerPass()); + pm.addPass(bufferization::createDropEquivalentBufferResultsPass()); + plan::buildPlanBufferOptimizationPipeline(pm); + plan::buildPlanBufferDeallocationPipeline( + pm, bufferization::DeallocationOptions{ + /*privateFuncDynamicOwnership=*/false}); + + // Post-bufferization + pm.addPass(createConvertMemRefToCUDAPass()); + pm.addPass(createConvertPlanToExecutorPass()); + pm.addPass(executor::createExecutorAllocsToGlobalsPass()); + pm.addNestedPass( + executor::createExecutorPopulateFunctionMetadataPass()); + + // Executor lowering + ConvertTensorRTRuntimeToExecutorPassOptions toExecutorOpts; + toExecutorOpts.indexBitwidth = options.get().indexBitwidth; + toExecutorOpts.usePackedMemRefCConv = + options.get().usePackedMemRefCConv; + pm.addPass(createConvertTensorRTRuntimeToExecutorPass(toExecutorOpts)); + + ConvertCUDAToExecutorPassOptions cudaToExecutorOpts; + cudaToExecutorOpts.indexBitwidth = + options.get().indexBitwidth; + cudaToExecutorOpts.usePackedMemRefCConv = + options.get().usePackedMemRefCConv; + pm.addPass(createConvertCUDAToExecutorPass(cudaToExecutorOpts)); + + pm.addPass(createDropNestedModulesPass()); +} + +void TensorRTToExecutableTask::populatePassManager( + mlir::PassManager &pm, const TensorRTToExecutableOptions &options) { + buildTensorRTClusteringPipeline(pm, options); + + buildPostClusteringPipeline(pm, options); + + mlir::executor::ConvertStdToExecutorPassOptions stdToExecOpts; + stdToExecOpts.indexBitwidth = options.get().indexBitwidth; + stdToExecOpts.usePackedMemRefCConv = true; + mlir::executor::buildExecutorLoweringPipeline(pm, stdToExecOpts); +} + +void mlirtrt::compiler::registerTensorRTToExecutableTask() { + registerCompilationTaskWithNoExtensions( + "tensorrt-to-executable"); +} + +MLIR_DEFINE_EXPLICIT_TYPE_ID(mlirtrt::compiler::TensorRTToExecutableTask) + +#endif diff --git a/mlir-tensorrt/compiler/test/Pipelines/TensorRTClustering/tensorrt-clustering.mlir b/mlir-tensorrt/compiler/test/Pipelines/TensorRTClustering/tensorrt-clustering.mlir new file mode 100644 index 000000000..1f319a0ec --- /dev/null +++ b/mlir-tensorrt/compiler/test/Pipelines/TensorRTClustering/tensorrt-clustering.mlir @@ -0,0 +1,15 @@ +// RUN: mlir-tensorrt-opt %s -tensorrt-clustering-pipeline -split-input-file | FileCheck %s + +func.func @trt_relu(%arg0: tensor<2x10xf16>) -> (tensor<2x10xf16>) { + %0 = tensorrt.activation { + activationType = #tensorrt.activation_type + } %arg0 : tensor<2x10xf16> + return %0: tensor<2x10xf16> +} + +// CHECK-LABEL: @trt_relu +// CHECK-DAG: %[[v0:.+]] = tensorrt.call_alloc @trt_engines::@tensorrt_cluster +// CHECK-DAG: return %[[v0]] +// CHECK-DAG: @tensorrt_cluster +// CHECK-DAG: %[[v1:.+]] = tensorrt.activation {activationType = #tensorrt.activation_type} +// CHECK-DAG: return %[[v1]] diff --git a/mlir-tensorrt/compiler/test/python/IntegrationTests/test_tensorrt_add.py b/mlir-tensorrt/compiler/test/python/IntegrationTests/test_tensorrt_add.py new file mode 100644 index 000000000..dabb3cd6b --- /dev/null +++ b/mlir-tensorrt/compiler/test/python/IntegrationTests/test_tensorrt_add.py @@ -0,0 +1,99 @@ +# RUN: %PYTHON %s +import time + +import mlir_tensorrt.compiler.api as compiler +import mlir_tensorrt.compiler.ir as ir +import mlir_tensorrt.runtime.api as runtime +import numpy as np + +ASM = """ +func.func @main(%arg0: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> { + %1 = tensorrt.element_wise (%arg0, %arg0 : tensor<2x3x4xf32>, tensor<2x3x4xf32>) -> tensor<2x3x4xf32> + func.return %1 : tensor<2x3x4xf32> +} +""" + + +def compile(client, op): + task = client.get_compilation_task( + "tensorrt-to-executable", + [ + "--tensorrt-builder-opt-level=0", + "--tensorrt-strongly-typed=true", + "--tensorrt-workspace-memory-pool-limit=1024kB", + ], + ) + task.run(op) + return compiler.translate_mlir_to_executable(op) + + +def tensorrt_add(): + # Build/parse the main function. + with ir.Context() as context: + m = ir.Module.parse(ASM) + + # Use the compiler API to compile to executable. + client = compiler.CompilerClient(context) + exe = compile(client, m.operation) + + # The RuntimeClient can and should persist across multiple Executables, RuntimeSessions, etc. + # It is primarily an interface for creating and manipulating buffers. + client = runtime.RuntimeClient() + stream = client.create_stream() + devices = client.get_devices() + + if len(devices) == 0: + return + + session_options = runtime.RuntimeSessionOptions(num_devices=1, device_id=0) + session = runtime.RuntimeSession(session_options, exe) + + arg0 = client.create_memref( + np.arange(0.0, 24.0, dtype=np.float32).reshape(2, 3, 4).data, + device=devices[0], + stream=stream, + ) + arg1 = client.create_memref( + np.zeros(shape=(2, 3, 4), dtype=np.float32).data, + device=devices[0], + stream=stream, + ) + session.execute_function("main", in_args=[arg0], out_args=[arg1], stream=stream) + + data = np.asarray(client.copy_to_host(arg1, stream=stream)) + stream.sync() + + print(data) + + # Run execution a bunch more times asynchronously so that it calculates + # `x * 2**num_iter`. + num_iter = 5 + start_time = time.time() + for _ in range(0, num_iter): + session.execute_function("main", in_args=[arg0], out_args=[arg0], stream=stream) + data = np.asarray(client.copy_to_host(arg1, stream=stream)) + stream.sync() + end_time = time.time() + elapsed = end_time - start_time + + print(np.asarray(client.copy_to_host(arg0))) + print(f"1000 iterations avg { (elapsed/num_iter)/1000.0} msec per iteration") + + +if __name__ == "__main__": + tensorrt_add() + +# CHECK: [ 0. 2. 4. 6.] +# CHECK-NEXT: [ 8. 10. 12. 14.] +# CHECK-NEXT: [16. 18. 20. 22.]] +# CHECK-NEXT: +# CHECK-NEXT: [24. 26. 28. 30.] +# CHECK-NEXT: [32. 34. 36. 38.] +# CHECK-NEXT: [40. 42. 44. 46.]]] +# CHECK-NEXT: [ 0. 32. 64. 96.] +# CHECK-NEXT: [128. 160. 192. 224.] +# CHECK-NEXT: [256. 288. 320. 352.]] +# CHECK-NEXT: +# CHECK-NEXT: [384. 416. 448. 480.] +# CHECK-NEXT: [512. 544. 576. 608.] +# CHECK-NEXT: [640. 672. 704. 736.]