From 9fc6f3101c015973647f46e1704eab4a0b55e8ae Mon Sep 17 00:00:00 2001
From: pranavm <pranavm@nvidia.com>
Date: Mon, 9 Dec 2024 15:22:04 -0800
Subject: [PATCH 01/10] initial commit

---
 .../mlir-tensorrt/Compiler/OptionsProviders.h | 12 +++
 .../mlir-tensorrt/Compiler/PassManagerUtils.h | 32 +++++++
 .../Compiler/StableHloToExecutable.h          |  6 +-
 .../Compiler/TensorRTToExecutable.h           | 85 +++++++++++++++++++
 .../Registration/RegisterMlirTensorRtPasses.h |  7 ++
 .../compiler/lib/Compiler/CMakeLists.txt      |  3 +
 .../lib/Compiler/PassManagerUtils.cpp         | 43 ++++++++++
 .../lib/Compiler/StableHloToExecutable.cpp    | 28 +-----
 .../lib/Compiler/TensorRTToExecutable.cpp     | 62 ++++++++++++++
 9 files changed, 249 insertions(+), 29 deletions(-)
 create mode 100644 mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/PassManagerUtils.h
 create mode 100644 mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable.h
 create mode 100644 mlir-tensorrt/compiler/lib/Compiler/PassManagerUtils.cpp
 create mode 100644 mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable.cpp
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsProviders.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsProviders.h
index 8e86f8bd0..6543452be 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsProviders.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsProviders.h
@@ -128,6 +128,18 @@ struct DeviceOptions : public OptionsProvider<DeviceOptions> {
   llvm::Error finalizeImpl();
 };
 
+struct EntrypointOptions : public OptionsProvider<EntrypointOptions> {
+public:
+  /// Entrypoint function name.
+  std::string entrypoint = "main";
+
+public:
+  void addToOptions(mlir::OptionsContext &context) {
+    context.addOption("entrypoint", entrypoint, llvm::cl::init("main"),
+                      llvm::cl::desc("entrypoint function name"));
+  }
+};
+
 } // namespace mlirtrt::compiler
 
 #endif // MLIR_TENSORRT_COMPILER_OPTIONS
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/PassManagerUtils.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/PassManagerUtils.h
new file mode 100644
index 000000000..11dc66a3c
--- /dev/null
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/PassManagerUtils.h
@@ -0,0 +1,32 @@
+//===- PassManagerUtils.h ---------------------------------------*- C++ -*-===//
+//
+// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES.
+// All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir-tensorrt/Compiler/OptionsProviders.h"
+#include "mlir/Pass/PassManager.h"
+
+using namespace mlirtrt::compiler;
+using namespace mlir;
+
+//===----------------------------------------------------------------------===//
+// Common helpers
+//===----------------------------------------------------------------------===//
+
+mlir::LogicalResult setupPassManager(mlir::PassManager &pm,
+                                     const DebugOptions &options);
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h
index e67b07bf2..5b8fc4d66 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h
@@ -52,7 +52,8 @@ namespace mlirtrt::compiler {
 class StablehloToExecutableTask;
 
 struct StablehloToExecutableOptions
-    : public mlir::OptionsBundle<DebugOptions, ExecutorOptions, DeviceOptions> {
+    : public mlir::OptionsBundle<DebugOptions, ExecutorOptions, DeviceOptions,
+                                 EntrypointOptions> {
   /// Initializes the options. The extensions in the provided registry
   /// must be extensions for the StableHloToExecutable task.
   StablehloToExecutableOptions(TaskExtensionRegistry extensions);
@@ -64,9 +65,6 @@ struct StablehloToExecutableOptions
   /// and backend types that support allocating results.
   bool enableNonDPSReturns = false;
 
-  /// Entrypoint function name.
-  std::string entrypoint = "main";
-
   /// Base class for extensions associated with StableHloToExecutableTask.
   class ExtensionBase : public TaskExtensionBase {
   public:
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable.h
new file mode 100644
index 000000000..b1de1c8b4
--- /dev/null
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable.h
@@ -0,0 +1,85 @@
+//===- TensorRTToExecutable.h -----------------------------------*- C++ -*-===//
+//
+// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES.
+// All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE
+#define MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE
+
+#ifdef MLIR_TRT_TARGET_TENSORRT
+#include "mlir-tensorrt-dialect/Target/TranslateToTensorRT.h"
+#include "mlir-tensorrt-dialect/Utils/Options.h"
+#include "mlir-tensorrt-dialect/Utils/OptionsBundle.h"
+#include "mlir-tensorrt/Compiler/Client.h"
+#include "mlir-tensorrt/Compiler/Extension.h"
+#include "mlir-tensorrt/Compiler/OptionsProviders.h"
+#include "mlir/Support/TypeID.h"
+
+namespace mlirtrt::compiler {
+
+//===----------------------------------------------------------------------===//
+// TensorRTToExecutableOptions
+//===----------------------------------------------------------------------===//
+
+// TODO (pranavm): Figure out a better way to reuse TRT translation options -
+// maybe move to options providers?
+struct TensorRTOptions
+    : public mlirtrt::compiler::OptionsProvider<TensorRTOptions> {
+  mlir::tensorrt::TensorRTTranslationOptions options;
+
+  void addToOptions(mlir::OptionsContext &context) {
+    options.addToOptions(context);
+  }
+};
+
+struct TensorRTToExecutableOptions
+    : public mlir::OptionsBundle<DeviceOptions, DebugOptions, ExecutorOptions,
+                                 EntrypointOptions, TensorRTOptions> {
+
+  TensorRTToExecutableOptions(TaskExtensionRegistry extensions);
+};
+
+//===----------------------------------------------------------------------===//
+// TensorRTToExecutableTask
+//===----------------------------------------------------------------------===//
+
+class TensorRTToExecutableTask
+    : public CompilationTask<TensorRTToExecutableTask,
+                             TensorRTToExecutableOptions> {
+public:
+  using Base::Base;
+
+  static void populatePassManager(mlir::PassManager &pm,
+                                  const TensorRTToExecutableOptions &options);
+};
+
+/// Register the task/options with the client's registry.
+void registerTensorRTToExecutableTask();
+
+//===----------------------------------------------------------------------===//
+// Pipeline Registrations
+//===----------------------------------------------------------------------===//
+
+// TODO (pranavm): How to do pipeline registration?
+// void registerTensorRTPipelines();
+
+} // namespace mlirtrt::compiler
+
+MLIR_DECLARE_EXPLICIT_TYPE_ID(mlirtrt::compiler::TensorRTToExecutableTask)
+
+#endif
+#endif // MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h
index b63b83d7f..8f4eb6c0b 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h
@@ -23,6 +23,7 @@
 #define REGISTRATION_REGISTERMLIRTENSORRTPASSES_H
 
 #include "mlir-tensorrt-dialect/TensorRT/Transforms/Passes.h"
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable.h"
 #include "mlir-tensorrt/Conversion/Passes.h"
 #include "mlir-tensorrt/Transforms/Passes.h"
 #include "mlir/Conversion/Passes.h"
@@ -52,6 +53,12 @@ inline void registerAllMlirTensorRtPasses() {
   mlir::registerTransformsPasses();
   mlir::registerConvertPDLToPDLInterp();
 
+  // TODO (pranavm): Check if this needs to be conditional - the TRT passes
+  // above are not.
+#ifdef MLIR_TRT_TARGET_TENSORRT
+  mlirtrt::compiler::registerTensorRTToExecutableTask();
+#endif
+
 #ifdef MLIR_TRT_ENABLE_HLO
   mlirtrt::compiler::registerStablehloClusteringPipelines();
   registerStableHloInputPipelines();
diff --git a/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt b/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt
index 746bd2e81..e95381528 100644
--- a/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt
+++ b/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt
@@ -3,6 +3,7 @@ add_mlir_tensorrt_library(MLIRTensorRTCompilerClient
     Extension.cpp
     OptionsRegistry.cpp
     OptionsProviders.cpp
+    PassManagerUtils.cpp
     PARTIAL_SOURCES_INTENDED
 
     LINK_LIBS PUBLIC
@@ -19,6 +20,8 @@ add_mlir_tensorrt_library(MLIRTensorRTCompilerStableHloToExecutable
     StableHloToExecutable.cpp
     # TODO: TensorRTExtension should be an independent library.
     TensorRTExtension/TensorRTExtension.cpp
+    # TODO (pranavm): TensorRTToExecutable should probably be a separate library
+    TensorRTToExecutable.cpp
 
     PARTIAL_SOURCES_INTENDED
 
diff --git a/mlir-tensorrt/compiler/lib/Compiler/PassManagerUtils.cpp b/mlir-tensorrt/compiler/lib/Compiler/PassManagerUtils.cpp
new file mode 100644
index 000000000..4546f4b31
--- /dev/null
+++ b/mlir-tensorrt/compiler/lib/Compiler/PassManagerUtils.cpp
@@ -0,0 +1,43 @@
+//===- PassManagerUtils.cpp -------------------------------------*- C++ -*-===//
+//
+// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES.
+// All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir-tensorrt/Compiler/PassManagerUtils.h"
+
+using namespace mlirtrt::compiler;
+using namespace mlir;
+
+//===----------------------------------------------------------------------===//
+// Common helpers
+//===----------------------------------------------------------------------===//
+
+mlir::LogicalResult setupPassManager(mlir::PassManager &pm,
+                                     const DebugOptions &options) {
+  pm.enableVerifier(true);
+  mlir::applyDefaultTimingPassManagerCLOptions(pm);
+  if (failed(mlir::applyPassManagerCLOptions(pm)))
+    return mlir::failure();
+  if (!options.dumpIRPath.empty()) {
+    pm.enableIRPrintingToFileTree(
+        [](Pass *, Operation *) { return false; },
+        [](Pass *, Operation *) { return true; }, true, false, false,
+        options.dumpIRPath, OpPrintingFlags().elideLargeElementsAttrs(32));
+  }
+  return mlir::success();
+}
diff --git a/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp b/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp
index 3d609a93e..a6cfa9c14 100644
--- a/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp
+++ b/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp
@@ -28,10 +28,10 @@
 #include "mlir-executor/Support/Status.h"
 #include "mlir-executor/Target/Lua/TranslateToRuntimeExecutable.h"
 #include "mlir-tensorrt-dialect/Target/TranslateToTensorRT.h"
-#include "mlir-tensorrt-dialect/TensorRT/Transforms/Passes.h"
 #include "mlir-tensorrt/Compiler/Extension.h"
 #include "mlir-tensorrt/Compiler/OptionsProviders.h"
 #include "mlir-tensorrt/Compiler/OptionsRegistry.h"
+#include "mlir-tensorrt/Compiler/PassManagerUtils.h"
 #include "mlir-tensorrt/Compiler/TensorRTExtension/TensorRTExtension.h"
 #include "mlir-tensorrt/Conversion/Passes.h"
 #include "mlir-tensorrt/Dialect/Plan/Transforms/Passes.h"
@@ -60,25 +60,6 @@ using namespace mlir;
 
 #ifdef MLIR_TRT_ENABLE_HLO
 
-//===----------------------------------------------------------------------===//
-// Common helpers
-//===----------------------------------------------------------------------===//
-
-static mlir::LogicalResult setupPassManager(mlir::PassManager &pm,
-                                            const DebugOptions &options) {
-  pm.enableVerifier(true);
-  mlir::applyDefaultTimingPassManagerCLOptions(pm);
-  if (failed(mlir::applyPassManagerCLOptions(pm)))
-    return mlir::failure();
-  if (!options.dumpIRPath.empty()) {
-    pm.enableIRPrintingToFileTree(
-        [](Pass *, Operation *) { return false; },
-        [](Pass *, Operation *) { return true; }, true, false, false,
-        options.dumpIRPath, OpPrintingFlags().elideLargeElementsAttrs(32));
-  }
-  return mlir::success();
-}
-
 //===----------------------------------------------------------------------===//
 // Adhoc Passes
 //===----------------------------------------------------------------------===//
@@ -162,9 +143,6 @@ StablehloToExecutableOptions::StablehloToExecutableOptions(
       disallowHostTensorsInTensorRTClusters, llvm::cl::init(false),
       llvm::cl::desc("Don't allow TensorRt clusters to contain host tensor "
                      "calculations (but they can still be inputs)"));
-
-  addOption("entrypoint", entrypoint, llvm::cl::init("main"),
-            llvm::cl::desc("entrypoint function name"));
 }
 
 //===----------------------------------------------------------------------===//
@@ -189,7 +167,7 @@ void StablehloToExecutableTask::buildStablehloClusteringPipeline(
   populateExtensionPasses(pm, opts, Phase::PreClustering);
 
   plan::StablehloClusteringPassOptions clusteringOpts{};
-  clusteringOpts.entrypoint = opts.entrypoint;
+  clusteringOpts.entrypoint = opts.get<EntrypointOptions>().entrypoint;
   plan::buildPlanSegmentationPipeline(pm, clusteringOpts);
 
   // Compile outlined funcs marked with `cluster.host`. The HLO in these
@@ -465,7 +443,7 @@ static StablehloToExecutableOptions populateStablehloClusteringPipelineOpts(
       cliOpts.deviceMaxSharedMemoryPerBlockKb;
   opts.get<DeviceOptions>().shouldInferFromHost =
       cliOpts.inferDeviceOptionsFromHost;
-  opts.entrypoint = cliOpts.entrypoint;
+  opts.get<EntrypointOptions>().entrypoint = cliOpts.entrypoint;
   return opts;
 }
 
diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable.cpp
new file mode 100644
index 000000000..de33d1908
--- /dev/null
+++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable.cpp
@@ -0,0 +1,62 @@
+//===- TensorRTToExecutable.cpp ---------------------------------*- C++ -*-===//
+//
+// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES.
+// All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+#ifdef MLIR_TRT_TARGET_TENSORRT
+
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable.h"
+#include "mlir-executor/Conversion/Passes.h"
+#include "mlir-executor/Executor/Transforms/Passes.h"
+#include "mlir-tensorrt/Compiler/OptionsRegistry.h"
+#include "mlir-tensorrt/Compiler/PassManagerUtils.h"
+
+using namespace mlirtrt::compiler;
+
+TensorRTToExecutableOptions::TensorRTToExecutableOptions(
+    TaskExtensionRegistry extensions) {
+  // TODO (pranavm): Do we need to support extensions?
+}
+
+void TensorRTToExecutableTask::populatePassManager(
+    mlir::PassManager &pm, const TensorRTToExecutableOptions &options) {
+  if (failed(setupPassManager(pm, options.get<DebugOptions>()))) {
+    /// TODO: Ignored. This can fail if pass manager static CL options were not
+    /// registered/initialized. This happens through invocation of e.g. this
+    /// function in e.g. Python bindings or standalone calls to C++ or C API
+    /// without doing all the typical static CL setup. We should instead be
+    /// accepting a PassManager here that has already been setup to the caller's
+    /// specifications.
+  }
+
+  // TODO (pranavm): Which passes go here?
+
+  mlir::executor::ConvertStdToExecutorPassOptions stdToExecOpts;
+  stdToExecOpts.indexBitwidth = options.get<ExecutorOptions>().indexBitwidth;
+  stdToExecOpts.usePackedMemRefCConv = true;
+  mlir::executor::buildExecutorLoweringPipeline(pm, stdToExecOpts);
+}
+
+void mlirtrt::compiler::registerTensorRTToExecutableTask() {
+  registerOption("tensorrt-to-executable",
+                 optionsCreateFromArgs<TensorRTToExecutableOptions,
+                                       TensorRTToExecutableTask>);
+}
+
+MLIR_DEFINE_EXPLICIT_TYPE_ID(mlirtrt::compiler::TensorRTToExecutableTask)
+
+#endif

From e50393b33010c6a066ba99ed23ec99055f33c008 Mon Sep 17 00:00:00 2001
From: pranavm <pranavm@nvidia.com>
Date: Tue, 10 Dec 2024 10:13:42 -0800
Subject: [PATCH 02/10] review comments

---
 .../include/mlir-tensorrt/Compiler/OptionsProviders.h         | 3 ++-
 .../include/mlir-tensorrt/Compiler/PassManagerUtils.h         | 3 ---
 .../include/mlir-tensorrt/Compiler/StableHloToExecutable.h    | 2 +-
 .../include/mlir-tensorrt/Compiler/TensorRTToExecutable.h     | 2 +-
 mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp | 4 ++--
 5 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsProviders.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsProviders.h
index 6543452be..4070778fe 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsProviders.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsProviders.h
@@ -128,7 +128,8 @@ struct DeviceOptions : public OptionsProvider<DeviceOptions> {
   llvm::Error finalizeImpl();
 };
 
-struct EntrypointOptions : public OptionsProvider<EntrypointOptions> {
+struct CommonCompilationOptions
+    : public OptionsProvider<CommonCompilationOptions> {
 public:
   /// Entrypoint function name.
   std::string entrypoint = "main";
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/PassManagerUtils.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/PassManagerUtils.h
index 11dc66a3c..0b05d7594 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/PassManagerUtils.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/PassManagerUtils.h
@@ -21,9 +21,6 @@
 #include "mlir-tensorrt/Compiler/OptionsProviders.h"
 #include "mlir/Pass/PassManager.h"
 
-using namespace mlirtrt::compiler;
-using namespace mlir;
-
 //===----------------------------------------------------------------------===//
 // Common helpers
 //===----------------------------------------------------------------------===//
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h
index 5b8fc4d66..7ae579193 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/StableHloToExecutable.h
@@ -53,7 +53,7 @@ class StablehloToExecutableTask;
 
 struct StablehloToExecutableOptions
     : public mlir::OptionsBundle<DebugOptions, ExecutorOptions, DeviceOptions,
-                                 EntrypointOptions> {
+                                 CommonCompilationOptions> {
   /// Initializes the options. The extensions in the provided registry
   /// must be extensions for the StableHloToExecutable task.
   StablehloToExecutableOptions(TaskExtensionRegistry extensions);
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable.h
index b1de1c8b4..60cac34d0 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable.h
@@ -48,7 +48,7 @@ struct TensorRTOptions
 
 struct TensorRTToExecutableOptions
     : public mlir::OptionsBundle<DeviceOptions, DebugOptions, ExecutorOptions,
-                                 EntrypointOptions, TensorRTOptions> {
+                                 CommonCompilationOptions, TensorRTOptions> {
 
   TensorRTToExecutableOptions(TaskExtensionRegistry extensions);
 };
diff --git a/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp b/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp
index a6cfa9c14..9edf0f23a 100644
--- a/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp
+++ b/mlir-tensorrt/compiler/lib/Compiler/StableHloToExecutable.cpp
@@ -167,7 +167,7 @@ void StablehloToExecutableTask::buildStablehloClusteringPipeline(
   populateExtensionPasses(pm, opts, Phase::PreClustering);
 
   plan::StablehloClusteringPassOptions clusteringOpts{};
-  clusteringOpts.entrypoint = opts.get<EntrypointOptions>().entrypoint;
+  clusteringOpts.entrypoint = opts.get<CommonCompilationOptions>().entrypoint;
   plan::buildPlanSegmentationPipeline(pm, clusteringOpts);
 
   // Compile outlined funcs marked with `cluster.host`. The HLO in these
@@ -443,7 +443,7 @@ static StablehloToExecutableOptions populateStablehloClusteringPipelineOpts(
       cliOpts.deviceMaxSharedMemoryPerBlockKb;
   opts.get<DeviceOptions>().shouldInferFromHost =
       cliOpts.inferDeviceOptionsFromHost;
-  opts.get<EntrypointOptions>().entrypoint = cliOpts.entrypoint;
+  opts.get<CommonCompilationOptions>().entrypoint = cliOpts.entrypoint;
   return opts;
 }
 

From b95a52afddcc88c5024ca9bdad16424c8bdda9a6 Mon Sep 17 00:00:00 2001
From: pranavm <pranavm@nvidia.com>
Date: Tue, 10 Dec 2024 10:59:29 -0800
Subject: [PATCH 03/10] add todo

---
 .../include/mlir-tensorrt/Compiler/TensorRTToExecutable.h      | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable.h
index 60cac34d0..23525cdd8 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable.h
@@ -20,6 +20,9 @@
 #ifndef MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE
 #define MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE
 
+// TODO (pranavm): MLIR_TRT_TARGET_TENSORRT is only needed because we pull in
+// the TranslateToTensorRT.h header. If we move the translation options, we
+// won't need it.
 #ifdef MLIR_TRT_TARGET_TENSORRT
 #include "mlir-tensorrt-dialect/Target/TranslateToTensorRT.h"
 #include "mlir-tensorrt-dialect/Utils/Options.h"

From 04bfde222072dc471ca8de902aef4efcb4660ec1 Mon Sep 17 00:00:00 2001
From: pranavm <pranavm@nvidia.com>
Date: Wed, 11 Dec 2024 11:01:42 -0800
Subject: [PATCH 04/10] add passes

---
 .../mlir-tensorrt/Compiler/PassManagerUtils.h |  5 +-
 .../lib/Compiler/TensorRTToExecutable.cpp     | 69 ++++++++++++++++++-
 2 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/PassManagerUtils.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/PassManagerUtils.h
index 0b05d7594..3a70598e7 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/PassManagerUtils.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/PassManagerUtils.h
@@ -25,5 +25,6 @@
 // Common helpers
 //===----------------------------------------------------------------------===//
 
-mlir::LogicalResult setupPassManager(mlir::PassManager &pm,
-                                     const DebugOptions &options);
+mlir::LogicalResult
+setupPassManager(mlir::PassManager &pm,
+                 const mlirtrt::compiler::DebugOptions &options);
diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable.cpp
index de33d1908..a50ec7599 100644
--- a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable.cpp
+++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable.cpp
@@ -22,14 +22,23 @@
 #include "mlir-tensorrt/Compiler/TensorRTToExecutable.h"
 #include "mlir-executor/Conversion/Passes.h"
 #include "mlir-executor/Executor/Transforms/Passes.h"
+#include "mlir-tensorrt-dialect/TensorRT/Transforms/Passes.h"
 #include "mlir-tensorrt/Compiler/OptionsRegistry.h"
 #include "mlir-tensorrt/Compiler/PassManagerUtils.h"
+#include "mlir-tensorrt/Conversion/Passes.h"
+#include "mlir-tensorrt/Dialect/Plan/Transforms/Passes.h"
+#include "mlir-tensorrt/Transforms/Passes.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Transforms/Passes.h"
 
+using namespace mlir;
 using namespace mlirtrt::compiler;
 
 TensorRTToExecutableOptions::TensorRTToExecutableOptions(
     TaskExtensionRegistry extensions) {
-  // TODO (pranavm): Do we need to support extensions?
+  // TODO (pranavm): We don't need extensions - remove from constructor and add
+  // `setExtensions` to base class.
+  assert(extensions.extensions.size() == 0);
 }
 
 void TensorRTToExecutableTask::populatePassManager(
@@ -43,7 +52,63 @@ void TensorRTToExecutableTask::populatePassManager(
     /// specifications.
   }
 
-  // TODO (pranavm): Which passes go here?
+  // Post-clustering
+  pm.addPass(createConvertTensorRTToTensorRTRuntimePass());
+
+  pm.addNestedPass<func::FuncOp>(plan::createPostClusteringValidationPass());
+
+  pm.addPass(createCanonicalizerPass());
+
+  pm.addPass(createInlinerPass());
+  pm.addNestedPass<func::FuncOp>(createCSEPass());
+  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
+
+  // We then perform some final simplification on the top-level func.func ops
+  // (e.g. public entrypoint functions).
+  pm.addNestedPass<func::FuncOp>(createSCFDetensorizeLoopsPass());
+  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
+
+  // Pre-bufferization
+  // Simplify and translate functions nested in `tensorrt.module` ops.
+  auto &trtPM = pm.nest<tensorrt::TensorRTModuleOp>();
+  tensorrt::buildTensorRTModuleTransformationPipeline(
+      trtPM, options.get<TensorRTOptions>().options.enableStronglyTyped);
+  trtPM.addPass(tensorrt::createTranslateTensorRTPass(
+      nullptr, nullptr, options.get<TensorRTOptions>().options));
+
+  pm.addPass(createMemRefCastEliminationPass());
+  pm.addPass(plan::createPlanAllocTensorsPass());
+  pm.addPass(plan::createPlanBufferizePass());
+  pm.addPass(createMemRefCastEliminationPass());
+  pm.addPass(createCanonicalizerPass());
+  pm.addPass(bufferization::createDropEquivalentBufferResultsPass());
+  plan::buildPlanBufferOptimizationPipeline(pm);
+  plan::buildPlanBufferDeallocationPipeline(
+      pm, bufferization::DeallocationOptions{
+              /*privateFuncDynamicOwnership=*/false});
+
+  // Post-bufferization
+  pm.addPass(createConvertMemRefToCUDAPass());
+  pm.addPass(createConvertPlanToExecutorPass());
+  pm.addPass(executor::createExecutorAllocsToGlobalsPass());
+  pm.addNestedPass<func::FuncOp>(
+      executor::createExecutorPopulateFunctionMetadataPass());
+
+  // Executor lowering
+  ConvertTensorRTRuntimeToExecutorPassOptions toExecutorOpts;
+  toExecutorOpts.indexBitwidth = options.get<ExecutorOptions>().indexBitwidth;
+  toExecutorOpts.usePackedMemRefCConv =
+      options.get<ExecutorOptions>().usePackedMemRefCConv;
+  pm.addPass(createConvertTensorRTRuntimeToExecutorPass(toExecutorOpts));
+
+  ConvertCUDAToExecutorPassOptions cudaToExecutorOpts;
+  cudaToExecutorOpts.indexBitwidth =
+      options.get<ExecutorOptions>().indexBitwidth;
+  cudaToExecutorOpts.usePackedMemRefCConv =
+      options.get<ExecutorOptions>().usePackedMemRefCConv;
+  pm.addPass(createConvertCUDAToExecutorPass(cudaToExecutorOpts));
+
+  pm.addPass(createDropNestedModulesPass());
 
   mlir::executor::ConvertStdToExecutorPassOptions stdToExecOpts;
   stdToExecOpts.indexBitwidth = options.get<ExecutorOptions>().indexBitwidth;

From 88d48b99163136c714a9ee486e762f3d334fe07e Mon Sep 17 00:00:00 2001
From: pranavm <pranavm@nvidia.com>
Date: Wed, 18 Dec 2024 13:55:17 -0800
Subject: [PATCH 05/10] fix compile

---
 mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable.cpp
index a50ec7599..796644552 100644
--- a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable.cpp
+++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable.cpp
@@ -74,7 +74,7 @@ void TensorRTToExecutableTask::populatePassManager(
   tensorrt::buildTensorRTModuleTransformationPipeline(
       trtPM, options.get<TensorRTOptions>().options.enableStronglyTyped);
   trtPM.addPass(tensorrt::createTranslateTensorRTPass(
-      nullptr, nullptr, options.get<TensorRTOptions>().options));
+      nullptr, options.get<TensorRTOptions>().options));
 
   pm.addPass(createMemRefCastEliminationPass());
   pm.addPass(plan::createPlanAllocTensorsPass());

From 918361934b8295bb02ad276d2cd12b7563d1ec08 Mon Sep 17 00:00:00 2001
From: Yizhuo Zhang <yizhuoz@nvidia.com>
Date: Thu, 9 Jan 2025 15:29:33 -0800
Subject: [PATCH 06/10] Add OutlineTensorRTOpPass

---
 .../TensorRTToExecutable/CMakeLists.txt       |   4 +
 .../Compiler/TensorRTToExecutable/Passes.h    |   8 +-
 .../Compiler/TensorRTToExecutable/Passes.td   |  34 ++++
 .../TensorRTToExecutable.h                    |  29 ++--
 .../TensorRTToExecutable/CMakeLists.txt       |   3 +-
 .../Compiler/TensorRTToExecutable/Passes.cpp  | 164 ++++++++++++++++++
 .../TensorRTToExecutable.cpp                  |  77 ++++++--
 7 files changed, 289 insertions(+), 30 deletions(-)
 create mode 100644 mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/CMakeLists.txt
 create mode 100644 mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td

diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/CMakeLists.txt b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/CMakeLists.txt
new file mode 100644
index 000000000..e549a6d5c
--- /dev/null
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/CMakeLists.txt
@@ -0,0 +1,4 @@
+set(_TABLEGEN_ARGS )
+set(LLVM_TARGET_DEFINITIONS Passes.td)
+mlir_tablegen(Passes.h.inc -gen-pass-decls -name TensorRTToExecutable ${_TABLEGEN_ARGS})
+add_public_tablegen_target(MLIRTensorRTTensorRTToExecutableIncGen)
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h
index 68a0ea7ca..53d6eb705 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h
@@ -30,14 +30,18 @@
 
 namespace mlirtrt::compiler {
 
-// TODO: Does this also need Tablegen'd pass?
+//===----------------------------------------------------------------------===//
+// Add Tablegen'd pass declarations and registration methods.
+//===----------------------------------------------------------------------===//
+#define GEN_PASS_DECL
+#define GEN_PASS_REGISTRATION
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h.inc"
 
 //===----------------------------------------------------------------------===//
 // Pipeline Registrations
 //===----------------------------------------------------------------------===//
 
 /// Register the TensorRT clustering and compilation pipelines.
-// TODO (pranavm): How to do pipeline registration?
 void registerTensorRTToExecutablePipelines();
 
 } // namespace mlirtrt::compiler
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td
new file mode 100644
index 000000000..28c5edc65
--- /dev/null
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td
@@ -0,0 +1,34 @@
+//===- Passes.td ----------------------------------------------------------===//
+//
+// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES.
+// All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES
+#define MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES
+
+include "mlir/Pass/PassBase.td"
+
+//===----------------------------------------------------------------------===//
+// OutlineTensorRTOpPass
+//===----------------------------------------------------------------------===//
+
+def OutlineTensorRTOpPass : Pass<"outline-tensorrt-op",
+      "::mlir::ModuleOp"> {
+  let summary = "Outline all tensorrt ops into a tensorrt module";
+}
+
+#endif // MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h
index d6e97d0e3..c0d204940 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h
@@ -25,11 +25,16 @@
 // won't need it.
 #ifdef MLIR_TRT_TARGET_TENSORRT
 #include "mlir-tensorrt-dialect/Target/TranslateToTensorRT.h"
+
+#include "mlir-executor/Runtime/API/API.h"
+#include "mlir-executor/Support/Status.h"
 #include "mlir-tensorrt-dialect/Utils/Options.h"
 #include "mlir-tensorrt-dialect/Utils/OptionsBundle.h"
 #include "mlir-tensorrt/Compiler/Client.h"
 #include "mlir-tensorrt/Compiler/Extension.h"
 #include "mlir-tensorrt/Compiler/OptionsProviders.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/Pass/PassManager.h"
 #include "mlir/Support/TypeID.h"
 
 namespace mlirtrt::compiler {
@@ -38,12 +43,17 @@ namespace mlirtrt::compiler {
 // TensorRTToExecutableOptions
 //===----------------------------------------------------------------------===//
 
+class TensorRTToExecutableTask;
+
 // TODO (pranavm): Figure out a better way to reuse TRT translation options -
 // maybe move to options providers?
-struct TensorRTOptions
-    : public mlirtrt::compiler::OptionsProvider<TensorRTOptions> {
+struct TensorRTOptions : public OptionsProvider<TensorRTOptions> {
+public:
+  using OptionsProvider::OptionsProvider;
   mlir::tensorrt::TensorRTTranslationOptions options;
 
+  TensorRTOptions(mlir::OptionsContext &ctx) : OptionsProvider(ctx) {}
+
   void addToOptions(mlir::OptionsContext &context) {
     options.addToOptions(context);
   }
@@ -52,12 +62,10 @@ struct TensorRTOptions
 struct TensorRTToExecutableOptions
     : public mlir::OptionsBundle<DeviceOptions, DebugOptions, ExecutorOptions,
                                  TensorRTOptions> {
+  // Default initialization does not require any extensions.
+  TensorRTToExecutableOptions() = default;
 
   TensorRTToExecutableOptions(TaskExtensionRegistry extensions);
-
-  /// Initializes the options using a default extension set (TensorRT
-  /// extension).
-  StablehloToExecutableOptions();
   
   Option<std::string> entrypoint{this, "entrypoint", llvm::cl::init("main"),
                                  llvm::cl::desc("entrypoint function name")};
@@ -71,6 +79,8 @@ class TensorRTToExecutableTask
     : public CompilationTask<TensorRTToExecutableTask,
                              TensorRTToExecutableOptions> {
 public:
+  TensorRTToExecutableTask(mlir::MLIRContext *ctx,
+                            const TensorRTToExecutableOptions &options);
 
   /// Build the clustering pipeline that occurs on TensorRT Ops.
   static void
@@ -84,13 +94,6 @@ class TensorRTToExecutableTask
 
   static void populatePassManager(mlir::PassManager &pm,
                                   const TensorRTToExecutableOptions &options);
-
-  /// Compile a TensorRT module into a MLIR-TensorRT Runtime executable.
-  /// This is the "functional" entrypoint that will allocate a new PassManager
-  /// for a single run.
-  // static mlirtrt::StatusOr<std::unique_ptr<runtime::Executable>>
-  // compileTensorRTToExecutable(CompilerClient &client, mlir::ModuleOp module,
-  //                              const TensorRTToExecutableOptions &options);
 };
 
 /// Register the task/options with the client's registry.
diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/CMakeLists.txt b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/CMakeLists.txt
index a32b549b5..63ab3d933 100644
--- a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/CMakeLists.txt
+++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/CMakeLists.txt
@@ -1,12 +1,11 @@
 add_mlir_tensorrt_library(MLIRTensorRTCompilerTensorRTToExecutable
     TensorRTToExecutable.cpp
-    TensorRTExtension.cpp
     Passes.cpp
 
     PARTIAL_SOURCES_INTENDED
 
     DEPENDS
-    MLIRTensorRTStablehloToExecutableIncGen
+    MLIRTensorRTTensorRTToExecutableIncGen
 
     LINK_LIBS PUBLIC
     MLIRIR
diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp
index 0eab83331..fe81518b6 100644
--- a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp
+++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp
@@ -22,14 +22,178 @@
 #include "mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h"
 #include "mlir-tensorrt/Conversion/Passes.h"
 #include "mlir-tensorrt/Dialect/Plan/Transforms/Passes.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/PassOptions.h"
 
 #ifdef MLIR_TRT_ENABLE_HLO
 
+namespace mlirtrt::compiler {
+#define GEN_PASS_DEF_OUTLINETENSORRTOPPASS
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h.inc"
+} // namespace mlirtrt::compiler
+
 using namespace mlirtrt;
 using namespace mlirtrt::compiler;
 using namespace mlir;
 
+namespace {
+
+//===----------------------------------------------------------------------===//
+// OutlineTensorRTOpPass
+//===----------------------------------------------------------------------===//
+
+/// ClusteringOpts that identifies groups of TensorRT operations and will be
+/// clustered into one TensorRT function (which is eventually translated to a
+/// engine).
+static FailureOr<ClusteringOpts>
+getTensorRTClusteringOptions(Operation *op) {
+  ClusteringOpts opts;
+  opts.mergeIndependentClusters = [](Operation *, ClusterRange, Operation *,
+                                     ClusterRange) { return true; };
+  opts.clusterTarget = Attribute{};
+  opts.isClusterableOp = [](Operation *op) {
+    if (llvm::isa<tensorrt::TensorRTDialect>(op->getDialect()))
+      return true;
+    return false;
+  };
+
+  return opts;
+}
+
+/// Create a `func.func` operation that represents `regionOp` and inserts into
+/// the `module` SymbolTable. The function is given a name starting with
+/// `nameBase` but may have numbers appended in order to unique the name. The
+/// created function has argument/result types as indicated by the parameters.
+static FailureOr<FunctionOpInterface>
+createOutlinedFunc(RewriterBase &rewriter, Location loc, Operation *module,
+                   StringRef nameBase, TypeRange funcArgTypes,
+                   TypeRange funcResultTypes) {
+  OpBuilder::InsertionGuard g(rewriter);
+
+  // Create the func for outlining the region body.
+  FunctionType type =
+      FunctionType::get(rewriter.getContext(), funcArgTypes, funcResultTypes);
+  auto outlinedFunc = func::FuncOp::create(loc, nameBase, type, {});
+  Block *funcBody = outlinedFunc.addEntryBlock();
+
+  // Add an empty terminator.
+  rewriter.setInsertionPointToEnd(funcBody);
+  rewriter.create<func::ReturnOp>(loc);
+
+  // Insert into the module.
+  SymbolTable(module).insert(outlinedFunc,
+                             module->getRegions().front().front().end());
+
+  // Tag the function with a UnitAttr for identifying the different kinds of
+  // functions based on the cluster type.
+  return cast<FunctionOpInterface>(outlinedFunc.getOperation());
+}
+
+/// Given the `op`, find the closest ModuleOp and check if the module has a
+/// `tensorrt.module` operation in it. If it does, then return the existing
+/// `tensorrt.module` operation. Otherwise, create a new `tensorrt.module`.
+static tensorrt::TensorRTModuleOp getOrCreateTensorRTModuleOp(Operation *op) {
+  auto moduleOp = op->getParentOfType<ModuleOp>();
+  if (!moduleOp)
+    return nullptr;
+  SymbolTable symbolTable(moduleOp);
+  tensorrt::TensorRTModuleOp result = nullptr;
+  for (auto trtModuleOp :
+       moduleOp.getBody()->getOps<tensorrt::TensorRTModuleOp>()) {
+    result = trtModuleOp;
+    break;
+  }
+  if (result)
+    return result;
+
+  // Create the function. Symbol name de-duplication occurs with insert into the
+  // symbol table.
+  result = tensorrt::TensorRTModuleOp::create(moduleOp.getLoc(), "trt_engines");
+  symbolTable.insert(result, op->getParentOp() == moduleOp ? Block::iterator(op)
+                                                           : Block::iterator{});
+  return result;
+}
+
+static FailureOr<tensorrt::CallAllocOp>
+outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule, plan::InlineGroupOp op) {
+
+  // Make the region isolated from above. This captures the input operands.
+  SmallVector<Value> inputs =
+      makeRegionIsolatedFromAbove(rewriter, op.getRegion());
+
+  // Create the outlined function
+  FailureOr<FunctionOpInterface> func =
+      createOutlinedFunc(rewriter, op.getLoc(), trtModule,
+                         "tensorrt_cluster", TypeRange(inputs), op->getResultTypes());
+  if (failed(func))
+    return failure();
+
+  rewriter.setInsertionPoint(op);
+  auto callOp = rewriter.create<tensorrt::CallAllocOp>(
+      op.getLoc(), op.getResultTypes(), inputs,
+      SymbolRefAttr::get(trtModule.getNameAttr(),
+                         {FlatSymbolRefAttr::get(*func)}));
+
+  // Populate the function entry block.
+  rewriter.eraseBlock(&func->getFunctionBody().front());
+
+  // Move region op operations to the func body.
+  Operation *regionYieldOp = op.getYield();
+  rewriter.inlineRegionBefore(op.getRegion(), func->getFunctionBody(),
+                              func->getFunctionBody().end());
+  rewriter.setInsertionPoint(regionYieldOp);
+  rewriter.replaceOpWithNewOp<func::ReturnOp>(regionYieldOp,
+                                              regionYieldOp->getOperands());
+  // replace the original region results.
+  rewriter.replaceOp(op, callOp);
+
+  return callOp;
+}
+
+
+class OutlineTensorRTOpPass
+    : public compiler::impl::OutlineTensorRTOpPassBase<
+          OutlineTensorRTOpPass> {
+public:
+  using Base::Base;
+  void runOnOperation() override {
+    ModuleOp module = getOperation();
+
+    SymbolTableCollection symbolTable;
+    IRRewriter rewriter(&getContext());
+
+    FailureOr<ClusteringOpts> opts = getTensorRTClusteringOptions(module);
+    if (failed(opts)) {
+      emitError(module.getLoc()) << "failed to create clustering options";
+      return signalPassFailure();
+    }
+    // What do they do here?
+    // patterns.add(*opts, createInlineGroupOp, isOpInClusterRegion,
+    //             target.getClusterFilter(),
+    //             PatternBenefit(target.getClusterBenefit()));
+
+    // FailureOr<SmallVector<Operation *>> regionOps =
+    //     rewrite->findClusterAndCreateRegionOp(module, rewriter);
+    // if (failed(regionOps)) {
+    //   emitError(module.getLoc())
+    //       << "clustering rewrite " << rewrite->getTarget() << " failed ";
+    //   return signalPassFailure();
+    // }
+
+    tensorrt::TensorRTModuleOp trtModuleOp = getOrCreateTensorRTModuleOp(module);
+
+    SmallVector<plan::InlineGroupOp> clusters;
+    module.walk(
+        [&](plan::InlineGroupOp cluster) { clusters.push_back(cluster); });
+
+    for (plan::InlineGroupOp cluster : clusters) {
+      if (failed(outlineOp(rewriter, trtModuleOp, cluster)))
+        return signalPassFailure();
+    }
+  }
+};
+} // namespace
+
 //===----------------------------------------------------------------------===//
 // Pipeline Registrations
 //===----------------------------------------------------------------------===//
diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp
index 88dd9dea0..af39858d7 100644
--- a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp
+++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp
@@ -19,12 +19,13 @@
 //===----------------------------------------------------------------------===//
 #ifdef MLIR_TRT_TARGET_TENSORRT
 
-#include "mlir-tensorrt/Compiler/TensorRTToExecutable.h"
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h"
 #include "mlir-executor/Conversion/Passes.h"
 #include "mlir-executor/Executor/Transforms/Passes.h"
 #include "mlir-tensorrt-dialect/TensorRT/Transforms/Passes.h"
+#include "mlir-tensorrt/Compiler/OptionsProviders.h"
 #include "mlir-tensorrt/Compiler/OptionsRegistry.h"
-#include "mlir-tensorrt/Compiler/PassManagerUtils.h"
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h"
 #include "mlir-tensorrt/Conversion/Passes.h"
 #include "mlir-tensorrt/Dialect/Plan/Transforms/Passes.h"
 #include "mlir-tensorrt/Transforms/Passes.h"
@@ -34,6 +35,10 @@
 using namespace mlir;
 using namespace mlirtrt::compiler;
 
+//===----------------------------------------------------------------------===//
+// TensorRTToExecutableOptions
+//===----------------------------------------------------------------------===//
+
 TensorRTToExecutableOptions::TensorRTToExecutableOptions(
     TaskExtensionRegistry extensions) {
   // TODO (pranavm): We don't need extensions - remove from constructor and add
@@ -41,19 +46,23 @@ TensorRTToExecutableOptions::TensorRTToExecutableOptions(
   assert(extensions.extensions.size() == 0);
 }
 
-
 //===----------------------------------------------------------------------===//
 // TensorRTToExecutableTask
 //===----------------------------------------------------------------------===//
 
+TensorRTToExecutableTask::TensorRTToExecutableTask(
+    MLIRContext *ctx, const TensorRTToExecutableOptions &options)
+    : CompilationTask(ctx, options) {
+  options.get<DebugOptions>().applyToPassManager(*this);
+}
+
 void TensorRTToExecutableTask::buildTensorRTClusteringPipeline(
     OpPassManager &pm, const TensorRTToExecutableOptions &opts) {
-  // TODO: add TRT clustering passes.
-  return;
+  pm.addPass(createOutlineTensorRTOpPass());
 }
 
 void TensorRTToExecutableTask::buildPostClusteringPipeline(
-    OpPassManager &pm, const TensorRTToExecutableOptions &opts) {
+    OpPassManager &pm, const TensorRTToExecutableOptions &options) {
   // Post-clustering
   pm.addPass(createConvertTensorRTToTensorRTRuntimePass());
 
@@ -115,10 +124,6 @@ void TensorRTToExecutableTask::buildPostClusteringPipeline(
 
 void TensorRTToExecutableTask::populatePassManager(
     mlir::PassManager &pm, const TensorRTToExecutableOptions &options) {
-  pm.addPass(createPopulateDefaultBackendMetadataPass(
-      PopulateDefaultBackendMetadataPassOptions{
-          options.disallowHostTensorsInTensorRTClusters, NV_TENSORRT_MAJOR}));
-
   buildTensorRTClusteringPipeline(pm, options);
 
   buildPostClusteringPipeline(pm, options);
@@ -130,9 +135,55 @@ void TensorRTToExecutableTask::populatePassManager(
 }
 
 void mlirtrt::compiler::registerTensorRTToExecutableTask() {
-  registerOption("tensorrt-to-executable",
-                 optionsCreateFromArgs<TensorRTToExecutableOptions,
-                                       TensorRTToExecutableTask>);
+  registerOption(
+      "tensorrt-to-executable",
+      [](MLIRContext *ctx, ArrayRef<StringRef> opts)
+          -> StatusOr<std::unique_ptr<OptionsContext>> {
+        auto task = optionsCreateFromArgs<TensorRTToExecutableOptions,
+                                          TensorRTToExecutableTask>(ctx, opts);
+        if (!task.isOk())
+          return task.getStatus();
+        return std::unique_ptr<OptionsContext>(std::move(*task));
+      });
+
+  registerCompilationTask<TensorRTToExecutableTask>(
+      "tensorrt-to-executable",
+      [](CompilerClient &client, llvm::ArrayRef<llvm::StringRef> options)
+          -> StatusOr<CompilationTaskBase *> {
+        TensorRTToExecutableOptions result;
+        std::string err;
+        if (failed(result.parse(options, err)))
+          return getInvalidArgStatus(
+              "failed to parse options string \"{0:$[ ]}\" due to error {1}",
+              llvm::iterator_range(options), err);
+
+        llvm::Error finalizeStatus = result.finalize();
+        std::optional<std::string> errMsg{};
+        llvm::handleAllErrors(std::move(finalizeStatus),
+                              [&errMsg](const llvm::StringError &err) {
+                                errMsg = err.getMessage();
+                              });
+
+        if (errMsg)
+          return getInvalidArgStatus("failed to parse options due to error {0}",
+                                     errMsg);
+
+        std::optional<llvm::hash_code> hashCode = result.getHash();
+        if (!hashCode)
+          return getInvalidArgStatus("failed to hash options");
+
+        CompilationTaskBase *cached = client.lookupCachedCompilationTask(
+            mlir::TypeID::get<TensorRTToExecutableTask>(), *hashCode);
+        if (cached)
+          return cached;
+
+        auto newPM = std::make_unique<TensorRTToExecutableTask>(
+            client.getContext(), result);
+        auto ptr = newPM.get();
+        client.updateCachedCompilationTask<TensorRTToExecutableTask>(
+            *hashCode, std::move(newPM));
+        return ptr;
+      });
 }
 
 MLIR_DEFINE_EXPLICIT_TYPE_ID(mlirtrt::compiler::TensorRTToExecutableTask)

From 6e552caca279e6e104e07021adc32bdf1349d625 Mon Sep 17 00:00:00 2001
From: Yizhuo Zhang <yizhuoz@nvidia.com>
Date: Wed, 22 Jan 2025 17:59:54 -0800
Subject: [PATCH 07/10] Add pybindings for TensorRTToExecutableOptions Fix
 TensorRTOptions registration

---
 .../mlir-tensorrt-c/Compiler/Compiler.h       | 34 ++++++++
 .../TensorRTToExecutable.h                    |  6 +-
 .../compiler/lib/CAPI/Compiler/Compiler.cpp   | 86 +++++++++++++++++++
 .../bindings/Compiler/CompilerPyBind.cpp      | 50 +++++++++++
 4 files changed, 172 insertions(+), 4 deletions(-)

diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt-c/Compiler/Compiler.h b/mlir-tensorrt/compiler/include/mlir-tensorrt-c/Compiler/Compiler.h
index 309bd7af7..8315cc950 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt-c/Compiler/Compiler.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt-c/Compiler/Compiler.h
@@ -113,6 +113,40 @@ static inline bool mtrtStableHloToExecutableOptionsIsNull(
   return !options.ptr;
 }
 
+//===----------------------------------------------------------------------===//
+// MTRT_TensorRTToExecutableOptions
+//===----------------------------------------------------------------------===//
+
+/// Options for compiling StableHLO MLIR to an Executable.
+typedef struct MTRT_TensorRTToExecutableOptions {
+  void *ptr;
+} MTRT_TensorRTToExecutableOptions;
+
+MLIR_CAPI_EXPORTED MTRT_Status mtrtTensorRTToExecutableOptionsCreate(
+    MTRT_CompilerClient client, MTRT_TensorRTToExecutableOptions *options,
+    int32_t tensorRTBuilderOptLevel, bool tensorRTStronglyTyped);
+
+MLIR_CAPI_EXPORTED MTRT_Status mtrtTensorRTToExecutableOptionsCreateFromArgs(
+    MTRT_CompilerClient client, MTRT_TensorRTToExecutableOptions *options,
+    const MlirStringRef *argv, unsigned argc);
+
+/// Specifies whether to enable the global LLVM debug flag for the duration of
+/// the compilation process. If the flag is enabled then the debug types
+/// specified in the array of literals are used as the global LLVM debug types
+/// (equivalent to `-debug-only=[list]`).
+MLIR_CAPI_EXPORTED MTRT_Status mtrtTensorRTToExecutableOptionsSetDebugOptions(
+    MTRT_TensorRTToExecutableOptions options, bool enableDebugging,
+    const char **debugTypes, size_t debugTypeSizes,
+    const char *dumpIrTreeDir = nullptr, const char *dumpTensorRTDir = nullptr);
+
+MLIR_CAPI_EXPORTED MTRT_Status mtrtTensorRTToExecutableOptionsDestroy(
+    MTRT_TensorRTToExecutableOptions options);
+
+static inline bool mtrtTensorRTToExecutableOptionsIsNull(
+    MTRT_TensorRTToExecutableOptions options) {
+  return !options.ptr;
+}
+
 //===----------------------------------------------------------------------===//
 // PassManagerReference APIs
 //===----------------------------------------------------------------------===//
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h
index c0d204940..df56e2852 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h
@@ -52,10 +52,8 @@ struct TensorRTOptions : public OptionsProvider<TensorRTOptions> {
   using OptionsProvider::OptionsProvider;
   mlir::tensorrt::TensorRTTranslationOptions options;
 
-  TensorRTOptions(mlir::OptionsContext &ctx) : OptionsProvider(ctx) {}
-
-  void addToOptions(mlir::OptionsContext &context) {
-    options.addToOptions(context);
+  TensorRTOptions(mlir::OptionsContext &ctx) : OptionsProvider(ctx) {
+    options.addToOptions(ctx);
   }
 };
 
diff --git a/mlir-tensorrt/compiler/lib/CAPI/Compiler/Compiler.cpp b/mlir-tensorrt/compiler/lib/CAPI/Compiler/Compiler.cpp
index 86e03d2a2..fe968b07f 100644
--- a/mlir-tensorrt/compiler/lib/CAPI/Compiler/Compiler.cpp
+++ b/mlir-tensorrt/compiler/lib/CAPI/Compiler/Compiler.cpp
@@ -33,6 +33,7 @@
 #include "mlir-tensorrt/Compiler/OptionsRegistry.h"
 #include "mlir-tensorrt/Compiler/StablehloToExecutable/StablehloToExecutable.h"
 #include "mlir-tensorrt/Compiler/StablehloToExecutable/TensorRTExtension.h"
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h"
 #include "mlir-tensorrt/Dialect/Plan/IR/Plan.h"
 #include "mlir/CAPI/IR.h"
 #include "mlir/CAPI/Utils.h"
@@ -50,6 +51,8 @@ using namespace mlir;
 DEFINE_C_API_PTR_METHODS(MTRT_CompilerClient, CompilerClient)
 DEFINE_C_API_PTR_METHODS(MTRT_StableHLOToExecutableOptions,
                          StablehloToExecutableOptions)
+DEFINE_C_API_PTR_METHODS(MTRT_TensorRTToExecutableOptions,
+                         TensorRTToExecutableOptions)
 DEFINE_C_API_PTR_METHODS(MTRT_OptionsContext, OptionsContext)
 #if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic pop
@@ -271,6 +274,89 @@ MTRT_Status mtrtStableHloToExecutableOptionsDestroy(
   return mtrtStatusGetOk();
 }
 
+
+//===----------------------------------------------------------------------===//
+// MTRT_TensorRTToExecutableOptions
+//===----------------------------------------------------------------------===//
+
+MTRT_Status mtrtTensorRTToExecutableOptionsCreate(
+    MTRT_CompilerClient client, MTRT_TensorRTToExecutableOptions *options,
+    int32_t tensorRTBuilderOptLevel, bool tensorRTStronglyTyped) {
+  auto result =
+      std::make_unique<TensorRTToExecutableOptions>();
+  tensorrt::TensorRTTranslationOptions translationOpts = result->get<TensorRTOptions>().options;
+  translationOpts.tensorrtBuilderOptLevel = tensorRTBuilderOptLevel;
+  translationOpts.enableStronglyTyped = tensorRTStronglyTyped;
+
+  llvm::Error finalizeStatus = result->finalize();
+
+  std::optional<std::string> errMsg{};
+  llvm::handleAllErrors(
+      std::move(finalizeStatus),
+      [&errMsg](const llvm::StringError &err) { errMsg = err.getMessage(); });
+
+  if (errMsg)
+    return wrap(getInternalErrorStatus(errMsg->c_str()));
+
+  *options = wrap(result.release());
+  return mtrtStatusGetOk();
+}
+
+MTRT_Status mtrtTensorRTToExecutableOptionsCreateFromArgs(
+    MTRT_CompilerClient client, MTRT_TensorRTToExecutableOptions *options,
+    const MlirStringRef *argv, unsigned argc) {
+
+  auto result =
+      std::make_unique<TensorRTToExecutableOptions>();
+  std::vector<llvm::StringRef> argvStrRef(argc);
+  for (unsigned i = 0; i < argc; i++)
+    argvStrRef[i] = llvm::StringRef(argv[i].data, argv[i].length);
+
+  std::string err;
+  if (failed(result->parse(argvStrRef, err))) {
+    std::string line = llvm::join(argvStrRef, " ");
+    return wrap(getInternalErrorStatus(
+        "failed to parse options string {0} due to error: {1}", line, err));
+  }
+
+  llvm::Error finalizeStatus = result->finalize();
+
+  std::optional<std::string> errMsg{};
+  llvm::handleAllErrors(
+      std::move(finalizeStatus),
+      [&errMsg](const llvm::StringError &err) { errMsg = err.getMessage(); });
+
+  if (errMsg)
+    return wrap(getInternalErrorStatus(errMsg->c_str()));
+
+  *options = wrap(result.release());
+  return mtrtStatusGetOk();
+}
+
+MTRT_Status mtrtTensorRTToExecutableOptionsSetDebugOptions(
+    MTRT_TensorRTToExecutableOptions options, bool enableDebugging,
+    const char **debugTypes, size_t debugTypeSizes, const char *dumpIrTreeDir,
+    const char *dumpTensorRTDir) {
+
+  TensorRTToExecutableOptions *cppOpts = unwrap(options);
+  cppOpts->get<DebugOptions>().enableLLVMDebugFlag = enableDebugging;
+  for (unsigned i = 0; i < debugTypeSizes; i++)
+    cppOpts->get<DebugOptions>().llvmDebugTypes.emplace_back(debugTypes[i]);
+
+  if (dumpIrTreeDir) {
+    cppOpts->get<DebugOptions>().printTreeDir = std::string(dumpIrTreeDir);
+    cppOpts->get<DebugOptions>().printAfterAll = true;
+  }
+
+  return mtrtStatusGetOk();
+}
+
+MTRT_Status mtrtTensorRTToExecutableOptionsDestroy(
+    MTRT_TensorRTToExecutableOptions options) {
+  delete reinterpret_cast<TensorRTToExecutableOptions *>(options.ptr);
+  return mtrtStatusGetOk();
+}
+
 //===----------------------------------------------------------------------===//
 // Main StableHLO Compiler API Functions
 //===----------------------------------------------------------------------===//
diff --git a/mlir-tensorrt/python/bindings/Compiler/CompilerPyBind.cpp b/mlir-tensorrt/python/bindings/Compiler/CompilerPyBind.cpp
index 480eae834..c45a4ca08 100644
--- a/mlir-tensorrt/python/bindings/Compiler/CompilerPyBind.cpp
+++ b/mlir-tensorrt/python/bindings/Compiler/CompilerPyBind.cpp
@@ -72,6 +72,19 @@ class PyStableHLOToExecutableOptions
           mtrtStableHloToExecutableOptionsDestroy};
 };
 
+/// Python object type wrapper for `MTRT_TensorRTToExecutableOptions`.
+class PyTensorRTToExecutableOptions
+    : public PyMTRTWrapper<PyTensorRTToExecutableOptions,
+                           MTRT_TensorRTToExecutableOptions> {
+public:
+  using PyMTRTWrapper::PyMTRTWrapper;
+  DECLARE_WRAPPER_CONSTRUCTORS(PyTensorRTToExecutableOptions);
+  static constexpr auto kMethodTable =
+      CAPITable<MTRT_TensorRTToExecutableOptions>{
+          mtrtTensorRTToExecutableOptionsIsNull,
+          mtrtTensorRTToExecutableOptionsDestroy};
+};
+
 /// Python object type wrapper for `MlirPassManager`.
 class PyPassManagerReference
     : public PyMTRTWrapper<PyPassManagerReference, MlirPassManager> {
@@ -339,6 +352,43 @@ PYBIND11_MODULE(_api, m) {
           py::arg("dump_ir_tree_dir") = py::none(),
           py::arg("dump_tensorrt_dir") = py::none());
 
+  py::class_<PyTensorRTToExecutableOptions>(m, "TensorRTToExecutableOptions",
+                                             py::module_local())
+      .def(py::init<>([](PyCompilerClient &client,
+                         const std::vector<std::string> &args)
+                          -> PyTensorRTToExecutableOptions * {
+             std::vector<MlirStringRef> refs(args.size());
+             for (unsigned i = 0; i < args.size(); i++)
+               refs[i] = mlirStringRefCreate(args[i].data(), args[i].size());
+
+             MTRT_TensorRTToExecutableOptions options;
+             MTRT_Status s = mtrtTensorRTToExecutableOptionsCreateFromArgs(
+                 client, &options, refs.data(), refs.size());
+             THROW_IF_MTRT_ERROR(s);
+             return new PyTensorRTToExecutableOptions(options);
+           }),
+           py::arg("client"), py::arg("args"))
+      .def(
+          "set_debug_options",
+          [](PyTensorRTToExecutableOptions &self, bool enabled,
+             std::vector<std::string> debugTypes,
+             std::optional<std::string> dumpIrTreeDir,
+             std::optional<std::string> dumpTensorRTDir) {
+            // The strings are copied by the CAPI call, so we just need to
+            // refence the C-strings temporarily.
+            std::vector<const char *> literals;
+            for (const std::string &str : debugTypes)
+              literals.push_back(str.c_str());
+            THROW_IF_MTRT_ERROR(mtrtTensorRTToExecutableOptionsSetDebugOptions(
+                self, enabled, literals.data(), literals.size(),
+                dumpIrTreeDir ? dumpIrTreeDir->c_str() : nullptr,
+                dumpTensorRTDir ? dumpTensorRTDir->c_str() : nullptr));
+          },
+          py::arg("enabled"),
+          py::arg("debug_types") = std::vector<std::string>{},
+          py::arg("dump_ir_tree_dir") = py::none(),
+          py::arg("dump_tensorrt_dir") = py::none());
+
   py::class_<PyPassManagerReference>(m, "PassManagerReference",
                                      py::module_local())
       .def("run", [](PyPassManagerReference &self, MlirOperation op) {

From bd34da5cec5abf2228f0495a3120255b31c4ae82 Mon Sep 17 00:00:00 2001
From: Yizhuo Zhang <yizhuoz@nvidia.com>
Date: Thu, 23 Jan 2025 12:59:13 -0800
Subject: [PATCH 08/10] Remove pybindings

---
 .../mlir-tensorrt-c/Compiler/Compiler.h       | 34 --------
 .../mlir-tensorrt/Compiler/OptionsRegistry.h  |  2 +-
 .../compiler/lib/CAPI/Compiler/Compiler.cpp   | 86 -------------------
 .../bindings/Compiler/CompilerPyBind.cpp      | 50 -----------
 4 files changed, 1 insertion(+), 171 deletions(-)

diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt-c/Compiler/Compiler.h b/mlir-tensorrt/compiler/include/mlir-tensorrt-c/Compiler/Compiler.h
index 8315cc950..309bd7af7 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt-c/Compiler/Compiler.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt-c/Compiler/Compiler.h
@@ -113,40 +113,6 @@ static inline bool mtrtStableHloToExecutableOptionsIsNull(
   return !options.ptr;
 }
 
-//===----------------------------------------------------------------------===//
-// MTRT_TensorRTToExecutableOptions
-//===----------------------------------------------------------------------===//
-
-/// Options for compiling StableHLO MLIR to an Executable.
-typedef struct MTRT_TensorRTToExecutableOptions {
-  void *ptr;
-} MTRT_TensorRTToExecutableOptions;
-
-MLIR_CAPI_EXPORTED MTRT_Status mtrtTensorRTToExecutableOptionsCreate(
-    MTRT_CompilerClient client, MTRT_TensorRTToExecutableOptions *options,
-    int32_t tensorRTBuilderOptLevel, bool tensorRTStronglyTyped);
-
-MLIR_CAPI_EXPORTED MTRT_Status mtrtTensorRTToExecutableOptionsCreateFromArgs(
-    MTRT_CompilerClient client, MTRT_TensorRTToExecutableOptions *options,
-    const MlirStringRef *argv, unsigned argc);
-
-/// Specifies whether to enable the global LLVM debug flag for the duration of
-/// the compilation process. If the flag is enabled then the debug types
-/// specified in the array of literals are used as the global LLVM debug types
-/// (equivalent to `-debug-only=[list]`).
-MLIR_CAPI_EXPORTED MTRT_Status mtrtTensorRTToExecutableOptionsSetDebugOptions(
-    MTRT_TensorRTToExecutableOptions options, bool enableDebugging,
-    const char **debugTypes, size_t debugTypeSizes,
-    const char *dumpIrTreeDir = nullptr, const char *dumpTensorRTDir = nullptr);
-
-MLIR_CAPI_EXPORTED MTRT_Status mtrtTensorRTToExecutableOptionsDestroy(
-    MTRT_TensorRTToExecutableOptions options);
-
-static inline bool mtrtTensorRTToExecutableOptionsIsNull(
-    MTRT_TensorRTToExecutableOptions options) {
-  return !options.ptr;
-}
-
 //===----------------------------------------------------------------------===//
 // PassManagerReference APIs
 //===----------------------------------------------------------------------===//
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h
index 25cf0bbc6..af6140099 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h
@@ -78,7 +78,7 @@ optionsCreateFromArgs(mlir::MLIRContext *context,
       [&errMsg](const llvm::StringError &err) { errMsg = err.getMessage(); });
 
   if (errMsg)
-    return getInternalErrorStatus("failed to initialize options: %s",
+    return getInternalErrorStatus("failed to initialize options: {0}",
                                   errMsg->c_str());
 
   return result;
diff --git a/mlir-tensorrt/compiler/lib/CAPI/Compiler/Compiler.cpp b/mlir-tensorrt/compiler/lib/CAPI/Compiler/Compiler.cpp
index fe968b07f..86e03d2a2 100644
--- a/mlir-tensorrt/compiler/lib/CAPI/Compiler/Compiler.cpp
+++ b/mlir-tensorrt/compiler/lib/CAPI/Compiler/Compiler.cpp
@@ -33,7 +33,6 @@
 #include "mlir-tensorrt/Compiler/OptionsRegistry.h"
 #include "mlir-tensorrt/Compiler/StablehloToExecutable/StablehloToExecutable.h"
 #include "mlir-tensorrt/Compiler/StablehloToExecutable/TensorRTExtension.h"
-#include "mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h"
 #include "mlir-tensorrt/Dialect/Plan/IR/Plan.h"
 #include "mlir/CAPI/IR.h"
 #include "mlir/CAPI/Utils.h"
@@ -51,8 +50,6 @@ using namespace mlir;
 DEFINE_C_API_PTR_METHODS(MTRT_CompilerClient, CompilerClient)
 DEFINE_C_API_PTR_METHODS(MTRT_StableHLOToExecutableOptions,
                          StablehloToExecutableOptions)
-DEFINE_C_API_PTR_METHODS(MTRT_TensorRTToExecutableOptions,
-                         TensorRTToExecutableOptions)
 DEFINE_C_API_PTR_METHODS(MTRT_OptionsContext, OptionsContext)
 #if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic pop
@@ -274,89 +271,6 @@ MTRT_Status mtrtStableHloToExecutableOptionsDestroy(
   return mtrtStatusGetOk();
 }
 
-
-//===----------------------------------------------------------------------===//
-// MTRT_TensorRTToExecutableOptions
-//===----------------------------------------------------------------------===//
-
-MTRT_Status mtrtTensorRTToExecutableOptionsCreate(
-    MTRT_CompilerClient client, MTRT_TensorRTToExecutableOptions *options,
-    int32_t tensorRTBuilderOptLevel, bool tensorRTStronglyTyped) {
-  auto result =
-      std::make_unique<TensorRTToExecutableOptions>();
-  tensorrt::TensorRTTranslationOptions translationOpts = result->get<TensorRTOptions>().options;
-  translationOpts.tensorrtBuilderOptLevel = tensorRTBuilderOptLevel;
-  translationOpts.enableStronglyTyped = tensorRTStronglyTyped;
-
-  llvm::Error finalizeStatus = result->finalize();
-
-  std::optional<std::string> errMsg{};
-  llvm::handleAllErrors(
-      std::move(finalizeStatus),
-      [&errMsg](const llvm::StringError &err) { errMsg = err.getMessage(); });
-
-  if (errMsg)
-    return wrap(getInternalErrorStatus(errMsg->c_str()));
-
-  *options = wrap(result.release());
-  return mtrtStatusGetOk();
-}
-
-MTRT_Status mtrtTensorRTToExecutableOptionsCreateFromArgs(
-    MTRT_CompilerClient client, MTRT_TensorRTToExecutableOptions *options,
-    const MlirStringRef *argv, unsigned argc) {
-
-  auto result =
-      std::make_unique<TensorRTToExecutableOptions>();
-  std::vector<llvm::StringRef> argvStrRef(argc);
-  for (unsigned i = 0; i < argc; i++)
-    argvStrRef[i] = llvm::StringRef(argv[i].data, argv[i].length);
-
-  std::string err;
-  if (failed(result->parse(argvStrRef, err))) {
-    std::string line = llvm::join(argvStrRef, " ");
-    return wrap(getInternalErrorStatus(
-        "failed to parse options string {0} due to error: {1}", line, err));
-  }
-
-  llvm::Error finalizeStatus = result->finalize();
-
-  std::optional<std::string> errMsg{};
-  llvm::handleAllErrors(
-      std::move(finalizeStatus),
-      [&errMsg](const llvm::StringError &err) { errMsg = err.getMessage(); });
-
-  if (errMsg)
-    return wrap(getInternalErrorStatus(errMsg->c_str()));
-
-  *options = wrap(result.release());
-  return mtrtStatusGetOk();
-}
-
-MTRT_Status mtrtTensorRTToExecutableOptionsSetDebugOptions(
-    MTRT_TensorRTToExecutableOptions options, bool enableDebugging,
-    const char **debugTypes, size_t debugTypeSizes, const char *dumpIrTreeDir,
-    const char *dumpTensorRTDir) {
-
-  TensorRTToExecutableOptions *cppOpts = unwrap(options);
-  cppOpts->get<DebugOptions>().enableLLVMDebugFlag = enableDebugging;
-  for (unsigned i = 0; i < debugTypeSizes; i++)
-    cppOpts->get<DebugOptions>().llvmDebugTypes.emplace_back(debugTypes[i]);
-
-  if (dumpIrTreeDir) {
-    cppOpts->get<DebugOptions>().printTreeDir = std::string(dumpIrTreeDir);
-    cppOpts->get<DebugOptions>().printAfterAll = true;
-  }
-
-  return mtrtStatusGetOk();
-}
-
-MTRT_Status mtrtTensorRTToExecutableOptionsDestroy(
-    MTRT_TensorRTToExecutableOptions options) {
-  delete reinterpret_cast<TensorRTToExecutableOptions *>(options.ptr);
-  return mtrtStatusGetOk();
-}
-
 //===----------------------------------------------------------------------===//
 // Main StableHLO Compiler API Functions
 //===----------------------------------------------------------------------===//
diff --git a/mlir-tensorrt/python/bindings/Compiler/CompilerPyBind.cpp b/mlir-tensorrt/python/bindings/Compiler/CompilerPyBind.cpp
index c45a4ca08..480eae834 100644
--- a/mlir-tensorrt/python/bindings/Compiler/CompilerPyBind.cpp
+++ b/mlir-tensorrt/python/bindings/Compiler/CompilerPyBind.cpp
@@ -72,19 +72,6 @@ class PyStableHLOToExecutableOptions
           mtrtStableHloToExecutableOptionsDestroy};
 };
 
-/// Python object type wrapper for `MTRT_TensorRTToExecutableOptions`.
-class PyTensorRTToExecutableOptions
-    : public PyMTRTWrapper<PyTensorRTToExecutableOptions,
-                           MTRT_TensorRTToExecutableOptions> {
-public:
-  using PyMTRTWrapper::PyMTRTWrapper;
-  DECLARE_WRAPPER_CONSTRUCTORS(PyTensorRTToExecutableOptions);
-  static constexpr auto kMethodTable =
-      CAPITable<MTRT_TensorRTToExecutableOptions>{
-          mtrtTensorRTToExecutableOptionsIsNull,
-          mtrtTensorRTToExecutableOptionsDestroy};
-};
-
 /// Python object type wrapper for `MlirPassManager`.
 class PyPassManagerReference
     : public PyMTRTWrapper<PyPassManagerReference, MlirPassManager> {
@@ -352,43 +339,6 @@ PYBIND11_MODULE(_api, m) {
           py::arg("dump_ir_tree_dir") = py::none(),
           py::arg("dump_tensorrt_dir") = py::none());
 
-  py::class_<PyTensorRTToExecutableOptions>(m, "TensorRTToExecutableOptions",
-                                             py::module_local())
-      .def(py::init<>([](PyCompilerClient &client,
-                         const std::vector<std::string> &args)
-                          -> PyTensorRTToExecutableOptions * {
-             std::vector<MlirStringRef> refs(args.size());
-             for (unsigned i = 0; i < args.size(); i++)
-               refs[i] = mlirStringRefCreate(args[i].data(), args[i].size());
-
-             MTRT_TensorRTToExecutableOptions options;
-             MTRT_Status s = mtrtTensorRTToExecutableOptionsCreateFromArgs(
-                 client, &options, refs.data(), refs.size());
-             THROW_IF_MTRT_ERROR(s);
-             return new PyTensorRTToExecutableOptions(options);
-           }),
-           py::arg("client"), py::arg("args"))
-      .def(
-          "set_debug_options",
-          [](PyTensorRTToExecutableOptions &self, bool enabled,
-             std::vector<std::string> debugTypes,
-             std::optional<std::string> dumpIrTreeDir,
-             std::optional<std::string> dumpTensorRTDir) {
-            // The strings are copied by the CAPI call, so we just need to
-            // refence the C-strings temporarily.
-            std::vector<const char *> literals;
-            for (const std::string &str : debugTypes)
-              literals.push_back(str.c_str());
-            THROW_IF_MTRT_ERROR(mtrtTensorRTToExecutableOptionsSetDebugOptions(
-                self, enabled, literals.data(), literals.size(),
-                dumpIrTreeDir ? dumpIrTreeDir->c_str() : nullptr,
-                dumpTensorRTDir ? dumpTensorRTDir->c_str() : nullptr));
-          },
-          py::arg("enabled"),
-          py::arg("debug_types") = std::vector<std::string>{},
-          py::arg("dump_ir_tree_dir") = py::none(),
-          py::arg("dump_tensorrt_dir") = py::none());
-
   py::class_<PyPassManagerReference>(m, "PassManagerReference",
                                      py::module_local())
       .def("run", [](PyPassManagerReference &self, MlirOperation op) {

From 065d7e305cad767eb960cc6f5f47972174928df1 Mon Sep 17 00:00:00 2001
From: Yizhuo Zhang <yizhuoz@nvidia.com>
Date: Thu, 23 Jan 2025 15:59:24 -0800
Subject: [PATCH 09/10] Address comments

---
 .../mlir-tensorrt/Compiler/OptionsRegistry.h  |  3 +-
 .../Compiler/TensorRTToExecutable/Passes.h    |  2 +-
 .../Compiler/TensorRTToExecutable/Passes.td   |  2 +-
 .../TensorRTToExecutable.h                    | 10 +---
 .../Registration/RegisterMlirTensorRtPasses.h |  7 ---
 .../Registration/RegisterAllDialects.cpp      |  2 +
 .../Compiler/TensorRTToExecutable/Passes.cpp  | 60 ++++++++-----------
 .../TensorRTToExecutable.cpp                  | 54 ++---------------
 8 files changed, 36 insertions(+), 104 deletions(-)

diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h
index af6140099..cb35ba816 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h
@@ -78,8 +78,7 @@ optionsCreateFromArgs(mlir::MLIRContext *context,
       [&errMsg](const llvm::StringError &err) { errMsg = err.getMessage(); });
 
   if (errMsg)
-    return getInternalErrorStatus("failed to initialize options: {0}",
-                                  errMsg->c_str());
+    return getInternalErrorStatus("failed to initialize options: {0}", *errMsg);
 
   return result;
 }
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h
index 53d6eb705..be0d1a730 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h
@@ -1,6 +1,6 @@
 //===- Passes.h ----------------------------------------------===//
 //
-// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES.
+// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES.
 // All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 //
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td
index 28c5edc65..05547532d 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td
@@ -1,6 +1,6 @@
 //===- Passes.td ----------------------------------------------------------===//
 //
-// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES.
+// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES.
 // All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 //
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h
index df56e2852..c7eb3a1a9 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h
@@ -1,6 +1,6 @@
 //===- TensorRTToExecutable.h -----------------------------------*- C++ -*-===//
 //
-// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES.
+// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES.
 // All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 //
@@ -26,16 +26,8 @@
 #ifdef MLIR_TRT_TARGET_TENSORRT
 #include "mlir-tensorrt-dialect/Target/TranslateToTensorRT.h"
 
-#include "mlir-executor/Runtime/API/API.h"
-#include "mlir-executor/Support/Status.h"
-#include "mlir-tensorrt-dialect/Utils/Options.h"
 #include "mlir-tensorrt-dialect/Utils/OptionsBundle.h"
 #include "mlir-tensorrt/Compiler/Client.h"
-#include "mlir-tensorrt/Compiler/Extension.h"
-#include "mlir-tensorrt/Compiler/OptionsProviders.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Support/TypeID.h"
 
 namespace mlirtrt::compiler {
 
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h
index ac87e2a26..7ac779ec8 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h
@@ -23,7 +23,6 @@
 #define REGISTRATION_REGISTERMLIRTENSORRTPASSES_H
 
 #include "mlir-tensorrt-dialect/TensorRT/Transforms/Passes.h"
-#include "mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h"
 #include "mlir-tensorrt/Conversion/Passes.h"
 #include "mlir-tensorrt/Transforms/Passes.h"
 #include "mlir/Conversion/Passes.h"
@@ -54,12 +53,6 @@ inline void registerAllMlirTensorRtPasses() {
   mlir::registerTransformsPasses();
   mlir::registerConvertPDLToPDLInterp();
 
-  // TODO (pranavm): Check if this needs to be conditional - the TRT passes
-  // above are not.
-#ifdef MLIR_TRT_TARGET_TENSORRT
-  mlirtrt::compiler::registerTensorRTToExecutableTask();
-#endif
-
 #ifdef MLIR_TRT_ENABLE_HLO
   mlirtrt::compiler::registerStablehloToExecutablePasses();
   mlirtrt::compiler::registerStablehloToExecutablePipelines();
diff --git a/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/RegisterAllDialects.cpp b/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/RegisterAllDialects.cpp
index 1fec30727..320d6ea63 100644
--- a/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/RegisterAllDialects.cpp
+++ b/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/RegisterAllDialects.cpp
@@ -24,6 +24,7 @@
 
 #include "mlir-tensorrt-c/Compiler/Registration/RegisterAllDialects.h"
 #include "mlir-tensorrt/Compiler/StablehloToExecutable/StablehloToExecutable.h"
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h"
 #include "mlir-tensorrt/Registration/RegisterMlirTensorRtDialects.h"
 #include "mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h"
 #include "mlir/CAPI/IR.h"
@@ -38,4 +39,5 @@ void mtrtCompilerRegisterPasses() {
 
 void mtrtCompilerRegisterTasks() {
   mlirtrt::compiler::registerStableHloToExecutableTask();
+  mlirtrt::compiler::registerTensorRTToExecutableTask();
 }
diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp
index fe81518b6..85ab6df9e 100644
--- a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp
+++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp
@@ -1,6 +1,6 @@
 //===- Passes.cpp --------------------------------------------------------===//
 //
-// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES.
+// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES.
 // All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 //
@@ -25,8 +25,6 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/PassOptions.h"
 
-#ifdef MLIR_TRT_ENABLE_HLO
-
 namespace mlirtrt::compiler {
 #define GEN_PASS_DEF_OUTLINETENSORRTOPPASS
 #include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h.inc"
@@ -52,9 +50,7 @@ getTensorRTClusteringOptions(Operation *op) {
                                      ClusterRange) { return true; };
   opts.clusterTarget = Attribute{};
   opts.isClusterableOp = [](Operation *op) {
-    if (llvm::isa<tensorrt::TensorRTDialect>(op->getDialect()))
-      return true;
-    return false;
+    return llvm::isa_and_present<tensorrt::TensorRTDialect>(op->getDialect());
   };
 
   return opts;
@@ -115,22 +111,30 @@ static tensorrt::TensorRTModuleOp getOrCreateTensorRTModuleOp(Operation *op) {
 }
 
 static FailureOr<tensorrt::CallAllocOp>
-outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule, plan::InlineGroupOp op) {
+outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule, const Cluster& cluster) {
+  auto inlineGroupOp = cast<plan::InlineGroupOp>(mlir::createRegionOpFromCluster(
+      cluster, rewriter,
+      [](OpBuilder &b, Location loc, TypeRange types, Attribute target) {
+        auto regionOp = b.create<plan::InlineGroupOp>(loc, types, target);
+        b.setInsertionPointToStart(&regionOp.getRegion().emplaceBlock());
+        b.create<plan::YieldOp>(loc);
+        return regionOp;
+      }));
 
   // Make the region isolated from above. This captures the input operands.
   SmallVector<Value> inputs =
-      makeRegionIsolatedFromAbove(rewriter, op.getRegion());
+      makeRegionIsolatedFromAbove(rewriter, inlineGroupOp.getRegion());
 
   // Create the outlined function
   FailureOr<FunctionOpInterface> func =
-      createOutlinedFunc(rewriter, op.getLoc(), trtModule,
-                         "tensorrt_cluster", TypeRange(inputs), op->getResultTypes());
+      createOutlinedFunc(rewriter, inlineGroupOp.getLoc(), trtModule,
+                         "tensorrt_cluster", TypeRange(inputs), inlineGroupOp->getResultTypes());
   if (failed(func))
     return failure();
 
-  rewriter.setInsertionPoint(op);
+  rewriter.setInsertionPoint(inlineGroupOp);
   auto callOp = rewriter.create<tensorrt::CallAllocOp>(
-      op.getLoc(), op.getResultTypes(), inputs,
+      inlineGroupOp.getLoc(), inlineGroupOp.getResultTypes(), inputs,
       SymbolRefAttr::get(trtModule.getNameAttr(),
                          {FlatSymbolRefAttr::get(*func)}));
 
@@ -138,14 +142,14 @@ outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule, plan::In
   rewriter.eraseBlock(&func->getFunctionBody().front());
 
   // Move region op operations to the func body.
-  Operation *regionYieldOp = op.getYield();
-  rewriter.inlineRegionBefore(op.getRegion(), func->getFunctionBody(),
+  Operation *regionYieldOp = inlineGroupOp.getYield();
+  rewriter.inlineRegionBefore(inlineGroupOp.getRegion(), func->getFunctionBody(),
                               func->getFunctionBody().end());
   rewriter.setInsertionPoint(regionYieldOp);
   rewriter.replaceOpWithNewOp<func::ReturnOp>(regionYieldOp,
                                               regionYieldOp->getOperands());
   // replace the original region results.
-  rewriter.replaceOp(op, callOp);
+  rewriter.replaceOp(inlineGroupOp, callOp);
 
   return callOp;
 }
@@ -167,26 +171,16 @@ class OutlineTensorRTOpPass
       emitError(module.getLoc()) << "failed to create clustering options";
       return signalPassFailure();
     }
-    // What do they do here?
-    // patterns.add(*opts, createInlineGroupOp, isOpInClusterRegion,
-    //             target.getClusterFilter(),
-    //             PatternBenefit(target.getClusterBenefit()));
-
-    // FailureOr<SmallVector<Operation *>> regionOps =
-    //     rewrite->findClusterAndCreateRegionOp(module, rewriter);
-    // if (failed(regionOps)) {
-    //   emitError(module.getLoc())
-    //       << "clustering rewrite " << rewrite->getTarget() << " failed ";
-    //   return signalPassFailure();
-    // }
 
-    tensorrt::TensorRTModuleOp trtModuleOp = getOrCreateTensorRTModuleOp(module);
+    FailureOr<SmallVector<Cluster>> clusters = mlir::analyzeAndClusterOperations(module, *opts);
+    if (failed(clusters)) {
+      emitError(module.getLoc()) << "failed to cluster operations";
+      return signalPassFailure();
+    }
 
-    SmallVector<plan::InlineGroupOp> clusters;
-    module.walk(
-        [&](plan::InlineGroupOp cluster) { clusters.push_back(cluster); });
+    tensorrt::TensorRTModuleOp trtModuleOp = getOrCreateTensorRTModuleOp(module);
 
-    for (plan::InlineGroupOp cluster : clusters) {
+    for (const auto& cluster : *clusters) {
       if (failed(outlineOp(rewriter, trtModuleOp, cluster)))
         return signalPassFailure();
     }
@@ -221,5 +215,3 @@ void mlirtrt::compiler::registerTensorRTToExecutablePipelines() {
         TensorRTToExecutableTask::buildPostClusteringPipeline(pm, opts);
       });
 }
-
-#endif // MLIR_TRT_ENABLE_HLO
\ No newline at end of file
diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp
index af39858d7..0fb7a5268 100644
--- a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp
+++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp
@@ -1,6 +1,6 @@
 //===- TensorRTToExecutable.cpp ---------------------------------*- C++ -*-===//
 //
-// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES.
+// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES.
 // All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 //
@@ -135,55 +135,9 @@ void TensorRTToExecutableTask::populatePassManager(
 }
 
 void mlirtrt::compiler::registerTensorRTToExecutableTask() {
-  registerOption(
-      "tensorrt-to-executable",
-      [](MLIRContext *ctx, ArrayRef<StringRef> opts)
-          -> StatusOr<std::unique_ptr<OptionsContext>> {
-        auto task = optionsCreateFromArgs<TensorRTToExecutableOptions,
-                                          TensorRTToExecutableTask>(ctx, opts);
-        if (!task.isOk())
-          return task.getStatus();
-        return std::unique_ptr<OptionsContext>(std::move(*task));
-      });
-
-  registerCompilationTask<TensorRTToExecutableTask>(
-      "tensorrt-to-executable",
-      [](CompilerClient &client, llvm::ArrayRef<llvm::StringRef> options)
-          -> StatusOr<CompilationTaskBase *> {
-        TensorRTToExecutableOptions result;
-        std::string err;
-        if (failed(result.parse(options, err)))
-          return getInvalidArgStatus(
-              "failed to parse options string \"{0:$[ ]}\" due to error {1}",
-              llvm::iterator_range(options), err);
-
-        llvm::Error finalizeStatus = result.finalize();
-        std::optional<std::string> errMsg{};
-        llvm::handleAllErrors(std::move(finalizeStatus),
-                              [&errMsg](const llvm::StringError &err) {
-                                errMsg = err.getMessage();
-                              });
-
-        if (errMsg)
-          return getInvalidArgStatus("failed to parse options due to error {0}",
-                                     errMsg);
-
-        std::optional<llvm::hash_code> hashCode = result.getHash();
-        if (!hashCode)
-          return getInvalidArgStatus("failed to hash options");
-
-        CompilationTaskBase *cached = client.lookupCachedCompilationTask(
-            mlir::TypeID::get<TensorRTToExecutableTask>(), *hashCode);
-        if (cached)
-          return cached;
-
-        auto newPM = std::make_unique<TensorRTToExecutableTask>(
-            client.getContext(), result);
-        auto ptr = newPM.get();
-        client.updateCachedCompilationTask<TensorRTToExecutableTask>(
-            *hashCode, std::move(newPM));
-        return ptr;
-      });
+  registerCompilationTaskWithNoExtensions<TensorRTToExecutableTask,
+                                          TensorRTToExecutableOptions>(
+      "tensorrt-to-executable");
 }
 
 MLIR_DEFINE_EXPLICIT_TYPE_ID(mlirtrt::compiler::TensorRTToExecutableTask)

From a0c1876168183458b51da4284704e12dda1857e3 Mon Sep 17 00:00:00 2001
From: Yizhuo Zhang <yizhuoz@nvidia.com>
Date: Fri, 24 Jan 2025 16:01:40 -0800
Subject: [PATCH 10/10] Add dependent dialect, fix outlineOp pass

---
 .../Compiler/TensorRTToExecutable/Passes.td     |  4 ++++
 .../Registration/RegisterMlirTensorRtPasses.h   |  5 +++++
 .../Compiler/TensorRTToExecutable/Passes.cpp    | 17 ++++++-----------
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td
index 05547532d..dd89a4d96 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td
@@ -29,6 +29,10 @@ include "mlir/Pass/PassBase.td"
 def OutlineTensorRTOpPass : Pass<"outline-tensorrt-op",
       "::mlir::ModuleOp"> {
   let summary = "Outline all tensorrt ops into a tensorrt module";
+
+  let dependentDialects = [
+    "::mlir::plan::PlanDialect"
+  ];
 }
 
 #endif // MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h
index 7ac779ec8..3ba7d3119 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h
@@ -31,6 +31,7 @@
 #ifdef MLIR_TRT_ENABLE_HLO
 #include "mlir-tensorrt/Compiler/StablehloToExecutable/Passes.h"
 #include "mlir-tensorrt/Compiler/StablehloToExecutable/StablehloToExecutable.h"
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h"
 #include "mlir-tensorrt/Dialect/Plan/Transforms/Passes.h"
 #include "mlir-tensorrt/Dialect/StableHloExt/Transforms/Passes.h"
 #include "mlir-tensorrt/Pipelines/StableHloInputPipelines.h"
@@ -63,6 +64,10 @@ inline void registerAllMlirTensorRtPasses() {
   plan::registerPlanDialectPipelines();
 #endif // MLIR_TRT_ENABLE_HLO
 
+#ifdef MLIR_TRT_TARGET_TENSORRT
+  mlirtrt::compiler::registerTensorRTToExecutablePipelines();
+#endif // MLIR_TRT_TARGET_TENSORRT
+
 #ifdef MLIR_TRT_ENABLE_EXECUTOR
   registerConvertCUDAToExecutorPass();
   bufferization::registerBufferizationPasses();
diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp
index 85ab6df9e..1ade607e7 100644
--- a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp
+++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp
@@ -88,10 +88,8 @@ createOutlinedFunc(RewriterBase &rewriter, Location loc, Operation *module,
 /// Given the `op`, find the closest ModuleOp and check if the module has a
 /// `tensorrt.module` operation in it. If it does, then return the existing
 /// `tensorrt.module` operation. Otherwise, create a new `tensorrt.module`.
-static tensorrt::TensorRTModuleOp getOrCreateTensorRTModuleOp(Operation *op) {
-  auto moduleOp = op->getParentOfType<ModuleOp>();
-  if (!moduleOp)
-    return nullptr;
+static tensorrt::TensorRTModuleOp
+getOrCreateTensorRTModuleOp(ModuleOp moduleOp) {
   SymbolTable symbolTable(moduleOp);
   tensorrt::TensorRTModuleOp result = nullptr;
   for (auto trtModuleOp :
@@ -105,8 +103,7 @@ static tensorrt::TensorRTModuleOp getOrCreateTensorRTModuleOp(Operation *op) {
   // Create the function. Symbol name de-duplication occurs with insert into the
   // symbol table.
   result = tensorrt::TensorRTModuleOp::create(moduleOp.getLoc(), "trt_engines");
-  symbolTable.insert(result, op->getParentOp() == moduleOp ? Block::iterator(op)
-                                                           : Block::iterator{});
+  symbolTable.insert(result);
   return result;
 }
 
@@ -162,8 +159,6 @@ class OutlineTensorRTOpPass
   using Base::Base;
   void runOnOperation() override {
     ModuleOp module = getOperation();
-
-    SymbolTableCollection symbolTable;
     IRRewriter rewriter(&getContext());
 
     FailureOr<ClusteringOpts> opts = getTensorRTClusteringOptions(module);
@@ -178,10 +173,10 @@ class OutlineTensorRTOpPass
       return signalPassFailure();
     }
 
-    tensorrt::TensorRTModuleOp trtModuleOp = getOrCreateTensorRTModuleOp(module);
+    tensorrt::TensorRTModuleOp trtModule = getOrCreateTensorRTModuleOp(module);
 
-    for (const auto& cluster : *clusters) {
-      if (failed(outlineOp(rewriter, trtModuleOp, cluster)))
+    for (const auto &cluster : *clusters) {
+      if (failed(outlineOp(rewriter, trtModule, cluster)))
         return signalPassFailure();
     }
   }