diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/CMakeLists.txt b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/CMakeLists.txt
index 48758fad0..70216a43a 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/CMakeLists.txt
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/CMakeLists.txt
@@ -1 +1,2 @@
 add_subdirectory(StablehloToExecutable)
+add_subdirectory(TensorRTToExecutable)
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h
index 25cf0bbc6..cb35ba816 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/OptionsRegistry.h
@@ -78,8 +78,7 @@ optionsCreateFromArgs(mlir::MLIRContext *context,
       [&errMsg](const llvm::StringError &err) { errMsg = err.getMessage(); });
 
   if (errMsg)
-    return getInternalErrorStatus("failed to initialize options: %s",
-                                  errMsg->c_str());
+    return getInternalErrorStatus("failed to initialize options: {0}", *errMsg);
 
   return result;
 }
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/CMakeLists.txt b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/CMakeLists.txt
new file mode 100644
index 000000000..e549a6d5c
--- /dev/null
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/CMakeLists.txt
@@ -0,0 +1,4 @@
+set(_TABLEGEN_ARGS )
+set(LLVM_TARGET_DEFINITIONS Passes.td)
+mlir_tablegen(Passes.h.inc -gen-pass-decls -name TensorRTToExecutable ${_TABLEGEN_ARGS})
+add_public_tablegen_target(MLIRTensorRTTensorRTToExecutableIncGen)
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h
new file mode 100644
index 000000000..0b0a31fa5
--- /dev/null
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h
@@ -0,0 +1,48 @@
+//===- Passes.h -------------------------------------------------*- C++ -*-===//
+//
+// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES.
+// All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Declarations for passes and pipelines related to 'tensorrt-to-executable'.
+///
+//===----------------------------------------------------------------------===//
+#ifndef MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES
+#define MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES
+
+#include <memory>
+#include <mlir/Pass/Pass.h>
+
+namespace mlirtrt::compiler {
+
+//===----------------------------------------------------------------------===//
+// Add Tablegen'd pass declarations and registration methods.
+//===----------------------------------------------------------------------===//
+#define GEN_PASS_DECL
+#define GEN_PASS_REGISTRATION
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h.inc"
+
+//===----------------------------------------------------------------------===//
+// Pipeline Registrations
+//===----------------------------------------------------------------------===//
+
+/// Register the TensorRT clustering and compilation pipelines.
+void registerTensorRTToExecutablePipelines();
+
+} // namespace mlirtrt::compiler
+
+#endif // MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td
new file mode 100644
index 000000000..dd89a4d96
--- /dev/null
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td
@@ -0,0 +1,38 @@
+//===- Passes.td ----------------------------------------------------------===//
+//
+// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES.
+// All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES
+#define MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES
+
+include "mlir/Pass/PassBase.td"
+
+//===----------------------------------------------------------------------===//
+// OutlineTensorRTOpPass
+//===----------------------------------------------------------------------===//
+
+def OutlineTensorRTOpPass : Pass<"outline-tensorrt-op",
+      "::mlir::ModuleOp"> {
+  let summary = "Outline all tensorrt ops into a tensorrt module";
+
+  let dependentDialects = [
+    "::mlir::plan::PlanDialect"
+  ];
+}
+
+#endif // MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h
new file mode 100644
index 000000000..ac9e691c6
--- /dev/null
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h
@@ -0,0 +1,97 @@
+//===- TensorRTToExecutable.h -----------------------------------*- C++ -*-===//
+//
+// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES.
+// All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE
+#define MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE
+
+// TODO (pranavm): MLIR_TRT_TARGET_TENSORRT is only needed because we pull in
+// the TranslateToTensorRT.h header. If we move the translation options, we
+// won't need it.
+#ifdef MLIR_TRT_TARGET_TENSORRT
+#include "mlir-tensorrt-dialect/Target/TranslateToTensorRT.h"
+
+#include "mlir-tensorrt-dialect/Utils/OptionsBundle.h"
+#include "mlir-tensorrt/Compiler/Client.h"
+
+namespace mlirtrt::compiler {
+
+//===----------------------------------------------------------------------===//
+// TensorRTToExecutableOptions
+//===----------------------------------------------------------------------===//
+
+class TensorRTToExecutableTask;
+
+// TODO (pranavm): Figure out a better way to reuse TRT translation options -
+// maybe move to options providers?
+struct TensorRTOptions : public OptionsProvider<TensorRTOptions> {
+public:
+  using OptionsProvider::OptionsProvider;
+  mlir::tensorrt::TensorRTTranslationOptions options;
+
+  TensorRTOptions(mlir::OptionsContext &ctx) : OptionsProvider(ctx) {
+    options.addToOptions(ctx);
+  }
+};
+
+struct TensorRTToExecutableOptions
+    : public mlir::OptionsBundle<DeviceOptions, DebugOptions, ExecutorOptions,
+                                 TensorRTOptions> {
+  // Default initialization does not require any extensions.
+  TensorRTToExecutableOptions() = default;
+
+  TensorRTToExecutableOptions(TaskExtensionRegistry extensions);
+
+  Option<std::string> entrypoint{this, "entrypoint", llvm::cl::init("main"),
+                                 llvm::cl::desc("entrypoint function name")};
+};
+
+//===----------------------------------------------------------------------===//
+// TensorRTToExecutableTask
+//===----------------------------------------------------------------------===//
+
+class TensorRTToExecutableTask
+    : public CompilationTask<TensorRTToExecutableTask,
+                             TensorRTToExecutableOptions> {
+public:
+  TensorRTToExecutableTask(mlir::MLIRContext *ctx,
+                           const TensorRTToExecutableOptions &options);
+
+  /// Build the clustering pipeline that occurs on TensorRT Ops.
+  static void
+  buildTensorRTClusteringPipeline(mlir::OpPassManager &pm,
+                                  const TensorRTToExecutableOptions &options);
+
+  /// Build the compilation pipeline that runs after clustering.
+  static void
+  buildPostClusteringPipeline(mlir::OpPassManager &pm,
+                              const TensorRTToExecutableOptions &options);
+
+  static void populatePassManager(mlir::PassManager &pm,
+                                  const TensorRTToExecutableOptions &options);
+};
+
+/// Register the task/options with the client's registry.
+void registerTensorRTToExecutableTask();
+
+} // namespace mlirtrt::compiler
+
+MLIR_DECLARE_EXPLICIT_TYPE_ID(mlirtrt::compiler::TensorRTToExecutableTask)
+
+#endif // MLIR_TRT_TARGET_TENSORRT
+#endif // MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE
diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h
index 3356077c0..fe2c4edfb 100644
--- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h
+++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h
@@ -38,6 +38,10 @@
 #include "stablehlo/transforms/Passes.h"
 #endif // MLIR_TRT_ENABLE_HLO
 
+#ifdef MLIR_TRT_TARGET_TENSORRT
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h"
+#endif // MLIR_TRT_TARGET_TENSORRT
+
 #ifdef MLIR_TRT_ENABLE_EXECUTOR
 #include "mlir-executor/InitAllPasses.h"
 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
@@ -65,6 +69,10 @@ inline void registerAllMlirTensorRtPasses() {
   plan::registerPlanDialectPipelines();
 #endif // MLIR_TRT_ENABLE_HLO
 
+#ifdef MLIR_TRT_TARGET_TENSORRT
+  mlirtrt::compiler::registerTensorRTToExecutablePipelines();
+#endif // MLIR_TRT_TARGET_TENSORRT
+
 #ifdef MLIR_TRT_ENABLE_EXECUTOR
   registerConvertCUDAToExecutorPass();
   bufferization::registerBufferizationPasses();
diff --git a/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/CMakeLists.txt b/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/CMakeLists.txt
index 4b065acea..f40cdedbc 100644
--- a/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/CMakeLists.txt
+++ b/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/CMakeLists.txt
@@ -8,6 +8,7 @@ add_mlir_tensorrt_public_c_api_library(MLIRTensorRTCAPIRegisterAllDialects
     MLIRFuncInlinerExtension
     MLIRTensorInferTypeOpInterfaceImpl
     MLIRTensorRTCompilerStableHloToExecutable
+    MLIRTensorRTCompilerTensorRTToExecutable
     MLIRTensorRTCUDAToLLVM
     MLIRTensorRTCUDATransforms
     MLIRTensorRTHostToLLVM
diff --git a/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/RegisterAllDialects.cpp b/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/RegisterAllDialects.cpp
index 1fec30727..320d6ea63 100644
--- a/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/RegisterAllDialects.cpp
+++ b/mlir-tensorrt/compiler/lib/CAPI/Compiler/Registration/RegisterAllDialects.cpp
@@ -24,6 +24,7 @@
 
 #include "mlir-tensorrt-c/Compiler/Registration/RegisterAllDialects.h"
 #include "mlir-tensorrt/Compiler/StablehloToExecutable/StablehloToExecutable.h"
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h"
 #include "mlir-tensorrt/Registration/RegisterMlirTensorRtDialects.h"
 #include "mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h"
 #include "mlir/CAPI/IR.h"
@@ -38,4 +39,5 @@ void mtrtCompilerRegisterPasses() {
 
 void mtrtCompilerRegisterTasks() {
   mlirtrt::compiler::registerStableHloToExecutableTask();
+  mlirtrt::compiler::registerTensorRTToExecutableTask();
 }
diff --git a/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt b/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt
index 92006c6d9..12940886b 100644
--- a/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt
+++ b/mlir-tensorrt/compiler/lib/Compiler/CMakeLists.txt
@@ -14,4 +14,5 @@ add_mlir_tensorrt_library(MLIRTensorRTCompilerClient
     MLIRTensorRTSupportDeviceInfo
 )
 
-add_subdirectory(StablehloToExecutable)
\ No newline at end of file
+add_subdirectory(StablehloToExecutable)
+add_subdirectory(TensorRTToExecutable)
diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/CMakeLists.txt b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/CMakeLists.txt
new file mode 100644
index 000000000..ef4f9948a
--- /dev/null
+++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/CMakeLists.txt
@@ -0,0 +1,31 @@
+add_mlir_tensorrt_library(MLIRTensorRTCompilerTensorRTToExecutable
+    TensorRTToExecutable.cpp
+    Passes.cpp
+
+    PARTIAL_SOURCES_INTENDED
+
+    DEPENDS
+    MLIRTensorRTTensorRTToExecutableIncGen
+
+    LINK_LIBS PUBLIC
+    MLIREmitCTransforms
+    MLIRIR
+    MLIRLLVMDialect
+    MLIRLLVMIRTransforms
+    MLIRTensorRTCompilerClient
+    MLIRTensorRTCUDAToExecutor
+    MLIRTensorRTDropNestedModules
+    MLIRTensorRTExecutorTransforms
+    MLIRTensorRTHostToLLVM
+    MLIRTensorRTMemRefToCUDA
+    MLIRTensorRTOptionUtils
+    MLIRTensorRTPipelines
+    MLIRTensorRTPlanToExecutor
+    MLIRTensorRTPlanTransforms
+    MLIRTensorRTRuntimeToExecutor
+    MLIRTensorRTSCFDetensorizeLoops
+    MLIRTensorRTTargetLua
+    MLIRTensorRTTargetTensorRT
+    MLIRTensorRTTensorRTToTensorRTRuntime
+    MLIRTensorRTTransforms
+    )
diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp
new file mode 100644
index 000000000..c7db65351
--- /dev/null
+++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp
@@ -0,0 +1,210 @@
+//===- Passes.cpp --------------------------------------------------------===//
+//
+// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES.
+// All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h"
+#include "mlir-executor/Executor/Transforms/Passes.h"
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h"
+#include "mlir-tensorrt/Conversion/Passes.h"
+#include "mlir-tensorrt/Dialect/Plan/Transforms/Passes.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Pass/PassOptions.h"
+
+namespace mlirtrt::compiler {
+#define GEN_PASS_DEF_OUTLINETENSORRTOPPASS
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h.inc"
+} // namespace mlirtrt::compiler
+
+using namespace mlirtrt;
+using namespace mlirtrt::compiler;
+using namespace mlir;
+
+/// ClusteringOpts that identifies groups of TensorRT operations and will be
+/// clustered into one TensorRT function (which is eventually translated to a
+/// engine).
+static FailureOr<ClusteringOpts> getTensorRTClusteringOptions(Operation *op) {
+  ClusteringOpts opts;
+  opts.mergeIndependentClusters = [](Operation *, ClusterRange, Operation *,
+                                     ClusterRange) { return true; };
+  opts.clusterTarget = Attribute{};
+  opts.isClusterableOp = [](Operation *op) {
+    return llvm::isa_and_present<tensorrt::TensorRTDialect>(op->getDialect());
+  };
+
+  return opts;
+}
+
+/// Create a `func.func` operation that represents `regionOp` and inserts into
+/// the `module` SymbolTable. The function is given a name starting with
+/// `nameBase` but may have numbers appended in order to unique the name. The
+/// created function has argument/result types as indicated by the parameters.
+static FailureOr<FunctionOpInterface>
+createOutlinedFunc(RewriterBase &rewriter, Location loc, Operation *module,
+                   StringRef nameBase, TypeRange funcArgTypes,
+                   TypeRange funcResultTypes) {
+  OpBuilder::InsertionGuard g(rewriter);
+
+  // Create the func for outlining the region body.
+  FunctionType type =
+      FunctionType::get(rewriter.getContext(), funcArgTypes, funcResultTypes);
+  auto outlinedFunc = func::FuncOp::create(loc, nameBase, type, {});
+  Block *funcBody = outlinedFunc.addEntryBlock();
+
+  // Add an empty terminator.
+  rewriter.setInsertionPointToEnd(funcBody);
+  rewriter.create<func::ReturnOp>(loc);
+
+  // Insert into the module.
+  SymbolTable(module).insert(outlinedFunc,
+                             module->getRegions().front().front().end());
+
+  // Tag the function with a UnitAttr for identifying the different kinds of
+  // functions based on the cluster type.
+  return cast<FunctionOpInterface>(outlinedFunc.getOperation());
+}
+
+/// Given the `op`, find the closest ModuleOp and check if the module has a
+/// `tensorrt.module` operation in it. If it does, then return the existing
+/// `tensorrt.module` operation. Otherwise, create a new `tensorrt.module`.
+static tensorrt::TensorRTModuleOp
+getOrCreateTensorRTModuleOp(ModuleOp moduleOp) {
+  SymbolTable symbolTable(moduleOp);
+  tensorrt::TensorRTModuleOp result = nullptr;
+  for (auto trtModuleOp :
+       moduleOp.getBody()->getOps<tensorrt::TensorRTModuleOp>()) {
+    result = trtModuleOp;
+    break;
+  }
+  if (result)
+    return result;
+
+  // Create the function. Symbol name de-duplication occurs with insert into the
+  // symbol table.
+  result = tensorrt::TensorRTModuleOp::create(moduleOp.getLoc(), "trt_engines");
+  symbolTable.insert(result);
+  return result;
+}
+
+static FailureOr<tensorrt::CallAllocOp>
+outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule,
+          const Cluster &cluster) {
+  auto inlineGroupOp =
+      cast<plan::InlineGroupOp>(mlir::createRegionOpFromCluster(
+          cluster, rewriter,
+          [](OpBuilder &b, Location loc, TypeRange types, Attribute target) {
+            auto regionOp = b.create<plan::InlineGroupOp>(loc, types, target);
+            b.setInsertionPointToStart(&regionOp.getRegion().emplaceBlock());
+            b.create<plan::YieldOp>(loc);
+            return regionOp;
+          }));
+
+  // Make the region isolated from above. This captures the input operands.
+  SmallVector<Value> inputs =
+      makeRegionIsolatedFromAbove(rewriter, inlineGroupOp.getRegion());
+
+  // Create the outlined function
+  FailureOr<FunctionOpInterface> func = createOutlinedFunc(
+      rewriter, inlineGroupOp.getLoc(), trtModule, "tensorrt_cluster",
+      TypeRange(inputs), inlineGroupOp->getResultTypes());
+  if (failed(func))
+    return failure();
+
+  rewriter.setInsertionPoint(inlineGroupOp);
+  auto callOp = rewriter.create<tensorrt::CallAllocOp>(
+      inlineGroupOp.getLoc(), inlineGroupOp.getResultTypes(), inputs,
+      SymbolRefAttr::get(trtModule.getNameAttr(),
+                         {FlatSymbolRefAttr::get(*func)}));
+
+  // Populate the function entry block.
+  rewriter.eraseBlock(&func->getFunctionBody().front());
+
+  // Move region op operations to the func body.
+  Operation *regionYieldOp = inlineGroupOp.getYield();
+  rewriter.inlineRegionBefore(inlineGroupOp.getRegion(),
+                              func->getFunctionBody(),
+                              func->getFunctionBody().end());
+  rewriter.setInsertionPoint(regionYieldOp);
+  rewriter.replaceOpWithNewOp<func::ReturnOp>(regionYieldOp,
+                                              regionYieldOp->getOperands());
+  // replace the original region results.
+  rewriter.replaceOp(inlineGroupOp, callOp);
+
+  return callOp;
+}
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// OutlineTensorRTOpPass
+//===----------------------------------------------------------------------===//
+class OutlineTensorRTOpPass
+    : public compiler::impl::OutlineTensorRTOpPassBase<OutlineTensorRTOpPass> {
+public:
+  using Base::Base;
+  void runOnOperation() override {
+    ModuleOp module = getOperation();
+    IRRewriter rewriter(&getContext());
+
+    FailureOr<ClusteringOpts> opts = getTensorRTClusteringOptions(module);
+    if (failed(opts)) {
+      emitError(module.getLoc()) << "failed to create clustering options";
+      return signalPassFailure();
+    }
+
+    FailureOr<SmallVector<Cluster>> clusters =
+        mlir::analyzeAndClusterOperations(module, *opts);
+    if (failed(clusters)) {
+      emitError(module.getLoc()) << "failed to cluster operations";
+      return signalPassFailure();
+    }
+
+    tensorrt::TensorRTModuleOp trtModule = getOrCreateTensorRTModuleOp(module);
+
+    for (const auto &cluster : *clusters) {
+      if (failed(outlineOp(rewriter, trtModule, cluster)))
+        return signalPassFailure();
+    }
+  }
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// Pipeline Registrations
+//===----------------------------------------------------------------------===//
+
+namespace {
+class TensorRTToExecutablePassPipelineOptions
+    : public PassPipelineOptionsAdaptor<TensorRTToExecutablePassPipelineOptions,
+                                        TensorRTToExecutableOptions> {};
+} // namespace
+
+void mlirtrt::compiler::registerTensorRTToExecutablePipelines() {
+  PassPipelineRegistration<TensorRTToExecutablePassPipelineOptions>(
+      "tensorrt-clustering-pipeline", "apply clustering to tensorrt IR",
+      [](OpPassManager &pm,
+         const TensorRTToExecutablePassPipelineOptions &opts) {
+        TensorRTToExecutableTask::buildTensorRTClusteringPipeline(pm, opts);
+      });
+
+  PassPipelineRegistration<TensorRTToExecutablePassPipelineOptions>(
+      "tensorrt-compilation-pipeline", "apply compilation post-clustering",
+      [](OpPassManager &pm,
+         const TensorRTToExecutablePassPipelineOptions &opts) {
+        TensorRTToExecutableTask::buildPostClusteringPipeline(pm, opts);
+      });
+}
diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp
new file mode 100644
index 000000000..0fb7a5268
--- /dev/null
+++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp
@@ -0,0 +1,145 @@
+//===- TensorRTToExecutable.cpp ---------------------------------*- C++ -*-===//
+//
+// SPDX-FileCopyrightText: Copyright 2025 NVIDIA CORPORATION & AFFILIATES.
+// All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+#ifdef MLIR_TRT_TARGET_TENSORRT
+
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h"
+#include "mlir-executor/Conversion/Passes.h"
+#include "mlir-executor/Executor/Transforms/Passes.h"
+#include "mlir-tensorrt-dialect/TensorRT/Transforms/Passes.h"
+#include "mlir-tensorrt/Compiler/OptionsProviders.h"
+#include "mlir-tensorrt/Compiler/OptionsRegistry.h"
+#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h"
+#include "mlir-tensorrt/Conversion/Passes.h"
+#include "mlir-tensorrt/Dialect/Plan/Transforms/Passes.h"
+#include "mlir-tensorrt/Transforms/Passes.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Transforms/Passes.h"
+
+using namespace mlir;
+using namespace mlirtrt::compiler;
+
+//===----------------------------------------------------------------------===//
+// TensorRTToExecutableOptions
+//===----------------------------------------------------------------------===//
+
+TensorRTToExecutableOptions::TensorRTToExecutableOptions(
+    TaskExtensionRegistry extensions) {
+  // TODO (pranavm): We don't need extensions - remove from constructor and add
+  // `setExtensions` to base class.
+  assert(extensions.extensions.size() == 0);
+}
+
+//===----------------------------------------------------------------------===//
+// TensorRTToExecutableTask
+//===----------------------------------------------------------------------===//
+
+TensorRTToExecutableTask::TensorRTToExecutableTask(
+    MLIRContext *ctx, const TensorRTToExecutableOptions &options)
+    : CompilationTask(ctx, options) {
+  options.get<DebugOptions>().applyToPassManager(*this);
+}
+
+void TensorRTToExecutableTask::buildTensorRTClusteringPipeline(
+    OpPassManager &pm, const TensorRTToExecutableOptions &opts) {
+  pm.addPass(createOutlineTensorRTOpPass());
+}
+
+void TensorRTToExecutableTask::buildPostClusteringPipeline(
+    OpPassManager &pm, const TensorRTToExecutableOptions &options) {
+  // Post-clustering
+  pm.addPass(createConvertTensorRTToTensorRTRuntimePass());
+
+  pm.addNestedPass<func::FuncOp>(plan::createPostClusteringValidationPass());
+
+  pm.addPass(createCanonicalizerPass());
+
+  pm.addPass(createInlinerPass());
+  pm.addNestedPass<func::FuncOp>(createCSEPass());
+  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
+
+  // We then perform some final simplification on the top-level func.func ops
+  // (e.g. public entrypoint functions).
+  pm.addNestedPass<func::FuncOp>(createSCFDetensorizeLoopsPass());
+  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
+
+  // Pre-bufferization
+  // Simplify and translate functions nested in `tensorrt.module` ops.
+  auto &trtPM = pm.nest<tensorrt::TensorRTModuleOp>();
+  tensorrt::buildTensorRTModuleTransformationPipeline(
+      trtPM, options.get<TensorRTOptions>().options.enableStronglyTyped);
+  trtPM.addPass(tensorrt::createTranslateTensorRTPass(
+      nullptr, options.get<TensorRTOptions>().options));
+
+  pm.addPass(createMemRefCastEliminationPass());
+  pm.addPass(plan::createPlanAllocTensorsPass());
+  pm.addPass(plan::createPlanBufferizePass());
+  pm.addPass(createMemRefCastEliminationPass());
+  pm.addPass(createCanonicalizerPass());
+  pm.addPass(bufferization::createDropEquivalentBufferResultsPass());
+  plan::buildPlanBufferOptimizationPipeline(pm);
+  plan::buildPlanBufferDeallocationPipeline(
+      pm, bufferization::DeallocationOptions{
+              /*privateFuncDynamicOwnership=*/false});
+
+  // Post-bufferization
+  pm.addPass(createConvertMemRefToCUDAPass());
+  pm.addPass(createConvertPlanToExecutorPass());
+  pm.addPass(executor::createExecutorAllocsToGlobalsPass());
+  pm.addNestedPass<func::FuncOp>(
+      executor::createExecutorPopulateFunctionMetadataPass());
+
+  // Executor lowering
+  ConvertTensorRTRuntimeToExecutorPassOptions toExecutorOpts;
+  toExecutorOpts.indexBitwidth = options.get<ExecutorOptions>().indexBitwidth;
+  toExecutorOpts.usePackedMemRefCConv =
+      options.get<ExecutorOptions>().usePackedMemRefCConv;
+  pm.addPass(createConvertTensorRTRuntimeToExecutorPass(toExecutorOpts));
+
+  ConvertCUDAToExecutorPassOptions cudaToExecutorOpts;
+  cudaToExecutorOpts.indexBitwidth =
+      options.get<ExecutorOptions>().indexBitwidth;
+  cudaToExecutorOpts.usePackedMemRefCConv =
+      options.get<ExecutorOptions>().usePackedMemRefCConv;
+  pm.addPass(createConvertCUDAToExecutorPass(cudaToExecutorOpts));
+
+  pm.addPass(createDropNestedModulesPass());
+}
+
+void TensorRTToExecutableTask::populatePassManager(
+    mlir::PassManager &pm, const TensorRTToExecutableOptions &options) {
+  buildTensorRTClusteringPipeline(pm, options);
+
+  buildPostClusteringPipeline(pm, options);
+
+  mlir::executor::ConvertStdToExecutorPassOptions stdToExecOpts;
+  stdToExecOpts.indexBitwidth = options.get<ExecutorOptions>().indexBitwidth;
+  stdToExecOpts.usePackedMemRefCConv = true;
+  mlir::executor::buildExecutorLoweringPipeline(pm, stdToExecOpts);
+}
+
+void mlirtrt::compiler::registerTensorRTToExecutableTask() {
+  registerCompilationTaskWithNoExtensions<TensorRTToExecutableTask,
+                                          TensorRTToExecutableOptions>(
+      "tensorrt-to-executable");
+}
+
+MLIR_DEFINE_EXPLICIT_TYPE_ID(mlirtrt::compiler::TensorRTToExecutableTask)
+
+#endif
diff --git a/mlir-tensorrt/compiler/test/Pipelines/TensorRTClustering/tensorrt-clustering.mlir b/mlir-tensorrt/compiler/test/Pipelines/TensorRTClustering/tensorrt-clustering.mlir
new file mode 100644
index 000000000..1f319a0ec
--- /dev/null
+++ b/mlir-tensorrt/compiler/test/Pipelines/TensorRTClustering/tensorrt-clustering.mlir
@@ -0,0 +1,15 @@
+// RUN: mlir-tensorrt-opt %s -tensorrt-clustering-pipeline -split-input-file | FileCheck %s
+
+func.func @trt_relu(%arg0: tensor<2x10xf16>) -> (tensor<2x10xf16>) {
+  %0 = tensorrt.activation {
+    activationType = #tensorrt.activation_type<kRELU>
+  } %arg0 : tensor<2x10xf16>
+  return %0: tensor<2x10xf16>
+}
+
+// CHECK-LABEL: @trt_relu
+// CHECK-DAG:   %[[v0:.+]] = tensorrt.call_alloc @trt_engines::@tensorrt_cluster
+// CHECK-DAG: return %[[v0]]
+// CHECK-DAG: @tensorrt_cluster
+// CHECK-DAG: %[[v1:.+]] = tensorrt.activation {activationType = #tensorrt.activation_type<kRELU>}
+// CHECK-DAG: return %[[v1]]
diff --git a/mlir-tensorrt/compiler/test/python/IntegrationTests/test_tensorrt_add.py b/mlir-tensorrt/compiler/test/python/IntegrationTests/test_tensorrt_add.py
new file mode 100644
index 000000000..dabb3cd6b
--- /dev/null
+++ b/mlir-tensorrt/compiler/test/python/IntegrationTests/test_tensorrt_add.py
@@ -0,0 +1,99 @@
+# RUN: %PYTHON %s
+import time
+
+import mlir_tensorrt.compiler.api as compiler
+import mlir_tensorrt.compiler.ir as ir
+import mlir_tensorrt.runtime.api as runtime
+import numpy as np
+
+ASM = """
+func.func @main(%arg0: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
+  %1 = tensorrt.element_wise <kSUM>(%arg0, %arg0 : tensor<2x3x4xf32>, tensor<2x3x4xf32>) -> tensor<2x3x4xf32>
+  func.return %1 : tensor<2x3x4xf32>
+}
+"""
+
+
+def compile(client, op):
+    task = client.get_compilation_task(
+        "tensorrt-to-executable",
+        [
+            "--tensorrt-builder-opt-level=0",
+            "--tensorrt-strongly-typed=true",
+            "--tensorrt-workspace-memory-pool-limit=1024kB",
+        ],
+    )
+    task.run(op)
+    return compiler.translate_mlir_to_executable(op)
+
+
+def tensorrt_add():
+    # Build/parse the main function.
+    with ir.Context() as context:
+        m = ir.Module.parse(ASM)
+
+        # Use the compiler API to compile to executable.
+        client = compiler.CompilerClient(context)
+        exe = compile(client, m.operation)
+
+    # The RuntimeClient can and should persist across multiple Executables, RuntimeSessions, etc.
+    # It is primarily an interface for creating and manipulating buffers.
+    client = runtime.RuntimeClient()
+    stream = client.create_stream()
+    devices = client.get_devices()
+
+    if len(devices) == 0:
+        return
+
+    session_options = runtime.RuntimeSessionOptions(num_devices=1, device_id=0)
+    session = runtime.RuntimeSession(session_options, exe)
+
+    arg0 = client.create_memref(
+        np.arange(0.0, 24.0, dtype=np.float32).reshape(2, 3, 4).data,
+        device=devices[0],
+        stream=stream,
+    )
+    arg1 = client.create_memref(
+        np.zeros(shape=(2, 3, 4), dtype=np.float32).data,
+        device=devices[0],
+        stream=stream,
+    )
+    session.execute_function("main", in_args=[arg0], out_args=[arg1], stream=stream)
+
+    data = np.asarray(client.copy_to_host(arg1, stream=stream))
+    stream.sync()
+
+    print(data)
+
+    # Run execution a bunch more times asynchronously so that it calculates
+    # `x * 2**num_iter`.
+    num_iter = 5
+    start_time = time.time()
+    for _ in range(0, num_iter):
+        session.execute_function("main", in_args=[arg0], out_args=[arg0], stream=stream)
+    data = np.asarray(client.copy_to_host(arg1, stream=stream))
+    stream.sync()
+    end_time = time.time()
+    elapsed = end_time - start_time
+
+    print(np.asarray(client.copy_to_host(arg0)))
+    print(f"1000 iterations avg { (elapsed/num_iter)/1000.0} msec per iteration")
+
+
+if __name__ == "__main__":
+    tensorrt_add()
+
+#      CHECK:   [ 0.  2.  4.  6.]
+# CHECK-NEXT:   [ 8. 10. 12. 14.]
+# CHECK-NEXT:   [16. 18. 20. 22.]]
+# CHECK-NEXT:
+# CHECK-NEXT:   [24. 26. 28. 30.]
+# CHECK-NEXT:   [32. 34. 36. 38.]
+# CHECK-NEXT:   [40. 42. 44. 46.]]]
+# CHECK-NEXT:   [  0.  32.  64.  96.]
+# CHECK-NEXT:   [128. 160. 192. 224.]
+# CHECK-NEXT:   [256. 288. 320. 352.]]
+# CHECK-NEXT:
+# CHECK-NEXT:   [384. 416. 448. 480.]
+# CHECK-NEXT:   [512. 544. 576. 608.]
+# CHECK-NEXT:   [640. 672. 704. 736.]