openxla
diff --git a/‎xla/backends/gpu/autotuner/BUILD‎
Lines changed: 59 additions & 0 deletions b/‎xla/backends/gpu/autotuner/BUILD‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎xla/backends/gpu/autotuner/cublas.cc‎
Lines changed: 4 additions & 0 deletions b/‎xla/backends/gpu/autotuner/cublas.cc‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎xla/backends/gpu/autotuner/cublas.h‎
Lines changed: 3 additions & 0 deletions b/‎xla/backends/gpu/autotuner/cublas.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎xla/backends/gpu/autotuner/cublaslt.cc‎
Lines changed: 3 additions & 3 deletions b/‎xla/backends/gpu/autotuner/cublaslt.cc‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎xla/backends/gpu/autotuner/cublaslt.h‎
Lines changed: 3 additions & 0 deletions b/‎xla/backends/gpu/autotuner/cublaslt.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎xla/backends/gpu/autotuner/cudnn.cc‎
Lines changed: 13 additions & 15 deletions b/‎xla/backends/gpu/autotuner/cudnn.cc‎
Lines changed: 13 additions & 15 deletions
diff --git a/‎xla/backends/gpu/autotuner/cudnn.h‎
Lines changed: 3 additions & 0 deletions b/‎xla/backends/gpu/autotuner/cudnn.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎xla/backends/gpu/autotuner/custom_kernel.cc‎
Lines changed: 1 addition & 3 deletions b/‎xla/backends/gpu/autotuner/custom_kernel.cc‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎xla/backends/gpu/autotuner/custom_kernel.h‎
Lines changed: 3 additions & 0 deletions b/‎xla/backends/gpu/autotuner/custom_kernel.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎xla/backends/gpu/autotuner/factory.h‎
Lines changed: 7 additions & 0 deletions b/‎xla/backends/gpu/autotuner/factory.h‎
Lines changed: 7 additions & 0 deletions
@@ -683,10 +683,15 @@ cc_library(
         ":cublaslt",
         ":cudnn",
         ":factory",
+        ":fission_backend",
         ":triton",
         "//xla/backends/autotuner:codegen_backend",
         "//xla/hlo/analysis:symbolic_expr",
+        "//xla/hlo/pass:hlo_pass_pipeline",
         "//xla/service:compiler",
+        "//xla/service/gpu/transforms:dot_algorithm_rewriter",
+        "//xla/service/gpu/transforms:gemm_rewriter",
+        "//xla/stream_executor:device_description",
         "//xla/stream_executor:stream_executor_h",
         "//xla/stream_executor/cuda:cuda_platform_id",
         "//xla/stream_executor/platform:platform_object_registry",
@@ -787,6 +792,60 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "fission_backend",
+    srcs = ["fission_backend.cc"],
+    hdrs = ["fission_backend.h"],
+    deps = [
+        ":gpu_codegen_backend",
+        "//xla/backends/autotuner:codegen_backend",
+        "//xla/hlo/analysis:symbolic_expr",
+        "//xla/hlo/ir:hlo",
+        "//xla/hlo/pass:hlo_pass_pipeline",
+        "//xla/service:compiler",
+        "//xla/service:hlo_cost_analysis",
+        "//xla/service/gpu/transforms:priority_fusion",
+        "//xla/stream_executor:stream_executor_h",
+        "//xla/tools:hlo_decomposer_lib",
+        "//xla/tsl/platform:errors",
+        "//xla/tsl/platform:statusor",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/log",
+        "@com_google_absl//absl/status",
+        "@com_google_absl//absl/status:statusor",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+xla_test(
+    name = "fission_backend_test",
+    srcs = ["fission_backend_test.cc"],
+    backends = ["h100"],
+    tags = ["cuda-only"],
+    deps = [
+        ":cublas",
+        ":fission_backend",
+        ":gpu_codegen_backend",
+        "//xla/backends/autotuner:codegen_backend",
+        "//xla/hlo/analysis:symbolic_expr",
+        "//xla/hlo/ir:hlo",
+        "//xla/hlo/pass:hlo_pass_pipeline",
+        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
+        "//xla/service:compiler",
+        "//xla/service:executable",
+        "//xla/service:platform_util",
+        "//xla/service/gpu:nvptx_compiler_impl",
+        "//xla/service/gpu/transforms:dot_algorithm_rewriter",
+        "//xla/service/gpu/transforms:gemm_rewriter",
+        "//xla/stream_executor:device_description",
+        "//xla/stream_executor:stream_executor_h",
+        "//xla/tsl/platform:statusor",
+        "@com_google_absl//absl/status:statusor",
+        "@com_google_googletest//:gtest_main",
+        "@llvm-project//mlir:IR",
+    ],
+)
+
 xla_cc_test(
     name = "legacy_cache_test",
     srcs = ["legacy_cache_test.cc"],
 
@@ -153,5 +153,9 @@ absl::Status CublasBackend::ApplyConfig(HloInstruction& instr,
   return absl::OkStatus();
 }
 
+bool CublasBackend::IsSupported(const HloInstruction& instr) {
+  return IsLegacyCublasMatmul(instr);
+}
+
 }  // namespace gpu
 }  // namespace xla
@@ -60,6 +60,9 @@ class CublasBackend : public GpuCodegenBackend {
 
   absl::Status ApplyConfig(HloInstruction& instr,
                            const BackendConfig& config) override;
+
+ private:
+  bool IsSupported(const HloInstruction& instr) override;
 };
 
 }  // namespace gpu
 
@@ -74,12 +74,12 @@ absl::StatusOr<BlasLt::Epilogue> AsBlasLtEpilogue(
   }
 }
 
-bool IsSupported(const HloInstruction& instr) {
+}  // namespace
+
+bool CublasLtBackend::IsSupported(const HloInstruction& instr) {
   return IsCublasLtMatmul(instr) || IsCublasLtMatmulF8(instr);
 }
 
-}  // namespace
-
 absl::StatusOr<std::vector<std::unique_ptr<BackendConfig>>>
 CublasLtBackend::GetSupportedConfigs(const HloInstruction& instr) {
   if (!IsSupported(instr)) {
 
@@ -59,6 +59,9 @@ class CublasLtBackend : public GpuCodegenBackend {
 
   absl::Status ApplyConfig(HloInstruction& instr,
                            const BackendConfig& config) override;
+
+ private:
+  bool IsSupported(const HloInstruction& instr) override;
 };
 
 }  // namespace gpu
 
@@ -155,20 +155,6 @@ bool IsSupportedCudnnFusion(const HloInstruction& instr,
   return false;
 }
 
-bool IsSupportedByCudnn(const HloInstruction& instr,
-                        se::StreamExecutor* stream_executor,
-                        const DebugOptions& debug_options) {
-  if (instr.opcode() == HloOpcode::kFusion) {
-    return IsSupportedCudnnFusion(instr, stream_executor, debug_options);
-  }
-
-  if (instr.opcode() == HloOpcode::kCustomCall) {
-    return IsCustomCallToDnnConvolution(instr);
-  }
-
-  return false;
-}
-
 absl::StatusOr<std::vector<CudnnBackendConfig>> GetAlgorithms(
     se::dnn::DnnSupport* dnn, se::dnn::ConvolutionKind conv_kind,
     se::dnn::DataType input_type, se::dnn::DataType output_type,
@@ -338,6 +324,18 @@ absl::Status ApplyConfigToCudnnCustomCall(HloInstruction& instr,
 
 }  // namespace
 
+bool CudnnBackend::IsSupported(const HloInstruction& instr) {
+  if (instr.opcode() == HloOpcode::kFusion) {
+    return IsSupportedCudnnFusion(instr, stream_executor(), debug_options());
+  }
+
+  if (instr.opcode() == HloOpcode::kCustomCall) {
+    return IsCustomCallToDnnConvolution(instr);
+  }
+
+  return false;
+}
+
 absl::StatusOr<std::unique_ptr<BackendConfig>> CudnnBackend::GetDefaultConfig(
     const HloInstruction& instr) {
   if (IsCustomCallToDnnConvolution(instr)) {
@@ -358,7 +356,7 @@ absl::StatusOr<std::unique_ptr<BackendConfig>> CudnnBackend::GetDefaultConfig(
 
 absl::StatusOr<std::vector<std::unique_ptr<BackendConfig>>>
 CudnnBackend::GetSupportedConfigs(const HloInstruction& instr) {
-  if (!IsSupportedByCudnn(instr, stream_executor(), debug_options())) {
+  if (!IsSupported(instr)) {
     return std::vector<std::unique_ptr<BackendConfig>>();
   }
   if (instr.opcode() == HloOpcode::kFusion) {
 
@@ -72,6 +72,9 @@ class CudnnBackend : public GpuCodegenBackend {
   // apply the configs with non-zero workspace size.
   absl::Status ApplyConfig(HloInstruction& instr,
                            const BackendConfig& config) override;
+
+ private:
+  bool IsSupported(const HloInstruction& instr) override;
 };
 
 }  // namespace gpu
 
@@ -44,8 +44,7 @@ namespace se = ::stream_executor;
 
 using CustomKernelBackendConfig = AutotuneResult::CustomKernelFusionKey;
 
-namespace {
-bool IsSupported(const HloInstruction& instr) {
+bool CustomKernelBackend::IsSupported(const HloInstruction& instr) {
   if (instr.opcode() != HloOpcode::kFusion) {
     LOG(ERROR)
         << "CustomKernelBackend doesn't support non-fusion instructions.";
@@ -61,7 +60,6 @@ bool IsSupported(const HloInstruction& instr) {
 
   return true;
 }
-}  // namespace
 
 absl::StatusOr<std::vector<CustomKernel>> LoadKernels(
     const HloInstruction* fusion_instruction,
 
@@ -48,6 +48,9 @@ class CustomKernelBackend : public GpuCodegenBackend {
 
   absl::Status ApplyConfig(HloInstruction& instr,
                            const BackendConfig& config) override;
+
+ private:
+  bool IsSupported(const HloInstruction& instr) override;
 };
 
 }  // namespace gpu
 
@@ -36,6 +36,13 @@ struct GetCodegenBackends {
       SymbolicExprContext* symbolic_expr_context)>;
 };
 
+struct GetFissionBackends {
+  using Type = std::function<std::vector<std::unique_ptr<CodegenBackend>>(
+      stream_executor::StreamExecutor*, const DebugOptions*, Compiler*,
+      const Compiler::TargetConfig*,
+      SymbolicExprContext* symbolic_expr_context)>;
+};
+
 }  // namespace gpu
 }  // namespace xla
Original file line number	Diff line number	Diff line change
`@@ -153,5 +153,9 @@ absl::Status CublasBackend::ApplyConfig(HloInstruction& instr,`
`153`	`153`	`return absl::OkStatus();`
`154`	`154`	`}`
`155`	`155`
	`156`	`+bool CublasBackend::IsSupported(const HloInstruction& instr) {`
	`157`	`+ return IsLegacyCublasMatmul(instr);`
	`158`	`+}`
	`159`	`+`
`156`	`160`	`} // namespace gpu`
`157`	`161`	`} // namespace xla`
Original file line number	Diff line number	Diff line change
`@@ -74,12 +74,12 @@ absl::StatusOr<BlasLt::Epilogue> AsBlasLtEpilogue(`
`74`	`74`	`}`
`75`	`75`	`}`
`76`	`76`
`77`		`-bool IsSupported(const HloInstruction& instr) {`
	`77`	`+} // namespace`
	`78`	`+`
	`79`	`+bool CublasLtBackend::IsSupported(const HloInstruction& instr) {`
`78`	`80`	`return IsCublasLtMatmul(instr) \|\| IsCublasLtMatmulF8(instr);`
`79`	`81`	`}`
`80`	`82`
`81`		`-} // namespace`
`82`		`-`
`83`	`83`	`absl::StatusOr<std::vector<std::unique_ptr<BackendConfig>>>`
`84`	`84`	`CublasLtBackend::GetSupportedConfigs(const HloInstruction& instr) {`
`85`	`85`	`if (!IsSupported(instr)) {`