diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index afad75707d..67f359d324 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -64,6 +64,18 @@ jobs:
     steps:
     - uses: actions/checkout@v5
 
+    - name: free up disk space on Ubuntu runner
+      if: ${{ matrix.os == 'ubuntu-24.04' }}
+      uses: jlumbroso/free-disk-space@main
+
+    - name: Setup Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.x'
+
+    - name: Install Python packages
+      run: pip install numpy torch
+
     - name: Create Build Environment
       # Some projects don't allow in-source building, so create a separate build directory
       # We'll use this as our working directory for all subsequent commands
diff --git a/.gitignore b/.gitignore
index 84940d475d..1aac3de827 100644
--- a/.gitignore
+++ b/.gitignore
@@ -65,6 +65,8 @@ _deps
 # End of https://www.gitignore.io/api/cmake
 # build directory
 build
+_codeql_build_dir
+_codeql_detected_source_root
 
 .clangd
 .vscode
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2a5b9b4570..3c681dc1e6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -350,6 +350,7 @@ set(SeQuant_src
         SeQuant/core/utility/macros.hpp
         SeQuant/core/utility/nodiscard.hpp
         SeQuant/core/utility/permutation.hpp
+        SeQuant/core/utility/scope.hpp
         SeQuant/core/utility/singleton.hpp
         SeQuant/core/utility/string.cpp
         SeQuant/core/utility/string.hpp
diff --git a/SeQuant/core/export/julia_tensor_operations.hpp b/SeQuant/core/export/julia_tensor_operations.hpp
index 2bc3624690..0938478d4a 100644
--- a/SeQuant/core/export/julia_tensor_operations.hpp
+++ b/SeQuant/core/export/julia_tensor_operations.hpp
@@ -219,49 +219,21 @@ class JuliaTensorOperationsGenerator : public Generator<Context> {
                    " += " + to_julia_expr(expression, ctx) + "\n";
   }
 
-  void declare(const Index &idx, const Context &ctx) override {
-    (void)idx;
-    (void)ctx;
-  }
+  void declare(const Index &, const Context &) override {}
 
-  void declare(const Variable &variable, UsageSet usage,
-               const Context &ctx) override {
-    (void)variable;
-    (void)usage;
-    (void)ctx;
-  }
+  void declare(const Variable &, UsageSet, const Context &) override {}
 
-  void declare(const Tensor &tensor, UsageSet usage,
-               const Context &ctx) override {
-    (void)tensor;
-    (void)usage;
-    (void)ctx;
-  }
+  void declare(const Tensor &, UsageSet, const Context &) override {}
 
-  void all_indices_declared(std::size_t amount, const Context &ctx) override {
-    (void)amount;
-    (void)ctx;
-  }
+  void all_indices_declared(std::size_t, const Context &) override {}
 
-  void all_variables_declared(std::size_t amount, const Context &ctx) override {
-    (void)amount;
-    (void)ctx;
-  }
+  void all_variables_declared(std::size_t, const Context &) override {}
 
-  void all_tensors_declared(std::size_t amount, const Context &ctx) override {
-    (void)amount;
-    (void)ctx;
-  }
+  void all_tensors_declared(std::size_t, const Context &) override {}
 
-  void begin_declarations(DeclarationScope scope, const Context &ctx) override {
-    (void)scope;
-    (void)ctx;
-  }
+  void begin_declarations(DeclarationScope, const Context &) override {}
 
-  void end_declarations(DeclarationScope scope, const Context &ctx) override {
-    (void)scope;
-    (void)ctx;
-  }
+  void end_declarations(DeclarationScope, const Context &) override {}
 
   void insert_comment(const std::string &comment, const Context &) override {
     m_generated += "# " + comment + "\n";
diff --git a/SeQuant/core/export/python_einsum.hpp b/SeQuant/core/export/python_einsum.hpp
new file mode 100644
index 0000000000..3bf08e1246
--- /dev/null
+++ b/SeQuant/core/export/python_einsum.hpp
@@ -0,0 +1,817 @@
+#ifndef SEQUANT_CORE_EXPORT_PYTHON_EINSUM_HPP
+#define SEQUANT_CORE_EXPORT_PYTHON_EINSUM_HPP
+
+#include <SeQuant/core/export/context.hpp>
+#include <SeQuant/core/export/generator.hpp>
+#include <SeQuant/core/export/reordering_context.hpp>
+#include <SeQuant/core/expr.hpp>
+#include <SeQuant/core/index.hpp>
+#include <SeQuant/core/space.hpp>
+#include <SeQuant/core/utility/macros.hpp>
+#include <SeQuant/core/utility/string.hpp>
+
+#include <range/v3/view/join.hpp>
+#include <range/v3/view/transform.hpp>
+
+#include <boost/unordered/unordered_map.hpp>
+#include <boost/unordered/unordered_set.hpp>
+
+#include <cctype>
+#include <cstdlib>
+#include <sstream>
+#include <string>
+#include <string_view>
+#include <vector>
+
+namespace sequant {
+
+/// Base context class for Python einsum generators
+class PythonEinsumGeneratorContext : public ExportContext {
+ public:
+  using ShapeMap = std::map<IndexSpace, std::string>;
+  using TagMap = std::map<IndexSpace, std::string>;
+
+  PythonEinsumGeneratorContext() = default;
+  ~PythonEinsumGeneratorContext() = default;
+  PythonEinsumGeneratorContext(ShapeMap index_shapes)
+      : m_index_shapes(std::move(index_shapes)) {}
+
+  /// Get whether to generate import statements
+  bool generate_imports() const { return m_generate_imports; }
+
+  /// Set whether to generate import statements
+  void set_generate_imports(bool value) { m_generate_imports = value; }
+
+  /// Get the memory layout for tensors
+  MemoryLayout memory_layout() const { return m_memory_layout; }
+
+  /// Set the memory layout for tensors
+  void set_memory_layout(MemoryLayout layout) { m_memory_layout = layout; }
+
+  /// Get the dimension/shape for a given index space
+  std::string get_shape(const IndexSpace &space) const {
+    auto it = m_index_shapes.find(space);
+    if (it != m_index_shapes.end()) {
+      return it->second;
+    }
+    // Default fallback - use a simple naming scheme
+    return "dim_" + toUtf8(space.base_key());
+  }
+
+  /// Get the shape tuple for a tensor
+  std::string get_shape_tuple(const Tensor &tensor) const {
+    std::string shape = "(";
+    bool first = true;
+    for (const Index &idx : tensor.const_indices()) {
+      if (!first) shape += ", ";
+      shape += get_shape(idx.space());
+      first = false;
+    }
+    shape += ")";
+    return shape;
+  }
+
+  /// Set the dimension/shape for a given index space
+  void set_shape(const IndexSpace &space, std::string shape) {
+    m_index_shapes[space] = std::move(shape);
+  }
+
+  /// Get the tag for a given index space
+  /// Tags are appended to tensor names to distinguish different blocks
+  std::string get_tag(const IndexSpace &space) const {
+    auto it = m_tags.find(space);
+    if (it != m_tags.end()) {
+      return it->second;
+    }
+    // Default: use first character of base key
+    return toUtf8(space.base_key()).substr(0, 1);
+  }
+
+  /// Set the tag for a given index space
+  void set_tag(const IndexSpace &space, std::string tag) {
+    m_tags[space] = std::move(tag);
+  }
+
+ protected:
+  ShapeMap m_index_shapes;
+  TagMap m_tags;
+  bool m_generate_imports = true;  // Generate imports by default
+  MemoryLayout m_memory_layout =
+      MemoryLayout::ColumnMajor;  // Fortran order to match Eigen::Tensor
+};
+
+/// Context for NumPy einsum generator
+class NumPyEinsumGeneratorContext : public PythonEinsumGeneratorContext {
+ public:
+  using PythonEinsumGeneratorContext::PythonEinsumGeneratorContext;
+};
+
+/// Context for PyTorch einsum generator
+class PyTorchEinsumGeneratorContext : public PythonEinsumGeneratorContext {
+ public:
+  using PythonEinsumGeneratorContext::PythonEinsumGeneratorContext;
+};
+
+/// Base generator for producing Python code using einsum
+/// Provides common functionality shared by NumPy and PyTorch backends
+template <typename Context>
+class PythonEinsumGeneratorBase : public Generator<Context> {
+ public:
+  PythonEinsumGeneratorBase() = default;
+  virtual ~PythonEinsumGeneratorBase() = default;
+
+  bool supports_named_sections() const override { return true; }
+
+  bool requires_named_sections() const override { return false; }
+
+  DeclarationScope index_declaration_scope() const override {
+    return DeclarationScope::Global;
+  }
+
+  DeclarationScope variable_declaration_scope() const override {
+    return DeclarationScope::Global;
+  }
+
+  DeclarationScope tensor_declaration_scope() const override {
+    return DeclarationScope::Global;
+  }
+
+  PrunableScalars prunable_scalars() const override {
+    return PrunableScalars::All;
+  }
+
+  std::string represent(const Index &idx, const Context &) const override {
+    if (idx.has_proto_indices()) {
+      throw std::runtime_error("Proto Indices are not (yet) supported!");
+    }
+
+    // Convert index label to a single character for einsum notation
+    // For einsum, we need single characters
+    std::string label = toUtf8(idx.full_label());
+
+    // Try to extract a single character representation
+    // If the label is longer, use its first character (this may need
+    // customization)
+    if (label.empty()) {
+      throw std::runtime_error("Empty index label");
+    }
+
+    // Return first character preserving case to avoid conflicts
+    // between index spaces that differ only in case (e.g., 'I' vs 'i')
+    char c = label[0];
+
+    return std::string(1, c);
+  }
+
+  std::string represent(const Tensor &tensor,
+                        const Context &ctx) const override {
+    return tensor_name(tensor, ctx);
+  }
+
+  std::string represent(const Variable &variable,
+                        const Context &) const override {
+    return sanitize_python_name(variable.label());
+  }
+
+  std::string represent(const Constant &constant,
+                        const Context &) const override {
+    std::stringstream sstream;
+
+    if (constant.value().imag() != 0) {
+      sstream << "(";
+      sstream << constant.value().real();
+      if (constant.value().imag() < 0) {
+        sstream << constant.value().imag() << "j";
+      } else {
+        sstream << "+" << constant.value().imag() << "j";
+      }
+      sstream << ")";
+    } else {
+      sstream << constant.value().real();
+    }
+
+    return sstream.str();
+  }
+
+  void unload(const Tensor &tensor, const Context &ctx) override {
+    m_generated += m_indent + "del " + represent(tensor, ctx) + "\n";
+  }
+
+  void destroy(const Tensor &tensor, const Context &ctx) override {
+    unload(tensor, ctx);
+    // Remove the associated storage file from disk
+    m_generated += m_indent + "os.remove('" + represent(tensor, ctx) +
+                   file_extension() + "')\n";
+  }
+
+  void create(const Variable &variable, bool zero_init,
+              const Context &ctx) override {
+    if (!zero_init) {
+      throw std::runtime_error(
+          "Python variables must be initialized when created");
+    }
+
+    m_generated += m_indent + represent(variable, ctx) + " = 0.0\n";
+  }
+
+  void set_to_zero(const Variable &variable, const Context &ctx) override {
+    m_generated += m_indent + represent(variable, ctx) + " = 0.0\n";
+  }
+
+  void unload(const Variable &variable, const Context &ctx) override {
+    m_generated += m_indent + "del " + represent(variable, ctx) + "\n";
+  }
+
+  void destroy(const Variable &variable, const Context &ctx) override {
+    unload(variable, ctx);
+    // Remove the associated storage file from disk
+    m_generated += m_indent + "os.remove('" + represent(variable, ctx) +
+                   file_extension() + "')\n";
+  }
+
+  void compute(const Expr &expression, const Tensor &result,
+               const Context &ctx) override {
+    m_generated += m_indent + represent(result, ctx) +
+                   " += " + to_einsum_expr(expression, result, ctx) + "\n";
+  }
+
+  void compute(const Expr &expression, const Variable &result,
+               const Context &ctx) override {
+    m_generated += m_indent + represent(result, ctx) +
+                   " += " + to_python_scalar_expr(expression, ctx) + "\n";
+  }
+
+  void declare(const Index &, const Context &) override {
+    // Indices don't need explicit declaration in Python
+  }
+
+  void declare(const Variable &, UsageSet, const Context &) override {
+    // Variables are dynamically typed in Python, no declaration needed
+  }
+
+  void declare(const Tensor &, UsageSet, const Context &) override {
+    // Tensors are dynamically typed in Python, no declaration needed
+  }
+
+  void all_indices_declared(std::size_t, const Context &) override {}
+
+  void all_variables_declared(std::size_t, const Context &) override {}
+
+  void all_tensors_declared(std::size_t, const Context &) override {}
+
+  void begin_declarations(DeclarationScope, const Context &) override {}
+
+  void end_declarations(DeclarationScope, const Context &) override {}
+
+  void insert_comment(const std::string &comment, const Context &) override {
+    m_generated += m_indent + "# " + comment + "\n";
+  }
+
+  void begin_named_section(std::string_view name, const Context &) override {
+    m_generated += m_indent + "def " + std::string(name) + "():\n";
+    m_indent += "    ";
+  }
+
+  void end_named_section(std::string_view /*name*/, const Context &) override {
+    SEQUANT_ASSERT(m_indent.size() >= 4);
+    m_indent = m_indent.substr(0, m_indent.size() - 4);
+    m_generated += "\n";
+  }
+
+  void begin_expression(const Context &ctx) override {
+    (void)ctx;
+    if (!m_generated.empty() && !m_generated.ends_with("\n\n")) {
+      // Add a blank line (no trailing whitespace in Python)
+      m_generated += "\n";
+    }
+  }
+
+  void end_expression(const Context &ctx) override { (void)ctx; }
+
+  void begin_export(const Context &) override { m_generated.clear(); }
+
+  void end_export(const Context &ctx) override { (void)ctx; }
+
+  std::string get_generated_code() const override { return m_generated; }
+
+ protected:
+  std::string m_generated;
+  std::string m_indent;
+
+  /// Get the tensor name (without indices)
+  std::string tensor_name(const Tensor &tensor, const Context &ctx) const {
+    // For Python variable names, start with sanitized tensor label
+    std::string name = sanitize_python_name(tensor.label());
+
+    // Append index space tags to distinguish different tensor blocks
+    // e.g., I[i1,a1] becomes "I_ov", I[a2,a1] becomes "I_vv"
+    if (tensor.num_indices() > 0) {
+      std::string tags;
+      for (const Index &idx : tensor.const_indices()) {
+        tags += ctx.get_tag(idx.space());
+      }
+      if (!tags.empty()) {
+        name += "_" + tags;
+      }
+    }
+
+    return name;
+  }
+
+  /// Backend-specific methods to be overridden by derived classes
+
+  /// Get the module prefix (np., torch., etc.)
+  virtual std::string module_prefix() const = 0;
+
+  /// Get the file extension for persistence (.npy, .pt, etc.)
+  virtual std::string file_extension() const = 0;
+
+  /// Get the available characters for einsum indices
+  virtual const std::string &available_index_chars() const = 0;
+
+  /// Backend-specific flag for optimize parameter in einsum
+  virtual bool use_optimize_parameter() const = 0;
+
+  /// Get the Python order string from memory layout
+  std::string get_order_string(const Context &ctx) const {
+    switch (ctx.memory_layout()) {
+      case MemoryLayout::ColumnMajor:
+        return "F";
+      case MemoryLayout::RowMajor:
+        return "C";
+      case MemoryLayout::Unspecified:
+        // Default to Fortran order for compatibility with Eigen::Tensor
+        return "F";
+    }
+    return "F";  // Fallback
+  }
+
+  /// Sanitize a label to be a valid Python identifier
+  std::string sanitize_python_name(std::wstring_view label) const {
+    std::string name = toUtf8(label);
+
+    // Replace invalid characters with underscores
+    // Python 3 supports unicode identifiers, so we only replace:
+    // - ASCII non-alphanumeric characters (except underscore)
+    // - Keep non-ASCII bytes as they're part of unicode characters (like Greek
+    // letters)
+    for (char &c : name) {
+      unsigned char uc = static_cast<unsigned char>(c);
+      // Only check ASCII range (0-127)
+      if (uc < 128) {
+        if (!std::isalnum(uc) && c != '_') {
+          c = '_';
+        }
+      }
+      // For non-ASCII bytes (>= 128), keep them - they're part of valid UTF-8
+      // unicode chars
+    }
+
+    // Ensure it doesn't start with a number
+    if (!name.empty() && std::isdigit(static_cast<unsigned char>(name[0]))) {
+      name = "_" + name;
+    }
+
+    return name;
+  }
+
+  /// Get or create a single-character einsum index for a given Index
+  std::string get_einsum_index(
+      const Index &idx, const Context &,
+      boost::unordered::unordered_map<std::string, std::string> &index_map,
+      boost::unordered::unordered_set<std::string> &used_chars) const {
+    const std::string full_label = toUtf8(idx.full_label());
+
+    // Check if we've already assigned a character to this index
+    auto it = index_map.find(full_label);
+    if (it != index_map.end()) {
+      return it->second;
+    }
+
+    // Try to use the first character of the label, preserving case
+    char base_char = full_label.empty() ? 'i' : full_label[0];
+    std::string candidate(1, base_char);
+
+    // Check if this character is already used (O(1) lookup)
+    bool found = used_chars.find(candidate) != used_chars.end();
+
+    // If already used, find next available character
+    if (found) {
+      // Use backend-specific character set
+      const std::string chars = available_index_chars();
+      const auto num_chars = chars.size();
+
+      for (std::size_t i = 0; i < num_chars; ++i) {
+        candidate = std::string(1, chars[i]);
+        // O(1) lookup instead of O(n) iteration
+        if (used_chars.find(candidate) == used_chars.end()) {
+          found = false;
+          break;
+        }
+      }
+
+      if (found) {
+        // Fallback: use numbered indices (not standard einsum, but necessary)
+        throw std::runtime_error("Too many unique indices for einsum notation");
+      }
+    }
+
+    // Store in both maps for fast forward and reverse lookups
+    index_map[full_label] = candidate;
+    used_chars.insert(candidate);
+    return candidate;
+  }
+
+  /// Convert a tensor to its einsum subscript notation
+  std::string tensor_to_einsum_subscript(
+      const Tensor &tensor, const Context &ctx,
+      boost::unordered::unordered_map<std::string, std::string> &index_map,
+      boost::unordered::unordered_set<std::string> &used_chars) const {
+    std::string subscript;
+    for (const Index &idx : tensor.const_indices()) {
+      subscript += get_einsum_index(idx, ctx, index_map, used_chars);
+    }
+    return subscript;
+  }
+
+  /// Convert an expression to an einsum call
+  std::string to_einsum_expr(const Expr &expr, const Tensor &result,
+                             const Context &ctx) const {
+    // Create local index mapping for this einsum operation
+    // This ensures each operation uses optimal character assignments
+    boost::unordered::unordered_map<std::string, std::string> index_map;
+    boost::unordered::unordered_set<std::string> used_chars;
+
+    std::string einsum_spec;
+    std::vector<std::string> tensor_names;
+    std::string scalar_factor;
+
+    // Extract scalar prefactor and tensors from the expression
+    extract_einsum_components(expr, einsum_spec, tensor_names, scalar_factor,
+                              ctx, index_map, used_chars);
+
+    // Add output specification
+    einsum_spec +=
+        "->" + tensor_to_einsum_subscript(result, ctx, index_map, used_chars);
+
+    // Build einsum call
+    std::string einsum_call = module_prefix() + "einsum('" + einsum_spec + "'";
+
+    for (const std::string &name : tensor_names) {
+      einsum_call += ", " + name;
+    }
+
+    if (use_optimize_parameter()) {
+      einsum_call += ", optimize=True";
+    }
+
+    einsum_call += ")";
+
+    // Apply scalar factor if present
+    if (!scalar_factor.empty() && scalar_factor != "1" &&
+        scalar_factor != "1.0") {
+      einsum_call = scalar_factor + " * " + einsum_call;
+    }
+
+    return einsum_call;
+  }
+
+  /// Extract einsum components from an expression
+  void extract_einsum_components(
+      const Expr &expr, std::string &einsum_spec,
+      std::vector<std::string> &tensor_names, std::string &scalar_factor,
+      const Context &ctx,
+      boost::unordered::unordered_map<std::string, std::string> &index_map,
+      boost::unordered::unordered_set<std::string> &used_chars) const {
+    if (expr.is<Tensor>()) {
+      const Tensor &tensor = expr.as<Tensor>();
+      if (!einsum_spec.empty()) einsum_spec += ",";
+      einsum_spec +=
+          tensor_to_einsum_subscript(tensor, ctx, index_map, used_chars);
+      tensor_names.push_back(represent(tensor, ctx));
+    } else if (expr.is<Variable>()) {
+      // Treat variable as a scalar
+      if (scalar_factor.empty()) {
+        scalar_factor = represent(expr.as<Variable>(), ctx);
+      } else {
+        scalar_factor += " * " + represent(expr.as<Variable>(), ctx);
+      }
+    } else if (expr.is<Constant>()) {
+      std::string const_repr = represent(expr.as<Constant>(), ctx);
+      if (scalar_factor.empty()) {
+        scalar_factor = const_repr;
+      } else {
+        scalar_factor += " * " + const_repr;
+      }
+    } else if (expr.is<Product>()) {
+      const Product &product = expr.as<Product>();
+
+      // Handle scalar part
+      if (!product.scalar().is_identity()) {
+        std::string scalar_repr = represent(Constant(product.scalar()), ctx);
+        if (scalar_factor.empty()) {
+          scalar_factor = scalar_repr;
+        } else {
+          scalar_factor += " * " + scalar_repr;
+        }
+      }
+
+      // Handle tensor factors
+      for (std::size_t i = 0; i < product.size(); ++i) {
+        extract_einsum_components(*product.factor(i), einsum_spec, tensor_names,
+                                  scalar_factor, ctx, index_map, used_chars);
+      }
+    } else if (expr.is<Sum>()) {
+      // For sums, we can't use a single einsum call
+      // This should be handled at a higher level by generating multiple
+      // statements
+      throw std::runtime_error(
+          "Sum expressions should be handled by generating multiple einsum "
+          "calls");
+    }
+  }
+
+  /// Convert expression to Python scalar expression (for variable results)
+  std::string to_python_scalar_expr(const Expr &expr,
+                                    const Context &ctx) const {
+    if (expr.is<Variable>()) {
+      return represent(expr.as<Variable>(), ctx);
+    } else if (expr.is<Constant>()) {
+      return represent(expr.as<Constant>(), ctx);
+    } else if (expr.is<Product>()) {
+      // This should involve tensor contractions
+      // For a scalar result, we need to contract all indices
+      // This is an einsum with no output indices
+
+      // Create local index mapping for this einsum operation
+      boost::unordered::unordered_map<std::string, std::string> index_map;
+      boost::unordered::unordered_set<std::string> used_chars;
+
+      std::string einsum_spec;
+      std::vector<std::string> tensor_names;
+      std::string scalar_factor;
+
+      extract_einsum_components(expr, einsum_spec, tensor_names, scalar_factor,
+                                ctx, index_map, used_chars);
+
+      // For scalar result, use "->" or "->..."
+      einsum_spec += "->";
+
+      std::string einsum_call =
+          module_prefix() + "einsum('" + einsum_spec + "'";
+
+      for (const std::string &name : tensor_names) {
+        einsum_call += ", " + name;
+      }
+
+      if (use_optimize_parameter()) {
+        einsum_call += ", optimize=True";
+      }
+
+      einsum_call += ")";
+
+      if (!scalar_factor.empty() && scalar_factor != "1" &&
+          scalar_factor != "1.0") {
+        einsum_call = scalar_factor + " * " + einsum_call;
+      }
+
+      return einsum_call;
+    } else if (expr.is<Sum>()) {
+      const Sum &sum = expr.as<Sum>();
+      std::string result = "(";
+
+      for (std::size_t i = 0; i < sum.size(); ++i) {
+        if (i > 0) result += " + ";
+        result += to_python_scalar_expr(*sum.summand(i), ctx);
+      }
+
+      result += ")";
+      return result;
+    }
+
+    throw std::runtime_error(
+        "Unsupported expression type for Python scalar expression");
+  }
+};
+
+/// Generator for NumPy einsum
+class NumPyEinsumGenerator
+    : public PythonEinsumGeneratorBase<NumPyEinsumGeneratorContext> {
+ private:
+  using Base = PythonEinsumGeneratorBase<NumPyEinsumGeneratorContext>;
+
+ public:
+  NumPyEinsumGenerator() = default;
+  ~NumPyEinsumGenerator() = default;
+
+  std::string get_format_name() const override { return "Python (einsum)"; }
+
+  void begin_export(const Context &ctx) override {
+    Base::m_generated.clear();
+    if (ctx.generate_imports()) {
+      Base::m_generated += "import numpy as np\n";
+      Base::m_generated += "import os\n\n";
+    }
+  }
+
+  // Bring base class overloads into scope to avoid hiding
+  using Base::create;
+  using Base::load;
+  using Base::persist;
+  using Base::set_to_zero;
+
+  void create(const Tensor &tensor, bool zero_init,
+              const Context &ctx) override {
+    if (!zero_init) {
+      throw std::runtime_error(
+          "Python tensors must be initialized when created");
+    }
+
+    m_generated += m_indent + represent(tensor, ctx) + " = " + module_prefix() +
+                   "zeros(" + ctx.get_shape_tuple(tensor) + ", order='" +
+                   get_order_string(ctx) + "')\n";
+  }
+
+  void load(const Tensor &tensor, bool set_to_zero,
+            const Context &ctx) override {
+    Base::m_generated += Base::m_indent + Base::represent(tensor, ctx) + " = ";
+
+    if (set_to_zero) {
+      Base::m_generated += module_prefix() + "zeros(" +
+                           ctx.get_shape_tuple(tensor) + ", order='" +
+                           Base::get_order_string(ctx) + "')";
+    } else {
+      // Load from file
+      Base::m_generated += module_prefix() + "load('" +
+                           Base::represent(tensor, ctx) + file_extension() +
+                           "')";
+    }
+
+    Base::m_generated += "\n";
+  }
+
+  void set_to_zero(const Tensor &tensor, const Context &ctx) override {
+    Base::m_generated +=
+        Base::m_indent + Base::represent(tensor, ctx) + ".fill(0)\n";
+  }
+
+  void persist(const Tensor &tensor, const Context &ctx) override {
+    Base::m_generated += Base::m_indent + module_prefix() + "save('" +
+                         Base::represent(tensor, ctx) + file_extension() +
+                         "', " + Base::represent(tensor, ctx) + ")\n";
+  }
+
+  void load(const Variable &variable, bool set_to_zero,
+            const Context &ctx) override {
+    Base::m_generated +=
+        Base::m_indent + Base::represent(variable, ctx) + " = ";
+
+    if (set_to_zero) {
+      Base::m_generated += "0.0";
+    } else {
+      // Load from file
+      Base::m_generated += module_prefix() + "load('" +
+                           Base::represent(variable, ctx) + file_extension() +
+                           "')";
+    }
+
+    Base::m_generated += "\n";
+  }
+
+  void persist(const Variable &variable, const Context &ctx) override {
+    Base::m_generated += Base::m_indent + module_prefix() + "save('" +
+                         Base::represent(variable, ctx) + file_extension() +
+                         "', " + Base::represent(variable, ctx) + ")\n";
+  }
+
+ protected:
+  std::string module_prefix() const override { return "np."; }
+
+  std::string file_extension() const override { return ".npy"; }
+
+  // NumPy supports both lowercase and uppercase indices
+  const std::string &available_index_chars() const override {
+    static const std::string chars =
+        "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+    return chars;
+  }
+
+  bool use_optimize_parameter() const override { return true; }
+};
+
+/// Generator for PyTorch einsum
+class PyTorchEinsumGenerator
+    : public PythonEinsumGeneratorBase<PyTorchEinsumGeneratorContext> {
+ private:
+  using Base = PythonEinsumGeneratorBase<PyTorchEinsumGeneratorContext>;
+
+ public:
+  PyTorchEinsumGenerator() = default;
+  ~PyTorchEinsumGenerator() = default;
+
+  std::string get_format_name() const override { return "PyTorch (einsum)"; }
+
+  void begin_export(const Context &ctx) override {
+    Base::m_generated.clear();
+    if (ctx.generate_imports()) {
+      Base::m_generated += "import torch\n";
+      Base::m_generated += "import os\n\n";
+    }
+  }
+
+  // Bring base class overloads into scope to avoid hiding
+  using Base::create;
+  using Base::load;
+  using Base::persist;
+  using Base::set_to_zero;
+
+  // Override create to not use 'order' parameter (PyTorch doesn't support it)
+  void create(const Tensor &tensor, bool zero_init,
+              const Context &ctx) override {
+    if (!zero_init) {
+      throw std::runtime_error(
+          "PyTorch tensors must be initialized when created");
+    }
+
+    // PyTorch doesn't support the 'order' parameter - tensors are always
+    // row-major Use float64 (double precision) to match C++ double type
+    Base::m_generated += Base::m_indent + Base::represent(tensor, ctx) + " = " +
+                         module_prefix() + "zeros(" +
+                         ctx.get_shape_tuple(tensor) +
+                         ", dtype=torch.float64)\n";
+  }
+
+  void load(const Tensor &tensor, bool set_to_zero,
+            const Context &ctx) override {
+    Base::m_generated += Base::m_indent + Base::represent(tensor, ctx) + " = ";
+
+    if (set_to_zero) {
+      // PyTorch doesn't support the 'order' parameter - tensors are always
+      // row-major Use float64 (double precision) to match C++ double type
+      Base::m_generated += module_prefix() + "zeros(" +
+                           ctx.get_shape_tuple(tensor) +
+                           ", dtype=torch.float64)";
+    } else {
+      // Load from file
+      Base::m_generated += "torch.load('" + Base::represent(tensor, ctx) +
+                           file_extension() + "')";
+    }
+
+    Base::m_generated += "\n";
+  }
+
+  void set_to_zero(const Tensor &tensor, const Context &ctx) override {
+    Base::m_generated +=
+        Base::m_indent + Base::represent(tensor, ctx) + ".zero_()\n";
+  }
+
+  void persist(const Tensor &tensor, const Context &ctx) override {
+    Base::m_generated +=
+        Base::m_indent + "torch.save(" + Base::represent(tensor, ctx) + ", '" +
+        Base::represent(tensor, ctx) + file_extension() + "')\n";
+  }
+
+  void load(const Variable &variable, bool set_to_zero,
+            const Context &ctx) override {
+    Base::m_generated +=
+        Base::m_indent + Base::represent(variable, ctx) + " = ";
+
+    if (set_to_zero) {
+      Base::m_generated += "0.0";
+    } else {
+      // Load from file
+      Base::m_generated += "torch.load('" + Base::represent(variable, ctx) +
+                           file_extension() + "')";
+    }
+
+    Base::m_generated += "\n";
+  }
+
+  void persist(const Variable &variable, const Context &ctx) override {
+    Base::m_generated +=
+        Base::m_indent + "torch.save(" + Base::represent(variable, ctx) +
+        ", '" + Base::represent(variable, ctx) + file_extension() + "')\n";
+  }
+
+ protected:
+  std::string module_prefix() const override { return "torch."; }
+
+  std::string file_extension() const override { return ".pt"; }
+
+  // PyTorch only supports lowercase indices
+  // See:
+  // https://stackoverflow.com/questions/55894693/understanding-pytorch-einsum
+  const std::string &available_index_chars() const override {
+    static const std::string chars = "abcdefghijklmnopqrstuvwxyz";
+    return chars;
+  }
+
+  bool use_optimize_parameter() const override { return false; }
+};
+
+/// Backward compatibility alias - default to NumPy
+using PythonEinsumGenerator = NumPyEinsumGenerator;
+
+}  // namespace sequant
+
+#endif  // SEQUANT_CORE_EXPORT_PYTHON_EINSUM_HPP
diff --git a/SeQuant/core/expressions/constant.hpp b/SeQuant/core/expressions/constant.hpp
index 09d8453b88..b3c046cd3c 100644
--- a/SeQuant/core/expressions/constant.hpp
+++ b/SeQuant/core/expressions/constant.hpp
@@ -40,7 +40,10 @@ class Constant : public Expr {
   Constant(Constant &&) = default;
   Constant &operator=(const Constant &) = default;
   Constant &operator=(Constant &&) = default;
-  template <typename U, typename = std::enable_if_t<!is_constant_v<U>>>
+  template <typename U>
+    requires(!is_constant_v<U> && !is_an_expr_v<std::remove_reference_t<U>> &&
+             !Expr::is_shared_ptr_of_expr_or_derived<
+                 std::remove_reference_t<U>>::value)
   explicit Constant(U &&value) : value_(std::forward<U>(value)) {}
 
   /// @tparam T the result type; default to the type of value_
diff --git a/SeQuant/core/expressions/expr_operators.hpp b/SeQuant/core/expressions/expr_operators.hpp
index c25031926e..910c3dd5f7 100644
--- a/SeQuant/core/expressions/expr_operators.hpp
+++ b/SeQuant/core/expressions/expr_operators.hpp
@@ -65,6 +65,18 @@ inline ExprPtr operator^(const ExprPtr &left, const ExprPtr &right) {
   SEQUANT_UNREACHABLE;
 }
 
+template <typename T>
+  requires(std::constructible_from<Constant, T>)
+inline ExprPtr operator*(T left, const ExprPtr &right) {
+  return ex<Constant>(std::move(left)) * right;
+}
+
+template <typename T>
+  requires(std::constructible_from<Constant, T>)
+inline ExprPtr operator*(const ExprPtr &left, T right) {
+  return left * ex<Constant>(std::move(right));
+}
+
 inline ExprPtr operator+(const ExprPtr &left, const ExprPtr &right) {
   auto left_is_sum = left->is<Sum>();
   auto right_is_sum = right->is<Sum>();
diff --git a/SeQuant/core/utility/debug.hpp b/SeQuant/core/utility/debug.hpp
index 76d56f6b81..188a7a8910 100644
--- a/SeQuant/core/utility/debug.hpp
+++ b/SeQuant/core/utility/debug.hpp
@@ -10,13 +10,22 @@
 
 namespace sequant {
 
+/// uses std::ostringstream + std::printf to print to stdout even if std::cout
+/// is captured (e.g., by Catch2)
+template <typename... Args>
+void printf(Args&&... args) {
+  std::ostringstream oss;
+  (oss << ... << std::forward<Args>(args));
+  std::printf("%s", oss.str().c_str());
+}
+
 /// uses std::wostringstream + std::wprintf to print to stdout even if wcout is
 /// captured (e.g., by Catch2)
 template <typename... Args>
 void wprintf(Args&&... args) {
   std::wostringstream oss;
   (oss << ... << std::forward<Args>(args));
-  std::wprintf(oss.str().c_str());
+  std::wprintf(L"%ls", oss.str().c_str());
 }
 
 }  // namespace sequant
diff --git a/SeQuant/core/utility/scope.hpp b/SeQuant/core/utility/scope.hpp
new file mode 100644
index 0000000000..d0a136c1b5
--- /dev/null
+++ b/SeQuant/core/utility/scope.hpp
@@ -0,0 +1,76 @@
+//
+// Created by Eduard Valeyev on 2/15/23.
+//
+
+#ifndef SEQUANT_CORE_SCOPE_HPP
+#define SEQUANT_CORE_SCOPE_HPP
+
+#include <utility>
+
+#ifdef SEQUANT_CORE_UTILITY_SCOPE_HPP_USE_STD_EXPERIMENTAL_SCOPE
+#error \
+    "SEQUANT_CORE_UTILITY_SCOPE_HPP_USE_STD_EXPERIMENTAL_SCOPE already defined, please create an issue at github.com/ValeevGroup/SeQuant"
+#endif
+
+// availability of std::experimental::scope_exit requires experimental/scope to
+// be present ...
+#if __has_include(<experimental/scope>)
+
+// ... and , if using libstdc++ >= 13, C++20 or later
+#if defined(__GLIBCXX__) && __GLIBCXX__ >= 13 && __cplusplus >= 202002L
+#define SEQUANT_CORE_UTILITY_SCOPE_HPP_USE_STD_EXPERIMENTAL_SCOPE 1
+#endif
+
+#endif
+
+#if SEQUANT_CORE_UTILITY_SCOPE_HPP_USE_STD_EXPERIMENTAL_SCOPE
+
+#include <experimental/scope>
+
+namespace sequant::detail {
+using std::experimental::scope_exit;
+}
+
+#else
+
+namespace sequant::detail {
+
+template <typename EF>
+class scope_exit {
+ public:
+  template <typename Fn>
+  explicit scope_exit(Fn&& fn) noexcept
+      : fn_(std::forward<Fn>(fn)), released_(false) {}
+  scope_exit(scope_exit&& other) noexcept = default;
+  scope_exit(const scope_exit&) = delete;
+
+  ~scope_exit() noexcept {
+    if (!released_) {
+      fn_();
+      released_ = true;
+    }
+  }
+
+  scope_exit& operator=(const scope_exit&) = delete;
+  scope_exit& operator=(scope_exit&&) = delete;
+
+  void release() noexcept { released_ = true; }
+
+ private:
+  EF fn_;
+  bool released_ = false;
+};
+
+}  // namespace sequant::detail
+
+#endif  // SEQUANT_CORE_UTILITY_SCOPE_HPP_USE_STD_EXPERIMENTAL_SCOPE
+#undef SEQUANT_CORE_UTILITY_SCOPE_HPP_USE_STD_EXPERIMENTAL_SCOPE
+
+namespace sequant::detail {
+template <typename EF>
+[[nodiscard]] scope_exit<EF> make_scope_exit(EF&& ef) {
+  return scope_exit<EF>(std::forward<EF>(ef));
+}
+}  // namespace sequant::detail
+
+#endif  // SEQUANT_CORE_SCOPE_HPP
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index 04fa3fb4a8..b0645ff22e 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -11,6 +11,8 @@ add_executable(unit_tests-sequant ${BUILD_BY_DEFAULT}
     "test_eval_expr.cpp"
     "test_eval_node.cpp"
     "test_export.cpp"
+    "test_export.hpp"
+    "test_export_python.cpp"
     "test_expr.cpp"
     "test_expr.cpp"
     "test_fusion.cpp"
@@ -39,6 +41,52 @@ add_executable(unit_tests-sequant ${BUILD_BY_DEFAULT}
 target_compile_definitions(unit_tests-sequant PRIVATE SEQUANT_UNIT_TESTS_SOURCE_DIR="${CMAKE_CURRENT_SOURCE_DIR}")
 target_link_libraries(unit_tests-sequant PRIVATE Boost::boost)
 
+# Optional: Find Python for PythonEinsumGenerator validation tests
+find_package(Python 3 COMPONENTS Interpreter QUIET)
+if(Python_Interpreter_FOUND)
+    message(STATUS "Found Python: ${Python_EXECUTABLE} (version ${Python_VERSION}) for export validation tests")
+
+    # Check for NumPy
+    execute_process(
+        COMMAND "${Python_EXECUTABLE}" "-c" "import numpy; print(numpy.__version__)"
+        RESULT_VARIABLE _numpy_status
+        OUTPUT_VARIABLE _numpy_version
+        ERROR_QUIET
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+
+    if(_numpy_status EQUAL 0)
+        message(STATUS "NumPy ${_numpy_version} found for export validation tests")
+        target_compile_definitions(unit_tests-sequant PRIVATE
+            SEQUANT_HAS_NUMPY_FOR_VALIDATION=1)
+    else()
+        message(STATUS "NumPy not found - PythonEinsumGenerator NumPy validation tests will be disabled")
+    endif()
+
+    # Check for PyTorch
+    execute_process(
+        COMMAND "${Python_EXECUTABLE}" "-c" "import torch; print(torch.__version__)"
+        RESULT_VARIABLE _torch_status
+        OUTPUT_VARIABLE _torch_version
+        ERROR_QUIET
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+
+    if(_torch_status EQUAL 0)
+        message(STATUS "PyTorch ${_torch_version} found for export validation tests")
+        target_compile_definitions(unit_tests-sequant PRIVATE SEQUANT_HAS_TORCH_FOR_VALIDATION=1)
+    else()
+        message(STATUS "PyTorch not found - PythonEinsumGenerator PyTorch validation tests will be disabled")
+    endif()
+
+    if (_numpy_status EQUAL 0 OR _torch_status EQUAL 0)
+        target_compile_definitions(unit_tests-sequant PRIVATE
+                SEQUANT_UNITTESTS_PYTHON_EXECUTABLE="${Python_EXECUTABLE}")
+    endif()
+else()
+    message(STATUS "Python not found - PythonEinsumGenerator validation tests will be disabled")
+endif()
+
 if (SEQUANT_SKIP_LONG_TESTS)
     target_compile_definitions(unit_tests-sequant PRIVATE SEQUANT_SKIP_LONG_TESTS=1)
 endif()
diff --git a/tests/unit/export_tests/binary_contract.export_test b/tests/unit/export_tests/binary_contract.export_test
index 68e60a4167..c636b27988 100644
--- a/tests/unit/export_tests/binary_contract.export_test
+++ b/tests/unit/export_tests/binary_contract.export_test
@@ -82,3 +82,17 @@ store I:cc[jk]
 ---- end
 
 =================================================
+Python (einsum)
+=================================================
+import numpy as np
+import os
+
+I_oo = np.zeros((nocc, nocc), order='F')
+A_vo = np.load('A_vo.npy')
+B_ov = np.load('B_ov.npy')
+I_oo += np.einsum('ai,ba->bi', A_vo, B_ov, optimize=True)
+del B_ov
+del A_vo
+np.save('I_oo.npy', I_oo)
+
+=================================================
diff --git a/tests/unit/export_tests/named_expression_group.export_test b/tests/unit/export_tests/named_expression_group.export_test
index 3e8158bf58..97d536a521 100644
--- a/tests/unit/export_tests/named_expression_group.export_test
+++ b/tests/unit/export_tests/named_expression_group.export_test
@@ -24,4 +24,25 @@ section test_section(tensor A[a_1, i_1], tensor R1[a_1, i_1], tensor R2[a_1, a_2
   Persist R2[a_1, a_2, i_1, i_2]
 end section
 
+=================================================
+Python (einsum)
+=================================================
+import numpy as np
+import os
+
+def test_section():
+
+    R1_vo = np.zeros((nvirt, nocc), order='F')
+    A_vo = np.load('A_vo.npy')
+    R1_vo += 42 * np.einsum('ai->ai', A_vo, optimize=True)
+    del A_vo
+    np.save('R1_vo.npy', R1_vo)
+
+    R2_vvoo = np.zeros((nvirt, nvirt, nocc, nocc), order='F')
+    A_vo = np.load('A_vo.npy')
+    R2_vvoo += np.einsum('ai,bc->abic', A_vo, A_vo, optimize=True)
+    del A_vo
+    np.save('R2_vvoo.npy', R2_vvoo)
+
+
 =================================================
diff --git a/tests/unit/export_tests/sum_unary_plus_binary.export_test b/tests/unit/export_tests/sum_unary_plus_binary.export_test
index ce76d1ba4b..ff0841873c 100644
--- a/tests/unit/export_tests/sum_unary_plus_binary.export_test
+++ b/tests/unit/export_tests/sum_unary_plus_binary.export_test
@@ -25,3 +25,20 @@ Unload f[a_1, i_1]
 Persist I[a_1, i_1]
 
 =================================================
+Python (einsum)
+=================================================
+import numpy as np
+import os
+
+I_vo = np.zeros((nvirt, nocc), order='F')
+f_oo = np.load('f_oo.npy')
+t_vo = np.load('t_vo.npy')
+I_vo += -1 * np.einsum('ia,bi->ba', f_oo, t_vo, optimize=True)
+del t_vo
+del f_oo
+f_vo = np.load('f_vo.npy')
+I_vo += np.einsum('ai->ai', f_vo, optimize=True)
+del f_vo
+np.save('I_vo.npy', I_vo)
+
+=================================================
diff --git a/tests/unit/export_tests/ternary.export_test b/tests/unit/export_tests/ternary.export_test
index 0760794559..d61245a136 100644
--- a/tests/unit/export_tests/ternary.export_test
+++ b/tests/unit/export_tests/ternary.export_test
@@ -114,3 +114,22 @@ store I:ce[jb]
 ---- end
 
 =================================================
+Python (einsum)
+=================================================
+import numpy as np
+import os
+
+I_ov = np.zeros((nocc, nvirt), order='F')
+I_vv = np.zeros((nvirt, nvirt), order='F')
+A_vo = np.load('A_vo.npy')
+B_ov = np.load('B_ov.npy')
+I_vv += np.einsum('ai,ib->ab', A_vo, B_ov, optimize=True)
+del B_ov
+del A_vo
+C_ov = np.load('C_ov.npy')
+I_ov += np.einsum('ab,ia->ib', I_vv, C_ov, optimize=True)
+del C_ov
+del I_vv
+np.save('I_ov.npy', I_ov)
+
+=================================================
diff --git a/tests/unit/test_export.cpp b/tests/unit/test_export.cpp
index c9ab67918c..a3d1731063 100644
--- a/tests/unit/test_export.cpp
+++ b/tests/unit/test_export.cpp
@@ -1,6 +1,8 @@
 #include <catch2/catch_all.hpp>
 #include <catch2/catch_test_macros.hpp>
 
+#include "test_export.hpp"
+
 #include <SeQuant/core/export/export.hpp>
 #include <SeQuant/core/export/export_expr.hpp>
 #include <SeQuant/core/export/export_node.hpp>
@@ -10,6 +12,7 @@
 #include <SeQuant/core/export/julia_itensor.hpp>
 #include <SeQuant/core/export/julia_tensor_kit.hpp>
 #include <SeQuant/core/export/julia_tensor_operations.hpp>
+#include <SeQuant/core/export/python_einsum.hpp>
 #include <SeQuant/core/export/reordering_context.hpp>
 #include <SeQuant/core/export/text_generator.hpp>
 #include <SeQuant/core/index_space_registry.hpp>
@@ -87,6 +90,8 @@ using KnownGenerators = std::tuple<
     JuliaITensorGenerator<JuliaITensorGeneratorContext>,
     JuliaTensorKitGenerator<JuliaTensorKitGeneratorContext>,
     JuliaTensorOperationsGenerator<JuliaTensorOperationsGeneratorContext>,
+    NumPyEinsumGenerator,
+    PyTorchEinsumGenerator,
     ItfGenerator<ItfContext>
 >;
 // clang-format on
@@ -133,12 +138,40 @@ void configure_context_defaults(JuliaTensorOperationsGeneratorContext &ctx) {
   ctx.set_tag(virt, "v");
 }
 
+void configure_context_defaults(NumPyEinsumGeneratorContext &ctx) {
+  auto registry = get_default_context().index_space_registry();
+  IndexSpace occ = registry->retrieve("i");
+  IndexSpace virt = registry->retrieve("a");
+
+  ctx.set_shape(occ, "nocc");
+  ctx.set_shape(virt, "nvirt");
+
+  ctx.set_tag(occ, "o");
+  ctx.set_tag(virt, "v");
+}
+
+void configure_context_defaults(PyTorchEinsumGeneratorContext &ctx) {
+  auto registry = get_default_context().index_space_registry();
+  IndexSpace occ = registry->retrieve("i");
+  IndexSpace virt = registry->retrieve("a");
+
+  ctx.set_shape(occ, "nocc");
+  ctx.set_shape(virt, "nvirt");
+
+  ctx.set_tag(occ, "o");
+  ctx.set_tag(virt, "v");
+}
+
 void add_to_context(TextGeneratorContext &, std::string_view,
                     std::string_view) {
   throw std::runtime_error(
       "TextGeneratorContext doesn't support specifications");
 }
 void add_to_context(ItfContext &, std::string_view, std::string_view) {}
+void add_to_context(PythonEinsumGeneratorContext &, std::string_view,
+                    std::string_view) {
+  // PythonEinsumGeneratorContext doesn't support context specifications
+}
 void add_to_context(JuliaTensorOperationsGeneratorContext &ctx,
                     std::string_view key, std::string_view value) {
   auto parse_space_map = [](std::string_view spec) {
@@ -233,18 +266,6 @@ std::vector<ExpressionGroup<>> parse_expression_spec(const std::string &spec) {
   return groups;
 }
 
-[[nodiscard]] auto to_export_context() {
-  auto reg = std::make_shared<IndexSpaceRegistry>();
-  reg->add(L"i", 0b001, is_particle, 10);
-  reg->add(L"a", 0b010, is_vacuum_occupied, is_reference_occupied, is_hole,
-           100);
-  reg->add(L"u", 0b100, is_vacuum_occupied, is_reference_occupied, is_hole,
-           is_particle, 5);
-
-  return set_scoped_default_context(
-      {.index_space_registry_shared_ptr = std::move(reg)});
-}
-
 TEMPLATE_LIST_TEST_CASE("export_tests", "[export]", KnownGenerators) {
   using CurrentGen = TestType;
   using CurrentCtx = CurrentGen::Context;
@@ -254,7 +275,7 @@ TEMPLATE_LIST_TEST_CASE("export_tests", "[export]", KnownGenerators) {
   REQUIRE(Index(L"i_1") < Index(L"a_1"));
 
   // Safe-guard that template magic works
-  const std::size_t n_generators = 5;
+  const std::size_t n_generators = 7;
 
   const std::set<std::string> known_formats =
       known_format_names(KnownGenerators{});
@@ -780,3 +801,130 @@ TEST_CASE("ExportExpr", "[export]") {
     REQUIRE(e1 == e3);
   }
 }
+
+TEST_CASE("PythonEinsumGenerator", "[export]") {
+  auto resetter = to_export_context();
+
+  auto registry = get_default_context().index_space_registry();
+  IndexSpace occ = registry->retrieve("i");
+  IndexSpace virt = registry->retrieve("a");
+
+  SECTION("NumPy backend - simple contraction") {
+    // Create tensors: T[a1, a2] = F[a1, i1] * t[i1, a2]
+    auto F = ex<Tensor>(L"F", bra{L"a_1"}, ket{L"i_1"});
+    auto t = ex<Tensor>(L"t", bra{L"i_1"}, ket{L"a_2"});
+    Tensor T(L"T", bra{L"a_1"}, ket{L"a_2"});
+
+    // Build the expression
+    ResultExpr result_expr(T, F * t);
+
+    // Convert to export tree
+    auto export_tree = to_export_tree(result_expr);
+
+    // Set up context
+    NumPyEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, "nocc");
+    ctx.set_shape(virt, "nvirt");
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+
+    // Generate code
+    NumPyEinsumGenerator generator;
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    // Verify generated code contains expected elements
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("np.zeros"));
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("np.load"));
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("np.einsum"));
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("optimize=True"));
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("T_vv +="));
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("np.save"));
+  }
+
+  SECTION("PyTorch backend - simple contraction") {
+    // Create tensors: T[a1, a2] = F[a1, i1] * t[i1, a2]
+    auto F = ex<Tensor>(L"F", bra{L"a_1"}, ket{L"i_1"});
+    auto t = ex<Tensor>(L"t", bra{L"i_1"}, ket{L"a_2"});
+    Tensor T(L"T", bra{L"a_1"}, ket{L"a_2"});
+
+    ResultExpr result_expr(T, F * t);
+    auto export_tree = to_export_tree(result_expr);
+
+    // Set up PyTorch context
+    PyTorchEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, "nocc");
+    ctx.set_shape(virt, "nvirt");
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+
+    PyTorchEinsumGenerator generator;
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    // Verify PyTorch-specific code
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("torch.zeros"));
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("torch.load"));
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("torch.einsum"));
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("torch.save"));
+    REQUIRE_THAT(code, !Catch::Matchers::ContainsSubstring("optimize=True"));
+  }
+
+  SECTION("Scalar factor") {
+    // Test with scalar prefactor: R = 0.5 * t1 * t2
+    Index i1(L"i_1", occ);
+    Index i2(L"i_2", occ);
+    Index a1(L"a_1", virt);
+    Index a2(L"a_2", virt);
+
+    auto t1 = ex<Tensor>(L"t1", bra{L"i_1"}, ket{L"a_1"});
+    auto t2 = ex<Tensor>(L"t2", bra{L"i_2"}, ket{L"a_2"});
+    Tensor R(L"R", bra{L"i_1", L"i_2"}, ket{L"a_1", L"a_2"});
+
+    ResultExpr result_with_scalar(R, rational(1, 2) * t1 * t2);
+    auto export_tree_scalar = to_export_tree(result_with_scalar);
+
+    PyTorchEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, "nocc");
+    ctx.set_shape(virt, "nvirt");
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+
+    PyTorchEinsumGenerator generator;
+    export_expression(export_tree_scalar, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    // Verify scalar factor is included
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("1/2"));
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("*"));
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring(".einsum('"));
+  }
+
+  SECTION("Scalar result - energy expression") {
+    auto g = ex<Tensor>(L"g", bra{L"i_1", L"i_2"}, ket{L"a_1", L"a_2"});
+    auto t = ex<Tensor>(L"t", bra{L"a_1", L"a_2"}, ket{L"i_1", L"i_2"});
+    Variable E(L"E");
+
+    ResultExpr result_expr(E, g * t);
+    auto export_tree = to_export_tree(result_expr);
+
+    PyTorchEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, "nocc");
+    ctx.set_shape(virt, "nvirt");
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+
+    PyTorchEinsumGenerator generator;
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    // Verify scalar result (einsum ending with "->")
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("E +="));
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("->'"));
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring(".einsum('"));
+  }
+}
diff --git a/tests/unit/test_export.hpp b/tests/unit/test_export.hpp
new file mode 100644
index 0000000000..bc8404f409
--- /dev/null
+++ b/tests/unit/test_export.hpp
@@ -0,0 +1,23 @@
+#ifndef SEQUANT_UNIT_TESTS_TEST_EXPORT_HPP
+#define SEQUANT_UNIT_TESTS_TEST_EXPORT_HPP
+
+#include <SeQuant/core/index_space_registry.hpp>
+
+#include "SeQuant/core/context.hpp"
+
+namespace {
+[[nodiscard]] [[maybe_unused]] inline auto to_export_context() {
+  using namespace sequant;
+  auto reg = std::make_shared<IndexSpaceRegistry>();
+  reg->add(L"i", 0b001, is_particle, 10);
+  reg->add(L"a", 0b010, is_vacuum_occupied, is_reference_occupied, is_hole,
+           100);
+  reg->add(L"u", 0b100, is_reference_occupied, is_hole, is_particle, 5);
+
+  return set_scoped_default_context(
+      {.index_space_registry_shared_ptr = std::move(reg)});
+}
+
+}  // anonymous namespace
+
+#endif  // SEQUANT_UNIT_TESTS_TEST_EXPORT_HPP
diff --git a/tests/unit/test_export_python.cpp b/tests/unit/test_export_python.cpp
new file mode 100644
index 0000000000..d475fa5256
--- /dev/null
+++ b/tests/unit/test_export_python.cpp
@@ -0,0 +1,1469 @@
+#include <catch2/catch_all.hpp>
+#include <catch2/catch_test_macros.hpp>
+
+#include "test_export.hpp"
+
+#include <SeQuant/core/export/export.hpp>
+#include <SeQuant/core/export/python_einsum.hpp>
+#include <SeQuant/core/index_space_registry.hpp>
+#include <SeQuant/core/rational.hpp>
+#include <SeQuant/core/utility/scope.hpp>
+
+#include "catch2_sequant.hpp"
+
+#include <boost/algorithm/string.hpp>
+
+#include <unsupported/Eigen/CXX11/Tensor>
+
+#include <filesystem>
+#include <fstream>
+#include <random>
+#include <string>
+
+using namespace sequant;
+
+// ============================================================================
+// Eigen::Tensor utilities for numpy I/O and validation
+// ============================================================================
+namespace {
+
+// Write Eigen::Tensor to numpy .npy file (version 1.0 format)
+// Supports both column-major (default) and row-major layouts
+template <typename Scalar, int NumDims, int Options = 0,
+          typename IndexType = Eigen::Index>
+void write_eigen_tensor_to_numpy(
+    const std::string &filename,
+    const Eigen::Tensor<Scalar, NumDims, Options, IndexType> &tensor) {
+  std::ofstream file(filename, std::ios::binary);
+  if (!file) {
+    throw std::runtime_error("Cannot open file for writing: " + filename);
+  }
+
+  // Magic number
+  file.write("\x93NUMPY", 6);
+
+  // Version 1.0
+  file.put(0x01);
+  file.put(0x00);
+
+  // Build header dict
+  std::ostringstream header;
+  header << "{'descr': '";
+
+  // Data type descriptor
+  if (std::is_same<Scalar, float>::value) {
+    header << "<f4";
+  } else if (std::is_same<Scalar, double>::value) {
+    header << "<f8";
+  } else if (std::is_same<Scalar, std::complex<float>>::value) {
+    header << "<c8";
+  } else if (std::is_same<Scalar, std::complex<double>>::value) {
+    header << "<c16";
+  } else {
+    throw std::runtime_error("Unsupported data type");
+  }
+
+  // Determine layout from Options template parameter
+  // Eigen::RowMajor = 1, Eigen::ColMajor = 0 (default)
+  constexpr bool is_row_major = (Options & Eigen::RowMajor) != 0;
+  constexpr bool fortran_order = !is_row_major;
+
+  header << "', 'fortran_order': " << (fortran_order ? "True" : "False")
+         << ", 'shape': (";
+  auto dimensions = tensor.dimensions();
+  for (int i = 0; i < NumDims; ++i) {
+    if (i > 0) header << ", ";
+    header << dimensions[i];
+  }
+  if (NumDims == 1) header << ",";
+  header << "), }";
+
+  // Pad header so total (10 bytes preamble + header length) is divisible by 64
+  // The header dict should end with a space before the newline for numpy
+  // compatibility
+  std::string header_str = header.str();
+  // Calculate required total size (must be multiple of 64)
+  std::size_t preamble_size = 10;  // magic(6) + version(2) + header_len(2)
+  std::size_t current_size =
+      preamble_size + header_str.size() + 1;  // +1 for newline
+  std::size_t padding = (64 - (current_size % 64)) % 64;
+
+  // Add padding spaces, then newline
+  if (padding > 0) {
+    header_str.append(padding, ' ');
+  }
+  header_str += "\n";
+
+  // Write header length (2 bytes, little endian)
+  uint16_t hlen = static_cast<uint16_t>(header_str.size());
+  file.write(reinterpret_cast<const char *>(&hlen), 2);
+
+  // Write header
+  file.write(header_str.c_str(), header_str.size());
+
+  // Write data
+  file.write(reinterpret_cast<const char *>(tensor.data()),
+             tensor.size() * sizeof(Scalar));
+}
+
+// Read Eigen::Tensor from numpy .npy file
+// Supports both column-major (default) and row-major layouts
+template <typename Scalar, int NumDims, int Options = 0,
+          typename IndexType = Eigen::Index>
+Eigen::Tensor<Scalar, NumDims, Options, IndexType> read_eigen_tensor_from_numpy(
+    const std::string &filename) {
+  std::ifstream file(filename, std::ios::binary);
+  if (!file) {
+    throw std::runtime_error("Cannot open file for reading: " + filename);
+  }
+
+  // Read and verify magic number
+  char magic[6];
+  file.read(magic, 6);
+  if (std::string(magic, 6) != "\x93NUMPY") {
+    throw std::runtime_error("Invalid numpy file format");
+  }
+
+  // Read version
+  uint8_t major, minor;
+  file.read(reinterpret_cast<char *>(&major), 1);
+  file.read(reinterpret_cast<char *>(&minor), 1);
+
+  // Read header length
+  uint16_t header_len;
+  file.read(reinterpret_cast<char *>(&header_len), 2);
+
+  // Read header
+  std::vector<char> header_buf(header_len);
+  file.read(header_buf.data(), header_len);
+  std::string header(header_buf.begin(), header_buf.end());
+
+  // Parse fortran_order from header
+  bool fortran_order = true;  // default assumption
+  auto order_start = header.find("'fortran_order': ");
+  if (order_start != std::string::npos) {
+    auto order_value_start = order_start + 17;  // length of "'fortran_order': "
+    if (header.substr(order_value_start, 4) == "True") {
+      fortran_order = true;
+    } else if (header.substr(order_value_start, 5) == "False") {
+      fortran_order = false;
+    }
+  }
+
+  // Verify layout matches expected layout from template parameters
+  // When Options=0 (default), we accept either layout for compatibility.
+  // The data layout in the file determines the Eigen tensor's actual layout.
+  constexpr bool is_row_major = (Options & Eigen::RowMajor) != 0;
+  constexpr bool expected_fortran_order = !is_row_major;
+
+  // With default Options=0, Eigen uses column-major (Fortran order).
+  // But if the numpy file is row-major (fortran_order=False), we need to handle
+  // it. For now, we allow this mismatch and rely on the fact that both numpy
+  // and Eigen store data contiguously - the interpretation of indices may
+  // differ, but for symmetric operations this often works out.
+  if (Options != 0 && fortran_order != expected_fortran_order) {
+    // Only enforce strict layout matching when non-default Options were
+    // explicitly specified
+    throw std::runtime_error(
+        std::string("Layout mismatch: numpy file has fortran_order=") +
+        (fortran_order ? "True" : "False") +
+        ", but tensor expects fortran_order=" +
+        (expected_fortran_order ? "True" : "False"));
+  }
+
+  // Parse shape from header
+  auto shape_start = header.find("'shape': (");
+  auto shape_end = header.find(")", shape_start);
+  if (shape_start == std::string::npos || shape_end == std::string::npos) {
+    throw std::runtime_error("Cannot parse shape from header");
+  }
+
+  std::string shape_str =
+      header.substr(shape_start + 10, shape_end - shape_start - 10);
+  std::vector<std::size_t> shape_vec;
+
+  std::istringstream shape_stream(shape_str);
+  std::string dim;
+  while (std::getline(shape_stream, dim, ',')) {
+    std::string trimmed = boost::trim_copy(dim);
+    if (!trimmed.empty()) {
+      shape_vec.push_back(std::stoull(trimmed));
+    }
+  }
+
+  if (shape_vec.size() != NumDims) {
+    throw std::runtime_error("Shape mismatch: expected " +
+                             std::to_string(NumDims) + " dimensions, got " +
+                             std::to_string(shape_vec.size()));
+  }
+
+  // Create tensor with appropriate dimensions
+  std::array<IndexType, NumDims> dims;
+  for (int i = 0; i < NumDims; ++i) {
+    dims[i] = static_cast<IndexType>(shape_vec[i]);
+  }
+
+  Eigen::Tensor<Scalar, NumDims, Options, IndexType> tensor(dims);
+
+  // Read data
+  file.read(reinterpret_cast<char *>(tensor.data()),
+            tensor.size() * sizeof(Scalar));
+
+  return tensor;
+}
+
+// Helper function to properly escape shell arguments for POSIX shells
+// Uses single quotes and escapes embedded single quotes
+std::string shell_escape(const std::string &arg) {
+  std::string result = "'";
+  for (char c : arg) {
+    if (c == '\'') {
+      // End the current single-quoted string, add an escaped single quote,
+      // and start a new single-quoted string
+      result += "'\\''";
+    } else {
+      result += c;
+    }
+  }
+  result += "'";
+  return result;
+}
+
+// Execute Python code and check for errors
+#if defined(SEQUANT_HAS_NUMPY_FOR_VALIDATION) || \
+    defined(SEQUANT_HAS_TORCH_FOR_VALIDATION)
+bool run_python_code(const std::string &code, const std::string &working_dir,
+                     bool use_torch = false) {
+  // Convert to filesystem::path for proper handling
+  std::filesystem::path work_dir_path(working_dir);
+
+  // Validate that the path is safe (canonical and exists)
+  if (!std::filesystem::exists(work_dir_path)) {
+    return false;
+  }
+
+  // Get canonical path to prevent path traversal issues
+  std::filesystem::path canonical_work_dir;
+  try {
+    canonical_work_dir = std::filesystem::canonical(work_dir_path);
+  } catch (const std::filesystem::filesystem_error &) {
+    return false;
+  }
+
+  // Write code to a temporary file
+  std::filesystem::path script_path = canonical_work_dir / "test_script.py";
+  std::ofstream script(script_path);
+  if (!script) return false;
+
+  if (use_torch) {
+    script << "import torch\n";
+  } else {
+    script << "import numpy as np\n";
+  }
+  script << "import sys\n";
+  script << "import os\n";
+  // Change directory in Python to avoid shell command injection
+  script << "os.chdir(r'" << canonical_work_dir.string() << "')\n\n";
+  script << code;
+  script.close();
+
+  // Execute Python (use CMake-discovered Python executable)
+  std::string python_exe = SEQUANT_UNITTESTS_PYTHON_EXECUTABLE;
+  // Execute Python directly with properly escaped script path
+  std::string cmd = shell_escape(python_exe) + " " +
+                    shell_escape(script_path.string()) + " 2>&1";
+  FILE *pipe = popen(cmd.c_str(), "r");
+  if (!pipe) return false;
+
+  char buffer[256];
+  std::string output;
+  while (fgets(buffer, sizeof(buffer), pipe) != nullptr) {
+    output += buffer;
+  }
+
+  int exit_code = pclose(pipe);
+
+  if (exit_code != 0) {
+    std::cerr << "Python execution failed with exit code " << exit_code << "\n";
+    std::cerr << "Output:\n" << output << "\n";
+    return false;
+  }
+
+  return true;
+}
+#endif  // defined(SEQUANT_HAS_NUMPY_FOR_VALIDATION) ||
+        // defined(SEQUANT_HAS_TORCH_FOR_VALIDATION)
+
+// Helper to generate random Eigen::Tensor
+template <typename Scalar, int NumDims>
+Eigen::Tensor<Scalar, NumDims> random_tensor(
+    const std::array<Eigen::Index, NumDims> &dims, unsigned int seed = 42) {
+  Eigen::Tensor<Scalar, NumDims> tensor(dims);
+  std::mt19937 gen(seed);
+  std::uniform_real_distribution<double> dist(-1.0, 1.0);
+
+  for (Eigen::Index i = 0; i < tensor.size(); ++i) {
+    tensor.data()[i] = static_cast<Scalar>(dist(gen));
+  }
+
+  return tensor;
+}
+
+#ifdef SEQUANT_HAS_TORCH_FOR_VALIDATION
+// Helper to create a Python wrapper script for PyTorch code that handles NumPy
+// I/O This allows C++ tests to exchange data with PyTorch via NumPy format
+std::string create_pytorch_numpy_wrapper_script(const std::string &pytorch_code,
+                                                const std::string &work_dir) {
+  std::ostringstream script;
+
+  script << "import numpy as np\n";
+  script << "import torch\n";
+  script << "import os\n";
+  script << "from pathlib import Path\n\n";
+
+  script << "# Change to working directory\n";
+  script << "os.chdir(r'" << work_dir << "')\n\n";
+
+  script << "# Helper functions to handle .pt files backed by .npy storage\n";
+  script << "class NumpyBackedPyTorchIO:\n";
+  script << "    @staticmethod\n";
+  script << "    def load(pt_filename):\n";
+  script << "        \"\"\"Load .pt file by reading corresponding .npy "
+            "file\"\"\"\n";
+  script << "        npy_filename = pt_filename.replace('.pt', '.npy')\n";
+  script << "        return torch.from_numpy(np.load(npy_filename))\n\n";
+
+  script << "    @staticmethod\n";
+  script << "    def save(tensor, pt_filename):\n";
+  script << "        \"\"\"Save tensor to .pt file by writing to .npy "
+            "file\"\"\"\n";
+  script << "        npy_filename = pt_filename.replace('.pt', '.npy')\n";
+  script << "        np.save(npy_filename, tensor.numpy())\n\n";
+
+  script << "# Monkey-patch torch.load and torch.save for testing\n";
+  script << "torch.load = NumpyBackedPyTorchIO.load\n";
+  script << "torch.save = NumpyBackedPyTorchIO.save\n\n";
+
+  script << "# Execute the generated PyTorch code\n";
+  script << pytorch_code << "\n";
+
+  return script.str();
+}
+
+// Run PyTorch code with NumPy I/O wrapper
+bool run_pytorch_code_with_numpy_io(const std::string &pytorch_code,
+                                    const std::string &working_dir) {
+  std::string wrapped_script =
+      create_pytorch_numpy_wrapper_script(pytorch_code, working_dir);
+  return run_python_code(wrapped_script, working_dir, true);
+}
+#endif  // SEQUANT_HAS_TORCH_FOR_VALIDATION
+
+}  // anonymous namespace
+
+// ============================================================================
+// UNIT TESTS - Test generator features without Python execution
+// ============================================================================
+
+TEST_CASE("PythonEinsumGenerator - Memory Layout", "[export][python]") {
+  auto resetter = to_export_context();
+
+  auto registry = get_default_context().index_space_registry();
+  IndexSpace occ = registry->retrieve("i");
+  IndexSpace virt = registry->retrieve("a");
+
+  SECTION("Default layout (ColumnMajor) generates order='F'") {
+    auto F = ex<Tensor>(L"F", bra{L"a_1"}, ket{L"i_1"});
+    Tensor T(L"T", bra{L"a_1"}, ket{L"i_1"});
+    ResultExpr result_expr(T, F);
+    auto export_tree = to_export_tree(result_expr);
+
+    NumPyEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, "nocc");
+    ctx.set_shape(virt, "nvirt");
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+
+    NumPyEinsumGenerator generator;
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    // Should contain Fortran order
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("order='F'"));
+    REQUIRE_THAT(code, !Catch::Matchers::ContainsSubstring("order='C'"));
+  }
+
+  SECTION("RowMajor layout generates order='C'") {
+    auto F = ex<Tensor>(L"F", bra{L"a_1"}, ket{L"i_1"});
+    Tensor T(L"T", bra{L"a_1"}, ket{L"i_1"});
+    ResultExpr result_expr(T, F);
+    auto export_tree = to_export_tree(result_expr);
+
+    NumPyEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, "nocc");
+    ctx.set_shape(virt, "nvirt");
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+    ctx.set_memory_layout(MemoryLayout::RowMajor);
+
+    NumPyEinsumGenerator generator;
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    // Should contain C order
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("order='C'"));
+    REQUIRE_THAT(code, !Catch::Matchers::ContainsSubstring("order='F'"));
+  }
+
+  SECTION("Unspecified layout defaults to order='F'") {
+    auto F = ex<Tensor>(L"F", bra{L"a_1"}, ket{L"i_1"});
+    Tensor T(L"T", bra{L"a_1"}, ket{L"i_1"});
+    ResultExpr result_expr(T, F);
+    auto export_tree = to_export_tree(result_expr);
+
+    NumPyEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, "nocc");
+    ctx.set_shape(virt, "nvirt");
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+    ctx.set_memory_layout(MemoryLayout::Unspecified);
+
+    NumPyEinsumGenerator generator;
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    // Should default to Fortran order
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("order='F'"));
+    REQUIRE_THAT(code, !Catch::Matchers::ContainsSubstring("order='C'"));
+  }
+
+  SECTION("PyTorch generator has hardwired memory layout") {
+    auto F = ex<Tensor>(L"F", bra{L"a_1"}, ket{L"i_1"});
+    Tensor T(L"T", bra{L"a_1"}, ket{L"i_1"});
+    ResultExpr result_expr(T, F);
+    auto export_tree = to_export_tree(result_expr);
+
+    PyTorchEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, "nocc");
+    ctx.set_shape(virt, "nvirt");
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+    ctx.set_memory_layout(MemoryLayout::RowMajor);
+
+    PyTorchEinsumGenerator generator;
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    // Should not contain order spec since PyTorch only support C layout
+    REQUIRE_THAT(code, !Catch::Matchers::ContainsSubstring("order='C'"));
+    REQUIRE_THAT(code, !Catch::Matchers::ContainsSubstring("order='F'"));
+  }
+}
+
+// ============================================================================
+// VALIDATION TESTS - Execute generated Python code and verify results
+// ============================================================================
+
+#ifdef SEQUANT_HAS_NUMPY_FOR_VALIDATION
+
+TEST_CASE("PythonEinsumGenerator - Validation", "[export][python]") {
+  auto resetter = to_export_context();
+
+  auto registry = get_default_context().index_space_registry();
+  IndexSpace occ = registry->retrieve("i");
+  IndexSpace virt = registry->retrieve("a");
+
+  SECTION("Validation: Simple matrix multiplication") {
+    // Create temporary directory for test files
+    std::filesystem::path temp_dir =
+        std::filesystem::temp_directory_path() / "sequant_test_simple";
+    std::filesystem::create_directories(temp_dir);
+    // Ensure cleanup on all exit paths
+    auto cleanup = sequant::detail::make_scope_exit(
+        [&temp_dir]() { std::filesystem::remove_all(temp_dir); });
+
+    // Test: T[a1, a2] = F[a1, i1] * t[i1, a2]
+    const Eigen::Index nocc = 3;
+    const Eigen::Index nvirt = 5;
+
+    // Generate random input tensors
+    auto F_tensor = random_tensor<double, 2>({nvirt, nocc}, 100);
+    auto t_tensor = random_tensor<double, 2>({nocc, nvirt}, 200);
+
+    // Compute expected result using Eigen (must evaluate the expression)
+    Eigen::array<Eigen::IndexPair<int>, 1> contraction_dims = {
+        Eigen::IndexPair<int>(1, 0)};
+    Eigen::Tensor<double, 2> T_expected =
+        F_tensor.contract(t_tensor, contraction_dims);
+
+    // Generate Python code
+    auto F = ex<Tensor>(L"F", bra{L"a_1"}, ket{L"i_1"});
+    auto t = ex<Tensor>(L"t", bra{L"i_1"}, ket{L"a_2"});
+    Tensor T(L"T", bra{L"a_1"}, ket{L"a_2"});
+
+    ResultExpr result_expr(T, F * t);
+    auto export_tree = to_export_tree(result_expr);
+
+    NumPyEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, std::to_string(nocc));
+    ctx.set_shape(virt, std::to_string(nvirt));
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+
+    NumPyEinsumGenerator generator;
+
+    // Get tagged names and write files
+    std::string F_name = generator.represent(F.as<Tensor>(), ctx);
+    std::string t_name = generator.represent(t.as<Tensor>(), ctx);
+    std::string T_name = generator.represent(T, ctx);
+
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + F_name + ".npy",
+                                F_tensor);
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + t_name + ".npy",
+                                t_tensor);
+
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    // Execute Python code
+    REQUIRE(run_python_code(code, temp_dir.string()));
+
+    // Read result
+    auto T_actual = read_eigen_tensor_from_numpy<double, 2>(
+        temp_dir.string() + "/" + T_name + ".npy");
+
+    // Verify results match
+    REQUIRE(T_actual.dimensions()[0] == nvirt);
+    REQUIRE(T_actual.dimensions()[1] == nvirt);
+
+    const double tolerance = 1e-10;
+    for (Eigen::Index i = 0; i < nvirt; ++i) {
+      for (Eigen::Index j = 0; j < nvirt; ++j) {
+        REQUIRE(std::abs(T_actual(i, j) - T_expected(i, j)) < tolerance);
+      }
+    }
+  }
+
+  SECTION("Validation: Scalar contraction (energy)") {
+    std::filesystem::path temp_dir =
+        std::filesystem::temp_directory_path() / "sequant_test_scalar";
+    std::filesystem::create_directories(temp_dir);
+    // Ensure cleanup on all exit paths
+    auto cleanup = sequant::detail::make_scope_exit(
+        [&temp_dir]() { std::filesystem::remove_all(temp_dir); });
+
+    // Test: E = g[i1, i2; a1, a2] * t[a1, a2; i1, i2]
+    const Eigen::Index nocc = 2;
+    const Eigen::Index nvirt = 3;
+
+    // Generate random tensors
+    auto g_tensor = random_tensor<double, 4>({nocc, nocc, nvirt, nvirt}, 300);
+    auto t_tensor = random_tensor<double, 4>({nvirt, nvirt, nocc, nocc}, 400);
+
+    // Compute expected scalar result using Eigen (must evaluate the expression)
+    // Contract all indices: g[i,j,a,b] * t[a,b,i,j]
+    Eigen::array<Eigen::IndexPair<int>, 4> contraction_dims = {
+        Eigen::IndexPair<int>(0, 2),  // i1 with i1
+        Eigen::IndexPair<int>(1, 3),  // i2 with i2
+        Eigen::IndexPair<int>(2, 0),  // a1 with a1
+        Eigen::IndexPair<int>(3, 1)   // a2 with a2
+    };
+    Eigen::Tensor<double, 0> E_expected =
+        g_tensor.contract(t_tensor, contraction_dims);
+
+    // Generate Python code
+    auto g = ex<Tensor>(L"g", bra{L"i_1", L"i_2"}, ket{L"a_1", L"a_2"});
+    auto t = ex<Tensor>(L"t", bra{L"a_1", L"a_2"}, ket{L"i_1", L"i_2"});
+    Variable E(L"E");
+
+    ResultExpr result_expr(E, g * t);
+    auto export_tree = to_export_tree(result_expr);
+
+    NumPyEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, std::to_string(nocc));
+    ctx.set_shape(virt, std::to_string(nvirt));
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+
+    NumPyEinsumGenerator generator;
+
+    // Get tagged names and write files
+    std::string g_name = generator.represent(g.as<Tensor>(), ctx);
+    std::string t_name = generator.represent(t.as<Tensor>(), ctx);
+
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + g_name + ".npy",
+                                g_tensor);
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + t_name + ".npy",
+                                t_tensor);
+
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    // Execute Python code
+    REQUIRE(run_python_code(code, temp_dir.string()));
+
+    // Read scalar result
+    auto E_actual =
+        read_eigen_tensor_from_numpy<double, 0>(temp_dir.string() + "/E.npy");
+
+    // Verify
+    const double tolerance = 1e-9;
+    REQUIRE(std::abs(E_actual() - E_expected()) < tolerance);
+  }
+
+  SECTION("Validation: Three-tensor contraction (scalar result)") {
+    std::filesystem::path temp_dir =
+        std::filesystem::temp_directory_path() / "sequant_test_three_scalar";
+    std::filesystem::create_directories(temp_dir);
+    // Ensure cleanup on all exit paths
+    auto cleanup = sequant::detail::make_scope_exit(
+        [&temp_dir]() { std::filesystem::remove_all(temp_dir); });
+
+    // Test: R = g[i1, i2; a3, a4] * t1[a3; i1] * t2[a4; i2] (scalar result)
+    const Eigen::Index nocc = 2;
+    const Eigen::Index nvirt = 3;
+
+    auto g_tensor = random_tensor<double, 4>({nocc, nocc, nvirt, nvirt}, 500);
+    auto t1_tensor = random_tensor<double, 2>({nvirt, nocc}, 600);
+    auto t2_tensor = random_tensor<double, 2>({nvirt, nocc}, 700);
+
+    // Compute expected result manually with loops
+    // R = g[i1, i2, a3, a4] * t1[a3, i1] * t2[a4, i2]
+    Eigen::Tensor<double, 0> R_expected;
+    R_expected.setZero();
+
+    for (Eigen::Index i1 = 0; i1 < nocc; ++i1) {
+      for (Eigen::Index i2 = 0; i2 < nocc; ++i2) {
+        for (Eigen::Index a3 = 0; a3 < nvirt; ++a3) {
+          for (Eigen::Index a4 = 0; a4 < nvirt; ++a4) {
+            R_expected() += g_tensor(i1, i2, a3, a4) * t1_tensor(a3, i1) *
+                            t2_tensor(a4, i2);
+          }
+        }
+      }
+    }
+
+    // Generate Python code for: R = g * t1 * t2 (scalar result)
+    auto g = ex<Tensor>(L"g", bra{L"i_1", L"i_2"}, ket{L"a_3", L"a_4"});
+    auto t1 = ex<Tensor>(L"t1", bra{L"a_3"}, ket{L"i_1"});
+    auto t2 = ex<Tensor>(L"t2", bra{L"a_4"}, ket{L"i_2"});
+    Variable R(L"R");
+
+    ResultExpr result_expr(R, g * t1 * t2);
+    auto export_tree = to_export_tree(result_expr);
+
+    NumPyEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, std::to_string(nocc));
+    ctx.set_shape(virt, std::to_string(nvirt));
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+
+    NumPyEinsumGenerator generator;
+
+    // Get tagged names and write files
+    std::string g_name = generator.represent(g.as<Tensor>(), ctx);
+    std::string t1_name = generator.represent(t1.as<Tensor>(), ctx);
+    std::string t2_name = generator.represent(t2.as<Tensor>(), ctx);
+
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + g_name + ".npy",
+                                g_tensor);
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + t1_name + ".npy",
+                                t1_tensor);
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + t2_name + ".npy",
+                                t2_tensor);
+
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    REQUIRE(run_python_code(code, temp_dir.string()));
+
+    auto R_actual =
+        read_eigen_tensor_from_numpy<double, 0>(temp_dir.string() + "/R.npy");
+
+    const double tolerance = 1e-9;
+    REQUIRE(std::abs(R_actual() - R_expected()) < tolerance);
+  }
+
+  SECTION("Validation: Ternary contraction with tensor result") {
+    std::filesystem::path temp_dir =
+        std::filesystem::temp_directory_path() / "sequant_test_ternary";
+    std::filesystem::create_directories(temp_dir);
+    // Ensure cleanup on all exit paths
+    auto cleanup = sequant::detail::make_scope_exit(
+        [&temp_dir]() { std::filesystem::remove_all(temp_dir); });
+
+    // Test: I[i1, a1] = A[a2, i2] * B[i2, a1] * C[i1, a2]
+    // This matches the ternary.export_test case
+    const Eigen::Index nocc = 3;
+    const Eigen::Index nvirt = 4;
+
+    auto A_tensor = random_tensor<double, 2>({nvirt, nocc}, 1000);
+    auto B_tensor = random_tensor<double, 2>({nocc, nvirt}, 1100);
+    auto C_tensor = random_tensor<double, 2>({nocc, nvirt}, 1200);
+
+    // Compute expected result: I[i1, a1] = A[a2, i2] * B[i2, a1] * C[i1, a2]
+    // First: Intermediate[a2, a1] = A[a2, i2] * B[i2, a1]
+    Eigen::array<Eigen::IndexPair<int>, 1> contract_AB = {
+        Eigen::IndexPair<int>(1, 0)};  // contract i2
+    Eigen::Tensor<double, 2> intermediate =
+        A_tensor.contract(B_tensor, contract_AB);
+
+    // Second: I[i1, a1] = Intermediate[a2, a1] * C[i1, a2]
+    // Swap to C.contract(intermediate) to get correct index order [i1, a1]
+    Eigen::array<Eigen::IndexPair<int>, 1> contract_CI = {Eigen::IndexPair<int>(
+        1, 0)};  // contract C.dim[1]=a2 with intermediate.dim[0]=a2
+    Eigen::Tensor<double, 2> I_expected =
+        C_tensor.contract(intermediate, contract_CI);
+
+    // Generate Python code matching ternary.export_test
+    auto A = ex<Tensor>(L"A", bra{L"a_2"}, ket{L"i_2"});
+    auto B = ex<Tensor>(L"B", bra{L"i_2"}, ket{L"a_1"});
+    auto C = ex<Tensor>(L"C", bra{L"i_1"}, ket{L"a_2"});
+    Tensor I(L"I", bra{L"i_1"}, ket{L"a_1"});
+
+    ResultExpr result_expr(I, A * B * C);
+    auto export_tree = to_export_tree(result_expr);
+
+    NumPyEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, std::to_string(nocc));
+    ctx.set_shape(virt, std::to_string(nvirt));
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+
+    NumPyEinsumGenerator generator;
+
+    // Get tagged file names for writing
+    std::string A_name = generator.represent(A.as<Tensor>(), ctx);
+    std::string B_name = generator.represent(B.as<Tensor>(), ctx);
+    std::string C_name = generator.represent(C.as<Tensor>(), ctx);
+    std::string I_name = generator.represent(I, ctx);
+
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + A_name + ".npy",
+                                A_tensor);
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + B_name + ".npy",
+                                B_tensor);
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + C_name + ".npy",
+                                C_tensor);
+
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    REQUIRE(run_python_code(code, temp_dir.string()));
+
+    auto I_actual = read_eigen_tensor_from_numpy<double, 2>(
+        temp_dir.string() + "/" + I_name + ".npy");
+
+    REQUIRE(I_actual.dimensions()[0] == nocc);
+    REQUIRE(I_actual.dimensions()[1] == nvirt);
+
+    const double tolerance = 1e-10;
+    for (Eigen::Index i = 0; i < nocc; ++i) {
+      for (Eigen::Index a = 0; a < nvirt; ++a) {
+        REQUIRE(std::abs(I_actual(i, a) - I_expected(i, a)) < tolerance);
+      }
+    }
+  }
+
+  SECTION("Validation: With scalar prefactor") {
+    std::filesystem::path temp_dir =
+        std::filesystem::temp_directory_path() / "sequant_test_scalar_factor";
+    std::filesystem::create_directories(temp_dir);
+    // Ensure cleanup on all exit paths
+    auto cleanup = sequant::detail::make_scope_exit(
+        [&temp_dir]() { std::filesystem::remove_all(temp_dir); });
+
+    // Test: T[a1, a2] = 0.5 * F[a1, i1] * t[i1, a2]
+    const Eigen::Index nocc = 3;
+    const Eigen::Index nvirt = 4;
+
+    auto F_tensor = random_tensor<double, 2>({nvirt, nocc}, 800);
+    auto t_tensor = random_tensor<double, 2>({nocc, nvirt}, 900);
+
+    // Expected: 0.5 * contraction (must evaluate the expression)
+    Eigen::array<Eigen::IndexPair<int>, 1> contraction_dims = {
+        Eigen::IndexPair<int>(1, 0)};
+    Eigen::Tensor<double, 2> T_expected =
+        0.5 * F_tensor.contract(t_tensor, contraction_dims);
+
+    auto F = ex<Tensor>(L"F", bra{L"a_1"}, ket{L"i_1"});
+    auto t = ex<Tensor>(L"t", bra{L"i_1"}, ket{L"a_2"});
+    Tensor T(L"T", bra{L"a_1"}, ket{L"a_2"});
+
+    ResultExpr result_expr(T, rational(1, 2) * F * t);
+    auto export_tree = to_export_tree(result_expr);
+
+    NumPyEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, std::to_string(nocc));
+    ctx.set_shape(virt, std::to_string(nvirt));
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+
+    NumPyEinsumGenerator generator;
+
+    // Get tagged names and write files
+    std::string F_name = generator.represent(F.as<Tensor>(), ctx);
+    std::string t_name = generator.represent(t.as<Tensor>(), ctx);
+    std::string T_name = generator.represent(T, ctx);
+
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + F_name + ".npy",
+                                F_tensor);
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + t_name + ".npy",
+                                t_tensor);
+
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    REQUIRE(run_python_code(code, temp_dir.string()));
+
+    auto T_actual = read_eigen_tensor_from_numpy<double, 2>(
+        temp_dir.string() + "/" + T_name + ".npy");
+
+    const double tolerance = 1e-10;
+    for (Eigen::Index i = 0; i < nvirt; ++i) {
+      for (Eigen::Index j = 0; j < nvirt; ++j) {
+        REQUIRE(std::abs(T_actual(i, j) - T_expected(i, j)) < tolerance);
+      }
+    }
+  }
+
+  SECTION("Validation: Sum of expressions") {
+    std::filesystem::path temp_dir =
+        std::filesystem::temp_directory_path() / "sequant_test_sum_expr";
+    std::filesystem::create_directories(temp_dir);
+    // Ensure cleanup on all exit paths
+    auto cleanup = sequant::detail::make_scope_exit(
+        [&temp_dir]() { std::filesystem::remove_all(temp_dir); });
+
+    // Test: I[a1, i1] = f[a1, i1] - f[i2, i1] * t[a1, i2]
+    // This matches the sum_unary_plus_binary.export_test case
+    const Eigen::Index nocc = 3;
+    const Eigen::Index nvirt = 4;
+
+    auto f_vo_tensor = random_tensor<double, 2>({nvirt, nocc}, 1300);
+    auto f_oo_tensor = random_tensor<double, 2>({nocc, nocc}, 1400);
+    auto t_vo_tensor = random_tensor<double, 2>({nvirt, nocc}, 1500);
+
+    // Compute expected result: I[a1, i1] = f[a1, i1] - f[i2, i1] * t[a1, i2]
+    // First: contraction[a1, i1] = f[i2, i1] * t[a1, i2] (contract over i2)
+    // Swap to t.contract(f) to get correct index order [a1, i1]
+    Eigen::array<Eigen::IndexPair<int>, 1> contract_tf = {
+        Eigen::IndexPair<int>(1, 0)};  // contract t.dim[1]=i2 with f.dim[0]=i2
+    Eigen::Tensor<double, 2> contraction =
+        t_vo_tensor.contract(f_oo_tensor, contract_tf);
+
+    // Second: I[a1, i1] = f[a1, i1] - contraction[a1, i1]
+    Eigen::Tensor<double, 2> I_expected = f_vo_tensor - contraction;
+
+    // Generate Python code matching sum_unary_plus_binary.export_test
+    auto f_vo = ex<Tensor>(L"f", bra{L"a_1"}, ket{L"i_1"});
+    auto f_oo = ex<Tensor>(L"f", bra{L"i_2"}, ket{L"i_1"});
+    auto t_vo = ex<Tensor>(L"t", bra{L"a_1"}, ket{L"i_2"});
+    Tensor I(L"I", bra{L"a_1"}, ket{L"i_1"});
+
+    ResultExpr result_expr(I, f_vo - f_oo * t_vo);
+    auto export_tree = to_export_tree(result_expr);
+
+    NumPyEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, std::to_string(nocc));
+    ctx.set_shape(virt, std::to_string(nvirt));
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+
+    NumPyEinsumGenerator generator;
+
+    // Get tagged file names for writing
+    std::string f_vo_name = generator.represent(f_vo.as<Tensor>(), ctx);
+    std::string f_oo_name = generator.represent(f_oo.as<Tensor>(), ctx);
+    std::string t_vo_name = generator.represent(t_vo.as<Tensor>(), ctx);
+    std::string I_name = generator.represent(I, ctx);
+
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + f_vo_name + ".npy",
+                                f_vo_tensor);
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + f_oo_name + ".npy",
+                                f_oo_tensor);
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + t_vo_name + ".npy",
+                                t_vo_tensor);
+
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    REQUIRE(run_python_code(code, temp_dir.string()));
+
+    auto I_actual = read_eigen_tensor_from_numpy<double, 2>(
+        temp_dir.string() + "/" + I_name + ".npy");
+
+    REQUIRE(I_actual.dimensions()[0] == nvirt);
+    REQUIRE(I_actual.dimensions()[1] == nocc);
+
+    const double tolerance = 1e-10;
+    for (Eigen::Index a = 0; a < nvirt; ++a) {
+      for (Eigen::Index i = 0; i < nocc; ++i) {
+        REQUIRE(std::abs(I_actual(a, i) - I_expected(a, i)) < tolerance);
+      }
+    }
+  }
+
+  SECTION("Validation: RowMajor memory layout") {
+    // Test that RowMajor (C order) layout produces numerically correct results
+    std::filesystem::path temp_dir =
+        std::filesystem::temp_directory_path() / "sequant_test_rowmajor";
+    std::filesystem::create_directories(temp_dir);
+    auto cleanup = sequant::detail::make_scope_exit(
+        [&temp_dir]() { std::filesystem::remove_all(temp_dir); });
+
+    // Test: T[a1, a2] = F[a1, i1] * t[i1, a2]
+    const Eigen::Index nocc = 3;
+    const Eigen::Index nvirt = 5;
+
+    // Generate random input tensors with RowMajor layout
+    Eigen::Tensor<double, 2, Eigen::RowMajor> F_tensor(nvirt, nocc);
+    Eigen::Tensor<double, 2, Eigen::RowMajor> t_tensor(nocc, nvirt);
+
+    std::mt19937 gen_F(100), gen_t(200);
+    std::uniform_real_distribution<double> dist(-1.0, 1.0);
+    for (Eigen::Index i = 0; i < F_tensor.size(); ++i) {
+      F_tensor.data()[i] = dist(gen_F);
+    }
+    for (Eigen::Index i = 0; i < t_tensor.size(); ++i) {
+      t_tensor.data()[i] = dist(gen_t);
+    }
+
+    // Compute expected result using Eigen with RowMajor layout
+    Eigen::array<Eigen::IndexPair<int>, 1> contraction_dims = {
+        Eigen::IndexPair<int>(1, 0)};
+    Eigen::Tensor<double, 2, Eigen::RowMajor> T_expected =
+        F_tensor.contract(t_tensor, contraction_dims);
+
+    // Generate Python code with RowMajor layout
+    auto F = ex<Tensor>(L"F", bra{L"a_1"}, ket{L"i_1"});
+    auto t = ex<Tensor>(L"t", bra{L"i_1"}, ket{L"a_2"});
+    Tensor T(L"T", bra{L"a_1"}, ket{L"a_2"});
+
+    ResultExpr result_expr(T, F * t);
+    auto export_tree = to_export_tree(result_expr);
+
+    NumPyEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, std::to_string(nocc));
+    ctx.set_shape(virt, std::to_string(nvirt));
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+    ctx.set_memory_layout(MemoryLayout::RowMajor);  // Use C order
+
+    NumPyEinsumGenerator generator;
+
+    // Get tagged names and write files
+    std::string F_name = generator.represent(F.as<Tensor>(), ctx);
+    std::string t_name = generator.represent(t.as<Tensor>(), ctx);
+    std::string T_name = generator.represent(T, ctx);
+
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + F_name + ".npy",
+                                F_tensor);
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + t_name + ".npy",
+                                t_tensor);
+
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    // Verify code contains order='C'
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("order='C'"));
+
+    // Execute Python code
+    REQUIRE(run_python_code(code, temp_dir.string()));
+
+    // Read result with RowMajor layout
+    auto T_actual = read_eigen_tensor_from_numpy<double, 2, Eigen::RowMajor>(
+        temp_dir.string() + "/" + T_name + ".npy");
+
+    // Verify results match
+    REQUIRE(T_actual.dimensions()[0] == nvirt);
+    REQUIRE(T_actual.dimensions()[1] == nvirt);
+
+    const double tolerance = 1e-10;
+    for (Eigen::Index i = 0; i < nvirt; ++i) {
+      for (Eigen::Index j = 0; j < nvirt; ++j) {
+        REQUIRE(std::abs(T_actual(i, j) - T_expected(i, j)) < tolerance);
+      }
+    }
+  }
+
+  SECTION("Validation: ColumnMajor memory layout (explicit)") {
+    // Test that explicit ColumnMajor (Fortran order) layout produces correct
+    // results
+    std::filesystem::path temp_dir =
+        std::filesystem::temp_directory_path() / "sequant_test_colmajor";
+    std::filesystem::create_directories(temp_dir);
+    auto cleanup = sequant::detail::make_scope_exit(
+        [&temp_dir]() { std::filesystem::remove_all(temp_dir); });
+
+    // Test: T[a1, a2] = F[a1, i1] * t[i1, a2]
+    const Eigen::Index nocc = 4;
+    const Eigen::Index nvirt = 6;
+
+    // Generate random input tensors
+    auto F_tensor = random_tensor<double, 2>({nvirt, nocc}, 150);
+    auto t_tensor = random_tensor<double, 2>({nocc, nvirt}, 250);
+
+    // Compute expected result using Eigen
+    Eigen::array<Eigen::IndexPair<int>, 1> contraction_dims = {
+        Eigen::IndexPair<int>(1, 0)};
+    Eigen::Tensor<double, 2> T_expected =
+        F_tensor.contract(t_tensor, contraction_dims);
+
+    // Generate Python code with explicit ColumnMajor layout
+    auto F = ex<Tensor>(L"F", bra{L"a_1"}, ket{L"i_1"});
+    auto t = ex<Tensor>(L"t", bra{L"i_1"}, ket{L"a_2"});
+    Tensor T(L"T", bra{L"a_1"}, ket{L"a_2"});
+
+    ResultExpr result_expr(T, F * t);
+    auto export_tree = to_export_tree(result_expr);
+
+    NumPyEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, std::to_string(nocc));
+    ctx.set_shape(virt, std::to_string(nvirt));
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+    ctx.set_memory_layout(MemoryLayout::ColumnMajor);  // Use Fortran order
+
+    NumPyEinsumGenerator generator;
+
+    // Get tagged names and write files
+    std::string F_name = generator.represent(F.as<Tensor>(), ctx);
+    std::string t_name = generator.represent(t.as<Tensor>(), ctx);
+    std::string T_name = generator.represent(T, ctx);
+
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + F_name + ".npy",
+                                F_tensor);
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + t_name + ".npy",
+                                t_tensor);
+
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    // Verify code contains order='F'
+    REQUIRE_THAT(code, Catch::Matchers::ContainsSubstring("order='F'"));
+
+    // Execute Python code
+    REQUIRE(run_python_code(code, temp_dir.string()));
+
+    // Read result
+    auto T_actual = read_eigen_tensor_from_numpy<double, 2>(
+        temp_dir.string() + "/" + T_name + ".npy");
+
+    // Verify results match
+    REQUIRE(T_actual.dimensions()[0] == nvirt);
+    REQUIRE(T_actual.dimensions()[1] == nvirt);
+
+    const double tolerance = 1e-10;
+    for (Eigen::Index i = 0; i < nvirt; ++i) {
+      for (Eigen::Index j = 0; j < nvirt; ++j) {
+        REQUIRE(std::abs(T_actual(i, j) - T_expected(i, j)) < tolerance);
+      }
+    }
+  }
+
+  SECTION("Tensor I/O - Layout compatibility") {
+    std::filesystem::path temp_dir =
+        std::filesystem::temp_directory_path() / "sequant_test_layout";
+    std::filesystem::create_directories(temp_dir);
+    // Ensure cleanup on all exit paths
+    auto cleanup = sequant::detail::make_scope_exit(
+        [&temp_dir]() { std::filesystem::remove_all(temp_dir); });
+
+    // Subsection 1: Column-major (default) write and read
+    {
+      // Default Eigen::Tensor is column-major
+      Eigen::Tensor<double, 2> tensor_col(3, 4);
+      for (int i = 0; i < 3; ++i) {
+        for (int j = 0; j < 4; ++j) {
+          tensor_col(i, j) = i * 10 + j;
+        }
+      }
+
+      // Write and read back
+      write_eigen_tensor_to_numpy(temp_dir.string() + "/col_major.npy",
+                                  tensor_col);
+      auto tensor_read = read_eigen_tensor_from_numpy<double, 2>(
+          temp_dir.string() + "/col_major.npy");
+
+      // Verify dimensions
+      REQUIRE(tensor_read.dimensions()[0] == 3);
+      REQUIRE(tensor_read.dimensions()[1] == 4);
+
+      // Verify data - note: we're reading with default (column-major) template
+      for (int i = 0; i < 3; ++i) {
+        for (int j = 0; j < 4; ++j) {
+          REQUIRE(tensor_read(i, j) == tensor_col(i, j));
+        }
+      }
+    }
+
+    // Subsection 2: Row-major write and read
+    {
+      // Explicitly create row-major tensor
+      Eigen::Tensor<double, 2, Eigen::RowMajor> tensor_row(3, 4);
+      for (int i = 0; i < 3; ++i) {
+        for (int j = 0; j < 4; ++j) {
+          tensor_row(i, j) = i * 10 + j;
+        }
+      }
+
+      // Write and read back with matching layout
+      write_eigen_tensor_to_numpy(temp_dir.string() + "/row_major.npy",
+                                  tensor_row);
+      auto tensor_read =
+          read_eigen_tensor_from_numpy<double, 2, Eigen::RowMajor>(
+              temp_dir.string() + "/row_major.npy");
+
+      // Verify dimensions
+      REQUIRE(tensor_read.dimensions()[0] == 3);
+      REQUIRE(tensor_read.dimensions()[1] == 4);
+
+      // Verify data
+      for (int i = 0; i < 3; ++i) {
+        for (int j = 0; j < 4; ++j) {
+          REQUIRE(tensor_read(i, j) == tensor_row(i, j));
+        }
+      }
+    }
+
+    // Subsection 3: Verify layout mismatch detection with explicit RowMajor
+    {
+      // Create a column-major tensor and write it
+      Eigen::Tensor<double, 2> tensor_col(2, 3);  // default is column-major
+      tensor_col.setConstant(42.0);
+      write_eigen_tensor_to_numpy(temp_dir.string() + "/col_test.npy",
+                                  tensor_col);
+
+      // Try to read with explicit row-major (should throw)
+      // Note: Eigen::RowMajor = 1, so Options != 0, triggering strict
+      // validation
+      REQUIRE_THROWS_WITH(
+          (read_eigen_tensor_from_numpy<double, 2, Eigen::RowMajor>(
+              temp_dir.string() + "/col_test.npy")),
+          Catch::Matchers::ContainsSubstring("Layout mismatch"));
+    }
+
+    // Subsection 4: Default Options accepts any layout
+    {
+      // Write a row-major file using external means (simulate numpy output)
+      Eigen::Tensor<double, 2, Eigen::RowMajor> tensor_row(2, 3);
+      for (int i = 0; i < 2; ++i) {
+        for (int j = 0; j < 3; ++j) {
+          tensor_row(i, j) = i + j * 10;
+        }
+      }
+      write_eigen_tensor_to_numpy(temp_dir.string() + "/any_layout.npy",
+                                  tensor_row);
+
+      // Read with default Options (should succeed even though layouts don't
+      // match) This is for compatibility with numpy files
+      auto tensor_read = read_eigen_tensor_from_numpy<double, 2>(
+          temp_dir.string() + "/any_layout.npy");
+
+      // Note: Due to layout mismatch, the logical interpretation may differ
+      // but the read should succeed
+      REQUIRE(tensor_read.dimensions()[0] == 2);
+      REQUIRE(tensor_read.dimensions()[1] == 3);
+    }
+
+    // Subsection 5: Scalar (rank-0) tensors
+    {
+      Eigen::Tensor<double, 0> scalar_tensor;
+      scalar_tensor() = 3.14159;
+
+      write_eigen_tensor_to_numpy(temp_dir.string() + "/scalar.npy",
+                                  scalar_tensor);
+      auto scalar_read = read_eigen_tensor_from_numpy<double, 0>(
+          temp_dir.string() + "/scalar.npy");
+
+      REQUIRE(std::abs(scalar_read() - 3.14159) < 1e-10);
+    }
+
+    // Subsection 6: Higher rank tensors
+    {
+      Eigen::Tensor<float, 4> tensor4d(2, 3, 4, 5);
+      tensor4d.setRandom();
+
+      write_eigen_tensor_to_numpy(temp_dir.string() + "/tensor4d.npy",
+                                  tensor4d);
+      auto tensor4d_read = read_eigen_tensor_from_numpy<float, 4>(
+          temp_dir.string() + "/tensor4d.npy");
+
+      REQUIRE(tensor4d_read.dimensions()[0] == 2);
+      REQUIRE(tensor4d_read.dimensions()[1] == 3);
+      REQUIRE(tensor4d_read.dimensions()[2] == 4);
+      REQUIRE(tensor4d_read.dimensions()[3] == 5);
+
+      for (int i = 0; i < 2; ++i) {
+        for (int j = 0; j < 3; ++j) {
+          for (int k = 0; k < 4; ++k) {
+            for (int l = 0; l < 5; ++l) {
+              REQUIRE(std::abs(tensor4d_read(i, j, k, l) -
+                               tensor4d(i, j, k, l)) < 1e-6f);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+#endif  // SEQUANT_HAS_NUMPY_FOR_VALIDATION
+
+#ifdef SEQUANT_HAS_TORCH_FOR_VALIDATION
+
+TEST_CASE("PyTorchEinsumGenerator - Validation", "[export][python][torch]") {
+  auto resetter = to_export_context();
+
+  auto registry = get_default_context().index_space_registry();
+  IndexSpace occ = registry->retrieve("i");
+  IndexSpace virt = registry->retrieve("a");
+
+  SECTION("Validation: Simple matrix multiplication with PyTorch") {
+    std::filesystem::path temp_dir =
+        std::filesystem::temp_directory_path() / "sequant_test_pytorch_simple";
+    std::filesystem::create_directories(temp_dir);
+    auto cleanup = sequant::detail::make_scope_exit(
+        [&temp_dir]() { std::filesystem::remove_all(temp_dir); });
+
+    // Test: T[a1, a2] = F[a1, i1] * t[i1, a2]
+    const Eigen::Index nocc = 3;
+    const Eigen::Index nvirt = 5;
+
+    // Generate random input tensors
+    auto F_tensor = random_tensor<double, 2>({nvirt, nocc}, 100);
+    auto t_tensor = random_tensor<double, 2>({nocc, nvirt}, 200);
+
+    // Compute expected result using Eigen
+    Eigen::array<Eigen::IndexPair<int>, 1> contraction_dims = {
+        Eigen::IndexPair<int>(1, 0)};
+    Eigen::Tensor<double, 2> T_expected =
+        F_tensor.contract(t_tensor, contraction_dims);
+
+    // Generate PyTorch code
+    auto F = ex<Tensor>(L"F", bra{L"a_1"}, ket{L"i_1"});
+    auto t = ex<Tensor>(L"t", bra{L"i_1"}, ket{L"a_2"});
+    Tensor T(L"T", bra{L"a_1"}, ket{L"a_2"});
+
+    ResultExpr result_expr(T, F * t);
+    auto export_tree = to_export_tree(result_expr);
+
+    PyTorchEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, std::to_string(nocc));
+    ctx.set_shape(virt, std::to_string(nvirt));
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+
+    PyTorchEinsumGenerator generator;
+
+    // Get tagged names and write files (using NumPy format for C++ interop)
+    std::string F_name = generator.represent(F.as<Tensor>(), ctx);
+    std::string t_name = generator.represent(t.as<Tensor>(), ctx);
+    std::string T_name = generator.represent(T, ctx);
+
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + F_name + ".npy",
+                                F_tensor);
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + t_name + ".npy",
+                                t_tensor);
+
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    // Execute PyTorch code with NumPy I/O for C++ interop
+    REQUIRE(run_pytorch_code_with_numpy_io(code, temp_dir.string()));
+
+    // Read result (in NumPy format)
+    auto T_actual = read_eigen_tensor_from_numpy<double, 2, Eigen::RowMajor>(
+        temp_dir.string() + "/" + T_name + ".npy");
+
+    // Verify results match
+    REQUIRE(T_actual.dimensions()[0] == nvirt);
+    REQUIRE(T_actual.dimensions()[1] == nvirt);
+
+    const double tolerance = 1e-10;
+    for (Eigen::Index i = 0; i < nvirt; ++i) {
+      for (Eigen::Index j = 0; j < nvirt; ++j) {
+        REQUIRE(std::abs(T_actual(i, j) - T_expected(i, j)) < tolerance);
+      }
+    }
+  }
+
+  SECTION("Validation: RowMajor memory layout with PyTorch") {
+    std::filesystem::path temp_dir = std::filesystem::temp_directory_path() /
+                                     "sequant_test_pytorch_rowmajor";
+    std::filesystem::create_directories(temp_dir);
+    auto cleanup = sequant::detail::make_scope_exit(
+        [&temp_dir]() { std::filesystem::remove_all(temp_dir); });
+
+    const Eigen::Index nocc = 3;
+    const Eigen::Index nvirt = 5;
+
+    // Generate random input tensors with RowMajor layout
+    Eigen::Tensor<double, 2, Eigen::RowMajor> F_tensor(nvirt, nocc);
+    Eigen::Tensor<double, 2, Eigen::RowMajor> t_tensor(nocc, nvirt);
+
+    std::mt19937 gen_F(100), gen_t(200);
+    std::uniform_real_distribution<double> dist(-1.0, 1.0);
+    for (Eigen::Index i = 0; i < F_tensor.size(); ++i) {
+      F_tensor.data()[i] = dist(gen_F);
+    }
+    for (Eigen::Index i = 0; i < t_tensor.size(); ++i) {
+      t_tensor.data()[i] = dist(gen_t);
+    }
+
+    // Compute expected result using Eigen with RowMajor layout
+    Eigen::array<Eigen::IndexPair<int>, 1> contraction_dims = {
+        Eigen::IndexPair<int>(1, 0)};
+    Eigen::Tensor<double, 2, Eigen::RowMajor> T_expected =
+        F_tensor.contract(t_tensor, contraction_dims);
+
+    // Generate PyTorch code with RowMajor layout
+    auto F = ex<Tensor>(L"F", bra{L"a_1"}, ket{L"i_1"});
+    auto t = ex<Tensor>(L"t", bra{L"i_1"}, ket{L"a_2"});
+    Tensor T(L"T", bra{L"a_1"}, ket{L"a_2"});
+
+    ResultExpr result_expr(T, F * t);
+    auto export_tree = to_export_tree(result_expr);
+
+    PyTorchEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, std::to_string(nocc));
+    ctx.set_shape(virt, std::to_string(nvirt));
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+    ctx.set_memory_layout(MemoryLayout::RowMajor);
+
+    PyTorchEinsumGenerator generator;
+
+    std::string F_name = generator.represent(F.as<Tensor>(), ctx);
+    std::string t_name = generator.represent(t.as<Tensor>(), ctx);
+    std::string T_name = generator.represent(T, ctx);
+
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + F_name + ".npy",
+                                F_tensor);
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + t_name + ".npy",
+                                t_tensor);
+
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    // PyTorch doesn't support the 'order' parameter - tensors are always
+    // row-major Verify that code does NOT contain order parameter
+    REQUIRE_THAT(code, !Catch::Matchers::ContainsSubstring("order="));
+
+    // Execute PyTorch code with NumPy I/O for C++ interop
+    REQUIRE(run_pytorch_code_with_numpy_io(code, temp_dir.string()));
+
+    // Read result with RowMajor layout (PyTorch tensors are always row-major)
+    auto T_actual = read_eigen_tensor_from_numpy<double, 2, Eigen::RowMajor>(
+        temp_dir.string() + "/" + T_name + ".npy");
+
+    // Verify results match
+    REQUIRE(T_actual.dimensions()[0] == nvirt);
+    REQUIRE(T_actual.dimensions()[1] == nvirt);
+
+    const double tolerance = 1e-10;
+    for (Eigen::Index i = 0; i < nvirt; ++i) {
+      for (Eigen::Index j = 0; j < nvirt; ++j) {
+        REQUIRE(std::abs(T_actual(i, j) - T_expected(i, j)) < tolerance);
+      }
+    }
+  }
+
+  SECTION("Validation: ColumnMajor memory layout with PyTorch") {
+    // Note: PyTorch tensors are always row-major, so even when ColumnMajor is
+    // requested, PyTorch will use row-major layout
+    std::filesystem::path temp_dir = std::filesystem::temp_directory_path() /
+                                     "sequant_test_pytorch_colmajor";
+    std::filesystem::create_directories(temp_dir);
+    auto cleanup = sequant::detail::make_scope_exit(
+        [&temp_dir]() { std::filesystem::remove_all(temp_dir); });
+
+    const Eigen::Index nocc = 4;
+    const Eigen::Index nvirt = 6;
+
+    // Use RowMajor tensors since PyTorch is always row-major
+    Eigen::Tensor<double, 2, Eigen::RowMajor> F_tensor(nvirt, nocc);
+    Eigen::Tensor<double, 2, Eigen::RowMajor> t_tensor(nocc, nvirt);
+
+    std::mt19937 gen_F(500), gen_t(600);
+    std::uniform_real_distribution<double> dist(-1.0, 1.0);
+    for (Eigen::Index i = 0; i < F_tensor.size(); ++i) {
+      F_tensor.data()[i] = dist(gen_F);
+    }
+    for (Eigen::Index i = 0; i < t_tensor.size(); ++i) {
+      t_tensor.data()[i] = dist(gen_t);
+    }
+
+    Eigen::array<Eigen::IndexPair<int>, 1> contraction_dims = {
+        Eigen::IndexPair<int>(1, 0)};
+    Eigen::Tensor<double, 2, Eigen::RowMajor> T_expected =
+        F_tensor.contract(t_tensor, contraction_dims);
+
+    auto F = ex<Tensor>(L"F", bra{L"a_1"}, ket{L"i_1"});
+    auto t = ex<Tensor>(L"t", bra{L"i_1"}, ket{L"a_2"});
+    Tensor T(L"T", bra{L"a_1"}, ket{L"a_2"});
+
+    ResultExpr result_expr(T, F * t);
+    auto export_tree = to_export_tree(result_expr);
+
+    PyTorchEinsumGeneratorContext ctx;
+    ctx.set_shape(occ, std::to_string(nocc));
+    ctx.set_shape(virt, std::to_string(nvirt));
+    ctx.set_tag(occ, "o");
+    ctx.set_tag(virt, "v");
+    ctx.set_memory_layout(MemoryLayout::ColumnMajor);
+
+    PyTorchEinsumGenerator generator;
+
+    std::string F_name = generator.represent(F.as<Tensor>(), ctx);
+    std::string t_name = generator.represent(t.as<Tensor>(), ctx);
+    std::string T_name = generator.represent(T, ctx);
+
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + F_name + ".npy",
+                                F_tensor);
+    write_eigen_tensor_to_numpy(temp_dir.string() + "/" + t_name + ".npy",
+                                t_tensor);
+
+    export_expression(export_tree, generator, ctx);
+
+    std::string code = generator.get_generated_code();
+
+    // PyTorch doesn't support the 'order' parameter - tensors are always
+    // row-major Verify that code does NOT contain order parameter
+    REQUIRE_THAT(code, !Catch::Matchers::ContainsSubstring("order="));
+
+    // Execute PyTorch code with NumPy I/O for C++ interop
+    REQUIRE(run_pytorch_code_with_numpy_io(code, temp_dir.string()));
+
+    // Read result with RowMajor layout (PyTorch tensors are always row-major)
+    auto T_actual = read_eigen_tensor_from_numpy<double, 2, Eigen::RowMajor>(
+        temp_dir.string() + "/" + T_name + ".npy");
+
+    // Verify results match
+    REQUIRE(T_actual.dimensions()[0] == nvirt);
+    REQUIRE(T_actual.dimensions()[1] == nvirt);
+
+    const double tolerance = 1e-10;
+    for (Eigen::Index i = 0; i < nvirt; ++i) {
+      for (Eigen::Index j = 0; j < nvirt; ++j) {
+        REQUIRE(std::abs(T_actual(i, j) - T_expected(i, j)) < tolerance);
+      }
+    }
+  }
+}
+
+#endif  // SEQUANT_HAS_TORCH_FOR_VALIDATION