Handle aliasing of viewed input tensors of varying shapes #2760

beverlylytle · 2025-11-20T12:45:58Z

Consider the following function

def f(x, y):
    return x, y.add_(1)

provided the input x = torch.randn(2,2) and y = x[0,:]. When the function is called, the in-place changes to y also affect x as they share storage. If x and y had the same number of elements, we would replace all instances of y in the program body with the output of prims.reshape(x, y.shape). Since they do not have the same number of elements, this doesn't work. Currently this situation is skipped, and, in the case above, no special handling of x occurs. This PR addresses that case by defining view_groups not only with the data collected from view creation operations within the program body but also with with data coming from the input tensors.

beverlylytle · 2025-11-20T13:00:44Z

Fixes #2756

thunder/core/update_aliases.py

shino16

Looks great, thank you!

thunder/core/update_aliases.py

shino16 · 2025-11-20T15:03:29Z

Your test, test_aliasing_for_viewed_input_of_different_shapes, worked with symbolic values enabled too.

Prologue

# Constructed by Transform for execution (took 10 milliseconds)
import torch
from thunder.executors.torchex import no_autocast

@torch.no_grad()
@no_autocast
def prologue(*args, **kwargs):
  # args: "Any"
  check_len(args, 3)
    # prims.check_len(args, 3)
  # kwargs: "Any"
  check_len(kwargs, 0)
    # prims.check_len(kwargs, 0)
  x: "cuda:0 f32[[IntegerProxy name=i0, value=2, static=CONSTRAINT.CONSTRAINABLE], [IntegerProxy name=i1, value=3, static=CONSTRAINT.CONSTRAINABLE]]" = args[0]
  y: "cuda:0 f32[[IntegerProxy name=i2, value=3, static=CONSTRAINT.CONSTRAINABLE]]" = args[1]
  z: "cuda:0 f32[[IntegerProxy name=i3, value=3, static=CONSTRAINT.CONSTRAINABLE]]" = args[2]
  (i0, i1) = shape(x)
    # (i0, i1) = prims.shape(x)
  check_tensor_metadata(x, (i0, i1), 'cuda:0', torch.float32, False)
    # prims.check_tensor_shape_and_metadata(x, (i0, i1), 'cuda:0', torch.float32, False)
  (i2,) = shape(y)
    # (i2,) = prims.shape(y)
  check_tensor_metadata(y, (i2,), 'cuda:0', torch.float32, False)
    # prims.check_tensor_shape_and_metadata(y, (i2,), 'cuda:0', torch.float32, False)
  (i3,) = shape(z)
    # (i3,) = prims.shape(z)
  check_tensor_metadata(z, (i3,), 'cuda:0', torch.float32, False)
    # prims.check_tensor_shape_and_metadata(z, (i3,), 'cuda:0', torch.float32, False)
  cache_info: "Any" = thunder._get_cache_info()
  cache_info_default_dtype: "<class 'torch.dtype'>" = cache_info['default_dtype']
  check_literal_like(cache_info_default_dtype, torch.float32)
    # prims.check_literal_like(cache_info_default_dtype, torch.float32)
  cache_info_default_device: "<class 'torch.device'>" = cache_info['default_device']
  check_literal_like(cache_info_default_device, torch.device("cpu"))
    # prims.check_literal_like(cache_info_default_device, torch.device("cpu"))
  cache_info_is_autocast_enabled: "bool False" = cache_info['is_autocast_enabled']
  check_number_type_and_value(cache_info_is_autocast_enabled, False)
    # prims.check_number_type_and_value(cache_info_is_autocast_enabled, False)
  cache_info_alias_tensor_indices: "str" = cache_info['alias_tensor_indices']
  check_string_value(cache_info_alias_tensor_indices, '0,1,2')
    # prims.check_string_value(cache_info_alias_tensor_indices, '0,1,2')
  cache_info_is_grad_enabled: "bool True" = cache_info['is_grad_enabled']
  check_number_type_and_value(cache_info_is_grad_enabled, True)
    # prims.check_number_type_and_value(cache_info_is_grad_enabled, True)
  cache_info_no_grad_sync: "bool False" = cache_info['no_grad_sync']
  check_number_type_and_value(cache_info_no_grad_sync, False)
    # prims.check_number_type_and_value(cache_info_no_grad_sync, False)
  return ((x, y, z), ())

Computation trace

# Constructed by Unwrap the actual return value
import torch
from thunder.executors.torchex import no_autocast

@torch.no_grad()
@no_autocast
def computation(x, y, z):
  # x: "cuda:0 f32[[IntegerProxy name=i0, value=2, static=CONSTRAINT.CONSTRAINABLE], [IntegerProxy name=i1, value=3, static=CONSTRAINT.CONSTRAINABLE]]"
  # y: "cuda:0 f32[[IntegerProxy name=i2, value=3, static=CONSTRAINT.CONSTRAINABLE]]"
  # z: "cuda:0 f32[[IntegerProxy name=i3, value=3, static=CONSTRAINT.CONSTRAINABLE]]"
  (t27, t28, t29) = update_aliases((x, y, z))
  del x, y, z

  # /opt/pytorch/lightning-thunder/tmp/main.py:8:               return x + 2, y.add_(z)
  t35 = torch.add(t27, 2, alpha=1)  # t35: "cuda:0 f32[[IntegerProxy name=i0, value=2, static=CONSTRAINT.CONSTRAINABLE], [IntegerProxy name=i1, value=3, static=CONSTRAINT.CONSTRAINABLE]]"
    # t35 = ltorch.add(t27, 2, alpha=1)  # t35: "cuda:0 f32[[IntegerProxy name=i0, value=2, static=CONSTRAINT.CONSTRAINABLE], [IntegerProxy name=i1, value=3, static=CONSTRAINT.CONSTRAINABLE]]"
      # (i0, i1) = prims.shape(t27)
      # (i0, i1) = prims.shape(t27)
      # i31 = prims.eq(i1, 1)  # i31: "bool False"
      # i32 = prims.eq(i1, i1)  # i32: "bool True"
      # i33 = prims.eq(i0, 1)  # i33: "bool False"
      # i34 = prims.eq(i0, i0)  # i34: "bool True"
      # (i0, i1) = prims.shape(t27)
      # (i0, i1) = prims.shape(t27)
      # t35 = prims.add(t27, 2.0)  # t35: "cuda:0 f32[[IntegerProxy name=i0, value=2, static=CONSTRAINT.CONSTRAINABLE], [IntegerProxy name=i1, value=3, static=CONSTRAINT.CONSTRAINABLE]]"
  (t36, t37, t38) = update_aliases((t27, t28, t29))
  del t27, t28, t29

  # /opt/pytorch/lightning-thunder/tmp/main.py:8:               return x + 2, y.add_(z)
  t49 = torch.add(t37, t38, alpha=1)  # t49: "cuda:0 f32[[IntegerProxy name=i2, value=3, static=CONSTRAINT.CONSTRAINABLE]]"
    # t49 = ltorch.add(t37, t38, alpha=1)  # t49: "cuda:0 f32[[IntegerProxy name=i2, value=3, static=CONSTRAINT.CONSTRAINABLE]]"
      # (i2,) = prims.shape(t37)
      # (i2,) = prims.shape(t37)
      # (i3,) = prims.shape(t38)
      # (i3,) = prims.shape(t38)
      # i42 = prims.eq(i2, 1)  # i42: "bool False"
      # i43 = prims.eq(i2, i2)  # i43: "bool True"
      # i44 = prims.eq(i2, 1)  # i44: "bool False"
      # i45 = prims.eq(i3, 1)  # i45: "bool False"
      # i46 = prims.eq(i2, i3)  # i46: "bool True"
      # (i2,) = prims.shape(t37)
      # (i2,) = prims.shape(t37)
      # (i3,) = prims.shape(t38)
      # (i3,) = prims.shape(t38)
      # i47 = prims.eq(i3, i2)  # i47: "bool True"
      # t49 = prims.add(t37, t38)  # t49: "cuda:0 f32[[IntegerProxy name=i2, value=3, static=CONSTRAINT.CONSTRAINABLE]]"
  [t17] = nvFusion0(t49, t37)
    # t17 = prims.copy_(t49, t37, grad_enabled=True)  # t17: "cuda:0 f32[[IntegerProxy name=i2, value=3, static=CONSTRAINT.CONSTRAINABLE]]"
  del t49, t37
  return (t35, t17)

Other than so many prims.shape subsymbols, it looks good overall.

beverlylytle · 2025-11-20T16:03:46Z

Thanks for checking!. I have another PR in review to clean up the symbolic traces.

mattteochen

This is great,thank you for addressing this Beverly!

thunder/core/update_aliases.py

thunder/tests/test_update_aliases.py

beverlylytle · 2025-11-21T11:53:14Z

I've just realized this doesn't solve the problem, but I'm not seeing a fix. Consider the following program:

import thunder
import torch

def f(a, b):
    return a.exp_() * b.tanh_()

jf = thunder.jit(f)
x = torch.ones(5,5, device="cuda", dtype=torch.float32)
y = x[0,0]
x_ref = x.clone().detach()
y_ref = x_ref[0,0]

z = jf(x, y)
expected = f(x_ref, y_ref)

torch.testing.assert_close(z, expected)

This fails on this PR. All the expected update_aliases instances are there. However, the issue is that Thunder is not communicating to nvFuser that a and b share data. When a and b are views of the same size (numel), all instances of b in the trace are replaced with the output of prims.reshape(a, b.shape). This is how Thunder bakes the relationship of a and b into the trace, and in a way that's communicable to nvFuser. When a cannot be reshaped as b (when b is a slice of a or the output of a.view_as_real), I don't know how Thunder can guess how to reconstruct b from the data of a.
Any thoughts @crcrpar, @IvanYashchuk ?

beverlylytle · 2025-11-21T12:32:30Z

Hmm...

Looks like both

def f(a, _):
    b = a.view(5,5)
    return a.exp_() * b.tanh_()

and

def f(a, _):
    b = a[0,0]
    return a.exp_() * b.tanh_()

are both returning nonsense when jitted on the main branch even. So the underlying issue is how we are communicating data sharing to nvFuser.

shino16 · 2025-11-21T17:50:30Z

I find the code here sketchy, particularly output_tensors = set().

lightning-thunder/thunder/core/update_aliases.py

Lines 147 to 153 in 9d24da4

    
           if _is_inplace_op(bsym) or _is_view_creation_op(bsym): 
        
               # only interested in the input which is modified by the inplace op 
        
               in_tensor = variableify(bsym.flat_proxy_args[0]) 
        
               out_tensors = set(map(variableify, filter(lambda p: isinstance(p, TensorProxy), bsym.flat_proxy_outs))) 
        
               if _is_inplace_op(bsym): 
        
                   inplace_inputs.add(in_tensor) 
        
                   out_tensors = set()

Compare a1 = ltorch.view(a0, *shape) and b1 = ltorch.exp_(b0). The former establishes {a0, a1} as a so-called view group, while the latter does nothing. Thanks to functionalization, every later occurrence of b0 is replaced with b1, so we can forget that b1 aliases b0.

lightning-thunder/thunder/core/update_aliases.py

Lines 195 to 197 in 9d24da4

    
           if _is_inplace_op(bsym) and len(out_tensors) == 1 and len(in_tensors) == 1: 
        
               #  This relies on these being one element sets (ltorch.setitem_ yields no outs). 
        
               swap_map = _update_swap_map(swap_map, in_tensors.pop(), unvariableify(out_tensors.pop()))

This shortcut is no longer justified when b0 is already aliased by something else that remains unfunctionalized.

Take your example:

def f(a, _):
    b = a.view(5,5)
    return a.exp_() * b.tanh_()

These are the traces before and after update_aliases.py:

# Constructed by Remove context manager prims
import thunder
import thunder.torch as ltorch
import torch
from thunder.executors.torchex import no_autocast

@torch.no_grad()
@no_autocast
def computation(a):
  # a: "cuda:0 f32[5, 5]"

  # /opt/pytorch/lightning-thunder/tmp/main.py:5:           b = a.view(5,5)
  b = ltorch.view(a, 5, 5)  # b: "cuda:0 f32[5, 5]"
    # b = ltorch.reshape(a, (5, 5))  # b: "cuda:0 f32[5, 5]"
      # b = prims.shallow_copy(a)  # b: "cuda:0 f32[5, 5]"

  # /opt/pytorch/lightning-thunder/tmp/main.py:6:           return a.exp_() * b.tanh_()
  t2 = ltorch.exp_(a)  # t2: "cuda:0 f32[5, 5]"
    # t1 = ltorch.exp(a)  # t1: "cuda:0 f32[5, 5]"
      # t1 = prims.exp(a)  # t1: "cuda:0 f32[5, 5]"
    # t2 = prims.copy_(t1, a, grad_enabled=True)  # t2: "cuda:0 f32[5, 5]"
  t4 = ltorch.tanh_(b)  # t4: "cuda:0 f32[5, 5]"
    # t3 = ltorch.tanh(b)  # t3: "cuda:0 f32[5, 5]"
      # t3 = prims.tanh(b)  # t3: "cuda:0 f32[5, 5]"
    # t4 = prims.copy_(t3, b, grad_enabled=True)  # t4: "cuda:0 f32[5, 5]"
  t5 = ltorch.mul(t2, t4)  # t5: "cuda:0 f32[5, 5]"
    # t5 = prims.mul(t2, t4)  # t5: "cuda:0 f32[5, 5]"
  return {'output': (t5,), 'flat_args': [a]}

Trace after update_aliases.py

# Constructed by Update aliases for in-place ops
import thunder
import thunder.core.prims as prims
import thunder.torch as ltorch
import torch
from thunder.executors.torchex import no_autocast

@torch.no_grad()
@no_autocast
def computation(a):
  # a: "cuda:0 f32[5, 5]"
  (t6,) = prims.update_aliases((a,))

  # /opt/pytorch/lightning-thunder/tmp/main.py:5:           b = a.view(5,5)
  b = ltorch.view(t6, 5, 5)  # b: "cuda:0 f32[5, 5]"
    # b = ltorch.reshape(t6, (5, 5))  # b: "cuda:0 f32[5, 5]"
      # b = prims.shallow_copy(t6)  # b: "cuda:0 f32[5, 5]"
  (t7, t8) = prims.update_aliases((b, t6))

  # /opt/pytorch/lightning-thunder/tmp/main.py:6:           return a.exp_() * b.tanh_()
  t2 = ltorch.exp_(t8)  # t2: "cuda:0 f32[5, 5]"
    # t1 = ltorch.exp(t8)  # t1: "cuda:0 f32[5, 5]"
      # t1 = prims.exp(t8)  # t1: "cuda:0 f32[5, 5]"
    # t2 = prims.copy_(t1, t8, grad_enabled=True)  # t2: "cuda:0 f32[5, 5]"
  (t9, t10) = prims.update_aliases((t7, t2))

  # /opt/pytorch/lightning-thunder/tmp/main.py:6:           return a.exp_() * b.tanh_()
  t4 = ltorch.tanh_(t9)  # t4: "cuda:0 f32[5, 5]"
    # t3 = ltorch.tanh(t9)  # t3: "cuda:0 f32[5, 5]"
      # t3 = prims.tanh(t9)  # t3: "cuda:0 f32[5, 5]"
    # t4 = prims.copy_(t3, t9, grad_enabled=True)  # t4: "cuda:0 f32[5, 5]"
  t5 = ltorch.mul(t10, t4)  # t5: "cuda:0 f32[5, 5]"
    # t5 = prims.mul(t10, t4)  # t5: "cuda:0 f32[5, 5]"
  return {'output': (t5,), 'flat_args': [t10]}

Currently, the only view group that update_aliases.py establishes is {a, b}. We should add t4 to that group.

shino16

This is probably a separate issue.

shino16 · 2025-11-21T19:08:24Z

thunder/core/update_aliases.py

                inplace_inputs.add(in_tensor)
                out_tensors = set()
-            for group in view_groups:
+            for group in intermediate_view_groups:


We should iterate for groups in input_view_groups too, otherwise the following makes {x, y} and {y, y2} as separate view groups.

import torch, thunder def f(x, y): y2 = y.view(-1) x.exp_() return y2.sin() jf = thunder.jit(f) x = torch.randn(3, device="cuda") x_ref = x.clone().detach() z = jf(x, x[0]) z_ref = f(x_ref, x_ref[0]) print(thunder.last_traces(jf)[-1]) torch.testing.assert_close(z, z_ref) # AssertionError

I also thought that we should merge overlapping groups to avoid what you pointed out. But I wasn't able to break the overall trace correctness. I guess because I tested on CPU and not on CUDA.

Yes, this is a good point.

mattteochen · 2025-11-24T10:24:07Z

I've just realized this doesn't solve the problem, but I'm not seeing a fix. Consider the following program:
import thunder
import torch

def f(a, b):
    return a.exp_() * b.tanh_()

jf = thunder.jit(f)
x = torch.ones(5,5, device="cuda", dtype=torch.float32)
y = x[0,0]
x_ref = x.clone().detach()
y_ref = x_ref[0,0]

z = jf(x, y)
expected = f(x_ref, y_ref)

torch.testing.assert_close(z, expected)
This fails on this PR. All the expected update_aliases instances are there. However, the issue is that Thunder is not communicating to nvFuser that a and b share data. When a and b are views of the same size (numel), all instances of b in the trace are replaced with the output of prims.reshape(a, b.shape). This is how Thunder bakes the relationship of a and b into the trace, and in a way that's communicable to nvFuser. When a cannot be reshaped as b (when b is a slice of a or the output of a.view_as_real), I don't know how Thunder can guess how to reconstruct b from the data of a. Any thoughts @crcrpar, @IvanYashchuk ?

I have been able to execute your problematic functions with something like (built over your branch):

Patch

diff --git a/thunder/core/update_aliases.py b/thunder/core/update_aliases.py
index d2db3e32..62e6c1c2 100644
--- a/thunder/core/update_aliases.py
+++ b/thunder/core/update_aliases.py
@@ -66,6 +66,62 @@ def _can_be_reshaped(arg, arg_to_replace):
         arg_to_replace_numel = arg_to_replace.numel
     return arg_numel == arg_to_replace_numel
 
+def _merge_overlapping_groups(groups: list[set]) -> list[set]:
+    """
+    Merge overlapping sets in a list of sets.
+    
+    When tensors share storage transitively (e.g., a→b→c), the initial grouping
+    may create overlapping sets like [{a,b}, {b,c}]. This function merges them
+    into [{a,b,c}] to preserve transitive relationships.
+    
+    Args:
+        groups: List of sets, potentially with overlaps
+        
+    Returns:
+        List of sets with all overlapping groups merged
+        
+    Example:
+        >>> _merge_overlapping_groups([{1, 2}, {2, 3}, {4, 5}])
+        [{1, 2, 3}, {4, 5}]
+    """
+    if not groups:
+        return []
+    
+    merged = []
+    for group in groups:
+        # Check if this group overlaps with any existing merged group
+        found_overlap = False
+        for existing in merged:
+            if group.intersection(existing):
+                # Merge into existing group
+                existing.update(group)
+                found_overlap = True
+                break
+        
+        if not found_overlap:
+            # No overlap found, add as new group
+            merged.append(group.copy())
+    
+    # Keep merging until no more overlaps exist (handles transitive overlaps)
+    # Example: [{1,2}, {2,3}, {3,4}] needs multiple passes
+    changed = True
+    while changed:
+        changed = False
+        new_merged = []
+        for group in merged:
+            found_overlap = False
+            for existing in new_merged:
+                if group.intersection(existing):
+                    existing.update(group)
+                    found_overlap = True
+                    changed = True
+                    break
+            if not found_overlap:
+                new_merged.append(group)
+        merged = new_merged
+    
+    return merged
+
 
 def replace_args_with_alias_map(
     computation_trace: Trace,
@@ -150,7 +206,6 @@ def insert_alias_updates(computation_trace: Trace, alias_tensor_indices: list[li
             out_tensors = set(map(variableify, filter(lambda p: isinstance(p, TensorProxy), bsym.flat_proxy_outs)))
             if _is_inplace_op(bsym):
                 inplace_inputs.add(in_tensor)
-                out_tensors = set()
             for group in intermediate_view_groups:
                 if in_tensor in group:
                     group.update(out_tensors)
@@ -158,12 +213,14 @@ def insert_alias_updates(computation_trace: Trace, alias_tensor_indices: list[li
             else:
                 intermediate_view_groups.append(out_tensors.union({in_tensor}))
 
-    # filter out view groups that don't have any tensors involved in inplace ops
-    input_view_groups = [group for group in input_view_groups if len(group.intersection(inplace_inputs)) != 0]
-    intermediate_view_groups = [
-        group for group in intermediate_view_groups if len(group.intersection(inplace_inputs)) != 0
-    ]
+    # Merge overlapping groups first to handle transitive relationships
+    # (e.g., if x aliases y, and y.view() creates y2, then {x,y} and {y,y2} should merge to {x,y,y2})
     view_groups = input_view_groups + intermediate_view_groups
+    view_groups = _merge_overlapping_groups(view_groups)
+    
+    # Filter out view groups that don't have any tensors involved in inplace ops
+    # This must happen AFTER merging so we don't discard groups that are transitively related
+    view_groups = [group for group in view_groups if len(group.intersection(inplace_inputs)) != 0]
     viewed = set(reduce(set.union, view_groups, set()))
     encountered = set(reduce(set.union, input_view_groups, set()))
 
@@ -183,15 +240,29 @@ def insert_alias_updates(computation_trace: Trace, alias_tensor_indices: list[li
                 bsyms.append(bsym.from_bsym_swap_proxies(swap_map, skip_output=True))
                 continue
 
-            new_aliases = _get_new_aliases(views_encountered, computation_trace)
+            # For view creation ops, just create the view with swapped inputs
+            # Don't insert update_aliases - the view output will be included in the next update_aliases
+            # (which happens before the inplace op)
+            if _is_view_creation_op(bsym):
+                new_bsym = bsym.from_bsym_swap_proxies(swap_map)
+                bsyms.append(new_bsym)
+                # Mark the output as encountered so it can be included in future update_aliases
+                encountered.update(out_tensors)
+            else:
+                # For inplace ops or ops involving viewed args, insert update_aliases before the op
+                if views_encountered:
+                    new_aliases = _get_new_aliases(views_encountered, computation_trace)
 
-            update_bsym, swap_map = _get_update_bsym(views_encountered, swap_map, new_aliases)
-            new_bsym = bsym.from_bsym_swap_proxies(swap_map)
-            if has_tags(bsym, {BoundSymbolTag.BACKWARD}):
-                update_bsym.tags.add(BoundSymbolTag.BACKWARD)
-            bsyms.append(update_bsym)
-            encountered.update(out_tensors)
-            bsyms.append(new_bsym)
+                    update_bsym, swap_map = _get_update_bsym(views_encountered, swap_map, new_aliases)
+                    new_bsym = bsym.from_bsym_swap_proxies(swap_map)
+                    if has_tags(bsym, {BoundSymbolTag.BACKWARD}):
+                        update_bsym.tags.add(BoundSymbolTag.BACKWARD)
+                    bsyms.append(update_bsym)
+                    encountered.update(out_tensors)
+                    bsyms.append(new_bsym)
+                else:
+                    # No compatible views to update, just process the operation
+                    bsyms.append(bsym.from_bsym_swap_proxies(swap_map))
             if _is_inplace_op(bsym) and len(out_tensors) == 1 and len(in_tensors) == 1:
                 #  This relies on these being one element sets (ltorch.setitem_ yields no outs).
                 swap_map = _update_swap_map(swap_map, in_tensors.pop(), unvariableify(out_tensors.pop()))

Test repro

import torch, thunder

passed = {}
print_traces = False

def f(x, y):
    y2 = y.view(-1)
    x.exp_()
    return y2.sin()

jf = thunder.jit(f)

x = torch.randn(3, device="cuda")
x_ref = x.clone().detach()
z = jf(x, x[0])
z_ref = f(x_ref, x_ref[0])

if print_traces:
    print(thunder.last_traces(jf)[-1])
try:
    torch.testing.assert_close(z, z_ref)
    passed["test_basic_alias"] = True
except Exception as e:
    passed["test_basic_alias"] = False

def f(a, _):
    b = a.view(5,5)
    return a.exp_().view(5,5) * b.tanh_() # adding view after exp_() due to broadcast issue

jf = thunder.jit(f)

x = torch.randn(25, device="cuda")
x_ref = x.clone().detach()
z = jf(x, x[0]) # b input irrelevant
z_ref = f(x_ref, x_ref[0])

if print_traces:
    print(thunder.last_traces(jf)[-1])
try:
    torch.testing.assert_close(z, z_ref)
    passed["test_alias_view_reshape"] = True
except Exception as e:
    passed["test_alias_view_reshape"] = False

def f(a, _):
    b = a[0,0]
    return a.exp_() * b.tanh_()

jf = thunder.jit(f)

x = torch.randn(4, 4, device="cuda")
x_ref = x.clone().detach()
z = jf(x, x[0]) # b input irrelevant
z_ref = f(x_ref, x_ref[0])

if print_traces:
    print(thunder.last_traces(jf)[-1])
try:
    torch.testing.assert_close(z, z_ref)
    passed["test_indexing_alias"] = True
except Exception as e:
    passed["test_indexing_alias"] = False

print("TEST PASSED!" if all(passed.values()) else "TEST FAILED!")

This was partially Gemini-generated, so take it with a grain of salt. What I tried to add:

merge overlapping groups
deferred view synchronization
tracked in place ops return values (Handle aliasing of viewed input tensors of varying shapes #2760 (comment))

beverlylytle · 2025-11-24T11:21:45Z

I find the code here sketchy, particularly output_tensors = set().

...

Currently, the only view group that update_aliases.py establishes is {a, b}. We should add t4 to that group.

I'm going to state some definitions for clarity's sake, to make sure that we are on the same page. view_groups is a list of sets. Each set contains variableified tensorproxies. The contents of one of these sets is the collection tensorproxies whose tensor equivalents would all share the same data storage and whose tensor equivalents were created by a PyTorch view creation operation, eg view or by slicing. What is not in this set are any shallow copies or aliases of these tensorproxies. We store that data in the swap_map. In the given example t4 = ltorch.tanh_(t9), t4 is a shallow copy of t9, which is a shallow copy of t7, which is a shallow copy of b. All of that is tracked in the swap_map. As we are iterating through the bsyms in a trace, once a shallow copy is added to swap_map, say swap_map[x1] = x2, all later instances of x1 will be replaced by x2 (or the last descendent of x2 according to the swap_map tree). In the given example, all instances of b would be replaced by t4. So long as b and its aliases are tracked by the swap_map, there is no added benefit of inserting any alias of b into the view group.

Consider the following modification of the given example

def f(a, _):
    b = a.view(5,5)
    a.exp_()
    b.tanh_()
    z = a * b
    return z, a.log_()

It produces the post-update_aliases trace of:

import thunder.torch as ltorch
import torch
from thunder.executors.torchex import no_autocast

@torch.no_grad()
@no_autocast
def computation(a):
  # a: "cuda:0 f32[5, 5]"
  (t8,) = prims.update_aliases((a,))

  # /home/blytle/scratch/sym_mod.py:51:             b = a.view(5,5)
  b = ltorch.view(t8, 5, 5)  # b: "cuda:0 f32[5, 5]"
    # b = ltorch.reshape(t8, (5, 5))  # b: "cuda:0 f32[5, 5]"
      # b = prims.shallow_copy(t8)  # b: "cuda:0 f32[5, 5]"
  (t9, t10) = prims.update_aliases((b, t8))

  # /home/blytle/scratch/sym_mod.py:52:             a.exp_()
  t2 = ltorch.exp_(t10)  # t2: "cuda:0 f32[5, 5]"
    # t1 = ltorch.exp(t10)  # t1: "cuda:0 f32[5, 5]"
      # t1 = prims.exp(t10)  # t1: "cuda:0 f32[5, 5]"
    # t2 = prims.copy_(t1, t10, grad_enabled=True)  # t2: "cuda:0 f32[5, 5]"
  (t11, t12) = prims.update_aliases((t9, t2))

  # /home/blytle/scratch/sym_mod.py:53:             b.tanh_()
  t4 = ltorch.tanh_(t11)  # t4: "cuda:0 f32[5, 5]"
    # t3 = ltorch.tanh(t11)  # t3: "cuda:0 f32[5, 5]"
      # t3 = prims.tanh(t11)  # t3: "cuda:0 f32[5, 5]"
    # t4 = prims.copy_(t3, t11, grad_enabled=True)  # t4: "cuda:0 f32[5, 5]"
  (t13, t14) = prims.update_aliases((t4, t12))

  # /home/blytle/scratch/sym_mod.py:54:             z = a * b
  z = ltorch.mul(t14, t13)  # z: "cuda:0 f32[5, 5]"
    # z = prims.mul(t14, t13)  # z: "cuda:0 f32[5, 5]"
  (t15, t16) = prims.update_aliases((t13, t14))

  # /home/blytle/scratch/sym_mod.py:55:             return z, a.log_()
  t7 = ltorch.log_(t16)  # t7: "cuda:0 f32[5, 5]"
    # t6 = ltorch.log(t16)  # t6: "cuda:0 f32[5, 5]"
      # t6 = prims.log(t16)  # t6: "cuda:0 f32[5, 5]"
    # t7 = prims.copy_(t6, t16, grad_enabled=True)  # t7: "cuda:0 f32[5, 5]"
  return {'output': (z, t7), 'flat_args': [t7]}

Notice that the alias t4 of b is re-aliased before it is used again in any later operation even though it was not explicit included in any view group.

as a so-called view group

I'm confused by this comment. Does the name of the variable suggest something different from what I described above? Do you have a suggestion for a better variable name?

beverlylytle · 2025-11-24T11:46:56Z

@mattteochen I just pushed a commit that incorporates the view groups coming from input with the collection of the view groups coming from the intermediate tensors. I understand how defining view_groups = input_view_groups + intermediate_view_groups results in view groups that look something like [{1,2}, {2,3}]., but I see in your patch that there's some merging logic for something like [{1,2}, {2,3}, {3,4}]. Did you see a situation where view groups looked like that?

For the other part of your patch following the comment # Don't insert update_aliases - the view output will be included in the next update_aliases, I think that's probably a good idea. I see no harm in potentially reordering a view creation op with another view creation op or an in-place op on the base tensor. But that should be part of another PR I think.

shino16 · 2025-11-24T12:43:54Z

@beverlylytle Running the repro from #2760 (comment) fails with:

AssertionError: Tensor-likes are not close!

Mismatched elements: 25 / 25 (100.0%)
Greatest absolute difference: 3.0266990661621094 at index (3, 0) (up to 1e-05 allowed)
Greatest relative difference: 3.0305416584014893 at index (3, 0) (up to 1.3e-06 allowed)

Execution traces are attached in that comment.

My understanding is that view_groups only needed tensors whose mutations need to flow to every other member of the group. That allows us to ignore aliases created by in-place ops such as t2 = ltorch.tanh_(a), because a disappears from later bound symbols once the swap_map replaces it. (This is what I clumsily called “functionalization”; sorry for the poor wording.)

However, in this example we already have the view group {a, b}. Even though a no longer appears later, b does, e.g., t4 = ltorch.tan_(b). Without adding t2 to that same group, we cannot guarantee that mutations of b propagate to t2, so the swap_map alone is insufficient. My suggestion is therefore to extend the {a, b} group with every alias produced by in-place ops or shallow copies.

Adding mappings like b -> t2 alongside a -> t2 does not solve the general problem, as illustrated by the second example in #2760 (comment).

thunder/core/update_aliases.py

shino16 · 2025-11-24T12:57:17Z

Sorry, I mistakenly re-requested @mattteochen's review.

mattteochen · 2025-11-24T13:17:10Z

dce is also messing this:

def func(a, b):
    # c is a view of a.
    c = a.view(a.shape)
    
    # Modify 'c' in place.
    c.add_(1.0)
    
    # Return b. It should reflect the update to c (via a).
    return b

Thunder last trace:

# Constructed by Unwrap the actual return value
from torch import Tensor
import torch
from thunder.executors.torchex import no_autocast

@torch.no_grad()
@no_autocast
def computation(a, b):
# a: "cuda:0 f32[10]"
(t9,) = update_aliases((a,))
del a

# /kaiximatteoc/dev/lightning-thunder/test_merge_impact_v2.py:32:            c = a.view(a.shape)
t12 = Tensor.view(t9, (10,))  # t12: "cuda:0 f32[10]"
 # t12 = ltorch.view(t9, (10,))  # t12: "cuda:0 f32[10]"
   # t12 = ltorch.reshape(t9, (10,))  # t12: "cuda:0 f32[10]"
     # t12 = prims.shallow_copy(t9)  # t12: "cuda:0 f32[10]"
(t10, t11) = update_aliases((t12, t9))
del t10, t12, t9
return (t11,)

shino16 · 2025-11-24T13:26:12Z

I think what I said here could answer your question in #2760 (comment), but it seems more like a separate issue that exists on main. Maybe we can just merge this PR and file this bug as a new issue, then I can make a separate PR to fix it. What do you think?

shino16 · 2025-11-24T13:32:46Z

@mattteochen

dce is also messing this:

I think it's dealt against in this transform:

lightning-thunder/thunder/__init__.py

Line 485 in 656656b

computation_trc = wrap_return_value_together_with_arguments(computation_trc)

This should appear as the first trace in thunder.last_traces(jfunc). Can I see your full repro?

beverlylytle · 2025-11-24T13:37:55Z

Consider the two versions of the same function

def f(a, _):
    b = a.view(5,5)
    return a.exp_() * b.tanh_()


def g(a, b):
    return a.exp_() * b.tanh_()

x_f = torch.ones(5,5, device="cuda", dtype=torch.float32, requires_grad=True)
x_g = x_f.clone().detach()

jf = thunder.jit(f)
jf(x_f, x_f)

jg = thunder.jit(g)
jg(x_g, x_g.view(5,5))

We would expect these two functions to produce essentially the same trace. However, on the main branch, f produces the following trace

# Constructed by Unwrap the actual return value
import torch
from thunder.executors.torchex import no_autocast

@torch.no_grad()
@no_autocast
def computation(a):
  # a: "cuda:0 f32[5, 5]"
  (t45,) = update_aliases((a,))
  del a

  # /home/blytle/scratch/sym_mod.py:52:             b = a.view(5,5)
  t50 = shallow_copy(t45)  # t50: "cuda:0 f32[5, 5]"
  (t46, t47) = update_aliases((t45, t50))
  del t45, t50
  [t22] = nvFusion0(t46)
    # t19 = prims.exp(t46)  # t19: "cuda:0 f32[5, 5]"
    # t22 = prims.copy_(t19, t46, grad_enabled=True)  # t22: "cuda:0 f32[5, 5]"
  del t46
  (t48, t49) = update_aliases((t22, t47))
  del t22, t47
  [t35] = nvFusion1(t49, t48)
    # t28 = prims.tanh(t49)  # t28: "cuda:0 f32[5, 5]"
    # t33 = prims.copy_(t28, t49, grad_enabled=True)  # t33: "cuda:0 f32[5, 5]"
    # t35 = prims.mul(t48, t33)  # t35: "cuda:0 f32[5, 5]"
  del t49
  return (t35,)

and results in a tensor-likes not close error when compare to the pytorch result, whereas g produces the trace

# Constructed by Unwrap the actual return value
import torch
from thunder.executors.torchex import no_autocast

@torch.no_grad()
@no_autocast
def computation(a, b):
  # a: "cuda:0 f32[5, 5]"
  (t30,) = update_aliases((a,))
  del a
  [t12] = nvFusion0(t30)
    # t9 = prims.exp(t30)  # t9: "cuda:0 f32[5, 5]"
    # t12 = prims.copy_(t9, t30, grad_enabled=True)  # t12: "cuda:0 f32[5, 5]"
  del t30
  (t31,) = update_aliases((t12,))
  del t12
  [t21, t23] = nvFusion1(t31)
    # t16 = prims.tanh(t31)  # t16: "cuda:0 f32[5, 5]"
    # t21 = prims.copy_(t16, t31, grad_enabled=True)  # t21: "cuda:0 f32[5, 5]"
    # t23 = prims.mul(t21, t21)  # t23: "cuda:0 f32[5, 5]"
  del t31
  return (t23,)

and does produce the same result as PyTorch.

Compare the last fusions in each trace. They both represent tanh-copy_-mul, but one is provided two inputs, while the other has only one. This is because in the second version, all instances of b were replaced by a reshaped a. The only way we can communicate to nvFuser that two tensorproxies are views of each is though this replacement pattern. When the FusionDefinition is created, we can only tell NvFuser the shape, contiguity, stride, dtype and device of a tensor, and not the memory dependencies it has with other input. This is the source of the tensor-likes not close error, and this isn't affected by whether or not the output of tanh_ is included in a view group or not.

That being said, it is possible that view groups should include more than they do. Do you (@shino16) have an example where an alias, when included in a view group, produces a correct trace, but an incorrect trace when excluded?

mattteochen · 2025-11-24T13:40:51Z

@mattteochen I just pushed a commit that incorporates the view groups coming from input with the collection of the view groups coming from the intermediate tensors. I understand how defining view_groups = input_view_groups + intermediate_view_groups results in view groups that look something like [{1,2}, {2,3}]., but I see in your patch that there's some merging logic for something like [{1,2}, {2,3}, {3,4}]. Did you see a situation where view groups looked like that?

For the other part of your patch following the comment # Don't insert update_aliases - the view output will be included in the next update_aliases, I think that's probably a good idea. I see no harm in potentially reordering a view creation op with another view creation op or an in-place op on the base tensor. But that should be part of another PR I think.

I found Thunder (with the patch above) producing incorrect results in a context like:

def func(a, b):
    # c is a view of a
    c = a.view(a.shape)
    
    # Inplace update on c. 
    # Since c is a view of a, and b is a slice of a, this MUST update b.
    c.add_(1.0)
    
    # Operation on b. Should see the +1.0 update.
    d = b * 2
    return d

if b is a slice of a, we expect b to have 1.0 values due to c.add_(1.0) and d to have values 2.0. Without the merge logic, this fails.

The input alias should be {a, b}, and intermediate aliases are {a, c}. By processing c's inplace ops we are blind on {a, b}, this causes b values to be 0.0.

The goal of that function is to unify {a, b}, {a, c} -> {a, b, c}

repro:

import torch
import thunder

def main():
    def func(a, b):
        # c is a view of a
        c = a.view(a.shape)
        
        # Inplace update on c, which affects a, and thus should affect b
        c.add_(1.0)
        
        # Operation on b. Should see the 1.0.
        # If b is not updated, this might run before the add or use old values.
        d = b * 2
        return d

    device = 'cuda'
    a = torch.zeros(10, device=device)
    b = a[:5] # b aliases a
    
    a_ref = a.clone().detach()
    b_ref = a_ref[:5]
    
    print(f"Input 'b' (should be 0): {b[:3].tolist()}")

    jf = thunder.jit(func)
    
    try:
        res = jf(a, b)
        res_ref = func(a_ref, b_ref)

        print(f"Thunder result (should be 1.0): {a[:3].tolist()}")
        print(f"PyTorch result: {a_ref[:3].tolist()}")
        
        print(f"Thunder result (should be 2.0): {res[:3].tolist()}")
        print(f"PyTorch result: {res_ref[:3].tolist()}")

        print("\nLast Trace:")
        print(thunder.last_traces(jf)[-1])
        
        torch.testing.assert_close(res, res_ref)
        print("\n[PASSED] Slice alias 'b' correctly saw the update!")

        
    except Exception as e:
        print(f"\n[FAILED] Slice alias check failed.")
        # print(e)
        raise e

if __name__ == "__main__":
    main()

mattteochen · 2025-11-24T13:50:57Z

@mattteochen

dce is also messing this:

I think it's dealt against in this transform:

lightning-thunder/thunder/__init__.py

Line 485 in 656656b

computation_trc = wrap_return_value_together_with_arguments(computation_trc)

This should appear as the first trace in thunder.last_traces(jfunc). Can I see your full repro?

import torch
import thunder
import traceback

def main():
    def func(a, b):
        # c is a view of a.
        c = a.view(a.shape)
        
        # Modify 'c' in place.
        c.add_(1.0)
        
        # Return b. It should reflect the update to c (via a).
        return b

    # Setup inputs
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    a = torch.zeros(10, device=device)
    b = a.view(a.shape) # b aliases a
    
    a_ref = a.clone().detach()
    b_ref = a_ref.view(a_ref.shape)
    
    print(f"Input 'b' (first 3): {b[:3].tolist()}")

    # Compile with Thunder
    jf = thunder.jit(func)
    
    try:
        # Run Thunder
        # Note: we return b. 
        res = jf(a, b)
        
        # Run PyTorch Reference
        res_ref = func(a_ref, b_ref)
        
        # Compare
        print(f"Thunder result (first 3): {res[:3].tolist()}")
        print(f"PyTorch result (first 3): {res_ref[:3].tolist()}")
        
        print("\nLast Trace:")
        print(thunder.last_traces(jf)[-1])

        torch.testing.assert_close(res, res_ref)
        print("\n[PASSED] Transitive alias 'b' was correctly updated!")
        
    except Exception as e:
        print(f"\n[FAILED] Transitive alias check failed.")
        print("Likely cause: 'b' did not receive the inplace update from 'c'.")
        # traceback.print_exc()
        raise e

if __name__ == "__main__":
    main()

shino16 · 2025-11-24T13:54:43Z

@beverlylytle

Do you (@shino16) have an example where an alias, when included in a view group, produces a correct trace, but an incorrect trace when excluded?

I think I'm not properly understanding your concern. The script in #2760 (comment) is exactly the trace that gives the correct result when t2 or t4 is added to the view group {a, b}.

mattteochen · 2025-11-24T13:59:11Z

This PR is exploding. I agree that we should address different subsections separately. I've investigated the proposed patch to address those cases where we mix view and inplace, producing incorrect results as highlighted, but it should be shipped as a separate work.

This PR doesn't need to handle all the edge cases, I think.

beverlylytle · 2025-11-24T14:00:43Z

@mattteochen I just pushed a commit that incorporates the view groups coming from input with the collection of the view groups coming from the intermediate tensors. I understand how defining view_groups = input_view_groups + intermediate_view_groups results in view groups that look something like [{1,2}, {2,3}]., but I see in your patch that there's some merging logic for something like [{1,2}, {2,3}, {3,4}]. Did you see a situation where view groups looked like that?
For the other part of your patch following the comment # Don't insert update_aliases - the view output will be included in the next update_aliases, I think that's probably a good idea. I see no harm in potentially reordering a view creation op with another view creation op or an in-place op on the base tensor. But that should be part of another PR I think.

I found Thunder (with the patch above) producing incorrect results in a context like:
def func(a, b):
    # c is a view of a
    c = a.view(a.shape)
    
    # Inplace update on c. 
    # Since c is a view of a, and b is a slice of a, this MUST update b.
    c.add_(1.0)
    
    # Operation on b. Should see the +1.0 update.
    d = b * 2
    return d
if b is a slice of a, we expect b to have 1.0 values due to c.add_(1.0) and d to have values 2.0. Without the merge logic, this fails.

The input alias should be {a, b}, and intermediate aliases are {a, c}. By processing c's inplace ops we are blind on {a, b}, this causes b values to be 0.0.

The goal of that function is to unify {a, b}, {a, c} -> {a, b, c}

If code doesn't directly contribute to the output of the function that is jitted, Thunder cuts it out. I think this is a good assumption for Thunder to make. We don't want to be spinning wheels on dead code. If a or c were part of the return statement, I think the jitted program would perform the in-place ops on a. I could see that maybe this could be a problem in thunderfx. Maybe the way the graph is split, one region contains in-place ops on a but doesn't return a, while another region uses a again and should execute after those in-place ops have occurred. If you (@mattteochen) have such an example, it would be great to make an issue out of that. But in this PR, I'm not seeing other examples of needing to do a multi-pass merge of view groups beyond what I changed in this commit.

mattteochen · 2025-11-24T14:06:14Z

@mattteochen I just pushed a commit that incorporates the view groups coming from input with the collection of the view groups coming from the intermediate tensors. I understand how defining view_groups = input_view_groups + intermediate_view_groups results in view groups that look something like [{1,2}, {2,3}]., but I see in your patch that there's some merging logic for something like [{1,2}, {2,3}, {3,4}]. Did you see a situation where view groups looked like that?
For the other part of your patch following the comment # Don't insert update_aliases - the view output will be included in the next update_aliases, I think that's probably a good idea. I see no harm in potentially reordering a view creation op with another view creation op or an in-place op on the base tensor. But that should be part of another PR I think.

I found Thunder (with the patch above) producing incorrect results in a context like:
def func(a, b):
    # c is a view of a
    c = a.view(a.shape)
    
    # Inplace update on c. 
    # Since c is a view of a, and b is a slice of a, this MUST update b.
    c.add_(1.0)
    
    # Operation on b. Should see the +1.0 update.
    d = b * 2
    return d
if b is a slice of a, we expect b to have 1.0 values due to c.add_(1.0) and d to have values 2.0. Without the merge logic, this fails.
The input alias should be {a, b}, and intermediate aliases are {a, c}. By processing c's inplace ops we are blind on {a, b}, this causes b values to be 0.0.
The goal of that function is to unify {a, b}, {a, c} -> {a, b, c}
If code doesn't directly contribute to the output of the function that is jitted, Thunder cuts it out. I think this is a good assumption for Thunder to make. We don't want to be spinning wheels on dead code. If a or c were part of the return statement, I think the jitted program would perform the in-place ops on a. I could see that maybe this could be a problem in thunderfx. Maybe the way the graph is split, one region contains in-place ops on a but doesn't return a, while another region uses a again and should execute after those in-place ops have occurred. If you (@mattteochen) have such an example, it would be great to make an issue out of that. But in this PR, I'm not seeing other examples of needing to do a multi-pass merge of view groups beyond what I changed in this commit.

Oh yes, if we assume that we don't cover "dead code", I agree with you. I don't have a concrete example right now as this came to mind by trying some nested ops working on a shared memory location.

I saw this case as something in the middle between dead and not dead code, due to the shared memory.

mattteochen · 2025-11-24T14:53:51Z

@mattteochen I just pushed a commit that incorporates the view groups coming from input with the collection of the view groups coming from the intermediate tensors. I understand how defining view_groups = input_view_groups + intermediate_view_groups results in view groups that look something like [{1,2}, {2,3}]., but I see in your patch that there's some merging logic for something like [{1,2}, {2,3}, {3,4}]. Did you see a situation where view groups looked like that?
For the other part of your patch following the comment # Don't insert update_aliases - the view output will be included in the next update_aliases, I think that's probably a good idea. I see no harm in potentially reordering a view creation op with another view creation op or an in-place op on the base tensor. But that should be part of another PR I think.

I found Thunder (with the patch above) producing incorrect results in a context like:
def func(a, b):
    # c is a view of a
    c = a.view(a.shape)
    
    # Inplace update on c. 
    # Since c is a view of a, and b is a slice of a, this MUST update b.
    c.add_(1.0)
    
    # Operation on b. Should see the +1.0 update.
    d = b * 2
    return d
if b is a slice of a, we expect b to have 1.0 values due to c.add_(1.0) and d to have values 2.0. Without the merge logic, this fails.

The input alias should be {a, b}, and intermediate aliases are {a, c}. By processing c's inplace ops we are blind on {a, b}, this causes b values to be 0.0.

The goal of that function is to unify {a, b}, {a, c} -> {a, b, c}

repro:

Sorry @beverlylytle , this test seems to be correct on your branch (this PR). I was testing this with the patch applied, and the _merge_X pass was needed to make it correct (deferring the view synch may need this somehow).

This introduced an incorrect overall result (caused by the skipping policy) on cases of this type. Checking out to an earlier commit, we had runtime errors:

RuntimeError: Attempting to reshape a.shape=(4,) to shape=(2,), but a.numel=4 is different from the number of elements in shape, 2

beverlylytle · 2025-11-24T15:03:11Z

@beverlylytle

Do you (@shino16) have an example where an alias, when included in a view group, produces a correct trace, but an incorrect trace when excluded?

I think I'm not properly understanding your concern. The script in #2760 (comment) is exactly the trace that gives the correct result when t2 or t4 is added to the view group {a, b}.

In an off-github discussion, we resolved that when t2 or t4 is added to the view groups, this inserts another call to update_aliases between tanh and mul. This in turn causes a fusion break so that in-place ops are happening atomically in fusions, and thus is resulting in a correct function. However, the philosophy behind update_aliases is to prevent the reordering of operations, and the fusion breaks are an unfortunate requirement of update_aliases for the moment, not a feature of it. Since not having the intermediate tensorproxies in the view groups doesn't result in improperly ordered traces, we will look for another path in another PR to solve the problematic cases.

shino16

Let's merge this! The issue with in-place ops on aliased memory exists even on main, so it will be discussed separately in #2766.

beverlylytle · 2025-11-25T09:20:07Z

@KaelanDt Could you take a look?

initialize view groups from input tensors

caf8058

beverlylytle requested review from IvanYashchuk, mattteochen and shino16 November 20, 2025 12:45

beverlylytle requested review from KaelanDt, lantiga and mruberry as code owners November 20, 2025 12:45

beverlylytle linked an issue Nov 20, 2025 that may be closed by this pull request

update_aliases misses case when function's input views itself #2756

Open

beverlylytle commented Nov 20, 2025

View reviewed changes

thunder/core/update_aliases.py Show resolved Hide resolved

shino16 approved these changes Nov 20, 2025

View reviewed changes

thunder/core/update_aliases.py Outdated Show resolved Hide resolved

use _numel

6fc9aec

This was referenced Nov 20, 2025

Fix KeyError 'i23' with symbolic shapes and reshape bsyms #2764

Open

Symbolic Values support for benchmark_inference.py #2677

Open

mattteochen approved these changes Nov 21, 2025

View reviewed changes

thunder/core/update_aliases.py Outdated Show resolved Hide resolved

thunder/core/update_aliases.py Outdated Show resolved Hide resolved

beverlylytle added 2 commits November 21, 2025 12:34

respond to comments

e0c385b

Merge branch 'main' into bl/update_view_groups

9d24da4

crcrpar approved these changes Nov 21, 2025

View reviewed changes

thunder/tests/test_update_aliases.py Show resolved Hide resolved

shino16 reviewed Nov 21, 2025

View reviewed changes

merge input and intermediate view groups

52703b3

shino16 reviewed Nov 24, 2025

View reviewed changes

thunder/core/update_aliases.py Outdated Show resolved Hide resolved

shino16 requested review from mattteochen and shino16 November 24, 2025 12:55

beverlylytle added 2 commits November 24, 2025 16:45

respond to comment

0f0592b

Merge branch 'main' into bl/update_view_groups

5cdb81f

shino16 mentioned this pull request Nov 24, 2025

Errors on in-place ops on tensor aliases unresolved by proxy substitution #2766

Open

shino16 approved these changes Nov 24, 2025

View reviewed changes

Handle aliasing of viewed input tensors of varying shapes #2760

Are you sure you want to change the base?

Handle aliasing of viewed input tensors of varying shapes #2760

Conversation

beverlylytle commented Nov 20, 2025

Uh oh!

beverlylytle commented Nov 20, 2025

Uh oh!

Uh oh!

shino16 left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

shino16 commented Nov 20, 2025

Uh oh!

beverlylytle commented Nov 20, 2025

Uh oh!

mattteochen left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

beverlylytle commented Nov 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

beverlylytle commented Nov 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

shino16 commented Nov 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

shino16 left a comment

Choose a reason for hiding this comment

Uh oh!

shino16 Nov 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

mattteochen Nov 24, 2025

Choose a reason for hiding this comment

Uh oh!

beverlylytle Nov 24, 2025

Choose a reason for hiding this comment

Uh oh!

mattteochen commented Nov 24, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

beverlylytle commented Nov 24, 2025

Uh oh!

beverlylytle commented Nov 24, 2025

Uh oh!

shino16 commented Nov 24, 2025

Uh oh!

Uh oh!

shino16 commented Nov 24, 2025

Uh oh!

mattteochen commented Nov 24, 2025

Uh oh!

shino16 commented Nov 24, 2025

Uh oh!

shino16 commented Nov 24, 2025

Uh oh!

beverlylytle commented Nov 24, 2025

Uh oh!

mattteochen commented Nov 24, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

mattteochen commented Nov 24, 2025

Uh oh!

shino16 commented Nov 24, 2025

Uh oh!

mattteochen commented Nov 24, 2025

Uh oh!

beverlylytle commented Nov 24, 2025

Uh oh!

mattteochen commented Nov 24, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

mattteochen commented Nov 24, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

beverlylytle commented Nov 21, 2025 •

edited

Loading

beverlylytle commented Nov 21, 2025 •

edited

Loading

shino16 commented Nov 21, 2025 •

edited

Loading

shino16 Nov 21, 2025 •

edited

Loading

mattteochen commented Nov 24, 2025 •

edited

Loading

mattteochen commented Nov 24, 2025 •

edited

Loading

mattteochen commented Nov 24, 2025 •

edited

Loading

mattteochen commented Nov 24, 2025 •

edited

Loading