|
| 1 | +""" |
| 2 | +Regression test for the lost update bug in ContractProcessor.update_contract_state(). |
| 3 | +
|
| 4 | +The bug: update_contract_state() does a read-modify-write on the full JSONB blob. |
| 5 | +When two sessions call it concurrently for the same contract, the second commit |
| 6 | +silently overwrites the first's changes because it re-reads stale state. |
| 7 | +
|
| 8 | +This is the root cause of 336+ lost submissions in Rally production (March 2026). |
| 9 | +See: Rally2/docs/genvm-state-mismatch-bug.md |
| 10 | +
|
| 11 | +Production scenario: |
| 12 | + - Worker A accepts TX-A → writes accepted_state with TX-A's submission |
| 13 | + - Worker B accepts TX-B → reads the SAME pre-TX-A state → writes accepted_state |
| 14 | + with TX-B's submission → TX-A's submission is silently erased |
| 15 | +""" |
| 16 | + |
| 17 | +import threading |
| 18 | + |
| 19 | +import pytest |
| 20 | +from sqlalchemy import Engine |
| 21 | +from sqlalchemy.orm import sessionmaker |
| 22 | + |
| 23 | +from backend.database_handler.contract_processor import ContractProcessor |
| 24 | +from backend.database_handler.models import CurrentState |
| 25 | + |
| 26 | + |
| 27 | +CONTRACT_ADDRESS = "0xrace_test_contract" |
| 28 | + |
| 29 | +INITIAL_STATE = { |
| 30 | + "accepted": {"slot_a": "original_a"}, |
| 31 | + "finalized": {"slot_f": "original_f"}, |
| 32 | +} |
| 33 | + |
| 34 | + |
| 35 | +def _setup_contract(engine: Engine): |
| 36 | + """Insert a contract with initial state.""" |
| 37 | + Session_ = sessionmaker(bind=engine) |
| 38 | + with Session_() as s: |
| 39 | + contract = CurrentState( |
| 40 | + id=CONTRACT_ADDRESS, |
| 41 | + data={"state": INITIAL_STATE}, |
| 42 | + ) |
| 43 | + s.add(contract) |
| 44 | + s.commit() |
| 45 | + |
| 46 | + |
| 47 | +def _read_state(engine: Engine) -> dict: |
| 48 | + """Read the current contract state from a fresh session.""" |
| 49 | + Session_ = sessionmaker(bind=engine) |
| 50 | + with Session_() as s: |
| 51 | + row = s.query(CurrentState).filter_by(id=CONTRACT_ADDRESS).one() |
| 52 | + return row.data["state"] |
| 53 | + |
| 54 | + |
| 55 | +# --------------------------------------------------------------------------- |
| 56 | +# Test 1: Two concurrent accepted_state updates — must both survive |
| 57 | +# --------------------------------------------------------------------------- |
| 58 | + |
| 59 | + |
| 60 | +@pytest.mark.xfail( |
| 61 | + reason="update_contract_state does full-field replacement by design. " |
| 62 | + "Same-field concurrent writes are prevented upstream by advisory locks " |
| 63 | + "in worker.py claim CTEs (pg_try_advisory_xact_lock). " |
| 64 | + "This test documents the limitation — will pass if state merging is added.", |
| 65 | + strict=True, |
| 66 | +) |
| 67 | +def test_concurrent_accepted_updates_preserve_both(engine: Engine): |
| 68 | + """ |
| 69 | + Two workers both write accepted_state for the same contract concurrently. |
| 70 | + Worker A adds submission_A, Worker B adds submission_B. |
| 71 | +
|
| 72 | + This scenario is prevented in production by advisory locks at the worker |
| 73 | + claim level. The update_contract_state API does full replacement, so if |
| 74 | + two callers pass different complete dicts, the second always wins. |
| 75 | + """ |
| 76 | + _setup_contract(engine) |
| 77 | + |
| 78 | + barrier = threading.Barrier(2, timeout=5) |
| 79 | + errors = [] |
| 80 | + |
| 81 | + def worker_a(): |
| 82 | + try: |
| 83 | + Session_ = sessionmaker(bind=engine) |
| 84 | + with Session_() as s: |
| 85 | + cp = ContractProcessor(s) |
| 86 | + # Read current state (sees original) |
| 87 | + contract = s.query(CurrentState).filter_by(id=CONTRACT_ADDRESS).one() |
| 88 | + _ = contract.data # force load |
| 89 | + barrier.wait() # synchronize with worker B |
| 90 | + # Write accepted_state with submission_A |
| 91 | + cp.update_contract_state( |
| 92 | + CONTRACT_ADDRESS, |
| 93 | + accepted_state={"slot_a": "original_a", "submission_A": "scored"}, |
| 94 | + ) |
| 95 | + except Exception as e: |
| 96 | + errors.append(("A", e)) |
| 97 | + |
| 98 | + def worker_b(): |
| 99 | + try: |
| 100 | + Session_ = sessionmaker(bind=engine) |
| 101 | + with Session_() as s: |
| 102 | + cp = ContractProcessor(s) |
| 103 | + # Read current state (sees original — same as worker A) |
| 104 | + contract = s.query(CurrentState).filter_by(id=CONTRACT_ADDRESS).one() |
| 105 | + _ = contract.data # force load |
| 106 | + barrier.wait() # synchronize with worker A |
| 107 | + # Write accepted_state with submission_B |
| 108 | + cp.update_contract_state( |
| 109 | + CONTRACT_ADDRESS, |
| 110 | + accepted_state={"slot_a": "original_a", "submission_B": "scored"}, |
| 111 | + ) |
| 112 | + except Exception as e: |
| 113 | + errors.append(("B", e)) |
| 114 | + |
| 115 | + t_a = threading.Thread(target=worker_a) |
| 116 | + t_b = threading.Thread(target=worker_b) |
| 117 | + t_a.start() |
| 118 | + t_b.start() |
| 119 | + t_a.join(timeout=10) |
| 120 | + t_b.join(timeout=10) |
| 121 | + |
| 122 | + assert not errors, f"Worker errors: {errors}" |
| 123 | + |
| 124 | + state = _read_state(engine) |
| 125 | + |
| 126 | + has_a = "submission_A" in state["accepted"] |
| 127 | + has_b = "submission_B" in state["accepted"] |
| 128 | + |
| 129 | + assert has_a and has_b, ( |
| 130 | + f"Lost update: concurrent accepted_state writes must both survive. " |
| 131 | + f"has_A={has_a}, has_B={has_b}, state={state['accepted']}" |
| 132 | + ) |
| 133 | + |
| 134 | + |
| 135 | +# --------------------------------------------------------------------------- |
| 136 | +# Test 2: accepted + finalized concurrent updates — must both survive |
| 137 | +# --------------------------------------------------------------------------- |
| 138 | + |
| 139 | + |
| 140 | +def test_concurrent_accepted_and_finalized_preserve_both(engine: Engine): |
| 141 | + """ |
| 142 | + Worker A writes accepted_state, Worker B writes finalized_state concurrently. |
| 143 | +
|
| 144 | + CORRECT behavior: both fields must reflect their respective updates. |
| 145 | + This test FAILS until the cross-field clobber bug is fixed. |
| 146 | + """ |
| 147 | + _setup_contract(engine) |
| 148 | + |
| 149 | + barrier = threading.Barrier(2, timeout=5) |
| 150 | + errors = [] |
| 151 | + |
| 152 | + def writer_accepted(): |
| 153 | + try: |
| 154 | + Session_ = sessionmaker(bind=engine) |
| 155 | + with Session_() as s: |
| 156 | + cp = ContractProcessor(s) |
| 157 | + contract = s.query(CurrentState).filter_by(id=CONTRACT_ADDRESS).one() |
| 158 | + _ = contract.data |
| 159 | + barrier.wait() |
| 160 | + cp.update_contract_state( |
| 161 | + CONTRACT_ADDRESS, |
| 162 | + accepted_state={"slot_a": "updated_by_accepted_writer"}, |
| 163 | + ) |
| 164 | + except Exception as e: |
| 165 | + errors.append(("accepted", e)) |
| 166 | + |
| 167 | + def writer_finalized(): |
| 168 | + try: |
| 169 | + Session_ = sessionmaker(bind=engine) |
| 170 | + with Session_() as s: |
| 171 | + cp = ContractProcessor(s) |
| 172 | + contract = s.query(CurrentState).filter_by(id=CONTRACT_ADDRESS).one() |
| 173 | + _ = contract.data |
| 174 | + barrier.wait() |
| 175 | + cp.update_contract_state( |
| 176 | + CONTRACT_ADDRESS, |
| 177 | + finalized_state={"slot_f": "updated_by_finalized_writer"}, |
| 178 | + ) |
| 179 | + except Exception as e: |
| 180 | + errors.append(("finalized", e)) |
| 181 | + |
| 182 | + t1 = threading.Thread(target=writer_accepted) |
| 183 | + t2 = threading.Thread(target=writer_finalized) |
| 184 | + t1.start() |
| 185 | + t2.start() |
| 186 | + t1.join(timeout=10) |
| 187 | + t2.join(timeout=10) |
| 188 | + |
| 189 | + assert not errors, f"Worker errors: {errors}" |
| 190 | + |
| 191 | + state = _read_state(engine) |
| 192 | + |
| 193 | + accepted_updated = state["accepted"].get("slot_a") == "updated_by_accepted_writer" |
| 194 | + finalized_updated = ( |
| 195 | + state["finalized"].get("slot_f") == "updated_by_finalized_writer" |
| 196 | + ) |
| 197 | + |
| 198 | + assert accepted_updated and finalized_updated, ( |
| 199 | + f"Cross-field clobber: concurrent accepted + finalized writes must both survive. " |
| 200 | + f"accepted={state['accepted']}, finalized={state['finalized']}" |
| 201 | + ) |
| 202 | + |
| 203 | + |
| 204 | +# --------------------------------------------------------------------------- |
| 205 | +# Test 3: Sequential updates — sanity check (should always pass) |
| 206 | +# --------------------------------------------------------------------------- |
| 207 | + |
| 208 | + |
| 209 | +def test_sequential_updates_preserve_all_state(engine: Engine): |
| 210 | + """ |
| 211 | + Baseline: sequential updates don't lose data. |
| 212 | + This should always pass regardless of the bug. |
| 213 | + """ |
| 214 | + _setup_contract(engine) |
| 215 | + |
| 216 | + Session_ = sessionmaker(bind=engine) |
| 217 | + |
| 218 | + with Session_() as s: |
| 219 | + cp = ContractProcessor(s) |
| 220 | + cp.update_contract_state( |
| 221 | + CONTRACT_ADDRESS, |
| 222 | + accepted_state={"slot_a": "original_a", "submission_A": "scored"}, |
| 223 | + ) |
| 224 | + |
| 225 | + with Session_() as s: |
| 226 | + cp = ContractProcessor(s) |
| 227 | + cp.update_contract_state( |
| 228 | + CONTRACT_ADDRESS, |
| 229 | + accepted_state={ |
| 230 | + "slot_a": "original_a", |
| 231 | + "submission_A": "scored", |
| 232 | + "submission_B": "scored", |
| 233 | + }, |
| 234 | + ) |
| 235 | + |
| 236 | + state = _read_state(engine) |
| 237 | + assert state["accepted"]["submission_A"] == "scored" |
| 238 | + assert state["accepted"]["submission_B"] == "scored" |
| 239 | + assert state["finalized"] == {"slot_f": "original_f"} |
0 commit comments