Skip to content

Commit

Permalink
fix: start handling replayed blocks at startup
Browse files Browse the repository at this point in the history
This was an important first step. We aren't at a point where I would
normally commit, but tests so far are passing and compiling, and it's my
end of day.

Building with the debug flag currently fails at runtime in gcosmos, but
nobody should be using that yet, so I will fix it tomorrow.
  • Loading branch information
mark-rushakoff committed Aug 20, 2024
1 parent ac57c43 commit adf25c1
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 8 deletions.
43 changes: 36 additions & 7 deletions tm/tmengine/internal/tmstate/statemachine.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ func (m *StateMachine) kernel(ctx context.Context) {

for {
if rlc.IsReplaying() {
if !m.handleReplayEvent(ctx, wSig, &rlc) {
if !m.handleCatchupEvent(ctx, wSig, &rlc) {
return
}
} else {
Expand All @@ -173,12 +173,39 @@ func (m *StateMachine) kernel(ctx context.Context) {
}
}

func (m *StateMachine) handleReplayEvent(
func (m *StateMachine) handleCatchupEvent(
ctx context.Context,
wSig <-chan gwatchdog.Signal,
rlc *tsi.RoundLifecycle,
) (ok bool) {
return false
defer trace.StartRegion(ctx, "handleCatchupEvent").End()

// Handle the minimal set of events that can happen during catchup.
// While at this height, we are in replay at least until we enter the next round.

for {
select {
case <-ctx.Done():
m.log.Info(
"Quitting due to context cancellation in kernel main loop (catchup)",
"cause", context.Cause(ctx),
"height", rlc.H, "round", rlc.R, "step", rlc.S,
)
return false

case resp := <-rlc.FinalizeRespCh:
// During a replay, we are blocked waiting for a finalization.

// The RLC step is kind of meaningless during replay,
// but handleFinalization expects the step to be awaiting finalization
// in order to advance to the next height,
// so we just fake it here.
rlc.S = tsi.StepAwaitingFinalization
if !m.handleFinalization(ctx, rlc, resp) {
return false
}
}
}
}

func (m *StateMachine) handleLiveEvent(
Expand All @@ -197,7 +224,7 @@ func (m *StateMachine) handleLiveEvent(
select {
case <-ctx.Done():
m.log.Info(
"Quitting due to context cancellation in kernel main loop",
"Quitting due to context cancellation in kernel main loop (live events)",
"cause", context.Cause(ctx),
"height", rlc.H, "round", rlc.R, "step", rlc.S,
"vote_summary", rlc.PrevVRV.VoteSummary,
Expand Down Expand Up @@ -277,7 +304,9 @@ func (m *StateMachine) initializeRLC(ctx context.Context) (rlc tsi.RoundLifecycl
}

if !su.IsVRV() {
return rlc, false
// We are replaying, so we don't need special begin round handling.
// sendInitialActionSet already made a finalization request.
return rlc, true
}

ok = m.beginRoundLive(ctx, &rlc, su.VRV)
Expand Down Expand Up @@ -1211,7 +1240,7 @@ func (m *StateMachine) handleFinalization(

if resp.Height != rlc.H || resp.Round != rlc.R {
panic(fmt.Errorf(
"BUG: app sent height/round %d/%d differing from current (%d/%d)",
"BUG: driver sent height/round %d/%d differing from current (%d/%d)",
resp.Height, resp.Round, rlc.H, rlc.R,
))
}
Expand All @@ -1220,7 +1249,7 @@ func (m *StateMachine) handleFinalization(
ctx,
rlc.H, rlc.R,
string(resp.BlockHash),
rlc.CurVals, // TODO: shouldn't this be rlc.FinalizedValidators?
rlc.FinalizedValidators,
string(resp.AppStateHash),
); err != nil {
glog.HRE(m.log, rlc.H, rlc.R, err).Error(
Expand Down
19 changes: 18 additions & 1 deletion tm/tmengine/internal/tmstate/statemachine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ func TestStateMachine_initialization(t *testing.T) {

require.Equal(t, 1, cap(req.Resp))

// The driver sends a response.
resp := tmdriver.FinalizeBlockResponse{
Height: 1, Round: 0,
BlockHash: pb1.Block.Hash,
Expand All @@ -313,7 +314,23 @@ func TestStateMachine_initialization(t *testing.T) {
}
gtest.SendSoon(t, req.Resp, resp)

t.Skip("TODO: assert entry in finalization store, updated round action set sent to mirror")
// Next, the state machine should send a new round entrance to the mirror.

re = gtest.ReceiveSoon(t, sfx.RoundEntranceOutCh)
require.Equal(t, uint64(2), re.H)
require.Zero(t, re.R)

// By the time the round entrance was made,
// the state machine saved a new finalization.

r, hash, vals, appHash, err := sfx.Cfg.FinalizationStore.LoadFinalizationByHeight(ctx, 1)
require.NoError(t, err)
require.Zero(t, r)
require.Equal(t, string(pb1.Block.Hash), hash)
require.True(t, tmconsensus.ValidatorSlicesEqual(vals, sfx.Fx.Vals()))
require.Equal(t, "app_state_1", appHash)

// TODO: assert behavior on another replayed block after we've finalized the initial block.
})
}

Expand Down

0 comments on commit adf25c1

Please sign in to comment.