Skip to content

Commit 509eee4

Browse files
authored
fix: avoid panic in bootstrap when late messages arrive (#949)
1 parent 0e628c0 commit 509eee4

File tree

2 files changed

+131
-12
lines changed

2 files changed

+131
-12
lines changed

v2/internal/coord/routing/bootstrap.go

+17-12
Original file line numberDiff line numberDiff line change
@@ -213,19 +213,24 @@ func (b *Bootstrap[K, N]) Advance(ctx context.Context, ev BootstrapEvent) (out B
213213
return b.advanceQuery(ctx, &query.EventQueryPoll{})
214214

215215
case *EventBootstrapFindCloserResponse[K, N]:
216-
b.counterFindSucceeded.Add(ctx, 1)
217-
return b.advanceQuery(ctx, &query.EventQueryNodeResponse[K, N]{
218-
NodeID: tev.NodeID,
219-
CloserNodes: tev.CloserNodes,
220-
})
216+
// ignore late responses
217+
if b.qry != nil {
218+
b.counterFindSucceeded.Add(ctx, 1)
219+
return b.advanceQuery(ctx, &query.EventQueryNodeResponse[K, N]{
220+
NodeID: tev.NodeID,
221+
CloserNodes: tev.CloserNodes,
222+
})
223+
}
221224
case *EventBootstrapFindCloserFailure[K, N]:
222-
b.counterFindFailed.Add(ctx, 1)
223-
span.RecordError(tev.Error)
224-
return b.advanceQuery(ctx, &query.EventQueryNodeFailure[K, N]{
225-
NodeID: tev.NodeID,
226-
Error: tev.Error,
227-
})
228-
225+
// ignore late responses
226+
if b.qry != nil {
227+
b.counterFindFailed.Add(ctx, 1)
228+
span.RecordError(tev.Error)
229+
return b.advanceQuery(ctx, &query.EventQueryNodeFailure[K, N]{
230+
NodeID: tev.NodeID,
231+
Error: tev.Error,
232+
})
233+
}
229234
case *EventBootstrapPoll:
230235
// ignore, nothing to do
231236
default:

v2/internal/coord/routing/bootstrap_test.go

+114
Original file line numberDiff line numberDiff line change
@@ -270,3 +270,117 @@ func TestBootstrapFinishesThenGoesIdle(t *testing.T) {
270270
// bootstrap should now be idle
271271
require.IsType(t, &StateBootstrapIdle{}, state)
272272
}
273+
274+
func TestBootstrapFinishedIgnoresLaterResponses(t *testing.T) {
275+
ctx := context.Background()
276+
clk := clock.NewMock()
277+
cfg := DefaultBootstrapConfig()
278+
cfg.Clock = clk
279+
280+
self := tiny.NewNode(0)
281+
bs, err := NewBootstrap[tiny.Key](self, cfg)
282+
require.NoError(t, err)
283+
284+
a := tiny.NewNode(4)
285+
b := tiny.NewNode(8)
286+
287+
// start the bootstrap
288+
state := bs.Advance(ctx, &EventBootstrapStart[tiny.Key, tiny.Node]{
289+
KnownClosestNodes: []tiny.Node{b},
290+
})
291+
require.IsType(t, &StateBootstrapFindCloser[tiny.Key, tiny.Node]{}, state)
292+
293+
// the bootstrap should attempt to contact the node it was given
294+
st := state.(*StateBootstrapFindCloser[tiny.Key, tiny.Node])
295+
require.Equal(t, coordt.QueryID("bootstrap"), st.QueryID)
296+
require.Equal(t, b, st.NodeID)
297+
298+
// notify bootstrap that node was contacted successfully with a closer node
299+
state = bs.Advance(ctx, &EventBootstrapFindCloserResponse[tiny.Key, tiny.Node]{
300+
NodeID: b,
301+
CloserNodes: []tiny.Node{a},
302+
})
303+
304+
// bootstrap should respond that it wants to contact the new node
305+
require.IsType(t, &StateBootstrapFindCloser[tiny.Key, tiny.Node]{}, state)
306+
307+
// poll bootstrap
308+
state = bs.Advance(ctx, &EventBootstrapPoll{})
309+
310+
// bootstrap should now be waiting
311+
require.IsType(t, &StateBootstrapWaiting{}, state)
312+
313+
// advance the clock past the timeout
314+
clk.Add(cfg.RequestTimeout * 2)
315+
316+
// poll bootstrap
317+
state = bs.Advance(ctx, &EventBootstrapPoll{})
318+
319+
// bootstrap should now be finished
320+
require.IsType(t, &StateBootstrapFinished{}, state)
321+
322+
// notify bootstrap that node was contacted successfully after the timeout
323+
state = bs.Advance(ctx, &EventBootstrapFindCloserResponse[tiny.Key, tiny.Node]{
324+
NodeID: a,
325+
})
326+
327+
// bootstrap should ignore late message and now be idle
328+
require.IsType(t, &StateBootstrapIdle{}, state)
329+
}
330+
331+
func TestBootstrapFinishedIgnoresLaterFailures(t *testing.T) {
332+
ctx := context.Background()
333+
clk := clock.NewMock()
334+
cfg := DefaultBootstrapConfig()
335+
cfg.Clock = clk
336+
337+
self := tiny.NewNode(0)
338+
bs, err := NewBootstrap[tiny.Key](self, cfg)
339+
require.NoError(t, err)
340+
341+
a := tiny.NewNode(4)
342+
b := tiny.NewNode(8)
343+
344+
// start the bootstrap
345+
state := bs.Advance(ctx, &EventBootstrapStart[tiny.Key, tiny.Node]{
346+
KnownClosestNodes: []tiny.Node{b},
347+
})
348+
require.IsType(t, &StateBootstrapFindCloser[tiny.Key, tiny.Node]{}, state)
349+
350+
// the bootstrap should attempt to contact the node it was given
351+
st := state.(*StateBootstrapFindCloser[tiny.Key, tiny.Node])
352+
require.Equal(t, coordt.QueryID("bootstrap"), st.QueryID)
353+
require.Equal(t, b, st.NodeID)
354+
355+
// notify bootstrap that node was contacted successfully with a closer node
356+
state = bs.Advance(ctx, &EventBootstrapFindCloserResponse[tiny.Key, tiny.Node]{
357+
NodeID: b,
358+
CloserNodes: []tiny.Node{a},
359+
})
360+
361+
// bootstrap should respond that it wants to contact the new node
362+
require.IsType(t, &StateBootstrapFindCloser[tiny.Key, tiny.Node]{}, state)
363+
364+
// poll bootstrap
365+
state = bs.Advance(ctx, &EventBootstrapPoll{})
366+
367+
// bootstrap should now be waiting
368+
require.IsType(t, &StateBootstrapWaiting{}, state)
369+
370+
// advance the clock past the timeout
371+
clk.Add(cfg.RequestTimeout * 2)
372+
373+
// poll bootstrap
374+
state = bs.Advance(ctx, &EventBootstrapPoll{})
375+
376+
// bootstrap should now be finished
377+
require.IsType(t, &StateBootstrapFinished{}, state)
378+
379+
// notify bootstrap that node failed to be contacted
380+
state = bs.Advance(ctx, &EventBootstrapFindCloserFailure[tiny.Key, tiny.Node]{
381+
NodeID: a,
382+
})
383+
384+
// bootstrap should ignore late message and now be idle
385+
require.IsType(t, &StateBootstrapIdle{}, state)
386+
}

0 commit comments

Comments
 (0)