Skip to content

Commit 4799eef

Browse files
Separate backoff strategies for peer registration vs login operations
The previous implementation used a single 10 second backoff for both registration and login operations. However, these are fundamentally different operations with different timeout requirements: Registration (new peer creation) legitimately needs longer timeouts because it involves database writes, external IdP validation, and group membership calculations. Login (existing peer authentication) is faster and should fail quickly to provide actionable feedback to users. This change: - Auto-detects operation type by checking for SetupKey or JwtToken in the request - Uses 180s timeout for registration (5s initial, 30s max interval) - Uses 45s timeout for login (3s initial, 15s max interval) - Extracts backoff creation into helper functions to keep login() complexity low - Maintains backward compatibility - no API changes to public methods The auto-detection approach eliminates the need for boolean parameters and keeps the login() function simple while providing appropriate timeouts for each scenario.
1 parent aca0398 commit 4799eef

File tree

1 file changed

+61
-5
lines changed

1 file changed

+61
-5
lines changed

shared/management/client/grpc.go

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,17 @@ import (
2525
"github.com/netbirdio/netbird/util/wsproxy"
2626
)
2727

28-
const ConnectTimeout = 10 * time.Second
28+
const (
29+
ConnectTimeout = 10 * time.Second
30+
31+
RegistrationRetryTimeout = 180 * time.Second
32+
RegistrationRetryInterval = 5 * time.Second
33+
RegistrationRetryMaxDelay = 30 * time.Second
34+
35+
LoginRetryTimeout = 45 * time.Second
36+
LoginRetryInterval = 3 * time.Second
37+
LoginRetryMaxDelay = 15 * time.Second
38+
)
2939

3040
const (
3141
errMsgMgmtPublicKey = "failed getting Management Service public key: %s"
@@ -312,6 +322,38 @@ func (c *GrpcClient) IsHealthy() bool {
312322
return true
313323
}
314324

325+
// newRegistrationBackoff creates a backoff strategy for peer registration operations
326+
func newRegistrationBackoff(ctx context.Context) backoff.BackOff {
327+
return backoff.WithContext(&backoff.ExponentialBackOff{
328+
InitialInterval: RegistrationRetryInterval,
329+
RandomizationFactor: backoff.DefaultRandomizationFactor,
330+
Multiplier: backoff.DefaultMultiplier,
331+
MaxInterval: RegistrationRetryMaxDelay,
332+
MaxElapsedTime: RegistrationRetryTimeout,
333+
Stop: backoff.Stop,
334+
Clock: backoff.SystemClock,
335+
}, ctx)
336+
}
337+
338+
// newLoginBackoff creates a backoff strategy for peer login operations
339+
func newLoginBackoff(ctx context.Context) backoff.BackOff {
340+
return backoff.WithContext(&backoff.ExponentialBackOff{
341+
InitialInterval: LoginRetryInterval,
342+
RandomizationFactor: backoff.DefaultRandomizationFactor,
343+
Multiplier: backoff.DefaultMultiplier,
344+
MaxInterval: LoginRetryMaxDelay,
345+
MaxElapsedTime: LoginRetryTimeout,
346+
Stop: backoff.Stop,
347+
Clock: backoff.SystemClock,
348+
}, ctx)
349+
}
350+
351+
// isRegistrationRequest determines if a login request is actually a registration.
352+
// Returns true if either the setup key or the JWT token is present in the request.
353+
func isRegistrationRequest(req *proto.LoginRequest) bool {
354+
return req.SetupKey != "" || req.JwtToken != ""
355+
}
356+
315357
func (c *GrpcClient) login(serverKey wgtypes.Key, req *proto.LoginRequest) (*proto.LoginResponse, error) {
316358
if !c.ready() {
317359
return nil, errors.New(errMsgNoMgmtConnection)
@@ -334,17 +376,31 @@ func (c *GrpcClient) login(serverKey wgtypes.Key, req *proto.LoginRequest) (*pro
334376
Body: loginReq,
335377
})
336378
if err != nil {
337-
// retry only on context canceled
338-
if s, ok := gstatus.FromError(err); ok && s.Code() == codes.Canceled {
339-
return err
379+
if s, ok := gstatus.FromError(err); ok {
380+
if s.Code() == codes.Canceled {
381+
log.Debugf("login canceled")
382+
return backoff.Permanent(err)
383+
}
384+
if s.Code() == codes.DeadlineExceeded {
385+
log.Debugf("login timeout, retrying...")
386+
return err
387+
}
340388
}
341389
return backoff.Permanent(err)
342390
}
343391

344392
return nil
345393
}
346394

347-
err = backoff.Retry(operation, nbgrpc.Backoff(c.ctx))
395+
isRegistration := isRegistrationRequest(req)
396+
var backoffStrategy backoff.BackOff
397+
if isRegistration {
398+
backoffStrategy = newRegistrationBackoff(c.ctx)
399+
} else {
400+
backoffStrategy = newLoginBackoff(c.ctx)
401+
}
402+
403+
err = backoff.Retry(operation, backoffStrategy)
348404
if err != nil {
349405
log.Errorf("failed to login to Management Service: %v", err)
350406
return nil, err

0 commit comments

Comments
 (0)