diff --git a/relay/healthcheck/peerid/peerid.go b/relay/healthcheck/peerid/peerid.go new file mode 100644 index 00000000000..cd8696817a7 --- /dev/null +++ b/relay/healthcheck/peerid/peerid.go @@ -0,0 +1,31 @@ +package peerid + +import ( + "crypto/sha256" + + v2 "github.com/netbirdio/netbird/shared/relay/auth/hmac/v2" + "github.com/netbirdio/netbird/shared/relay/messages" +) + +var ( + // HealthCheckPeerID is the hashed peer ID for health check connections + HealthCheckPeerID = messages.HashID("healthcheck-agent") + + // DummyAuthToken is a structurally valid auth token for health check. + // The signature is not valid but the format is correct (1 byte algo + 32 bytes signature + payload). + DummyAuthToken = createDummyToken() +) + +func createDummyToken() []byte { + token := v2.Token{ + AuthAlgo: v2.AuthAlgoHMACSHA256, + Signature: make([]byte, sha256.Size), + Payload: []byte("healthcheck"), + } + return token.Marshal() +} + +// IsHealthCheck checks if the given peer ID is the health check agent +func IsHealthCheck(peerID *messages.PeerID) bool { + return peerID != nil && *peerID == HealthCheckPeerID +} diff --git a/relay/healthcheck/ws.go b/relay/healthcheck/ws.go index db61ed80262..9267096f596 100644 --- a/relay/healthcheck/ws.go +++ b/relay/healthcheck/ws.go @@ -7,8 +7,10 @@ import ( "github.com/coder/websocket" + "github.com/netbirdio/netbird/relay/healthcheck/peerid" "github.com/netbirdio/netbird/relay/server" "github.com/netbirdio/netbird/shared/relay" + "github.com/netbirdio/netbird/shared/relay/messages" ) func dialWS(ctx context.Context, address url.URL) error { @@ -30,7 +32,18 @@ func dialWS(ctx context.Context, address url.URL) error { if err != nil { return fmt.Errorf("failed to connect to websocket: %w", err) } + defer func() { + _ = conn.CloseNow() + }() + + authMsg, err := messages.MarshalAuthMsg(peerid.HealthCheckPeerID, peerid.DummyAuthToken) + if err != nil { + return fmt.Errorf("failed to marshal auth message: %w", err) + } + + if err := conn.Write(ctx, websocket.MessageBinary, authMsg); err != nil { + return fmt.Errorf("failed to write auth message: %w", err) + } - _ = conn.Close(websocket.StatusNormalClosure, "availability check complete") return nil } diff --git a/relay/server/handshake.go b/relay/server/handshake.go index 922369798c0..8c3ee1899a6 100644 --- a/relay/server/handshake.go +++ b/relay/server/handshake.go @@ -97,7 +97,7 @@ func (h *handshake) handshakeReceive() (*messages.PeerID, error) { return nil, fmt.Errorf("invalid message type %d from %s", msgType, h.conn.RemoteAddr()) } if err != nil { - return nil, err + return peerID, err } h.peerID = peerID return peerID, nil @@ -147,7 +147,7 @@ func (h *handshake) handleAuthMsg(buf []byte) (*messages.PeerID, error) { } if err := h.validator.Validate(authPayload); err != nil { - return nil, fmt.Errorf("validate %s (%s): %w", rawPeerID.String(), h.conn.RemoteAddr(), err) + return rawPeerID, fmt.Errorf("validate %s (%s): %w", rawPeerID.String(), h.conn.RemoteAddr(), err) } return rawPeerID, nil diff --git a/relay/server/relay.go b/relay/server/relay.go index c1cfa13fdfe..bb355f58f22 100644 --- a/relay/server/relay.go +++ b/relay/server/relay.go @@ -12,6 +12,7 @@ import ( "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/metric" + "github.com/netbirdio/netbird/relay/healthcheck/peerid" //nolint:staticcheck "github.com/netbirdio/netbird/relay/metrics" "github.com/netbirdio/netbird/relay/server/store" @@ -123,7 +124,11 @@ func (r *Relay) Accept(conn net.Conn) { } peerID, err := h.handshakeReceive() if err != nil { - log.Errorf("failed to handshake: %s", err) + if peerid.IsHealthCheck(peerID) { + log.Debugf("health check connection from %s", conn.RemoteAddr()) + } else { + log.Errorf("failed to handshake: %s", err) + } if cErr := conn.Close(); cErr != nil { log.Errorf("failed to close connection, %s: %s", conn.RemoteAddr(), cErr) }