Skip to content

Commit 3ea5b8f

Browse files
authored
Merge pull request #18 from project-aethermesh/fix/http-server-startup
fix: Start HTTP server before dependencies to ensure immediate availability
2 parents e0768cf + ebb23e6 commit 3ea5b8f

3 files changed

Lines changed: 58 additions & 28 deletions

File tree

internal/health/server.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"net"
77
"net/http"
88
"strconv"
9+
"sync"
910
"time"
1011

1112
"github.com/rs/zerolog/log"
@@ -14,6 +15,7 @@ import (
1415
// HealthCheckerServer provides HTTP endpoints for health and readiness checks
1516
type HealthCheckerServer struct {
1617
checker *Checker
18+
checkerMu sync.RWMutex // Protects checker field from concurrent access
1719
httpServer *http.Server
1820
}
1921

@@ -39,10 +41,20 @@ func NewHealthCheckerServer(port int, checker *Checker) *HealthCheckerServer {
3941
return server
4042
}
4143

44+
// SetChecker updates the checker instance after the server has started
45+
// This allows the server to start before dependencies (config, Valkey) are available
46+
func (s *HealthCheckerServer) SetChecker(checker *Checker) {
47+
s.checkerMu.Lock()
48+
defer s.checkerMu.Unlock()
49+
s.checker = checker
50+
log.Info().Msg("Health checker HTTP server: checker instance updated")
51+
}
52+
4253
// Start starts the HTTP server in a goroutine and reports bind errors via the error channel
4354
func (s *HealthCheckerServer) Start(startupErrCh chan<- error) {
4455
log.Info().Str("addr", s.httpServer.Addr).Msg("Starting health checker HTTP server")
4556
go func() {
57+
log.Debug().Str("addr", s.httpServer.Addr).Msg("HTTP server goroutine started, attempting to bind")
4658
// First attempt to bind to detect immediate errors (port in use, permission denied, etc.)
4759
listener, err := net.Listen("tcp", s.httpServer.Addr)
4860
if err != nil {
@@ -51,6 +63,7 @@ func (s *HealthCheckerServer) Start(startupErrCh chan<- error) {
5163
return
5264
}
5365

66+
log.Info().Str("addr", s.httpServer.Addr).Msg("Successfully bound to address, starting HTTP server")
5467
// Successfully bound, send nil to indicate successful startup
5568
startupErrCh <- nil
5669

@@ -90,9 +103,13 @@ func (s *HealthCheckerServer) handleReady(w http.ResponseWriter, r *http.Request
90103
var statusCode int
91104

92105
// Read ready state under lock, then unlock before I/O
106+
s.checkerMu.RLock()
107+
checker := s.checker
108+
s.checkerMu.RUnlock()
109+
93110
var isReady bool
94-
if s.checker != nil {
95-
isReady = s.checker.IsReady()
111+
if checker != nil {
112+
isReady = checker.IsReady()
96113
} else {
97114
isReady = false
98115
}

internal/server/server.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,6 @@ func (s *Server) checkHealthCheckerServiceReady(ctx context.Context) bool {
353353
if resp.StatusCode == http.StatusOK {
354354
// Drain response body to enable connection reuse
355355
io.Copy(io.Discard, resp.Body)
356-
log.Debug().Str("url", checkURL).Msg("Health-checker service is ready")
357356
return true
358357
}
359358

services/health-checker/main.go

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ var testCheckerPatch func(*health.Checker)
3333
// testExitAfterSetup is a test hook to exit main after setup in tests
3434
var testExitAfterSetup bool
3535

36+
// exitCode is used to track the exit code for the process
37+
var exitCode int
38+
3639
// createStandaloneRateLimitHandler creates a simple rate limit handler for the standalone health checker
3740
func createStandaloneRateLimitHandler(valkeyClient store.ValkeyClientIface) func(chain, endpointID, protocol string) {
3841
return func(chain, endpointID, protocol string) {
@@ -100,6 +103,29 @@ func RunHealthChecker(
100103
return
101104
}
102105

106+
// Start HTTP server FIRST, before any dependencies (config, Valkey)
107+
// This ensures health probes are able to be used from the start
108+
log.Info().Int("port", healthCheckerServerPort).Msg("Starting HTTP server before dependencies")
109+
httpServer := health.NewHealthCheckerServer(healthCheckerServerPort, nil) // Start with nil checker
110+
startupErrCh := make(chan error, 1)
111+
httpServer.Start(startupErrCh)
112+
113+
// Wait for startup result from the HTTP server goroutine (bind errors are immediate)
114+
if err := <-startupErrCh; err != nil {
115+
log.Error().Err(err).Msg("Health checker HTTP server failed to start")
116+
exitCode = 1
117+
return
118+
}
119+
log.Info().Msg("HTTP server startup successful, proceeding with dependency initialization")
120+
121+
defer func() {
122+
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 10*time.Second)
123+
defer shutdownCancel()
124+
if err := httpServer.Shutdown(shutdownCtx); err != nil {
125+
log.Error().Err(err).Msg("Error shutting down health checker HTTP server")
126+
}
127+
}()
128+
103129
// Start the metrics server if enabled
104130
if metricsEnabled {
105131
log.Info().Int("port", metricsPort).Msg("Prometheus metrics server enabled")
@@ -108,7 +134,9 @@ func RunHealthChecker(
108134

109135
cfg, err := loadConfig(configFile)
110136
if err != nil {
111-
log.Fatal().Err(err).Msg("Failed to load configuration")
137+
log.Error().Err(err).Msg("Failed to load configuration")
138+
exitCode = 1
139+
return
112140
}
113141

114142
log.Info().Msg("Health Checker Service - Loaded configuration:")
@@ -130,7 +158,9 @@ func RunHealthChecker(
130158
valkeyAddr := valkeyHost + ":" + valkeyPort
131159
valkeyClient := newValkeyClient(valkeyAddr, valkeyPass, valkeySkipTLSCheck, valkeyUseTLS)
132160
if err := valkeyClient.Ping(context.Background()); err != nil {
133-
log.Fatal().Err(err).Msg("Failed to connect to Valkey")
161+
log.Error().Err(err).Msg("Failed to connect to Valkey")
162+
exitCode = 1
163+
return
134164
}
135165
defer valkeyClient.Close()
136166

@@ -145,29 +175,9 @@ func RunHealthChecker(
145175
testCheckerPatch(checker)
146176
}
147177

148-
// Start HTTP server for health and readiness endpoints
149-
httpServer := health.NewHealthCheckerServer(healthCheckerServerPort, checker)
150-
startupErrCh := make(chan error, 1)
151-
httpServer.Start(startupErrCh)
152-
153-
// Wait briefly to detect startup failures (bind errors should be immediate)
154-
select {
155-
case err := <-startupErrCh:
156-
if err != nil {
157-
log.Fatal().Err(err).Msg("Health checker HTTP server failed to start")
158-
}
159-
// Startup successful
160-
case <-time.After(100 * time.Millisecond):
161-
// No error received within timeout, assume startup successful
162-
// Bind errors from net.Listen() should be immediate, so this is safe
163-
}
164-
defer func() {
165-
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 10*time.Second)
166-
defer shutdownCancel()
167-
if err := httpServer.Shutdown(shutdownCtx); err != nil {
168-
log.Error().Err(err).Msg("Error shutting down health checker HTTP server")
169-
}
170-
}()
178+
// Update HTTP server with checker instance now that dependencies are loaded
179+
log.Info().Msg("Dependencies loaded, updating HTTP server with checker instance")
180+
httpServer.SetChecker(checker)
171181

172182
if healthCheckInterval == 0 {
173183
mode = "ephemeral"
@@ -241,4 +251,8 @@ func main() {
241251
config.ValkeyUseTLS,
242252
config.StandaloneHealthChecks,
243253
)
254+
255+
// Exit with the appropriate code after RunHealthChecker returns
256+
// This allows defers in RunHealthChecker to run, while not hiding panics
257+
os.Exit(exitCode)
244258
}

0 commit comments

Comments
 (0)