Refactor ConcurrencyMetrics struct in handler.go to include additional metrics and locks

ShocOne · ShocOne · commit 366863576bfa · 2024-04-18T15:08:38.000+01:00
diff --git a/concurrency/adjust_concurrency.go b/concurrency/adjust_concurrency.go
diff --git a/concurrency/const.go b/concurrency/const.go
@@ -1,3 +1,4 @@
+// concurrency/const.go
 package concurrency
 
 import "time"
@@ -16,4 +17,9 @@ const (
 	// MaxAcceptableResponseTimeVariability represents the maximum acceptable variability in response times.
 	// It is used as a threshold to dynamically adjust concurrency based on fluctuations in response times.
 	MaxAcceptableResponseTimeVariability = 500 * time.Millisecond
+
+	// ErrorRateThreshold represents the threshold for error rate above which concurrency will be adjusted.
+	// Error rate is calculated as (TotalRateLimitErrors + 5xxErrors) / TotalRequests.
+	// Adjustments in concurrency will be made if the error rate exceeds this threshold. A threshold of 0.1 (or 10%) is common.
+	ErrorRateThreshold = 0.1
 )
diff --git a/concurrency/handler.go b/concurrency/handler.go
@@ -25,14 +25,11 @@ type ConcurrencyHandler struct {
 	Metrics                  *ConcurrencyMetrics
 }
 
-// ConcurrencyMetrics captures various metrics related to managing concurrency for the client's interactions with the API.// ConcurrencyMetrics captures various metrics related to managing concurrency for the client's interactions with the API.
+// ConcurrencyMetrics captures various metrics related to managing concurrency for the client's interactions with the API.
 type ConcurrencyMetrics struct {
 	TotalRequests        int64         // Total number of requests made
 	TotalRetries         int64         // Total number of retry attempts
 	TotalRateLimitErrors int64         // Total number of rate limit errors encountered
-	TotalResponseTime    time.Duration // Total response time for all requests
-	AverageResponseTime  time.Duration // Average response time across all requests
-	ErrorRate            float64       // Error rate calculated as (TotalRateLimitErrors + 5xxErrors) / TotalRequests
 	TokenWaitTime        time.Duration // Total time spent waiting for tokens
 	TTFB                 struct {      // Metrics related to Time to First Byte (TTFB)
 		Total time.Duration // Total Time to First Byte (TTFB) for all requests
@@ -42,11 +39,21 @@ type ConcurrencyMetrics struct {
 	Throughput struct { // Metrics related to network throughput
 		Total float64    // Total network throughput for all requests
 		Count int64      // Count of requests used for calculating throughput
-		Lock  sync.Mutex // Lock for throughput metrics
+		Lock  sync.Mutex // Lock for throughput metrics/
 	}
-	Variance      float64    // Variance of response times
-	ResponseCount int64      // Count of responses used for calculating response time variability
-	Lock          sync.Mutex // Lock for overall metrics fields
+	ResponseTimeVariability struct { // Metrics related to response time variability
+		Total           time.Duration // Total response time for all requests
+		Average         time.Duration // Average response time across all requests
+		Variance        float64       // Variance of response times
+		Count           int64         // Count of responses used for calculating response time variability
+		Lock            sync.Mutex    // Lock for response time variability metrics
+		StdDevThreshold float64       // Maximum acceptable standard deviation for adjusting concurrency
+	}
+	ResponseCodeMetrics struct {
+		ErrorRate float64    // Error rate calculated as (TotalRateLimitErrors + 5xxErrors) / TotalRequests
+		Lock      sync.Mutex // Lock for response code metrics
+	}
+	Lock sync.Mutex // Lock for overall metrics fields
 }
 
 // NewConcurrencyHandler initializes a new ConcurrencyHandler with the given
diff --git a/concurrency/metrics.go b/concurrency/metrics.go
@@ -3,27 +3,90 @@ package concurrency
 import (
 	"math"
 	"net/http"
+	"strconv"
 	"time"
 
 	"go.uber.org/zap"
 )
 
 // MonitorRateLimitHeaders monitors the rate limit headers (X-RateLimit-Remaining and Retry-After)
 // in the HTTP response and adjusts concurrency accordingly.
+// If X-RateLimit-Remaining is below a threshold or Retry-After is specified, decrease concurrency.
+// If neither condition is met, consider scaling up if concurrency is below the maximum limit.
+// - Threshold for X-RateLimit-Remaining: 10
+// - Maximum concurrency: MaxConcurrency
 func (ch *ConcurrencyHandler) MonitorRateLimitHeaders(resp *http.Response) {
 	// Extract X-RateLimit-Remaining and Retry-After headers from the response
 	remaining := resp.Header.Get("X-RateLimit-Remaining")
 	retryAfter := resp.Header.Get("Retry-After")
 
-	// Adjust concurrency based on the values of these headers
-	// Implement your logic here to dynamically adjust concurrency
+	if remaining != "" {
+		remainingValue, err := strconv.Atoi(remaining)
+		if err == nil && remainingValue < 10 {
+			// Decrease concurrency if X-RateLimit-Remaining is below the threshold
+			if len(ch.sem) > MinConcurrency {
+				newSize := len(ch.sem) - 1
+				ch.logger.Info("Reducing concurrency due to low X-RateLimit-Remaining", zap.Int("NewSize", newSize))
+				ch.ResizeSemaphore(newSize)
+			}
+		}
+	}
+
+	if retryAfter != "" {
+		// Decrease concurrency if Retry-After is specified
+		if len(ch.sem) > MinConcurrency {
+			newSize := len(ch.sem) - 1
+			ch.logger.Info("Reducing concurrency due to Retry-After header", zap.Int("NewSize", newSize))
+			ch.ResizeSemaphore(newSize)
+		}
+	} else {
+		// Scale up if concurrency is below the maximum limit
+		if len(ch.sem) < MaxConcurrency {
+			newSize := len(ch.sem) + 1
+			ch.logger.Info("Increasing concurrency", zap.Int("NewSize", newSize))
+			ch.ResizeSemaphore(newSize)
+		}
+	}
 }
 
 // MonitorServerResponseCodes monitors server response codes and adjusts concurrency accordingly.
 func (ch *ConcurrencyHandler) MonitorServerResponseCodes(resp *http.Response) {
 	statusCode := resp.StatusCode
-	// Check for 5xx errors (server errors) and 4xx errors (client errors)
-	// Implement your logic here to track increases in error rates and adjust concurrency
+
+	// Lock the metrics to ensure thread safety
+	ch.Metrics.Lock.Lock()
+	defer ch.Metrics.Lock.Unlock()
+
+	// Update the appropriate error count based on the response status code
+	switch {
+	case statusCode >= 500 && statusCode < 600:
+		ch.Metrics.TotalRateLimitErrors++
+	case statusCode >= 400 && statusCode < 500:
+		// Assuming 4xx errors as client errors
+		// Increase the TotalRetries count to indicate a client error
+		ch.Metrics.TotalRetries++
+	}
+
+	// Calculate error rate
+	totalRequests := float64(ch.Metrics.TotalRequests)
+	totalErrors := float64(ch.Metrics.TotalRateLimitErrors + ch.Metrics.TotalRetries)
+	errorRate := totalErrors / totalRequests
+
+	// Set the new error rate in the metrics
+	ch.Metrics.ResponseCodeMetrics.ErrorRate = errorRate
+
+	// Check if the error rate exceeds the threshold and adjust concurrency accordingly
+	if errorRate > ErrorRateThreshold && len(ch.sem) > MinConcurrency {
+		// Decrease concurrency
+		newSize := len(ch.sem) - 1
+		ch.logger.Info("Reducing request concurrency due to high error rate", zap.Int("NewSize", newSize))
+		ch.ResizeSemaphore(newSize)
+	} else if errorRate <= ErrorRateThreshold && len(ch.sem) < MaxConcurrency {
+		// Scale up if error rate is below the threshold and concurrency is below the maximum limit
+		newSize := len(ch.sem) + 1
+		ch.logger.Info("Increasing request concurrency due to low error rate", zap.Int("NewSize", newSize))
+		ch.ResizeSemaphore(newSize)
+	}
 }
 
 // MonitorResponseTimeVariability calculates the standard deviation of response times
@@ -32,21 +95,30 @@ func (ch *ConcurrencyHandler) MonitorResponseTimeVariability(responseTime time.D
 	ch.Metrics.Lock.Lock()
 	defer ch.Metrics.Lock.Unlock()
 
-	// Update TotalResponseTime and ResponseCount for moving average calculation
-	ch.Metrics.TotalResponseTime += responseTime
-	ch.Metrics.ResponseCount++
+	// Update ResponseTimeVariability metrics
+	ch.Metrics.ResponseTimeVariability.Lock.Lock()
+	defer ch.Metrics.ResponseTimeVariability.Lock.Unlock()
+	ch.Metrics.ResponseTimeVariability.Total += responseTime
+	ch.Metrics.ResponseTimeVariability.Count++
 
 	// Calculate average response time
-	averageResponseTime := ch.Metrics.TotalResponseTime / time.Duration(ch.Metrics.ResponseCount)
+	ch.Metrics.ResponseTimeVariability.Average = ch.Metrics.ResponseTimeVariability.Total / time.Duration(ch.Metrics.ResponseTimeVariability.Count)
+
+	// Calculate variance of response times
+	ch.Metrics.ResponseTimeVariability.Variance = ch.calculateVariance(ch.Metrics.ResponseTimeVariability.Average, responseTime)
 
 	// Calculate standard deviation of response times
-	variance := ch.calculateVariance(averageResponseTime, responseTime)
-	stdDev := math.Sqrt(variance)
+	stdDev := math.Sqrt(ch.Metrics.ResponseTimeVariability.Variance)
 
 	// Adjust concurrency based on response time variability
-	if float64(stdDev) > MaxAcceptableResponseTimeVariability.Seconds() && len(ch.sem) > MinConcurrency {
+	if stdDev > ch.Metrics.ResponseTimeVariability.StdDevThreshold && len(ch.sem) > MinConcurrency {
 		newSize := len(ch.sem) - 1
-		ch.logger.Info("Reducing concurrency due to high response time variability", zap.Int("NewSize", newSize))
+		ch.logger.Info("Reducing request concurrency due to high response time variability", zap.Int("NewSize", newSize))
+		ch.ResizeSemaphore(newSize)
+	} else if stdDev <= ch.Metrics.ResponseTimeVariability.StdDevThreshold && len(ch.sem) < MaxConcurrency {
+		// Scale up if response time variability is below the threshold and concurrency is below the maximum limit
+		newSize := len(ch.sem) + 1
+		ch.logger.Info("Increasing request concurrency due to low response time variability", zap.Int("NewSize", newSize))
 		ch.ResizeSemaphore(newSize)
 	}
 }
@@ -58,8 +130,8 @@ func (ch *ConcurrencyHandler) calculateVariance(averageResponseTime time.Duratio
 	responseSeconds := responseTime.Seconds()
 
 	// Calculate variance
-	variance := (float64(ch.Metrics.ResponseCount-1)*math.Pow(averageSeconds-responseSeconds, 2) + ch.Metrics.Variance) / float64(ch.Metrics.ResponseCount)
-	ch.Metrics.Variance = variance
+	variance := (float64(ch.Metrics.ResponseTimeVariability.Count-1)*math.Pow(averageSeconds-responseSeconds, 2) + ch.Metrics.ResponseTimeVariability.Variance) / float64(ch.Metrics.ResponseTimeVariability.Count)
+	ch.Metrics.ResponseTimeVariability.Variance = variance
 	return variance
 }
 
@@ -86,11 +158,11 @@ func (ch *ConcurrencyHandler) MonitorNetworkLatency(ttfb time.Duration, throughp
 	// Adjust concurrency based on TTFB and throughput moving averages
 	if ttfbMovingAverage > MaxAcceptableTTFB && len(ch.sem) > MinConcurrency {
 		newSize := len(ch.sem) - 1
-		ch.logger.Info("Reducing concurrency due to high TTFB", zap.Int("NewSize", newSize))
+		ch.logger.Info("Reducing request concurrency due to high TTFB", zap.Int("NewSize", newSize))
 		ch.ResizeSemaphore(newSize)
 	} else if throughputMovingAverage > MaxAcceptableThroughput && len(ch.sem) < MaxConcurrency {
 		newSize := len(ch.sem) + 1
-		ch.logger.Info("Increasing concurrency due to high throughput", zap.Int("NewSize", newSize))
+		ch.logger.Info("Increasing request concurrency due to high throughput", zap.Int("NewSize", newSize))
 		ch.ResizeSemaphore(newSize)
 	}
 }
diff --git a/concurrency/resize.go b/concurrency/resize.go
@@ -0,0 +1,36 @@
+// concurrency/resize.go
+
+package concurrency
+
+// ResizeSemaphore adjusts the size of the semaphore used to control concurrency. This method creates a new
+// semaphore with the specified new size and closes the old semaphore to ensure that no further tokens can
+// be acquired from it. This approach helps manage the transition from the old concurrency level to the new one
+// without affecting ongoing operations significantly.
+//
+// Parameters:
+//   - newSize: The new size for the semaphore, representing the updated limit on concurrent requests.
+//
+// This function should be called from within synchronization contexts, such as AdjustConcurrency, to avoid
+// race conditions and ensure that changes to the semaphore are consistent with the observed metrics.
+func (ch *ConcurrencyHandler) ResizeSemaphore(newSize int) {
+	newSem := make(chan struct{}, newSize)
+
+	// Transfer tokens from the old semaphore to the new one.
+	for {
+		select {
+		case token := <-ch.sem:
+			select {
+			case newSem <- token:
+				// Token transferred to new semaphore.
+			default:
+				// New semaphore is full, put token back to the old one to allow ongoing operations to complete.
+				ch.sem <- token
+			}
+		default:
+			// No more tokens to transfer.
+			close(ch.sem)
+			ch.sem = newSem
+			return
+		}
+	}
+}