Skip to content

Commit ee686a1

Browse files
committed
feat(crd): Refactor prometheus metrics
Co-authored-by: Gemini AI <[email protected]>" Signed-off-by: Denis Karpelevich <[email protected]>
1 parent ca2a4e0 commit ee686a1

22 files changed

+493
-229
lines changed

cmd/common.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/argoproj-labs/argocd-image-updater/internal/controller"
1515
"github.com/argoproj-labs/argocd-image-updater/pkg/argocd"
1616
"github.com/argoproj-labs/argocd-image-updater/pkg/common"
17+
"github.com/argoproj-labs/argocd-image-updater/pkg/metrics"
1718
"github.com/argoproj-labs/argocd-image-updater/pkg/webhook"
1819
"github.com/argoproj-labs/argocd-image-updater/registry-scanner/pkg/registry"
1920
)
@@ -30,6 +31,9 @@ type WebhookConfig struct {
3031

3132
// SetupCommon initializes common components (logging, context, etc.)
3233
func SetupCommon(ctx context.Context, cfg *controller.ImageUpdaterConfig, setupLogger logr.Logger, commitMessagePath, kubeConfig string) error {
34+
// Initialize metrics before starting the metrics server or using any counters
35+
metrics.InitMetrics()
36+
3337
var commitMessageTpl string
3438

3539
// User can specify a path to a template used for Git commit messages

cmd/common_test.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"github.com/argoproj-labs/argocd-image-updater/internal/controller"
1919
"github.com/argoproj-labs/argocd-image-updater/pkg/common"
2020
aiukube "github.com/argoproj-labs/argocd-image-updater/pkg/kube"
21+
"github.com/argoproj-labs/argocd-image-updater/pkg/metrics"
2122
"github.com/argoproj-labs/argocd-image-updater/registry-scanner/pkg/registry"
2223
)
2324

@@ -64,6 +65,8 @@ var setupCommonMutex sync.Mutex
6465

6566
// setupCommonStub mirrors SetupCommon behavior without starting the askpass server and without interactive kube client.
6667
func setupCommonStub(ctx context.Context, cfg *controller.ImageUpdaterConfig, setupLogger logr.Logger, commitMessagePath, kubeConfig string) error {
68+
metrics.InitMetrics()
69+
6770
var commitMessageTpl string
6871

6972
// User can specify a path to a template used for Git commit messages
@@ -231,4 +234,15 @@ func TestSetupCommon(t *testing.T) {
231234
err = setupCommonStub(context.Background(), cfg, logr.Discard(), "", invalidKubeconfigFile)
232235
assert.Nil(t, err)
233236
})
237+
238+
t.Run("should initialize metrics and kube client", func(t *testing.T) {
239+
cfg := &controller.ImageUpdaterConfig{}
240+
err := callSetupCommonWithMocks(t, cfg, logr.Discard(), "", kubeconfigFile)
241+
require.NoError(t, err)
242+
assert.NotNil(t, metrics.Endpoint())
243+
assert.NotNil(t, metrics.Applications())
244+
assert.NotNil(t, metrics.Clients())
245+
assert.NotNil(t, cfg.KubeClient)
246+
assert.IsType(t, &aiukube.ImageUpdaterKubernetesClient{}, cfg.KubeClient)
247+
})
234248
}

cmd/run.go

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,13 @@ This enables a CRD-driven approach to automated image updates with Argo CD.
7474
default:
7575
return fmt.Errorf("invalid log format '%s'", cfg.LogFormat)
7676
}
77+
78+
if once {
79+
cfg.CheckInterval = 0
80+
probeAddr = "0"
81+
warmUpCache = true
82+
}
83+
7784
log.SetLogFormat(logFormat)
7885

7986
ctrl.SetLogger(logrusr.New(log.Log()))
@@ -85,15 +92,9 @@ This enables a CRD-driven approach to automated image updates with Argo CD.
8592
"app", version.BinaryName()+": "+version.Version(),
8693
"loglevel", strings.ToUpper(cfg.LogLevel),
8794
"interval", argocd.GetPrintableInterval(cfg.CheckInterval),
88-
"healthPort", argocd.GetPrintableHealthPort(cfg.HealthPort),
95+
"healthPort", probeAddr,
8996
)
9097

91-
if once {
92-
cfg.CheckInterval = 0
93-
cfg.HealthPort = 0
94-
warmUpCache = true
95-
}
96-
9798
// Create context with signal handling
9899
ctx := ctrl.SetupSignalHandler()
99100
err := SetupCommon(ctx, cfg, setupLogger, commitMessagePath, kubeConfig)
@@ -152,18 +153,6 @@ This enables a CRD-driven approach to automated image updates with Argo CD.
152153
LeaderElection: enableLeaderElection,
153154
LeaderElectionID: "c21b75f2.argoproj.io",
154155
LeaderElectionNamespace: leaderElectionNamespace,
155-
156-
// LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily
157-
// when the Manager ends. This requires the binary to immediately end when the
158-
// Manager is stopped, otherwise, this setting is unsafe. Setting this significantly
159-
// speeds up voluntary leader transitions as the new leader don't have to wait
160-
// LeaseDuration time first.
161-
//
162-
// In the default scaffold provided, the program ends immediately after
163-
// the manager stops, so would be fine to enable this option. However,
164-
// if you are doing or is intended to do any operation such as perform cleanups
165-
// after the manager stops then its usage might be unsafe.
166-
// LeaderElectionReleaseOnCancel: true,
167156
})
168157
if err != nil {
169158
setupLogger.Error(err, "unable to start manager")
@@ -201,6 +190,7 @@ This enables a CRD-driven approach to automated image updates with Argo CD.
201190
setupLogger.Info("Cache warm-up disabled, skipping cache warmer")
202191
// If warm-up is disabled, we need to signal that cache is warmed
203192
close(warmupState.Done)
193+
warmupState.isCacheWarmed.Store(true)
204194
}
205195

206196
// Start the webhook server if enabled
@@ -267,16 +257,11 @@ This enables a CRD-driven approach to automated image updates with Argo CD.
267257
},
268258
}
269259

270-
// TODO: flags below are not documented yet and don't have env vars yet. Metrics and health checks will be implemented in GITOPS-7113
271260
controllerCmd.Flags().StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.")
272-
controllerCmd.Flags().StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
261+
controllerCmd.Flags().StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to. Leave as 0 to disable the probe service.")
273262
controllerCmd.Flags().BoolVar(&secureMetrics, "metrics-secure", true, "If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.")
274263
controllerCmd.Flags().BoolVar(&enableHTTP2, "enable-http2", false, "If set, HTTP/2 will be enabled for the metrics and webhook servers")
275264

276-
// TODO: most probably legacy flags. Will be checked in GITOPS-7113
277-
controllerCmd.Flags().IntVar(&cfg.HealthPort, "health-port", 8080, "port to start the health server on, 0 to disable")
278-
controllerCmd.Flags().IntVar(&cfg.MetricsPort, "metrics-port", 8081, "port to start the metrics server on, 0 to disable")
279-
280265
controllerCmd.Flags().BoolVar(&enableLeaderElection, "leader-election", true, "Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.")
281266
controllerCmd.Flags().StringVar(&leaderElectionNamespace, "leader-election-namespace", "", "The namespace used for the leader election lease. If empty, the controller will use the namespace of the pod it is running in. When running locally this value must be set.")
282267
controllerCmd.Flags().BoolVar(&cfg.DryRun, "dry-run", false, "run in dry-run mode. If set to true, do not perform any changes")
@@ -285,21 +270,23 @@ This enables a CRD-driven approach to automated image updates with Argo CD.
285270
controllerCmd.Flags().StringVar(&cfg.LogFormat, "logformat", env.GetStringVal("IMAGE_UPDATER_LOGFORMAT", "text"), "set the log format to one of text|json")
286271
controllerCmd.Flags().StringVar(&kubeConfig, "kubeconfig", "", "full path to kubernetes client configuration, i.e. ~/.kube/config")
287272

288-
controllerCmd.Flags().BoolVar(&once, "once", false, "run only once, same as specifying --warmup-cache=true, --interval=0 and --health-port=0")
273+
controllerCmd.Flags().BoolVar(&once, "once", false, "run only once, same as specifying --warmup-cache=true, --interval=0 and --health-probe-bind-address=0")
289274
controllerCmd.Flags().StringVar(&cfg.RegistriesConf, "registries-conf-path", common.DefaultRegistriesConfPath, "path to registries configuration file")
290275
controllerCmd.Flags().IntVar(&cfg.MaxConcurrentApps, "max-concurrent-apps", env.ParseNumFromEnv("MAX_CONCURRENT_APPS", 10, 1, 100), "maximum number of ArgoCD applications that can be updated concurrently (must be >= 1)")
291276
controllerCmd.Flags().IntVar(&MaxConcurrentReconciles, "max-concurrent-reconciles", env.ParseNumFromEnv("MAX_CONCURRENT_RECONCILES", 1, 1, 10), "maximum number of concurrent Reconciles which can be run (must be >= 1)")
292277
controllerCmd.Flags().StringVar(&cfg.ArgocdNamespace, "argocd-namespace", "", "namespace where ArgoCD runs in (current namespace by default)")
293278
controllerCmd.Flags().BoolVar(&warmUpCache, "warmup-cache", true, "whether to perform a cache warm-up on startup")
279+
controllerCmd.Flags().BoolVar(&cfg.DisableKubeEvents, "disable-kube-events", env.GetBoolVal("IMAGE_UPDATER_KUBE_EVENTS", false), "Disable kubernetes events")
294280

281+
// Git flags
295282
controllerCmd.Flags().StringVar(&cfg.GitCommitUser, "git-commit-user", env.GetStringVal("GIT_COMMIT_USER", "argocd-image-updater"), "Username to use for Git commits")
296283
controllerCmd.Flags().StringVar(&cfg.GitCommitMail, "git-commit-email", env.GetStringVal("GIT_COMMIT_EMAIL", "[email protected]"), "E-Mail address to use for Git commits")
297284
controllerCmd.Flags().StringVar(&cfg.GitCommitSigningKey, "git-commit-signing-key", env.GetStringVal("GIT_COMMIT_SIGNING_KEY", ""), "GnuPG key ID or path to Private SSH Key used to sign the commits")
298285
controllerCmd.Flags().StringVar(&cfg.GitCommitSigningMethod, "git-commit-signing-method", env.GetStringVal("GIT_COMMIT_SIGNING_METHOD", "openpgp"), "Method used to sign Git commits ('openpgp' or 'ssh')")
299286
controllerCmd.Flags().BoolVar(&cfg.GitCommitSignOff, "git-commit-sign-off", env.GetBoolVal("GIT_COMMIT_SIGN_OFF", false), "Whether to sign-off git commits")
300287
controllerCmd.Flags().StringVar(&commitMessagePath, "git-commit-message-path", common.DefaultCommitTemplatePath, "Path to a template to use for Git commit messages")
301-
controllerCmd.Flags().BoolVar(&cfg.DisableKubeEvents, "disable-kube-events", env.GetBoolVal("IMAGE_UPDATER_KUBE_EVENTS", false), "Disable kubernetes events")
302288

289+
// Webhook flags
303290
controllerCmd.Flags().BoolVar(&cfg.EnableWebhook, "enable-webhook", env.GetBoolVal("ENABLE_WEBHOOK", false), "Enable webhook server for receiving registry events")
304291
controllerCmd.Flags().IntVar(&webhookCfg.Port, "webhook-port", env.ParseNumFromEnv("WEBHOOK_PORT", 8082, 0, 65535), "Port to listen on for webhook events")
305292
controllerCmd.Flags().StringVar(&webhookCfg.DockerSecret, "docker-webhook-secret", env.GetStringVal("DOCKER_WEBHOOK_SECRET", ""), "Secret for validating Docker Hub webhooks")

cmd/run_test.go

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ package main
22

33
import (
44
"context"
5+
"fmt"
56
"math"
7+
"net/http"
68
"os"
79
"strconv"
810
"testing"
@@ -43,8 +45,6 @@ func TestNewRunCommand(t *testing.T) {
4345
asser.Equal(env.GetStringVal("IMAGE_UPDATER_LOGLEVEL", "info"), controllerCommand.Flag("loglevel").Value.String())
4446
asser.Equal(env.GetStringVal("IMAGE_UPDATER_LOGFORMAT", "text"), controllerCommand.Flag("logformat").Value.String())
4547
asser.Equal("", controllerCommand.Flag("kubeconfig").Value.String())
46-
asser.Equal("8080", controllerCommand.Flag("health-port").Value.String())
47-
asser.Equal("8081", controllerCommand.Flag("metrics-port").Value.String())
4848
asser.Equal("false", controllerCommand.Flag("once").Value.String())
4949
asser.Equal(common.DefaultRegistriesConfPath, controllerCommand.Flag("registries-conf-path").Value.String())
5050
asser.Equal(strconv.Itoa(env.ParseNumFromEnv("MAX_CONCURRENT_APPS", 10, 1, 100)), controllerCommand.Flag("max-concurrent-apps").Value.String())
@@ -297,3 +297,46 @@ func TestWebhookServerRunnable_Start_ContextCancelStopsServer(t *testing.T) {
297297
assert.NotNil(t, ws.webhookServer.Server)
298298
}
299299
}
300+
301+
// Assisted-by: Gemini AI
302+
// TestReadyzCheckWithWarmupStatus verifies the behavior of the warmup-check readiness probe
303+
// based on the state of WarmupStatus.
304+
func TestReadyzCheckWithWarmupStatus(t *testing.T) {
305+
// Sub-test for when cache warm-up is disabled
306+
t.Run("warmup-disabled", func(t *testing.T) {
307+
status := &WarmupStatus{Done: make(chan struct{})}
308+
309+
// Simulate the logic for when warmUpCache is false
310+
status.isCacheWarmed.Store(true)
311+
312+
// Create a fake readiness check that uses the warmup status
313+
check := func(req *http.Request) error {
314+
if !status.isCacheWarmed.Load() {
315+
return fmt.Errorf("cache is not yet warmed")
316+
}
317+
return nil
318+
}
319+
320+
// The check should pass because isCacheWarmed is true
321+
err := check(nil)
322+
assert.NoError(t, err, "readiness check should pass when cache warmup is disabled")
323+
})
324+
325+
// Sub-test for when cache warm-up is enabled but not yet complete
326+
t.Run("warmup-enabled-not-warmed", func(t *testing.T) {
327+
status := &WarmupStatus{Done: make(chan struct{})}
328+
329+
// In this case, isCacheWarmed is still false
330+
check := func(req *http.Request) error {
331+
if !status.isCacheWarmed.Load() {
332+
return fmt.Errorf("cache is not yet warmed")
333+
}
334+
return nil
335+
}
336+
337+
// The check should fail because isCacheWarmed is false
338+
err := check(nil)
339+
assert.Error(t, err, "readiness check should fail when cache is not warmed")
340+
assert.Equal(t, "cache is not yet warmed", err.Error())
341+
})
342+
}

config/default/kustomization.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,20 @@ resources:
2222
# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
2323
#- ../prometheus
2424
# [METRICS] Expose the controller manager metrics service.
25-
#- metrics_service.yaml
25+
- metrics_service.yaml
2626
# [NETWORK POLICY] Protect the /metrics endpoint and Webhook Server with NetworkPolicy.
2727
# Only Pod(s) running a namespace labeled with 'metrics: enabled' will be able to gather the metrics.
2828
# Only CR(s) which requires webhooks and are applied on namespaces labeled with 'webhooks: enabled' will
2929
# be able to communicate with the Webhook Server.
30-
#- ../network-policy
30+
- ../network-policy
3131

3232
# Uncomment the patches line if you enable Metrics, and/or are using webhooks and cert-manager
33-
#patches:
33+
patches:
3434
# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443.
3535
# More info: https://book.kubebuilder.io/reference/metrics
36-
#- path: manager_metrics_patch.yaml
37-
# target:
38-
# kind: Deployment
36+
- path: manager_metrics_patch.yaml
37+
target:
38+
kind: Deployment
3939

4040
# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
4141
# crd/kustomization.yaml

config/default/metrics_service.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ metadata:
66
app.kubernetes.io/name: argocd-image-updater
77
app.kubernetes.io/managed-by: kustomize
88
name: argocd-image-updater-controller-metrics-service
9-
namespace: system
9+
namespace: argocd-image-updater-system
1010
spec:
1111
ports:
1212
- name: https

config/install.yaml

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ metadata:
55
app.kubernetes.io/managed-by: kustomize
66
app.kubernetes.io/name: argocd-image-updater-system
77
control-plane: argocd-image-updater-controller
8+
metrics: enabled
89
name: argocd-image-updater-system
910
---
1011
apiVersion: apiextensions.k8s.io/v1
@@ -661,6 +662,34 @@ rules:
661662
- watch
662663
---
663664
apiVersion: rbac.authorization.k8s.io/v1
665+
kind: ClusterRole
666+
metadata:
667+
name: argocd-image-updater-metrics-auth-role
668+
rules:
669+
- apiGroups:
670+
- authentication.k8s.io
671+
resources:
672+
- tokenreviews
673+
verbs:
674+
- create
675+
- apiGroups:
676+
- authorization.k8s.io
677+
resources:
678+
- subjectaccessreviews
679+
verbs:
680+
- create
681+
---
682+
apiVersion: rbac.authorization.k8s.io/v1
683+
kind: ClusterRole
684+
metadata:
685+
name: argocd-image-updater-metrics-reader
686+
rules:
687+
- nonResourceURLs:
688+
- /metrics
689+
verbs:
690+
- get
691+
---
692+
apiVersion: rbac.authorization.k8s.io/v1
664693
kind: RoleBinding
665694
metadata:
666695
labels:
@@ -711,6 +740,32 @@ subjects:
711740
name: argocd-image-updater-controller
712741
namespace: argocd-image-updater-system
713742
---
743+
apiVersion: rbac.authorization.k8s.io/v1
744+
kind: ClusterRoleBinding
745+
metadata:
746+
name: argocd-image-updater-metrics-auth-rolebinding
747+
roleRef:
748+
apiGroup: rbac.authorization.k8s.io
749+
kind: ClusterRole
750+
name: argocd-image-updater-metrics-auth-role
751+
subjects:
752+
- kind: ServiceAccount
753+
name: argocd-image-updater-controller
754+
namespace: argocd-image-updater-system
755+
---
756+
apiVersion: rbac.authorization.k8s.io/v1
757+
kind: ClusterRoleBinding
758+
metadata:
759+
name: argocd-image-updater-metrics-reader-rolebinding
760+
roleRef:
761+
apiGroup: rbac.authorization.k8s.io
762+
kind: ClusterRole
763+
name: argocd-image-updater-metrics-reader
764+
subjects:
765+
- kind: ServiceAccount
766+
name: argocd-image-updater-controller
767+
namespace: argocd-image-updater-system
768+
---
714769
apiVersion: v1
715770
kind: ConfigMap
716771
metadata:
@@ -738,6 +793,24 @@ metadata:
738793
name: argocd-image-updater-secret
739794
namespace: argocd-image-updater-system
740795
---
796+
apiVersion: v1
797+
kind: Service
798+
metadata:
799+
labels:
800+
app.kubernetes.io/managed-by: kustomize
801+
app.kubernetes.io/name: argocd-image-updater
802+
control-plane: argocd-image-updater-controller
803+
name: argocd-image-updater-controller-metrics-service
804+
namespace: argocd-image-updater-system
805+
spec:
806+
ports:
807+
- name: https
808+
port: 8443
809+
protocol: TCP
810+
targetPort: 8443
811+
selector:
812+
control-plane: argocd-image-updater-controller
813+
---
741814
apiVersion: apps/v1
742815
kind: Deployment
743816
metadata:
@@ -762,6 +835,7 @@ spec:
762835
spec:
763836
containers:
764837
- args:
838+
- --metrics-bind-address=:8443
765839
- run
766840
command:
767841
- /manager
@@ -945,3 +1019,27 @@ spec:
9451019
secretName: ssh-git-creds
9461020
- emptyDir: {}
9471021
name: tmp
1022+
---
1023+
apiVersion: networking.k8s.io/v1
1024+
kind: NetworkPolicy
1025+
metadata:
1026+
labels:
1027+
app.kubernetes.io/managed-by: kustomize
1028+
app.kubernetes.io/name: argocd-image-updater-controller
1029+
control-plane: argocd-image-updater-controller
1030+
name: allow-metrics-traffic
1031+
namespace: argocd-image-updater-system
1032+
spec:
1033+
ingress:
1034+
- from:
1035+
- namespaceSelector:
1036+
matchLabels:
1037+
metrics: enabled
1038+
ports:
1039+
- port: 8443
1040+
protocol: TCP
1041+
podSelector:
1042+
matchLabels:
1043+
control-plane: argocd-image-updater-controller
1044+
policyTypes:
1045+
- Ingress

config/manager/manager.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ metadata:
55
control-plane: argocd-image-updater-controller
66
app.kubernetes.io/name: argocd-image-updater-system
77
app.kubernetes.io/managed-by: kustomize
8+
metrics: enabled
89
name: argocd-image-updater-system
910
---
1011
apiVersion: apps/v1

0 commit comments

Comments
 (0)