diff --git a/cmd/common.go b/cmd/common.go index 6acd9310..238319c6 100644 --- a/cmd/common.go +++ b/cmd/common.go @@ -14,6 +14,7 @@ import ( "github.com/argoproj-labs/argocd-image-updater/internal/controller" "github.com/argoproj-labs/argocd-image-updater/pkg/argocd" "github.com/argoproj-labs/argocd-image-updater/pkg/common" + "github.com/argoproj-labs/argocd-image-updater/pkg/metrics" "github.com/argoproj-labs/argocd-image-updater/pkg/webhook" "github.com/argoproj-labs/argocd-image-updater/registry-scanner/pkg/registry" ) @@ -30,6 +31,9 @@ type WebhookConfig struct { // SetupCommon initializes common components (logging, context, etc.) func SetupCommon(ctx context.Context, cfg *controller.ImageUpdaterConfig, setupLogger logr.Logger, commitMessagePath, kubeConfig string) error { + // Initialize metrics before starting the metrics server or using any counters + metrics.InitMetrics() + var commitMessageTpl string // User can specify a path to a template used for Git commit messages diff --git a/cmd/common_test.go b/cmd/common_test.go index 0a9594ec..e1da3631 100644 --- a/cmd/common_test.go +++ b/cmd/common_test.go @@ -18,6 +18,7 @@ import ( "github.com/argoproj-labs/argocd-image-updater/internal/controller" "github.com/argoproj-labs/argocd-image-updater/pkg/common" aiukube "github.com/argoproj-labs/argocd-image-updater/pkg/kube" + "github.com/argoproj-labs/argocd-image-updater/pkg/metrics" "github.com/argoproj-labs/argocd-image-updater/registry-scanner/pkg/registry" ) @@ -64,6 +65,8 @@ var setupCommonMutex sync.Mutex // setupCommonStub mirrors SetupCommon behavior without starting the askpass server and without interactive kube client. func setupCommonStub(ctx context.Context, cfg *controller.ImageUpdaterConfig, setupLogger logr.Logger, commitMessagePath, kubeConfig string) error { + metrics.InitMetrics() + var commitMessageTpl string // User can specify a path to a template used for Git commit messages @@ -231,4 +234,15 @@ func TestSetupCommon(t *testing.T) { err = setupCommonStub(context.Background(), cfg, logr.Discard(), "", invalidKubeconfigFile) assert.Nil(t, err) }) + + t.Run("should initialize metrics and kube client", func(t *testing.T) { + cfg := &controller.ImageUpdaterConfig{} + err := callSetupCommonWithMocks(t, cfg, logr.Discard(), "", kubeconfigFile) + require.NoError(t, err) + assert.NotNil(t, metrics.Endpoint()) + assert.NotNil(t, metrics.Applications()) + assert.NotNil(t, metrics.Clients()) + assert.NotNil(t, cfg.KubeClient) + assert.IsType(t, &aiukube.ImageUpdaterKubernetesClient{}, cfg.KubeClient) + }) } diff --git a/cmd/run.go b/cmd/run.go index f46d9564..5cd15796 100644 --- a/cmd/run.go +++ b/cmd/run.go @@ -74,6 +74,13 @@ This enables a CRD-driven approach to automated image updates with Argo CD. default: return fmt.Errorf("invalid log format '%s'", cfg.LogFormat) } + + if once { + cfg.CheckInterval = 0 + probeAddr = "0" + warmUpCache = true + } + log.SetLogFormat(logFormat) ctrl.SetLogger(logrusr.New(log.Log())) @@ -85,15 +92,9 @@ This enables a CRD-driven approach to automated image updates with Argo CD. "app", version.BinaryName()+": "+version.Version(), "loglevel", strings.ToUpper(cfg.LogLevel), "interval", argocd.GetPrintableInterval(cfg.CheckInterval), - "healthPort", argocd.GetPrintableHealthPort(cfg.HealthPort), + "healthPort", probeAddr, ) - if once { - cfg.CheckInterval = 0 - cfg.HealthPort = 0 - warmUpCache = true - } - // Create context with signal handling ctx := ctrl.SetupSignalHandler() err := SetupCommon(ctx, cfg, setupLogger, commitMessagePath, kubeConfig) @@ -152,18 +153,6 @@ This enables a CRD-driven approach to automated image updates with Argo CD. LeaderElection: enableLeaderElection, LeaderElectionID: "c21b75f2.argoproj.io", LeaderElectionNamespace: leaderElectionNamespace, - - // LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily - // when the Manager ends. This requires the binary to immediately end when the - // Manager is stopped, otherwise, this setting is unsafe. Setting this significantly - // speeds up voluntary leader transitions as the new leader don't have to wait - // LeaseDuration time first. - // - // In the default scaffold provided, the program ends immediately after - // the manager stops, so would be fine to enable this option. However, - // if you are doing or is intended to do any operation such as perform cleanups - // after the manager stops then its usage might be unsafe. - // LeaderElectionReleaseOnCancel: true, }) if err != nil { setupLogger.Error(err, "unable to start manager") @@ -201,6 +190,7 @@ This enables a CRD-driven approach to automated image updates with Argo CD. setupLogger.Info("Cache warm-up disabled, skipping cache warmer") // If warm-up is disabled, we need to signal that cache is warmed close(warmupState.Done) + warmupState.isCacheWarmed.Store(true) } // Start the webhook server if enabled @@ -267,16 +257,11 @@ This enables a CRD-driven approach to automated image updates with Argo CD. }, } - // TODO: flags below are not documented yet and don't have env vars yet. Metrics and health checks will be implemented in GITOPS-7113 controllerCmd.Flags().StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.") - controllerCmd.Flags().StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") + controllerCmd.Flags().StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to. Change to 0 to disable the probe service.") controllerCmd.Flags().BoolVar(&secureMetrics, "metrics-secure", true, "If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.") controllerCmd.Flags().BoolVar(&enableHTTP2, "enable-http2", false, "If set, HTTP/2 will be enabled for the metrics and webhook servers") - // TODO: most probably legacy flags. Will be checked in GITOPS-7113 - controllerCmd.Flags().IntVar(&cfg.HealthPort, "health-port", 8080, "port to start the health server on, 0 to disable") - controllerCmd.Flags().IntVar(&cfg.MetricsPort, "metrics-port", 8081, "port to start the metrics server on, 0 to disable") - controllerCmd.Flags().BoolVar(&enableLeaderElection, "leader-election", true, "Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.") controllerCmd.Flags().StringVar(&leaderElectionNamespace, "leader-election-namespace", "", "The namespace used for the leader election lease. If empty, the controller will use the namespace of the pod it is running in. When running locally this value must be set.") controllerCmd.Flags().BoolVar(&cfg.DryRun, "dry-run", false, "run in dry-run mode. If set to true, do not perform any changes") @@ -285,21 +270,23 @@ This enables a CRD-driven approach to automated image updates with Argo CD. controllerCmd.Flags().StringVar(&cfg.LogFormat, "logformat", env.GetStringVal("IMAGE_UPDATER_LOGFORMAT", "text"), "set the log format to one of text|json") controllerCmd.Flags().StringVar(&kubeConfig, "kubeconfig", "", "full path to kubernetes client configuration, i.e. ~/.kube/config") - controllerCmd.Flags().BoolVar(&once, "once", false, "run only once, same as specifying --warmup-cache=true, --interval=0 and --health-port=0") + controllerCmd.Flags().BoolVar(&once, "once", false, "run only once, same as specifying --warmup-cache=true, --interval=0 and --health-probe-bind-address=0") controllerCmd.Flags().StringVar(&cfg.RegistriesConf, "registries-conf-path", common.DefaultRegistriesConfPath, "path to registries configuration file") controllerCmd.Flags().IntVar(&cfg.MaxConcurrentApps, "max-concurrent-apps", env.ParseNumFromEnv("MAX_CONCURRENT_APPS", 10, 1, 100), "maximum number of ArgoCD applications that can be updated concurrently (must be >= 1)") controllerCmd.Flags().IntVar(&MaxConcurrentReconciles, "max-concurrent-reconciles", env.ParseNumFromEnv("MAX_CONCURRENT_RECONCILES", 1, 1, 10), "maximum number of concurrent Reconciles which can be run (must be >= 1)") controllerCmd.Flags().StringVar(&cfg.ArgocdNamespace, "argocd-namespace", "", "namespace where ArgoCD runs in (current namespace by default)") controllerCmd.Flags().BoolVar(&warmUpCache, "warmup-cache", true, "whether to perform a cache warm-up on startup") + controllerCmd.Flags().BoolVar(&cfg.DisableKubeEvents, "disable-kube-events", env.GetBoolVal("IMAGE_UPDATER_KUBE_EVENTS", false), "Disable kubernetes events") + // Git flags controllerCmd.Flags().StringVar(&cfg.GitCommitUser, "git-commit-user", env.GetStringVal("GIT_COMMIT_USER", "argocd-image-updater"), "Username to use for Git commits") controllerCmd.Flags().StringVar(&cfg.GitCommitMail, "git-commit-email", env.GetStringVal("GIT_COMMIT_EMAIL", "noreply@argoproj.io"), "E-Mail address to use for Git commits") controllerCmd.Flags().StringVar(&cfg.GitCommitSigningKey, "git-commit-signing-key", env.GetStringVal("GIT_COMMIT_SIGNING_KEY", ""), "GnuPG key ID or path to Private SSH Key used to sign the commits") controllerCmd.Flags().StringVar(&cfg.GitCommitSigningMethod, "git-commit-signing-method", env.GetStringVal("GIT_COMMIT_SIGNING_METHOD", "openpgp"), "Method used to sign Git commits ('openpgp' or 'ssh')") controllerCmd.Flags().BoolVar(&cfg.GitCommitSignOff, "git-commit-sign-off", env.GetBoolVal("GIT_COMMIT_SIGN_OFF", false), "Whether to sign-off git commits") controllerCmd.Flags().StringVar(&commitMessagePath, "git-commit-message-path", common.DefaultCommitTemplatePath, "Path to a template to use for Git commit messages") - controllerCmd.Flags().BoolVar(&cfg.DisableKubeEvents, "disable-kube-events", env.GetBoolVal("IMAGE_UPDATER_KUBE_EVENTS", false), "Disable kubernetes events") + // Webhook flags controllerCmd.Flags().BoolVar(&cfg.EnableWebhook, "enable-webhook", env.GetBoolVal("ENABLE_WEBHOOK", false), "Enable webhook server for receiving registry events") controllerCmd.Flags().IntVar(&webhookCfg.Port, "webhook-port", env.ParseNumFromEnv("WEBHOOK_PORT", 8082, 0, 65535), "Port to listen on for webhook events") controllerCmd.Flags().StringVar(&webhookCfg.DockerSecret, "docker-webhook-secret", env.GetStringVal("DOCKER_WEBHOOK_SECRET", ""), "Secret for validating Docker Hub webhooks") diff --git a/cmd/run_test.go b/cmd/run_test.go index ca01e0aa..f358eaa5 100644 --- a/cmd/run_test.go +++ b/cmd/run_test.go @@ -2,7 +2,9 @@ package main import ( "context" + "fmt" "math" + "net/http" "os" "strconv" "testing" @@ -43,8 +45,6 @@ func TestNewRunCommand(t *testing.T) { asser.Equal(env.GetStringVal("IMAGE_UPDATER_LOGLEVEL", "info"), controllerCommand.Flag("loglevel").Value.String()) asser.Equal(env.GetStringVal("IMAGE_UPDATER_LOGFORMAT", "text"), controllerCommand.Flag("logformat").Value.String()) asser.Equal("", controllerCommand.Flag("kubeconfig").Value.String()) - asser.Equal("8080", controllerCommand.Flag("health-port").Value.String()) - asser.Equal("8081", controllerCommand.Flag("metrics-port").Value.String()) asser.Equal("false", controllerCommand.Flag("once").Value.String()) asser.Equal(common.DefaultRegistriesConfPath, controllerCommand.Flag("registries-conf-path").Value.String()) asser.Equal(strconv.Itoa(env.ParseNumFromEnv("MAX_CONCURRENT_APPS", 10, 1, 100)), controllerCommand.Flag("max-concurrent-apps").Value.String()) @@ -297,3 +297,46 @@ func TestWebhookServerRunnable_Start_ContextCancelStopsServer(t *testing.T) { assert.NotNil(t, ws.webhookServer.Server) } } + +// Assisted-by: Gemini AI +// TestReadyzCheckWithWarmupStatus verifies the behavior of the warmup-check readiness probe +// based on the state of WarmupStatus. +func TestReadyzCheckWithWarmupStatus(t *testing.T) { + // Sub-test for when cache warm-up is disabled + t.Run("warmup-disabled", func(t *testing.T) { + status := &WarmupStatus{Done: make(chan struct{})} + + // Simulate the logic for when warmUpCache is false + status.isCacheWarmed.Store(true) + + // Create a fake readiness check that uses the warmup status + check := func(req *http.Request) error { + if !status.isCacheWarmed.Load() { + return fmt.Errorf("cache is not yet warmed") + } + return nil + } + + // The check should pass because isCacheWarmed is true + err := check(nil) + assert.NoError(t, err, "readiness check should pass when cache warmup is disabled") + }) + + // Sub-test for when cache warm-up is enabled but not yet complete + t.Run("warmup-enabled-not-warmed", func(t *testing.T) { + status := &WarmupStatus{Done: make(chan struct{})} + + // In this case, isCacheWarmed is still false + check := func(req *http.Request) error { + if !status.isCacheWarmed.Load() { + return fmt.Errorf("cache is not yet warmed") + } + return nil + } + + // The check should fail because isCacheWarmed is false + err := check(nil) + assert.Error(t, err, "readiness check should fail when cache is not warmed") + assert.Equal(t, "cache is not yet warmed", err.Error()) + }) +} diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index d8852a41..e30c7865 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -22,20 +22,20 @@ resources: # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. #- ../prometheus # [METRICS] Expose the controller manager metrics service. -#- metrics_service.yaml +- metrics_service.yaml # [NETWORK POLICY] Protect the /metrics endpoint and Webhook Server with NetworkPolicy. # Only Pod(s) running a namespace labeled with 'metrics: enabled' will be able to gather the metrics. # Only CR(s) which requires webhooks and are applied on namespaces labeled with 'webhooks: enabled' will # be able to communicate with the Webhook Server. -#- ../network-policy +- ../network-policy # Uncomment the patches line if you enable Metrics, and/or are using webhooks and cert-manager -#patches: +patches: # [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443. # More info: https://book.kubebuilder.io/reference/metrics -#- path: manager_metrics_patch.yaml -# target: -# kind: Deployment +- path: manager_metrics_patch.yaml + target: + kind: Deployment # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in # crd/kustomization.yaml diff --git a/config/default/metrics_service.yaml b/config/default/metrics_service.yaml index 80f715d4..40abf4ca 100644 --- a/config/default/metrics_service.yaml +++ b/config/default/metrics_service.yaml @@ -6,7 +6,7 @@ metadata: app.kubernetes.io/name: argocd-image-updater app.kubernetes.io/managed-by: kustomize name: argocd-image-updater-controller-metrics-service - namespace: system + namespace: argocd-image-updater-system spec: ports: - name: https diff --git a/config/install.yaml b/config/install.yaml index a592b255..a8d84fcf 100644 --- a/config/install.yaml +++ b/config/install.yaml @@ -5,6 +5,7 @@ metadata: app.kubernetes.io/managed-by: kustomize app.kubernetes.io/name: argocd-image-updater-system control-plane: argocd-image-updater-controller + metrics: enabled name: argocd-image-updater-system --- apiVersion: apiextensions.k8s.io/v1 @@ -661,6 +662,34 @@ rules: - watch --- apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: argocd-image-updater-metrics-auth-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: argocd-image-updater-metrics-reader +rules: +- nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: labels: @@ -711,6 +740,32 @@ subjects: name: argocd-image-updater-controller namespace: argocd-image-updater-system --- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: argocd-image-updater-metrics-auth-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: argocd-image-updater-metrics-auth-role +subjects: +- kind: ServiceAccount + name: argocd-image-updater-controller + namespace: argocd-image-updater-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: argocd-image-updater-metrics-reader-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: argocd-image-updater-metrics-reader +subjects: +- kind: ServiceAccount + name: argocd-image-updater-controller + namespace: argocd-image-updater-system +--- apiVersion: v1 kind: ConfigMap metadata: @@ -738,6 +793,24 @@ metadata: name: argocd-image-updater-secret namespace: argocd-image-updater-system --- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: argocd-image-updater + control-plane: argocd-image-updater-controller + name: argocd-image-updater-controller-metrics-service + namespace: argocd-image-updater-system +spec: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + control-plane: argocd-image-updater-controller +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -762,6 +835,7 @@ spec: spec: containers: - args: + - --metrics-bind-address=:8443 - run command: - /manager @@ -945,3 +1019,27 @@ spec: secretName: ssh-git-creds - emptyDir: {} name: tmp +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: argocd-image-updater-controller + control-plane: argocd-image-updater-controller + name: allow-metrics-traffic + namespace: argocd-image-updater-system +spec: + ingress: + - from: + - namespaceSelector: + matchLabels: + metrics: enabled + ports: + - port: 8443 + protocol: TCP + podSelector: + matchLabels: + control-plane: argocd-image-updater-controller + policyTypes: + - Ingress diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index a48a77e0..9e8cfc70 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -5,6 +5,7 @@ metadata: control-plane: argocd-image-updater-controller app.kubernetes.io/name: argocd-image-updater-system app.kubernetes.io/managed-by: kustomize + metrics: enabled name: argocd-image-updater-system --- apiVersion: apps/v1 diff --git a/config/network-policy/allow-metrics-traffic.yaml b/config/network-policy/allow-metrics-traffic.yaml index b7271877..de874b85 100644 --- a/config/network-policy/allow-metrics-traffic.yaml +++ b/config/network-policy/allow-metrics-traffic.yaml @@ -5,10 +5,11 @@ apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: labels: - app.kubernetes.io/name: argocd-image-updater + control-plane: argocd-image-updater-controller + app.kubernetes.io/name: argocd-image-updater-controller app.kubernetes.io/managed-by: kustomize name: allow-metrics-traffic - namespace: system + namespace: argocd-image-updater-system spec: podSelector: matchLabels: diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index 858f9c7c..b50967e5 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -15,15 +15,14 @@ resources: # can access the metrics endpoint. Comment the following # permissions if you want to disable this protection. # More info: https://book.kubebuilder.io/reference/metrics.html -#- metrics_auth_role.yaml -#- metrics_auth_role_binding.yaml -#- metrics_reader_role.yaml +- metrics_auth_role.yaml +- metrics_auth_role_binding.yaml +- metrics_reader_role.yaml +- metrics_reader_role_binding.yaml # For each CRD, "Editor" and "Viewer" roles are scaffolded by # default, aiding admins in cluster management. Those roles are # not used by the Project itself. You can comment the following lines # if you do not want those helpers be installed with your Project. -#- imageupdater_editor_role.yaml -#- imageupdater_viewer_role.yaml # argocd-image-updater Role and RoleBinding - argocd-image-updater-role.yaml - argocd-image-updater-rolebinding.yaml diff --git a/config/rbac/metrics_reader_role_binding.yaml b/config/rbac/metrics_reader_role_binding.yaml new file mode 100644 index 00000000..ad1b6e4c --- /dev/null +++ b/config/rbac/metrics_reader_role_binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: argocd-image-updater-metrics-reader-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: argocd-image-updater-metrics-reader +subjects: +- kind: ServiceAccount + name: argocd-image-updater-controller + namespace: argocd-image-updater-system diff --git a/docs/install/cmd/run.md b/docs/install/cmd/run.md index 0e23c2e9..e30bfbd7 100644 --- a/docs/install/cmd/run.md +++ b/docs/install/cmd/run.md @@ -31,6 +31,10 @@ Secret for validating Docker Hub webhooks. Can also be set with the `DOCKER_WEBHOOK_SECRET` environment variable. +**--enable-http2 *disabled*** + +If set, HTTP/2 will be enabled for the metrics and webhook servers. + **--enable-webhook *enabled*** Enable webhook server for receiving registry events. @@ -81,7 +85,7 @@ Secret for validating Harbor webhooks Can also be set with the `HARBOR_WEBHOOK_SECRET` environment variable. -**--health-port *port*** +**--health-probe-bind-address *port*** Specifies the local port to bind the health server to. The health server is used to provide health and readiness probes when running as K8s workload. @@ -117,6 +121,16 @@ Kubernetes cluster, i.e. `~/.kube/config`. When specified, Argo CD Image Updater will use the currently active context in the configuration to connect to the Kubernetes cluster. +**--leader-election *enabled*** + +Enable leader election for controller manager. Enabling this will ensure +there is only one active controller manager. + +**--leader-election-namespace *namespace*** + +The namespace used for the leader election lease. If empty, the controller will +use the namespace of the pod it is running in. When running locally this value must be set. + **--loglevel *level*** Set the log level to *level*, where *level* can be one of `trace`, `debug`, @@ -146,13 +160,17 @@ Higher values may improve throughput but could increase resource usage and API l Can also be set using the *MAX_CONCURRENT_RECONCILES* environment variable. -**--metrics-port *port*** +**--metrics-bind-address *port*** + +port to start the metrics server on, "0" to disable (default "0") + +**--metrics-secure *enabled*** -port to start the metrics server on, 0 to disable (default 8081) +If set, the metrics endpoint is served securely via HTTPS. Use `--metrics-secure="false"` to use HTTP instead. **--once** -A shortcut for specifying `--interval 0 --health-port 0 --warmUpCache=true`. If given, +A shortcut for specifying `--interval 0 --health-probe-bind-address 0 --warmUpCache=true`. If given, Argo CD Image Updater will exit after the first update cycle. **--quay-webhook-secret *secret*** diff --git a/docs/install/installation.md b/docs/install/installation.md index 89f4cb47..ccfe1887 100644 --- a/docs/install/installation.md +++ b/docs/install/installation.md @@ -70,43 +70,29 @@ If opting for such an approach, you should make sure that: ## Metrics -!!!note "Under Construction" - Please note that Prometheus metrics are not available in the initial CRD-based versions of Argo CD Image Updater. The functionality described below is planned for a future release. We are keeping this section as a reference for when metrics are re-introduced. - Starting with v0.8.0, Argo CD Image Updater exports Prometheus-compatible -metrics on a dedicated endpoint, which by default listens on TCP port 8081 -and serves data from `/metrics` path. This endpoint is exposed by a service -named `argocd-image-updater` on a port named `metrics`. - -The following metrics are being made available: - -* Number of applications processed (i.e. those with an annotation) - - * `argocd_image_updater_applications_watched_total` - -* Number of images watched for new tags - - * `argocd_image_updater_images_watched_total` - -* Number of images updated (successful and failed) - - * `argocd_image_updater_images_updated_total` - * `argocd_image_updater_images_errors_total` - -* Number of requests to Argo CD API (successful and failed) - - * `argocd_image_updater_argocd_api_requests_total` - * `argocd_image_updater_argocd_api_errors_total` - -* Number of requests to K8s API (successful and failed) - - * `argocd_image_updater_k8s_api_requests_total` - * `argocd_image_updater_k8s_api_errors_total` - -* Number of requests to the container registries (successful and failed) - - * `argocd_image_updater_registry_requests_total` - * `argocd_image_updater_registry_requests_failed_total` +metrics. This feature is disabled by default but can be enabled using the +`--metrics-bind-address` flag to specify a listening address (e.g., `:8080`). +Metrics are then served on the `/metrics` path. + +The following metric is currently available and populated with data: + +* `argocd_image_updater_applications_watched_total` - A gauge that shows the + number of applications watched per `ImageUpdater` CR. + +!!! note "Other Defined Metrics" + The metrics listed below are also defined within the application. However, + for various reasons, they are either not populated with data or have been + temporarily disabled. They may not appear on the `/metrics` endpoint or may + always report a value of `0`. + +* `argocd_image_updater_images_watched_total` +* `argocd_image_updater_images_updated_total` +* `argocd_image_updater_images_errors_total` +* `argocd_image_updater_k8s_api_requests_total` +* `argocd_image_updater_k8s_api_errors_total` +* `argocd_image_updater_registry_requests_total` +* `argocd_image_updater_registry_requests_failed_total` A (very) rudimentary example dashboard definition for Grafana is provided [here](https://github.com/argoproj-labs/argocd-image-updater/tree/master/config) diff --git a/internal/controller/imageupdater_controller.go b/internal/controller/imageupdater_controller.go index 8140df26..6ea774f8 100644 --- a/internal/controller/imageupdater_controller.go +++ b/internal/controller/imageupdater_controller.go @@ -35,6 +35,7 @@ import ( "github.com/argoproj-labs/argocd-image-updater/pkg/argocd" "github.com/argoproj-labs/argocd-image-updater/pkg/common" "github.com/argoproj-labs/argocd-image-updater/pkg/kube" + "github.com/argoproj-labs/argocd-image-updater/pkg/metrics" "github.com/argoproj-labs/argocd-image-updater/registry-scanner/pkg/log" ) @@ -48,8 +49,6 @@ type ImageUpdaterConfig struct { LogFormat string KubeClient *kube.ImageUpdaterKubernetesClient MaxConcurrentApps int - HealthPort int - MetricsPort int RegistriesConf string GitCommitUser string GitCommitMail string @@ -138,7 +137,7 @@ func (r *ImageUpdaterReconciler) Reconcile(ctx context.Context, req ctrl.Request if hasFinalizer { reqLogger.Debugf("ImageUpdater resource is being deleted, running finalizer.") // --- FINALIZER LOGIC --- - // Currently, there is nothing to clean up. + metrics.Applications().RemoveNumberOfApplications(imageUpdater.Name, imageUpdater.Namespace) // Remove the finalizer from the list and update the object. reqLogger.Debugf("Finalizer logic complete, removing finalizer from the resource.") diff --git a/internal/controller/imageupdater_controller_test.go b/internal/controller/imageupdater_controller_test.go new file mode 100644 index 00000000..15ba9387 --- /dev/null +++ b/internal/controller/imageupdater_controller_test.go @@ -0,0 +1,74 @@ +package controller + +import ( + "context" + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" + + "github.com/argoproj-labs/argocd-image-updater/api/v1alpha1" + "github.com/argoproj-labs/argocd-image-updater/pkg/metrics" +) + +func TestReconcile_DeleteFinalizer_RemovesMetrics(t *testing.T) { + // Initialize a new prometheus registry for the test. + crmetrics.Registry = prometheus.NewRegistry() + metrics.InitMetrics() + + crName := "test-iu" + crNamespace := "test-ns" + + apm := metrics.Applications() + // Pre-set a metric for our test CR + apm.SetNumberOfApplications(crName, crNamespace, 1) + if apm != nil { + assert.Equal(t, 1, testutil.CollectAndCount(apm.ApplicationsTotal)) + } + + // Create a fake ImageUpdater resource that is marked for deletion + imageUpdater := &v1alpha1.ImageUpdater{ + ObjectMeta: metav1.ObjectMeta{ + Name: crName, + Namespace: crNamespace, + DeletionTimestamp: &metav1.Time{Time: time.Now()}, + Finalizers: []string{ResourcesFinalizerName}, + }, + } + + scheme := runtime.NewScheme() + _ = v1alpha1.AddToScheme(scheme) + + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithRuntimeObjects(imageUpdater).Build() + + warmedCh := make(chan struct{}) + close(warmedCh) + + reconciler := &ImageUpdaterReconciler{ + Client: fakeClient, + Scheme: scheme, + Config: &ImageUpdaterConfig{}, + CacheWarmed: warmedCh, + } + + req := ctrl.Request{ + NamespacedName: client.ObjectKey{ + Name: crName, + Namespace: crNamespace, + }, + } + + _, err := reconciler.Reconcile(context.Background(), req) + assert.NoError(t, err) + + // The metric should be gone after reconciliation of the deleted resource + assert.Equal(t, 0, testutil.CollectAndCount(apm.ApplicationsTotal)) +} diff --git a/internal/controller/reconcile.go b/internal/controller/reconcile.go index 50bd3bab..494fa421 100644 --- a/internal/controller/reconcile.go +++ b/internal/controller/reconcile.go @@ -10,6 +10,7 @@ import ( iuapi "github.com/argoproj-labs/argocd-image-updater/api/v1alpha1" "github.com/argoproj-labs/argocd-image-updater/pkg/argocd" + "github.com/argoproj-labs/argocd-image-updater/pkg/metrics" "github.com/argoproj-labs/argocd-image-updater/registry-scanner/pkg/image" "github.com/argoproj-labs/argocd-image-updater/registry-scanner/pkg/log" "github.com/argoproj-labs/argocd-image-updater/registry-scanner/pkg/registry" @@ -33,10 +34,10 @@ func (r *ImageUpdaterReconciler) RunImageUpdater(ctx context.Context, cr *iuapi. return result, err } - // TODO: metrics will be implemented in GITOPS-7113 - //metrics.Applications().SetNumberOfApplications(len(appList)) - if !warmUp { + if metrics.Applications() != nil { + metrics.Applications().SetNumberOfApplications(cr.Name, cr.Namespace, len(appList)) + } baseLogger.Infof("Starting image update cycle, considering %d application(s) for update", len(appList)) } @@ -95,7 +96,8 @@ func (r *ImageUpdaterReconciler) RunImageUpdater(ctx context.Context, cr *iuapi. result.NumImagesConsidered += res.NumImagesConsidered result.NumImagesUpdated += res.NumImagesUpdated result.NumSkipped += res.NumSkipped - // TODO: metrics will be implemnted in GITOPS-7113 + // TODO: images metrics will be implemented in GITOPS-8068 + // TODO: these metrics were commented out because there is no proper cabbage collector that will handle metrics deletion //if !warmUp && !r.Config.DryRun { // metrics.Applications().IncreaseImageUpdate(app, res.NumImagesUpdated) //} diff --git a/internal/controller/reconcile_test.go b/internal/controller/reconcile_test.go index 27548cf4..3bf3c963 100644 --- a/internal/controller/reconcile_test.go +++ b/internal/controller/reconcile_test.go @@ -7,6 +7,8 @@ import ( "time" argocdapi "github.com/argoproj/argo-cd/v3/pkg/apis/application/v1alpha1" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -16,12 +18,14 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" clifake "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" "sigs.k8s.io/controller-runtime/pkg/reconcile" argocdimageupdaterv1alpha1 "github.com/argoproj-labs/argocd-image-updater/api/v1alpha1" "github.com/argoproj-labs/argocd-image-updater/pkg/argocd" "github.com/argoproj-labs/argocd-image-updater/pkg/argocd/mocks" "github.com/argoproj-labs/argocd-image-updater/pkg/kube" + "github.com/argoproj-labs/argocd-image-updater/pkg/metrics" regokube "github.com/argoproj-labs/argocd-image-updater/registry-scanner/pkg/kube" ) @@ -244,6 +248,7 @@ func TestImageUpdaterReconciler_Reconcile(t *testing.T) { }, } + metrics.InitMetrics() for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Setup @@ -2127,6 +2132,7 @@ func TestImageUpdaterReconciler_RunImageUpdater(t *testing.T) { require.NoError(t, err) ctx := context.Background() fakeClientset := fake.NewClientset() + metrics.InitMetrics() // Base CR for tests baseCr := &argocdimageupdaterv1alpha1.ImageUpdater{ @@ -2205,6 +2211,7 @@ func TestImageUpdaterReconciler_RunImageUpdater(t *testing.T) { expectedResult argocd.ImageUpdaterResult expectErr bool expectedErrContains string + postCheck func(t *testing.T, r *ImageUpdaterReconciler, cr *argocdimageupdaterv1alpha1.ImageUpdater, res argocd.ImageUpdaterResult) }{ { name: "one matching application", @@ -2217,6 +2224,21 @@ func TestImageUpdaterReconciler_RunImageUpdater(t *testing.T) { NumImagesUpdated: 1, }, }, + { + name: "sets number of applications metric", + cr: baseCr, + apps: []client.Object{matchingApp, nonMatchingApp}, + postCheck: func(t *testing.T, r *ImageUpdaterReconciler, cr *argocdimageupdaterv1alpha1.ImageUpdater, res argocd.ImageUpdaterResult) { + expectedVal := float64(1) + metricVal := testutil.ToFloat64(metrics.Applications().ApplicationsTotal.WithLabelValues(cr.Name, cr.Namespace)) + assert.Equal(t, expectedVal, metricVal) + }, + expectedResult: argocd.ImageUpdaterResult{ + NumApplicationsProcessed: 1, + NumImagesConsidered: 1, + NumImagesUpdated: 1, + }, + }, { name: "dry run false", cr: baseCr, @@ -2347,6 +2369,10 @@ func TestImageUpdaterReconciler_RunImageUpdater(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + // Reset and re-init metrics for every test run to ensure isolation + crmetrics.Registry = prometheus.NewRegistry() + metrics.InitMetrics() + fakeKubeClient := clifake.NewClientBuilder().WithScheme(s).WithObjects(tt.apps...).Build() reconciler := &ImageUpdaterReconciler{ Client: fakeKubeClient, @@ -2372,6 +2398,10 @@ func TestImageUpdaterReconciler_RunImageUpdater(t *testing.T) { require.NoError(t, err) assert.Equal(t, tt.expectedResult, result) } + + if tt.postCheck != nil { + tt.postCheck(t, reconciler, tt.cr, result) + } }) } } @@ -2388,6 +2418,7 @@ func TestImageUpdaterReconciler_ProcessImageUpdaterCRs(t *testing.T) { require.NoError(t, err) ctx := context.Background() fakeClientset := fake.NewClientset() + metrics.InitMetrics() // A helper function to create a new reconciler for each test run newTestReconciler := func(cli client.Client) *ImageUpdaterReconciler { diff --git a/pkg/health/health.go b/pkg/health/health.go deleted file mode 100644 index cbc4977c..00000000 --- a/pkg/health/health.go +++ /dev/null @@ -1,25 +0,0 @@ -package health - -// Most simple health check probe to see whether our server is still alive - -import ( - "fmt" - "net/http" - - "github.com/argoproj-labs/argocd-image-updater/registry-scanner/pkg/log" -) - -func StartHealthServer(port int) chan error { - errCh := make(chan error) - go func() { - sm := http.NewServeMux() - sm.HandleFunc("/healthz", HealthProbe) - errCh <- http.ListenAndServe(fmt.Sprintf(":%d", port), sm) - }() - return errCh -} - -func HealthProbe(w http.ResponseWriter, r *http.Request) { - log.Tracef("/healthz ping request received, replying with pong") - fmt.Fprintf(w, "OK\n") -} diff --git a/pkg/health/health_test.go b/pkg/health/health_test.go deleted file mode 100644 index b5670f57..00000000 --- a/pkg/health/health_test.go +++ /dev/null @@ -1,52 +0,0 @@ -package health - -import ( - "fmt" - "net/http" - "net/http/httptest" - "testing" - "time" -) - -// Unit test function -func TestStartHealthServer_InvalidPort(t *testing.T) { - // Use an invalid port number - port := -1 - errCh := StartHealthServer(port) - defer close(errCh) // Close the error channel after the test completes - select { - case err := <-errCh: - if err == nil { - t.Error("Expected error, got nil") - } else if err.Error() != fmt.Sprintf("listen tcp: address %d: invalid port", port) { - t.Errorf("Expected error message about invalid port, got %v", err) - } - case <-time.After(2 * time.Second): - t.Error("Timed out waiting for error") - } -} - -func TestHealthProbe(t *testing.T) { - // Create a mock HTTP request - req, err := http.NewRequest("GET", "/healthz", nil) - if err != nil { - t.Fatalf("Failed to create request: %v", err) - } - - // Create a mock HTTP response recorder - w := httptest.NewRecorder() - - // Call the HealthProbe function directly - HealthProbe(w, req) - - // Check the response status code - if w.Code != http.StatusOK { - t.Errorf("Expected status OK; got %d", w.Code) - } - - // Check the response body - expectedBody := "OK\n" - if body := w.Body.String(); body != expectedBody { - t.Errorf("Expected body %q; got %q", expectedBody, body) - } -} diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index be47bfb2..467b6eac 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -1,12 +1,12 @@ package metrics import ( - "fmt" - "net/http" + "sync" + + crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/prometheus/client_golang/prometheus/promhttp" ) type Metrics struct { @@ -15,7 +15,10 @@ type Metrics struct { Clients *ClientMetrics } -var defaultMetrics *Metrics +var ( + defaultMetrics *Metrics + initMetricsOnce sync.Once +) // EndpointMetrics stores metrics for registry endpoints type EndpointMetrics struct { @@ -25,40 +28,27 @@ type EndpointMetrics struct { // ApplicationMetrics stores metrics for applications type ApplicationMetrics struct { - applicationsTotal prometheus.Gauge + ApplicationsTotal *prometheus.GaugeVec imagesWatchedTotal *prometheus.GaugeVec imagesUpdatedTotal *prometheus.CounterVec imagesUpdatedErrorsTotal *prometheus.CounterVec } -// ClientMetrics stores metrics for K8s and ArgoCD clients +// ClientMetrics stores metrics for K8s client type ClientMetrics struct { - argoCDRequestsTotal *prometheus.CounterVec - argoCDRequestsErrorsTotal *prometheus.CounterVec kubeAPIRequestsTotal prometheus.Counter kubeAPIRequestsErrorsTotal prometheus.Counter } -// StartMetricsServer starts a new HTTP server for metrics on given port -func StartMetricsServer(port int) chan error { - errCh := make(chan error) - go func() { - sm := http.NewServeMux() - sm.Handle("/metrics", promhttp.Handler()) - errCh <- http.ListenAndServe(fmt.Sprintf(":%d", port), sm) - }() - return errCh -} - // NewEndpointMetrics returns a new endpoint metrics object func NewEndpointMetrics() *EndpointMetrics { metrics := &EndpointMetrics{} - metrics.requestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ + metrics.requestsTotal = promauto.With(crmetrics.Registry).NewCounterVec(prometheus.CounterOpts{ Name: "argocd_image_updater_registry_requests_total", Help: "The total number of requests to this endpoint", }, []string{"registry"}) - metrics.requestsFailed = promauto.NewCounterVec(prometheus.CounterOpts{ + metrics.requestsFailed = promauto.With(crmetrics.Registry).NewCounterVec(prometheus.CounterOpts{ Name: "argocd_image_updater_registry_requests_failed_total", Help: "The number of failed requests to this endpoint", }, []string{"registry"}) @@ -70,22 +60,22 @@ func NewEndpointMetrics() *EndpointMetrics { func NewApplicationsMetrics() *ApplicationMetrics { metrics := &ApplicationMetrics{} - metrics.applicationsTotal = promauto.NewGauge(prometheus.GaugeOpts{ + metrics.ApplicationsTotal = promauto.With(crmetrics.Registry).NewGaugeVec(prometheus.GaugeOpts{ Name: "argocd_image_updater_applications_watched_total", - Help: "The total number of applications watched by Argo CD Image Updater", - }) + Help: "The total number of applications watched by Argo CD Image Updater CR", + }, []string{"image_updater_cr_name", "image_updater_cr_namespace"}) - metrics.imagesWatchedTotal = promauto.NewGaugeVec(prometheus.GaugeOpts{ + metrics.imagesWatchedTotal = promauto.With(crmetrics.Registry).NewGaugeVec(prometheus.GaugeOpts{ Name: "argocd_image_updater_images_watched_total", Help: "Number of images watched by Argo CD Image Updater", }, []string{"application"}) - metrics.imagesUpdatedTotal = promauto.NewCounterVec(prometheus.CounterOpts{ + metrics.imagesUpdatedTotal = promauto.With(crmetrics.Registry).NewCounterVec(prometheus.CounterOpts{ Name: "argocd_image_updater_images_updated_total", Help: "Number of images updates by Argo CD Image Updater", }, []string{"application"}) - metrics.imagesUpdatedErrorsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ + metrics.imagesUpdatedErrorsTotal = promauto.With(crmetrics.Registry).NewCounterVec(prometheus.CounterOpts{ Name: "argocd_image_updater_images_errors_total", Help: "Number of errors reported by Argo CD Image Updater", }, []string{"application"}) @@ -97,24 +87,14 @@ func NewApplicationsMetrics() *ApplicationMetrics { func NewClientMetrics() *ClientMetrics { metrics := &ClientMetrics{} - metrics.argoCDRequestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "argocd_image_updater_argocd_api_requests_total", - Help: "The total number of Argo CD API requests performed by the Argo CD Image Updater", - }, []string{"argocd_server"}) - - metrics.argoCDRequestsErrorsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "argocd_image_updater_argocd_api_errors_total", - Help: "The total number of Argo CD API requests resulting in error", - }, []string{"argocd_server"}) - - metrics.kubeAPIRequestsTotal = promauto.NewCounter(prometheus.CounterOpts{ + metrics.kubeAPIRequestsTotal = promauto.With(crmetrics.Registry).NewCounter(prometheus.CounterOpts{ Name: "argocd_image_updater_k8s_api_requests_total", - Help: "The total number of Argo CD API requests resulting in error", + Help: "The total number of K8S API requests performed by the Argo CD Image Updater", }) - metrics.kubeAPIRequestsErrorsTotal = promauto.NewCounter(prometheus.CounterOpts{ + metrics.kubeAPIRequestsErrorsTotal = promauto.With(crmetrics.Registry).NewCounter(prometheus.CounterOpts{ Name: "argocd_image_updater_k8s_api_errors_total", - Help: "The total number of Argo CD API requests resulting in error", + Help: "The total number of K8S API requests resulting in error", }) return metrics @@ -161,8 +141,18 @@ func (epm *EndpointMetrics) IncreaseRequest(registryURL string, isFailed bool) { } // SetNumberOfApplications sets the total number of currently watched applications -func (apm *ApplicationMetrics) SetNumberOfApplications(num int) { - apm.applicationsTotal.Set(float64(num)) +func (apm *ApplicationMetrics) SetNumberOfApplications(name, namespace string, num int) { + apm.ApplicationsTotal.WithLabelValues(name, namespace).Set(float64(num)) +} + +// RemoveNumberOfApplications removes the application gauge for a given CR +func (apm *ApplicationMetrics) RemoveNumberOfApplications(name, namespace string) { + apm.ApplicationsTotal.DeleteLabelValues(name, namespace) +} + +// ResetApplicationsTotal resets the total number of applications to handle deletion +func (apm *ApplicationMetrics) ResetApplicationsTotal() { + apm.ApplicationsTotal.Reset() } // SetNumberOfImagesWatched sets the total number of currently watched images for given application @@ -180,14 +170,11 @@ func (apm *ApplicationMetrics) IncreaseUpdateErrors(application string, by int) apm.imagesUpdatedErrorsTotal.WithLabelValues(application).Add(float64(by)) } -// IncreaseArgoCDClientRequest increases the number of Argo CD API requests for given server -func (cpm *ClientMetrics) IncreaseArgoCDClientRequest(server string, by int) { - cpm.argoCDRequestsTotal.WithLabelValues(server).Add(float64(by)) -} - -// IncreaseArgoCDClientError increases the number of failed Argo CD API requests for given server -func (cpm *ClientMetrics) IncreaseArgoCDClientError(server string, by int) { - cpm.argoCDRequestsErrorsTotal.WithLabelValues(server).Add(float64(by)) +// RemoveNumberOfImages removes the images gauge for a given CR +func (apm *ApplicationMetrics) RemoveNumberOfImages(application string) { + apm.imagesWatchedTotal.DeleteLabelValues(application) + apm.imagesUpdatedTotal.DeleteLabelValues(application) + apm.imagesUpdatedErrorsTotal.DeleteLabelValues(application) } // IncreaseK8sClientRequest increases the number of K8s API requests @@ -195,12 +182,14 @@ func (cpm *ClientMetrics) IncreaseK8sClientRequest(by int) { cpm.kubeAPIRequestsTotal.Add(float64(by)) } -// IncreaseK8sClientRequest increases the number of failed K8s API requests +// IncreaseK8sClientError increases the number of failed K8s API requests func (cpm *ClientMetrics) IncreaseK8sClientError(by int) { cpm.kubeAPIRequestsErrorsTotal.Add(float64(by)) } // InitMetrics initializes the global metrics objects func InitMetrics() { - defaultMetrics = NewMetrics() + initMetricsOnce.Do(func() { + defaultMetrics = NewMetrics() + }) } diff --git a/pkg/metrics/metrics_test.go b/pkg/metrics/metrics_test.go index 05af83f1..6c444cec 100644 --- a/pkg/metrics/metrics_test.go +++ b/pkg/metrics/metrics_test.go @@ -3,18 +3,23 @@ package metrics import ( "testing" + crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/assert" ) func TestMetricsInitialization(t *testing.T) { t.Run("NewEndpointMetrics", func(t *testing.T) { + crmetrics.Registry = prometheus.NewRegistry() prometheus.DefaultRegisterer = prometheus.NewRegistry() epm := NewEndpointMetrics() assert.NotNil(t, epm) assert.NotNil(t, epm.requestsTotal) assert.NotNil(t, epm.requestsFailed) + crmetrics.Registry = prometheus.NewRegistry() prometheus.DefaultRegisterer = nil epm = NewEndpointMetrics() assert.NotNil(t, epm) @@ -23,48 +28,119 @@ func TestMetricsInitialization(t *testing.T) { }) t.Run("NewClientMetrics", func(t *testing.T) { + crmetrics.Registry = prometheus.NewRegistry() prometheus.DefaultRegisterer = prometheus.NewRegistry() cpm := NewClientMetrics() assert.NotNil(t, cpm) - assert.NotNil(t, cpm.argoCDRequestsTotal) - assert.NotNil(t, cpm.argoCDRequestsErrorsTotal) assert.NotNil(t, cpm.kubeAPIRequestsTotal) assert.NotNil(t, cpm.kubeAPIRequestsErrorsTotal) + crmetrics.Registry = prometheus.NewRegistry() prometheus.DefaultRegisterer = nil cpm = NewClientMetrics() assert.NotNil(t, cpm) - assert.NotNil(t, cpm.argoCDRequestsTotal) - assert.NotNil(t, cpm.argoCDRequestsErrorsTotal) assert.NotNil(t, cpm.kubeAPIRequestsTotal) assert.NotNil(t, cpm.kubeAPIRequestsErrorsTotal) }) t.Run("NewApplicationsMetrics", func(t *testing.T) { + crmetrics.Registry = prometheus.NewRegistry() apm := NewApplicationsMetrics() assert.NotNil(t, apm) - assert.NotNil(t, apm.applicationsTotal) + assert.NotNil(t, apm.ApplicationsTotal) assert.NotNil(t, apm.imagesWatchedTotal) assert.NotNil(t, apm.imagesUpdatedTotal) assert.NotNil(t, apm.imagesUpdatedErrorsTotal) }) + + t.Run("InitMetrics is idempotent", func(t *testing.T) { + // Replace the default registry with a new one for this test. + crmetrics.Registry = prometheus.NewRegistry() + prometheus.DefaultRegisterer = crmetrics.Registry + + // We cannot reset initMetricsOnce, so we test for idempotency. + // defaultMetrics may or may not be nil at this point, depending on test execution order. + InitMetrics() + firstInstance := defaultMetrics + assert.NotNil(t, firstInstance) + + // Calling it again should have no effect. + InitMetrics() + secondInstance := defaultMetrics + + // The key is that the instance must be the same. + assert.Same(t, firstInstance, secondInstance) + }) } func TestMetricsOperations(t *testing.T) { + crmetrics.Registry = prometheus.NewRegistry() + InitMetrics() epm := Endpoint() epm.IncreaseRequest("/registry1", false) epm.IncreaseRequest("/registry1", true) cpm := Clients() - cpm.IncreaseArgoCDClientRequest("server1", 1) - cpm.IncreaseArgoCDClientError("server1", 2) cpm.IncreaseK8sClientRequest(3) cpm.IncreaseK8sClientError(4) apm := Applications() apm.IncreaseImageUpdate("app1", 1) apm.IncreaseUpdateErrors("app1", 2) - apm.SetNumberOfApplications(3) + apm.SetNumberOfApplications("cr1", "ns1", 3) apm.SetNumberOfImagesWatched("app1", 4) } + +func TestApplicationMetricsRemovals(t *testing.T) { + t.Run("RemoveNumberOfApplications", func(t *testing.T) { + crmetrics.Registry = prometheus.NewRegistry() + apm := NewApplicationsMetrics() + apm.SetNumberOfApplications("cr1", "ns1", 5) + apm.SetNumberOfApplications("cr2", "ns2", 10) + assert.Equal(t, 2, testutil.CollectAndCount(apm.ApplicationsTotal)) + + apm.RemoveNumberOfApplications("cr1", "ns1") + assert.Equal(t, 1, testutil.CollectAndCount(apm.ApplicationsTotal)) + assert.Equal(t, float64(10), testutil.ToFloat64(apm.ApplicationsTotal.WithLabelValues("cr2", "ns2"))) + }) + + t.Run("ResetApplicationsTotal", func(t *testing.T) { + crmetrics.Registry = prometheus.NewRegistry() + apm := NewApplicationsMetrics() + apm.SetNumberOfApplications("cr1", "ns1", 5) + apm.SetNumberOfApplications("cr2", "ns2", 10) + assert.Equal(t, 2, testutil.CollectAndCount(apm.ApplicationsTotal)) + + apm.ResetApplicationsTotal() + assert.Equal(t, 0, testutil.CollectAndCount(apm.ApplicationsTotal)) + }) + + t.Run("RemoveNumberOfImages", func(t *testing.T) { + crmetrics.Registry = prometheus.NewRegistry() + apm := NewApplicationsMetrics() + + apm.SetNumberOfImagesWatched("app1", 10) + apm.IncreaseImageUpdate("app1", 5) + apm.IncreaseUpdateErrors("app1", 2) + + apm.SetNumberOfImagesWatched("app2", 20) + apm.IncreaseImageUpdate("app2", 6) + apm.IncreaseUpdateErrors("app2", 3) + + assert.Equal(t, 2, testutil.CollectAndCount(apm.imagesWatchedTotal)) + assert.Equal(t, 2, testutil.CollectAndCount(apm.imagesUpdatedTotal)) + assert.Equal(t, 2, testutil.CollectAndCount(apm.imagesUpdatedErrorsTotal)) + + apm.RemoveNumberOfImages("app1") + + assert.Equal(t, 1, testutil.CollectAndCount(apm.imagesWatchedTotal)) + assert.Equal(t, float64(20), testutil.ToFloat64(apm.imagesWatchedTotal.WithLabelValues("app2"))) + + assert.Equal(t, 1, testutil.CollectAndCount(apm.imagesUpdatedTotal)) + assert.Equal(t, float64(6), testutil.ToFloat64(apm.imagesUpdatedTotal.WithLabelValues("app2"))) + + assert.Equal(t, 1, testutil.CollectAndCount(apm.imagesUpdatedErrorsTotal)) + assert.Equal(t, float64(3), testutil.ToFloat64(apm.imagesUpdatedErrorsTotal.WithLabelValues("app2"))) + }) +} diff --git a/pkg/webhook/server.go b/pkg/webhook/server.go index 4c248cb6..533bed5e 100644 --- a/pkg/webhook/server.go +++ b/pkg/webhook/server.go @@ -83,9 +83,16 @@ func (s *WebhookServer) Stop(ctx context.Context) error { // handleHealth handles health check requests func (s *WebhookServer) handleHealth(w http.ResponseWriter, r *http.Request) { + webhookLogger := log.Log().WithFields(logrus.Fields{ + "logger": "webhook", + }) + ctx := log.ContextWithLogger(r.Context(), webhookLogger) + baseLogger := log.LoggerFromContext(ctx). + WithField("webhook_remote", r.RemoteAddr) + w.WriteHeader(http.StatusOK) if _, err := w.Write([]byte("OK")); err != nil { - log.Errorf("Failed to write health check response: %v", err) + baseLogger.Errorf("Failed to write health check response: %v", err) } }