From b40e1a28eb08e06c7db6f509f81ec48493473c90 Mon Sep 17 00:00:00 2001 From: Bernard Kim Date: Thu, 25 Apr 2024 11:43:50 -0700 Subject: [PATCH] Add automatic_updates label (#40563) --- lib/auth/auth.go | 83 ++++-------- lib/auth/periodic.go | 104 ++++++++++++--- lib/auth/periodic_test.go | 264 +++++++++++++++++++++++++++++--------- metrics.go | 4 + 4 files changed, 318 insertions(+), 137 deletions(-) diff --git a/lib/auth/auth.go b/lib/auth/auth.go index 5c4c16756c2d6..0f3fcc7725f13 100644 --- a/lib/auth/auth.go +++ b/lib/auth/auth.go @@ -686,9 +686,12 @@ var ( prometheus.GaugeOpts{ Namespace: teleport.MetricNamespace, Name: teleport.MetricRegisteredServers, - Help: "The number of Teleport services that are connected to an auth server by version.", + Help: "The number of Teleport services that are connected to an auth server.", + }, + []string{ + teleport.TagVersion, + teleport.TagAutomaticUpdates, }, - []string{teleport.TagVersion}, ) registeredAgentsInstallMethod = prometheus.NewGaugeVec( @@ -1304,8 +1307,7 @@ func (a *Server) runPeriodicOperations() { // Update prometheus gauge heartbeatsMissedByAuth.Set(float64(missedKeepAliveCount)) case <-promTicker.Next(): - a.updateVersionMetrics() - a.updateInstallMethodsMetrics() + a.updateAgentMetrics() case <-releaseCheck.Next(): a.syncReleaseAlerts(ctx, true) case <-localReleaseCheck.Next(): @@ -1376,8 +1378,6 @@ func (a *Server) doInstancePeriodics(ctx context.Context) { return } - a.updateUpdaterVersionMetrics() - // create/delete upgrade enroll prompt as appropriate enrollMsg, shouldPrompt := uep.GenerateEnrollPrompt() a.handleUpgradeEnrollPrompt(ctx, enrollMsg, shouldPrompt) @@ -1563,76 +1563,39 @@ func (a *Server) doReleaseAlertSync(ctx context.Context, current vc.Target, visi } } -// updateUpdaterVersionMetrics leverages the inventory control stream to report the -// number of teleport updaters installed and their versions. To get an accurate representation -// of versions in an entire cluster the metric must be aggregated with all auth instances. -func (a *Server) updateUpdaterVersionMetrics() { +func (a *Server) updateAgentMetrics() { imp := newInstanceMetricsPeriodic() - // record versions for all connected resources a.inventory.Iter(func(handle inventory.UpstreamHandle) { - imp.VisitInstance(handle.Hello()) + imp.VisitInstance(handle.Hello(), handle.AgentMetadata()) }) totalInstancesMetric.Set(float64(imp.TotalInstances())) enrolledInUpgradesMetric.Set(float64(imp.TotalEnrolledInUpgrades())) - // reset the gauges so that any versions that fall off are removed from exported metrics - upgraderCountsMetric.Reset() - for upgraderType, upgraderVersions := range imp.upgraderCounts { - for version, count := range upgraderVersions { - upgraderCountsMetric.With(prometheus.Labels{ - teleport.TagUpgrader: upgraderType, - teleport.TagVersion: version, - }).Set(float64(count)) - } - } -} - -// updateVersionMetrics leverages the inventory control stream to report the versions of -// all instances that are connected to a single auth server via prometheus metrics. To -// get an accurate representation of versions in an entire cluster the metric must be aggregated -// with all auth instances. -func (a *Server) updateVersionMetrics() { - versionCount := make(map[string]int) - - // record versions for all connected resources - a.inventory.Iter(func(handle inventory.UpstreamHandle) { - versionCount[handle.Hello().Version]++ - }) - // reset the gauges so that any versions that fall off are removed from exported metrics registeredAgents.Reset() - for version, count := range versionCount { - registeredAgents.WithLabelValues(version).Set(float64(count)) + for agent, count := range imp.RegisteredAgentsCount() { + registeredAgents.With(prometheus.Labels{ + teleport.TagVersion: agent.version, + teleport.TagAutomaticUpdates: agent.automaticUpdates, + }).Set(float64(count)) } -} - -// updateInstallMethodsMetrics leverages the inventory control stream to report the install methods -// of all instances that are connected to a single auth server via prometheus metrics. -// To get an accurate representation of install methods in an entire cluster the metric must be aggregated -// with all auth instances. -func (a *Server) updateInstallMethodsMetrics() { - installMethodCount := make(map[string]int) - - // record install methods for all connected resources - a.inventory.Iter(func(handle inventory.UpstreamHandle) { - installMethod := "unknown" - installMethods := append([]string{}, handle.AgentMetadata().InstallMethods...) - - if len(installMethods) > 0 { - slices.Sort(installMethods) - installMethod = strings.Join(installMethods, ",") - } - - installMethodCount[installMethod]++ - }) // reset the gauges so that any versions that fall off are removed from exported metrics registeredAgentsInstallMethod.Reset() - for installMethod, count := range installMethodCount { + for installMethod, count := range imp.InstallMethodCounts() { registeredAgentsInstallMethod.WithLabelValues(installMethod).Set(float64(count)) } + + // reset the gauges so that any type+version that fall off are removed from exported metrics + upgraderCountsMetric.Reset() + for metadata, count := range imp.UpgraderCounts() { + upgraderCountsMetric.With(prometheus.Labels{ + teleport.TagUpgrader: metadata.upgraderType, + teleport.TagVersion: metadata.version, + }).Set(float64(count)) + } } var ( diff --git a/lib/auth/periodic.go b/lib/auth/periodic.go index 40823f689707a..ce36934f02d01 100644 --- a/lib/auth/periodic.go +++ b/lib/auth/periodic.go @@ -20,6 +20,8 @@ package auth import ( "fmt" + "slices" + "strings" "golang.org/x/mod/semver" @@ -104,40 +106,112 @@ func inspectVersionCounts(counts map[string]int) (median string, total int, ok b // instanceMetricsPeriodic is an aggregator for general instance metrics. type instanceMetricsPeriodic struct { - upgraderCounts map[string]map[string]int - totalInstances int + metadata []instanceMetadata +} + +// instanceMetadata contains instance metadata to be exported. +type instanceMetadata struct { + // version specifies the version of the Teleport instance + version string + // installMethod specifies the Teleport agent installation method + installMethod string + // upgraderType specifies the upgrader type + upgraderType string + // upgraderVersion specifies the upgrader version + upgraderVersion string } func newInstanceMetricsPeriodic() *instanceMetricsPeriodic { return &instanceMetricsPeriodic{ - upgraderCounts: make(map[string]map[string]int), + metadata: []instanceMetadata{}, } } -// VisitInstance adds an instance to ongoing aggregations. -func (i *instanceMetricsPeriodic) VisitInstance(instance proto.UpstreamInventoryHello) { - i.totalInstances++ - if upgrader := instance.GetExternalUpgrader(); upgrader != "" { - if _, exists := i.upgraderCounts[upgrader]; !exists { - i.upgraderCounts[upgrader] = make(map[string]int) +func (i *instanceMetricsPeriodic) VisitInstance(instance proto.UpstreamInventoryHello, metadata proto.UpstreamInventoryAgentMetadata) { + // Sort install methods if multiple methods are specified. + installMethod := "unknown" + installMethods := append([]string{}, metadata.GetInstallMethods()...) + if len(installMethods) > 0 { + slices.Sort(installMethods) + installMethod = strings.Join(installMethods, ",") + } + + iMetadata := instanceMetadata{ + version: instance.GetVersion(), + installMethod: installMethod, + upgraderType: instance.GetExternalUpgrader(), + upgraderVersion: instance.GetExternalUpgraderVersion(), + } + i.metadata = append(i.metadata, iMetadata) +} + +type registeredAgent struct { + version string + automaticUpdates string +} + +// RegisteredAgentsCount returns the count registered agents count. +func (i *instanceMetricsPeriodic) RegisteredAgentsCount() map[registeredAgent]int { + result := make(map[registeredAgent]int) + for _, metadata := range i.metadata { + automaticUpdates := "false" + if metadata.upgraderType != "" { + automaticUpdates = "true" + } + + agent := registeredAgent{ + version: metadata.version, + automaticUpdates: automaticUpdates, } - i.upgraderCounts[upgrader][instance.GetExternalUpgraderVersion()]++ + result[agent]++ } + return result +} + +// InstallMethodCounts returns the count of each install method. +func (i *instanceMetricsPeriodic) InstallMethodCounts() map[string]int { + installMethodCount := make(map[string]int) + for _, metadata := range i.metadata { + installMethodCount[metadata.installMethod]++ + } + return installMethodCount +} + +type upgrader struct { + upgraderType string + version string +} + +// UpgraderCounts returns the count for the different upgrader version and type combinations. +func (i *instanceMetricsPeriodic) UpgraderCounts() map[upgrader]int { + result := make(map[upgrader]int) + for _, metadata := range i.metadata { + // Do not count the instance if a type is not specified + if metadata.upgraderType == "" { + continue + } + + upgrader := upgrader{ + upgraderType: metadata.upgraderType, + version: metadata.upgraderVersion, + } + result[upgrader]++ + } + return result } // TotalEnrolledInUpgrades gets the total number of instances that have some upgrader defined. func (i *instanceMetricsPeriodic) TotalEnrolledInUpgrades() int { var total int - for _, upgraderVersion := range i.upgraderCounts { - for _, count := range upgraderVersion { - total += count + for _, metadata := range i.metadata { + if metadata.upgraderType != "" { + total++ } } - return total } // TotalInstances gets the total number of known instances. func (i *instanceMetricsPeriodic) TotalInstances() int { - return i.totalInstances + return len(i.metadata) } diff --git a/lib/auth/periodic_test.go b/lib/auth/periodic_test.go index 05e49a65fb16b..6800f3e90d0d7 100644 --- a/lib/auth/periodic_test.go +++ b/lib/auth/periodic_test.go @@ -29,13 +29,28 @@ import ( "github.com/gravitational/teleport/api/types" ) -func TestInstanceMetricsPeriodic(t *testing.T) { +func TestTotalInstances(t *testing.T) { + instances := []proto.UpstreamInventoryHello{ + {}, + {Version: "15.0.0"}, + {ServerID: "id"}, + {ExternalUpgrader: "kube"}, + {ExternalUpgraderVersion: "14.0.0"}, + } + + periodic := newInstanceMetricsPeriodic() + for _, instance := range instances { + periodic.VisitInstance(instance, proto.UpstreamInventoryAgentMetadata{}) + } + + require.Equal(t, 5, periodic.TotalInstances()) +} + +func TestTotalEnrolledInUpgrades(t *testing.T) { tts := []struct { - desc string - instances []proto.UpstreamInventoryHello - expectedCounts map[string]map[string]int - upgraders []string - expectEnrolled int + desc string + instances []proto.UpstreamInventoryHello + expected int }{ { desc: "mixed", @@ -47,25 +62,15 @@ func TestInstanceMetricsPeriodic(t *testing.T) { {ExternalUpgrader: "unit", ExternalUpgraderVersion: "14.0.0"}, {}, }, - upgraders: []string{ - "kube", - "kube", - "unit", - "", - "unit", - "", - }, - expectedCounts: map[string]map[string]int{ - "kube": { - "13.0.0": 1, - "14.0.0": 1, - }, - "unit": { - "13.0.0": 1, - "14.0.0": 1, - }, - }, - expectEnrolled: 4, + expected: 4, + }, + { + desc: "version omitted", + instances: []proto.UpstreamInventoryHello{ + {ExternalUpgrader: "kube"}, + {ExternalUpgrader: "unit"}, + }, + expected: 2, }, { desc: "all-unenrolled", @@ -73,11 +78,55 @@ func TestInstanceMetricsPeriodic(t *testing.T) { {}, {}, }, - upgraders: []string{ - "", - "", + expected: 0, + }, + { + desc: "none", + instances: []proto.UpstreamInventoryHello{}, + expected: 0, + }, + } + for _, tt := range tts { + t.Run(tt.desc, func(t *testing.T) { + periodic := newInstanceMetricsPeriodic() + for _, instance := range tt.instances { + periodic.VisitInstance(instance, proto.UpstreamInventoryAgentMetadata{}) + } + require.Equal(t, tt.expected, periodic.TotalEnrolledInUpgrades(), "tt=%q", tt.desc) + }) + } +} + +func TestUpgraderCounts(t *testing.T) { + tts := []struct { + desc string + instances []proto.UpstreamInventoryHello + expected map[upgrader]int + }{ + { + desc: "mixed", + instances: []proto.UpstreamInventoryHello{ + {ExternalUpgrader: "kube", ExternalUpgraderVersion: "13.0.0"}, + {ExternalUpgrader: "kube", ExternalUpgraderVersion: "14.0.0"}, + {ExternalUpgrader: "unit", ExternalUpgraderVersion: "13.0.0"}, + {}, + {ExternalUpgrader: "unit", ExternalUpgraderVersion: "14.0.0"}, + {}, + }, + expected: map[upgrader]int{ + {"kube", "13.0.0"}: 1, + {"kube", "14.0.0"}: 1, + {"unit", "13.0.0"}: 1, + {"unit", "14.0.0"}: 1, }, - expectedCounts: map[string]map[string]int{}, + }, + { + desc: "all-unenrolled", + instances: []proto.UpstreamInventoryHello{ + {}, + {}, + }, + expected: map[upgrader]int{}, }, { desc: "all-enrolled", @@ -87,21 +136,10 @@ func TestInstanceMetricsPeriodic(t *testing.T) { {ExternalUpgrader: "unit", ExternalUpgraderVersion: "13.0.0"}, {ExternalUpgrader: "unit", ExternalUpgraderVersion: "13.0.0"}, }, - upgraders: []string{ - "kube", - "kube", - "unit", - "unit", - }, - expectedCounts: map[string]map[string]int{ - "kube": { - "13.0.0": 2, - }, - "unit": { - "13.0.0": 2, - }, - }, - expectEnrolled: 4, + expected: map[upgrader]int{ + {"kube", "13.0.0"}: 2, + {"unit", "13.0.0"}: 2, + }, }, { desc: "nil version", @@ -109,23 +147,14 @@ func TestInstanceMetricsPeriodic(t *testing.T) { {ExternalUpgrader: "kube"}, {ExternalUpgrader: "unit"}, }, - upgraders: []string{ - "kube", - "unit", - }, - expectedCounts: map[string]map[string]int{ - "kube": { - "": 1, - }, - "unit": { - "": 1, - }, + expected: map[upgrader]int{ + {"kube", ""}: 1, + {"unit", ""}: 1, }, - expectEnrolled: 2, }, { - desc: "nothing", - expectedCounts: map[string]map[string]int{}, + desc: "nothing", + expected: map[upgrader]int{}, }, } @@ -134,14 +163,125 @@ func TestInstanceMetricsPeriodic(t *testing.T) { periodic := newInstanceMetricsPeriodic() for _, instance := range tt.instances { - periodic.VisitInstance(instance) + periodic.VisitInstance(instance, proto.UpstreamInventoryAgentMetadata{}) } + require.Equal(t, tt.expected, periodic.UpgraderCounts(), "tt=%q", tt.desc) + }) + } +} + +func TestInstallMethodCounts(t *testing.T) { + tts := []struct { + desc string + metadata []proto.UpstreamInventoryAgentMetadata + expected map[string]int + }{ + { + desc: "none", + metadata: []proto.UpstreamInventoryAgentMetadata{}, + expected: map[string]int{}, + }, + { + desc: "unknown install method", + metadata: []proto.UpstreamInventoryAgentMetadata{ + {}, + }, + expected: map[string]int{ + "unknown": 1, + }, + }, + { + desc: "various install methods", + metadata: []proto.UpstreamInventoryAgentMetadata{ + {InstallMethods: []string{"systemctl"}}, + {InstallMethods: []string{"systemctl"}}, + {InstallMethods: []string{"helm_kube_agent"}}, + {InstallMethods: []string{"dockerfile"}}, + }, + expected: map[string]int{ + "systemctl": 2, + "helm_kube_agent": 1, + "dockerfile": 1, + }, + }, + { + desc: "multiple install methods", + metadata: []proto.UpstreamInventoryAgentMetadata{ + {InstallMethods: []string{"dockerfile", "helm_kube_agent"}}, + {InstallMethods: []string{"helm_kube_agent", "dockerfile"}}, + }, + expected: map[string]int{ + "dockerfile,helm_kube_agent": 2, + }, + }, + } + for _, tt := range tts { + t.Run(tt.desc, func(t *testing.T) { + periodic := newInstanceMetricsPeriodic() - require.Equal(t, tt.expectedCounts, periodic.upgraderCounts, "tt=%q", tt.desc) + for _, metadata := range tt.metadata { + periodic.VisitInstance(proto.UpstreamInventoryHello{}, metadata) + } + require.Equal(t, tt.expected, periodic.InstallMethodCounts(), "tt=%q", tt.desc) + }) + } +} - require.Equal(t, tt.expectEnrolled, periodic.TotalEnrolledInUpgrades(), "tt=%q", tt.desc) +func TestRegisteredAgentsCounts(t *testing.T) { + tts := []struct { + desc string + instance []proto.UpstreamInventoryHello + expected map[registeredAgent]int + }{ + { + desc: "none", + instance: []proto.UpstreamInventoryHello{}, + expected: map[registeredAgent]int{}, + }, + { + desc: "automatic updates disabled", + instance: []proto.UpstreamInventoryHello{ + {Version: "13.0.0"}, + {Version: "14.0.0"}, + {Version: "15.0.0"}, + }, + expected: map[registeredAgent]int{ + {"13.0.0", "false"}: 1, + {"14.0.0", "false"}: 1, + {"15.0.0", "false"}: 1, + }, + }, + { + desc: "automatic updates enabled", + instance: []proto.UpstreamInventoryHello{ + {Version: "13.0.0", ExternalUpgrader: "unit"}, + {Version: "13.0.0", ExternalUpgrader: "kube"}, + {Version: "13.0.0"}, + {Version: "14.0.0", ExternalUpgrader: "unit"}, + {Version: "14.0.0", ExternalUpgrader: "kube"}, + {Version: "14.0.0"}, + {Version: "15.0.0", ExternalUpgrader: "unit"}, + {Version: "15.0.0", ExternalUpgrader: "kube"}, + {Version: "15.0.0"}, + }, + expected: map[registeredAgent]int{ + {"13.0.0", "true"}: 2, + {"13.0.0", "false"}: 1, + {"14.0.0", "true"}: 2, + {"14.0.0", "false"}: 1, + {"15.0.0", "true"}: 2, + {"15.0.0", "false"}: 1, + }, + }, + } + for _, tt := range tts { + t.Run(tt.desc, func(t *testing.T) { + periodic := newInstanceMetricsPeriodic() - require.Len(t, tt.upgraders, periodic.TotalInstances(), "tt=%q", tt.desc) + for _, instance := range tt.instance { + periodic.VisitInstance(instance, proto.UpstreamInventoryAgentMetadata{}) + } + require.Equal(t, tt.expected, periodic.RegisteredAgentsCount(), "tt=%q", tt.desc) }) } } diff --git a/metrics.go b/metrics.go index a59ef6746fe6c..f8a113128c001 100644 --- a/metrics.go +++ b/metrics.go @@ -323,6 +323,10 @@ const ( // Only a subset of services are monitored. See [lib/service.metricsServicesRunningMap] // Eg, discovery_service TagServiceName = "service_name" + + // TagAutomaticUpdates is a prometheus label to indicate whether the instance + // is enrolled in automatic updates. + TagAutomaticUpdates = "automatic_updates" ) const (