From 62ad3fe6c7bdfa23fe770e4f3c842d4252f49485 Mon Sep 17 00:00:00 2001 From: Marco Dinis Date: Mon, 13 Jan 2025 19:57:38 +0000 Subject: [PATCH] Fix EKS Discover User Task reporting (#50989) * Fix EKS Discover User Task reporting The `clusterNames` slice and `clusterByNames` key set must be the same. When there was two groups of EKS Clusters, one with App Discovery enabled and another one with it disabled, we had different set of clusters being processed. `clusterNames` had all the EKS Clusters, while `clusterByNames` only had the EKS Clusters for one of the processing groups (either AppDiscovery=on or AppDiscovery=off). This meant that when the `EnrollEKSClusters` returned an error, we looked up the map, but it might be the case that that particular EKS Cluster was not configured for the current processing group. So, the `clusterByNames[r.EksClusterName]` returned a nil value, which resulted in a panic. * add test * check if cluster exists in local map --- lib/srv/discovery/discovery_test.go | 108 +++++++++++++++++- lib/srv/discovery/kube_integration_watcher.go | 10 +- 2 files changed, 113 insertions(+), 5 deletions(-) diff --git a/lib/srv/discovery/discovery_test.go b/lib/srv/discovery/discovery_test.go index 2948e10cdb916..5e9d3d1acf7e6 100644 --- a/lib/srv/discovery/discovery_test.go +++ b/lib/srv/discovery/discovery_test.go @@ -322,6 +322,31 @@ func TestDiscoveryServer(t *testing.T) { ) require.NoError(t, err) + discoveryConfigWithAndWithoutAppDiscoveryTestName := uuid.NewString() + discoveryConfigWithAndWithoutAppDiscovery, err := discoveryconfig.NewDiscoveryConfig( + header.Metadata{Name: discoveryConfigWithAndWithoutAppDiscoveryTestName}, + discoveryconfig.Spec{ + DiscoveryGroup: defaultDiscoveryGroup, + AWS: []types.AWSMatcher{ + { + Types: []string{"eks"}, + Regions: []string{"eu-west-2"}, + Tags: map[string]utils.Strings{"EnableAppDiscovery": {"No"}}, + Integration: "my-integration", + KubeAppDiscovery: false, + }, + { + Types: []string{"eks"}, + Regions: []string{"eu-west-2"}, + Tags: map[string]utils.Strings{"EnableAppDiscovery": {"Yes"}}, + Integration: "my-integration", + KubeAppDiscovery: true, + }, + }, + }, + ) + require.NoError(t, err) + tcs := []struct { name string // presentInstances is a list of servers already present in teleport. @@ -754,6 +779,74 @@ func TestDiscoveryServer(t *testing.T) { require.Equal(t, defaultDiscoveryGroup, taskCluster.DiscoveryGroup) }, }, + { + name: "multiple EKS clusters with different KubeAppDiscovery setting failed to autoenroll and user tasks are created", + presentInstances: []types.Server{}, + foundEC2Instances: []ec2types.Instance{}, + ssm: &mockSSMClient{}, + eksClusters: []*ekstypes.Cluster{ + { + Name: aws.String("cluster01"), + Arn: aws.String("arn:aws:eks:us-west-2:123456789012:cluster/cluster01"), + Status: ekstypes.ClusterStatusActive, + Tags: map[string]string{ + "EnableAppDiscovery": "Yes", + }, + }, + { + Name: aws.String("cluster02"), + Arn: aws.String("arn:aws:eks:us-west-2:123456789012:cluster/cluster02"), + Status: ekstypes.ClusterStatusActive, + Tags: map[string]string{ + "EnableAppDiscovery": "No", + }, + }, + }, + eksEnroller: &mockEKSClusterEnroller{ + resp: &integrationpb.EnrollEKSClustersResponse{ + Results: []*integrationpb.EnrollEKSClusterResult{ + { + EksClusterName: "cluster01", + Error: "access endpoint is not reachable", + IssueType: "eks-cluster-unreachable", + }, + { + EksClusterName: "cluster02", + Error: "access endpoint is not reachable", + IssueType: "eks-cluster-unreachable", + }, + }, + }, + err: nil, + }, + emitter: &mockEmitter{}, + staticMatchers: Matchers{}, + discoveryConfig: discoveryConfigWithAndWithoutAppDiscovery, + wantInstalledInstances: []string{}, + userTasksDiscoverCheck: func(t require.TestingT, i1 interface{}, i2 ...interface{}) { + existingTasks, ok := i1.([]*usertasksv1.UserTask) + require.True(t, ok, "failed to get existing tasks: %T", i1) + require.Len(t, existingTasks, 2) + existingTask := existingTasks[0] + if existingTask.Spec.DiscoverEks.AppAutoDiscover == false { + existingTask = existingTasks[1] + } + + require.Equal(t, "OPEN", existingTask.GetSpec().State) + require.Equal(t, "my-integration", existingTask.GetSpec().Integration) + require.Equal(t, "eks-cluster-unreachable", existingTask.GetSpec().IssueType) + require.Equal(t, "123456789012", existingTask.GetSpec().GetDiscoverEks().GetAccountId()) + require.Equal(t, "us-west-2", existingTask.GetSpec().GetDiscoverEks().GetRegion()) + + taskClusters := existingTask.GetSpec().GetDiscoverEks().Clusters + require.Contains(t, taskClusters, "cluster01") + taskCluster := taskClusters["cluster01"] + + require.Equal(t, "cluster01", taskCluster.Name) + require.Equal(t, discoveryConfigWithAndWithoutAppDiscoveryTestName, taskCluster.DiscoveryConfig) + require.Equal(t, defaultDiscoveryGroup, taskCluster.DiscoveryGroup) + }, + }, } for _, tc := range tcs { @@ -3528,8 +3621,19 @@ type mockEKSClusterEnroller struct { err error } -func (m *mockEKSClusterEnroller) EnrollEKSClusters(context.Context, *integrationpb.EnrollEKSClustersRequest, ...grpc.CallOption) (*integrationpb.EnrollEKSClustersResponse, error) { - return m.resp, m.err +func (m *mockEKSClusterEnroller) EnrollEKSClusters(ctx context.Context, req *integrationpb.EnrollEKSClustersRequest, opt ...grpc.CallOption) (*integrationpb.EnrollEKSClustersResponse, error) { + ret := &integrationpb.EnrollEKSClustersResponse{ + Results: []*integrationpb.EnrollEKSClusterResult{}, + } + // Filter out non-requested clusters. + for _, clusterName := range req.EksClusterNames { + for _, mockClusterResult := range m.resp.Results { + if clusterName == mockClusterResult.EksClusterName { + ret.Results = append(ret.Results, mockClusterResult) + } + } + } + return ret, m.err } type combinedDiscoveryClient struct { diff --git a/lib/srv/discovery/kube_integration_watcher.go b/lib/srv/discovery/kube_integration_watcher.go index ffbecf6497359..88d89f258f8c4 100644 --- a/lib/srv/discovery/kube_integration_watcher.go +++ b/lib/srv/discovery/kube_integration_watcher.go @@ -21,6 +21,7 @@ package discovery import ( "context" "fmt" + "maps" "slices" "strings" "sync" @@ -243,14 +244,13 @@ func (s *Server) enrollEKSClusters(region, integration, discoveryConfigName stri } ctx, cancel := context.WithTimeout(s.ctx, time.Duration(len(clusters))*30*time.Second) defer cancel() - var clusterNames []string for _, kubeAppDiscovery := range []bool{true, false} { clustersByName := make(map[string]types.DiscoveredEKSCluster) for _, c := range batchedClusters[kubeAppDiscovery] { - clusterNames = append(clusterNames, c.GetAWSConfig().Name) clustersByName[c.GetAWSConfig().Name] = c } + clusterNames := slices.Collect(maps.Keys(clustersByName)) if len(clusterNames) == 0 { continue } @@ -283,7 +283,11 @@ func (s *Server) enrollEKSClusters(region, integration, discoveryConfigName stri s.Log.DebugContext(ctx, "EKS cluster already has installed kube agent", "cluster_name", r.EksClusterName) } - cluster := clustersByName[r.EksClusterName] + cluster, ok := clustersByName[r.EksClusterName] + if !ok { + s.Log.WarnContext(ctx, "Received an EnrollEKSCluster result for a cluster which was not part of the requested clusters", "cluster_name", r.EksClusterName, "clusters_install_request", clusterNames) + continue + } s.awsEKSTasks.addFailedEnrollment( awsEKSTaskKey{ integration: integration,