diff --git a/.github/actions/e2e/cleanup/action.yaml b/.github/actions/e2e/cleanup/action.yaml index c5d56a871294..bb1bb683fdc2 100644 --- a/.github/actions/e2e/cleanup/action.yaml +++ b/.github/actions/e2e/cleanup/action.yaml @@ -42,6 +42,20 @@ runs: break fi done + - name: delete-security-group + shell: bash + # For drift testing, we create a security group and need to clean it up here + # to avoid leaks if the tests is not fully completed + run: | + aws ec2 describe-security-groups \ + --filters Name=group-name,Values=security-group-drift Name=tag:karpenter.sh/discovery,Values=${{ inputs.cluster_name }} \ + --query "SecurityGroups[*].{ID:GroupId}" \ + --output text | + xargs \ + -n 1 \ + -r \ + aws ec2 delete-security-group \ + --group-id - name: delete-cluster shell: bash run: | diff --git a/.github/actions/e2e/create-cluster/action.yaml b/.github/actions/e2e/create-cluster/action.yaml index 6a6048c11ce6..ef9841ff5369 100644 --- a/.github/actions/e2e/create-cluster/action.yaml +++ b/.github/actions/e2e/create-cluster/action.yaml @@ -125,6 +125,18 @@ runs: wellKnownPolicies: ebsCSIController: true EOF + - name: tag oidc provider of the cluster + if: always() + shell: bash + run: | + for arn in $(aws iam list-open-id-connect-providers --query "OpenIDConnectProviderList[*].{ARN:Arn}" --output text); do + tags=$(aws iam list-open-id-connect-provider-tags --open-id-connect-provider-arn $arn --output json) + if [[ $(echo $tags | jq -r '.Tags[] | select(.Key == "alpha.eksctl.io/cluster-name") | .Value') == "${{ inputs.cluster_name }}" ]]; then + aws iam tag-open-id-connect-provider --open-id-connect-provider-arn $arn \ + --tags Key=testing.karpenter.sh/type,Value=e2e Key=github.com/run-url,Value=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + break + fi + done - name: give KarpenterNodeRole permission to bootstrap shell: bash run: | diff --git a/.github/workflows/sweeper.yaml b/.github/workflows/sweeper.yaml index 0ad247a5ccbd..24745257198b 100644 --- a/.github/workflows/sweeper.yaml +++ b/.github/workflows/sweeper.yaml @@ -9,6 +9,10 @@ permissions: jobs: sweeper: if: github.repository == 'aws/karpenter' || github.event_name == 'workflow_dispatch' + strategy: + fail-fast: false + matrix: + region: [us-east-2, us-west-2, eu-west-1] runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -16,7 +20,7 @@ jobs: uses: aws-actions/configure-aws-credentials@v2 with: role-to-assume: arn:aws:iam::${{ vars.ACCOUNT_ID }}:role/${{ vars.ROLE_NAME }} - aws-region: ${{ vars.AWS_REGION }} + aws-region: ${{ matrix.region }} - uses: actions/setup-go@v4 with: go-version-file: test/hack/cleanup/go.mod diff --git a/test/cloudformation/iam_cloudformation.yaml b/test/cloudformation/iam_cloudformation.yaml index 99c47daabc24..b48a396ea60a 100644 --- a/test/cloudformation/iam_cloudformation.yaml +++ b/test/cloudformation/iam_cloudformation.yaml @@ -98,6 +98,9 @@ Resources: Action: - iam:CreateOpenIDConnectProvider - iam:DeleteOpenIDConnectProvider + - iam:ListOpenIDConnectProviders + - iam:ListOpenIDConnectProviderTags + - iam:TagOpenIDConnectProvider - iam:GetOpenIDConnectProvider - iam:TagOpenIDConnectProvider Resource: !Sub "arn:${AWS::Partition}:iam::${AWS::AccountId}:oidc-provider/*" diff --git a/test/hack/cleanup/go.mod b/test/hack/cleanup/go.mod index 6913122a71d0..8ad6dd340716 100644 --- a/test/hack/cleanup/go.mod +++ b/test/hack/cleanup/go.mod @@ -3,12 +3,16 @@ module github.com/aws/karpenter/test/hack/cleanup go 1.20 require ( + github.com/aws/aws-sdk-go v1.44.309 github.com/aws/aws-sdk-go-v2/config v1.18.27 github.com/aws/aws-sdk-go-v2/service/cloudformation v1.30.0 github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.26.2 github.com/aws/aws-sdk-go-v2/service/ec2 v1.102.0 + github.com/aws/aws-sdk-go-v2/service/iam v1.21.0 github.com/samber/lo v1.38.1 + go.uber.org/multierr v1.11.0 go.uber.org/zap v1.24.0 + k8s.io/client-go v0.27.4 ) require ( @@ -23,8 +27,12 @@ require ( github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.12 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.19.2 // indirect github.com/aws/smithy-go v1.13.5 // indirect + github.com/go-logr/logr v1.2.3 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect go.uber.org/atomic v1.7.0 // indirect - go.uber.org/multierr v1.11.0 // indirect golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 // indirect + golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect + k8s.io/apimachinery v0.27.4 // indirect + k8s.io/klog/v2 v2.90.1 // indirect + k8s.io/utils v0.0.0-20230209194617-a36077c30491 // indirect ) diff --git a/test/hack/cleanup/go.sum b/test/hack/cleanup/go.sum index 838f7547d5f3..7c432e98aac4 100644 --- a/test/hack/cleanup/go.sum +++ b/test/hack/cleanup/go.sum @@ -1,3 +1,5 @@ +github.com/aws/aws-sdk-go v1.44.309 h1:IPJOFBzXekakxmEpDwd4RTKmmBR6LIAiXgNsM51bWbU= +github.com/aws/aws-sdk-go v1.44.309/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/aws/aws-sdk-go-v2 v1.18.1 h1:+tefE750oAb7ZQGzla6bLkOwfcQCEtC5y2RqoqCeqKo= github.com/aws/aws-sdk-go-v2 v1.18.1/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= github.com/aws/aws-sdk-go-v2/config v1.18.27 h1:Az9uLwmssTE6OGTpsFqOnaGpLnKDqNYOJzWuC6UAYzA= @@ -18,6 +20,8 @@ github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.26.2 h1:PWGu2JhCb/XJlJ7SSFJq7 github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.26.2/go.mod h1:2KOZkkzMDZCo/aLzPhys06mHNkiU74u85aMJA3PLRvg= github.com/aws/aws-sdk-go-v2/service/ec2 v1.102.0 h1:P4dyjm49F2kKws0FpouBC6fjVImACXKt752+CWa01lM= github.com/aws/aws-sdk-go-v2/service/ec2 v1.102.0/go.mod h1:tIctCeX9IbzsUTKHt53SVEcgyfxV2ElxJeEB+QUbc4M= +github.com/aws/aws-sdk-go-v2/service/iam v1.21.0 h1:8hEpu60CWlrp7iEBUFRZhgPoX6+gadaGL1sD4LoRYS0= +github.com/aws/aws-sdk-go-v2/service/iam v1.21.0/go.mod h1:aQZ8BI+reeaY7RI/QQp7TKCSUHOesTdrzzylp3CW85c= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.28 h1:bkRyG4a929RCnpVSTvLM2j/T4ls015ZhhYApbmYs15s= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.28/go.mod h1:jj7znCIg05jXlaGBlFMGP8+7UN3VtCkRBG2spnmRQkU= github.com/aws/aws-sdk-go-v2/service/sso v1.12.12 h1:nneMBM2p79PGWBQovYO/6Xnc2ryRMw3InnDJq1FHkSY= @@ -32,20 +36,25 @@ github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLj github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= +github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= +github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM= github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI= @@ -53,9 +62,45 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 h1:3MTrJm4PyNL9NBqvYDSj3DHl46qQakyfqfWo4jgfaEM= golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 h1:vVKdlvoWBphwdxWKrFZEuM0kGgGLxUOYcY4U/2Vjg44= +golang.org/x/time v0.0.0-20220210224613-90d013bbcef8/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +k8s.io/apimachinery v0.27.4 h1:CdxflD4AF61yewuid0fLl6bM4a3q04jWel0IlP+aYjs= +k8s.io/apimachinery v0.27.4/go.mod h1:XNfZ6xklnMCOGGFNqXG7bUrQCoR04dh/E7FprV6pb+E= +k8s.io/client-go v0.27.4 h1:vj2YTtSJ6J4KxaC88P4pMPEQECWMY8gqPqsTgUKzvjk= +k8s.io/client-go v0.27.4/go.mod h1:ragcly7lUlN0SRPk5/ZkGnDjPknzb37TICq07WhI6Xc= +k8s.io/klog/v2 v2.90.1 h1:m4bYOKall2MmOiRaR1J+We67Do7vm9KiQVlT96lnHUw= +k8s.io/klog/v2 v2.90.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= +k8s.io/utils v0.0.0-20230209194617-a36077c30491 h1:r0BAOLElQnnFhE/ApUsg3iHdVYYPBjNSSOMowRZxxsY= +k8s.io/utils v0.0.0-20230209194617-a36077c30491/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= diff --git a/test/hack/cleanup/main.go b/test/hack/cleanup/main.go index 7890e34dda72..a61b5b223061 100644 --- a/test/hack/cleanup/main.go +++ b/test/hack/cleanup/main.go @@ -16,6 +16,7 @@ package main import ( "context" + "fmt" "time" "github.com/aws/aws-sdk-go-v2/config" @@ -25,8 +26,12 @@ import ( cloudwatchtypes "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" "github.com/aws/aws-sdk-go-v2/service/ec2" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/aws/aws-sdk-go-v2/service/iam" + "github.com/aws/aws-sdk-go/aws" "github.com/samber/lo" + "go.uber.org/multierr" "go.uber.org/zap" + "k8s.io/client-go/util/workqueue" ) const ( @@ -35,9 +40,16 @@ const ( karpenterProvisionerNameTag = "karpenter.sh/provisioner-name" karpenterLaunchTemplateTag = "karpenter.k8s.aws/cluster" + karpenterSecurityGroupTag = "karpenter.sh/discovery" githubRunURLTag = "github.com/run-url" ) +type CleanableResourceType interface { + Type() string + Get(context.Context, time.Time) ([]string, error) + Cleanup(context.Context, []string) ([]string, error) +} + func main() { ctx := context.Background() cfg := lo.Must(config.LoadDefaultConfig(ctx)) @@ -51,79 +63,46 @@ func main() { ec2Client := ec2.NewFromConfig(cfg) cloudFormationClient := cloudformation.NewFromConfig(cfg) cloudWatchClient := cloudwatch.NewFromConfig(cfg) + iamClient := iam.NewFromConfig(cfg) - // Terminate any old instances that were provisioned by Karpenter as part of testing - // We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively - ids := getOldInstances(ctx, ec2Client, expirationTime) - logger.With("ids", ids, "count", len(ids)).Infof("discovered test instances to delete") - if len(ids) > 0 { - if _, err := ec2Client.TerminateInstances(ctx, &ec2.TerminateInstancesInput{ - InstanceIds: ids, - }); err != nil { - logger.With("ids", ids, "count", len(ids)).Errorf("terminating test instances, %v", err) - } else { - logger.With("ids", ids, "count", len(ids)).Infof("terminated test instances") - if err = fireMetric(ctx, cloudWatchClient, "InstancesDeleted", float64(len(ids))); err != nil { - logger.With("name", "InstancesDeleted").Errorf("firing metric, %v", err) - } - } + resources := []CleanableResourceType{ + &instance{ec2Client: ec2Client}, + &securitygroup{ec2Client: ec2Client}, + &stack{cloudFormationClient: cloudFormationClient}, + &launchtemplate{ec2Client: ec2Client}, + &oidc{iamClient: iamClient}, } - - // Terminate any old stacks that were provisioned as part of testing - // We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively - names := getOldStacks(ctx, cloudFormationClient, expirationTime) - logger.With("names", names, "count", len(names)).Infof("discovered test stacks to delete") - deleted := 0 - for i := range names { - if _, err := cloudFormationClient.DeleteStack(ctx, &cloudformation.DeleteStackInput{ - StackName: lo.ToPtr(names[i]), - }); err != nil { - logger.With("name", names[i]).Errorf("deleting test stack, %v", err) - } else { - logger.With("name", names[i]).Infof("deleted test stack") - deleted++ + workqueue.ParallelizeUntil(ctx, len(resources), len(resources), func(i int) { + ids, err := resources[i].Get(ctx, expirationTime) + if err != nil { + logger.With("type", resources[i].Type()).Errorf("%v", err) } - } - if err := fireMetric(ctx, cloudWatchClient, "StacksDeleted", float64(deleted)); err != nil { - logger.With("name", "StacksDeleted").Errorf("firing metric, %v", err) - } - - // Terminate any old launch templates that were managed by Karpenter and were provisioned as part of testing - names = getOldLaunchTemplates(ctx, ec2Client, expirationTime) - logger.With("names", names, "count", len(names)).Infof("discovered test launch templates to delete") - deleted = 0 - for i := range names { - if _, err := ec2Client.DeleteLaunchTemplate(ctx, &ec2.DeleteLaunchTemplateInput{ - LaunchTemplateName: lo.ToPtr(names[i]), - }); err != nil { - logger.With("name", names[i]).Errorf("deleting test launch template, %v", err) - } else { - logger.With("name", names[i]).Infof("deleted test launch template") - deleted++ + logger.With("type", resources[i].Type(), "ids", ids, "count", len(ids)).Infof("discovered resources") + if len(ids) > 0 { + cleaned, err := resources[i].Cleanup(ctx, ids) + if err != nil { + logger.With("type", resources[i].Type()).Errorf("%v", err) + } + if err = fireMetric(ctx, cloudWatchClient, fmt.Sprintf("%sDeleted", resources[i].Type()), float64(len(cleaned))); err != nil { + logger.With("type", resources[i].Type()).Errorf("%v", err) + } + logger.With("type", resources[i].Type(), "ids", cleaned, "count", len(cleaned)).Infof("deleted resources") } - } - if err := fireMetric(ctx, cloudWatchClient, "LaunchTemplatesDeleted", float64(deleted)); err != nil { - logger.With("name", "LaunchTemplatesDeleted").Errorf("firing metric, %v", err) - } + }) } -func fireMetric(ctx context.Context, cloudWatchClient *cloudwatch.Client, name string, value float64) error { - _, err := cloudWatchClient.PutMetricData(ctx, &cloudwatch.PutMetricDataInput{ - Namespace: lo.ToPtr(karpenterMetricNamespace), - MetricData: []cloudwatchtypes.MetricDatum{ - { - MetricName: lo.ToPtr(name), - Value: lo.ToPtr(value), - }, - }, - }) - return err +type instance struct { + ec2Client *ec2.Client } -func getOldInstances(ctx context.Context, ec2Client *ec2.Client, expirationTime time.Time) (ids []string) { +func (i *instance) Type() string { + return "Instances" +} + +func (i *instance) Get(ctx context.Context, expirationTime time.Time) (ids []string, err error) { var nextToken *string for { - out := lo.Must(ec2Client.DescribeInstances(ctx, &ec2.DescribeInstancesInput{ + out, err := i.ec2Client.DescribeInstances(ctx, &ec2.DescribeInstancesInput{ Filters: []ec2types.Filter{ { Name: lo.ToPtr("instance-state-name"), @@ -135,7 +114,10 @@ func getOldInstances(ctx context.Context, ec2Client *ec2.Client, expirationTime }, }, NextToken: nextToken, - })) + }) + if err != nil { + return ids, err + } for _, res := range out.Reservations { for _, instance := range res.Instances { @@ -152,15 +134,101 @@ func getOldInstances(ctx context.Context, ec2Client *ec2.Client, expirationTime break } } - return ids + return ids, err +} + +// Terminate any old instances that were provisioned by Karpenter as part of testing +// We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively +func (i *instance) Cleanup(ctx context.Context, ids []string) ([]string, error) { + if _, err := i.ec2Client.TerminateInstances(ctx, &ec2.TerminateInstancesInput{ + InstanceIds: ids, + }); err != nil { + return nil, err + } + return ids, nil +} + +type securitygroup struct { + ec2Client *ec2.Client +} + +func (sg *securitygroup) Type() string { + return "SecurityGroup" +} + +func (sg *securitygroup) Get(ctx context.Context, expirationTime time.Time) (ids []string, err error) { + var nextToken *string + for { + out, err := sg.ec2Client.DescribeSecurityGroups(ctx, &ec2.DescribeSecurityGroupsInput{ + Filters: []ec2types.Filter{ + { + Name: lo.ToPtr("group-name"), + Values: []string{"security-group-drift"}, + }, + }, + NextToken: nextToken, + }) + if err != nil { + return ids, err + } + + for _, sgroup := range out.SecurityGroups { + creationDate, found := lo.Find(sgroup.Tags, func(tag ec2types.Tag) bool { + return *tag.Key == "creation-date" + }) + if !found { + continue + } + time, err := time.Parse(time.RFC3339, *creationDate.Value) + if err != nil { + continue + } + if time.Before(expirationTime) { + ids = append(ids, lo.FromPtr(sgroup.GroupId)) + } + } + + nextToken = out.NextToken + if nextToken == nil { + break + } + } + return ids, err +} + +func (sg *securitygroup) Cleanup(ctx context.Context, ids []string) ([]string, error) { + deleted := []string{} + var errs error + for i := range ids { + _, err := sg.ec2Client.DeleteSecurityGroup(ctx, &ec2.DeleteSecurityGroupInput{ + GroupId: aws.String(ids[i]), + }) + if err != nil { + errs = multierr.Append(errs, err) + } + deleted = append(deleted, ids[i]) + } + + return deleted, errs +} + +type stack struct { + cloudFormationClient *cloudformation.Client +} + +func (s *stack) Type() string { + return "CloudformationStacks" } -func getOldStacks(ctx context.Context, cloudFormationClient *cloudformation.Client, expirationTime time.Time) (names []string) { +func (s *stack) Get(ctx context.Context, expirationTime time.Time) (names []string, err error) { var nextToken *string for { - out := lo.Must(cloudFormationClient.DescribeStacks(ctx, &cloudformation.DescribeStacksInput{ + out, err := s.cloudFormationClient.DescribeStacks(ctx, &cloudformation.DescribeStacksInput{ NextToken: nextToken, - })) + }) + if err != nil { + return names, err + } stacks := lo.Reject(out.Stacks, func(s cloudformationtypes.Stack, _ int) bool { return s.StackStatus == cloudformationtypes.StackStatusDeleteComplete || @@ -179,13 +247,38 @@ func getOldStacks(ctx context.Context, cloudFormationClient *cloudformation.Clie break } } - return names + return names, err +} + +// Terminate any old stacks that were provisioned as part of testing +// We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively +func (s *stack) Cleanup(ctx context.Context, names []string) ([]string, error) { + var errs error + deleted := []string{} + for i := range names { + _, err := s.cloudFormationClient.DeleteStack(ctx, &cloudformation.DeleteStackInput{ + StackName: lo.ToPtr(names[i]), + }) + if err != nil { + errs = multierr.Append(errs, err) + } + deleted = append(deleted, names[i]) + } + return deleted, errs +} + +type launchtemplate struct { + ec2Client *ec2.Client +} + +func (lt *launchtemplate) Type() string { + return "LaunchTemplates" } -func getOldLaunchTemplates(ctx context.Context, ec2Client *ec2.Client, expirationTime time.Time) (names []string) { +func (lt *launchtemplate) Get(ctx context.Context, expirationTime time.Time) (names []string, err error) { var nextToken *string for { - out := lo.Must(ec2Client.DescribeLaunchTemplates(ctx, &ec2.DescribeLaunchTemplatesInput{ + out, err := lt.ec2Client.DescribeLaunchTemplates(ctx, &ec2.DescribeLaunchTemplatesInput{ Filters: []ec2types.Filter{ { Name: lo.ToPtr("tag-key"), @@ -193,7 +286,10 @@ func getOldLaunchTemplates(ctx context.Context, ec2Client *ec2.Client, expiratio }, }, NextToken: nextToken, - })) + }) + if err != nil { + return names, err + } for _, launchTemplate := range out.LaunchTemplates { if lo.FromPtr(launchTemplate.CreateTime).Before(expirationTime) { @@ -206,5 +302,84 @@ func getOldLaunchTemplates(ctx context.Context, ec2Client *ec2.Client, expiratio break } } - return names + return names, err +} + +// Terminate any old launch templates that were managed by Karpenter and were provisioned as part of testing +// We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively +func (lt *launchtemplate) Cleanup(ctx context.Context, names []string) ([]string, error) { + var errs error + deleted := []string{} + for i := range names { + _, err := lt.ec2Client.DeleteLaunchTemplate(ctx, &ec2.DeleteLaunchTemplateInput{ + LaunchTemplateName: lo.ToPtr(names[i]), + }) + if err != nil { + errs = multierr.Append(errs, err) + } + deleted = append(deleted, names[i]) + } + return deleted, errs +} + +type oidc struct { + iamClient *iam.Client +} + +func (o *oidc) Type() string { + return "OpenIDConnectProvider" +} + +func (o *oidc) Get(ctx context.Context, expirationTime time.Time) (names []string, err error) { + out, err := o.iamClient.ListOpenIDConnectProviders(ctx, &iam.ListOpenIDConnectProvidersInput{}) + if err != nil { + return names, err + } + + errs := make([]error, len(out.OpenIDConnectProviderList)) + for i := range out.OpenIDConnectProviderList { + oicd, err := o.iamClient.GetOpenIDConnectProvider(ctx, &iam.GetOpenIDConnectProviderInput{ + OpenIDConnectProviderArn: out.OpenIDConnectProviderList[i].Arn, + }) + if err != nil { + errs[i] = err + } + + for _, t := range oicd.Tags { + if lo.FromPtr(t.Key) == githubRunURLTag && oicd.CreateDate.Before(expirationTime) { + names = append(names, lo.FromPtr(out.OpenIDConnectProviderList[i].Arn)) + } + } + } + + return names, multierr.Combine(errs...) +} + +// Terminate any old OIDC providers that were are remaining as part of testing +// We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively +func (o *oidc) Cleanup(ctx context.Context, arns []string) ([]string, error) { + var errs error + deleted := []string{} + for i := range arns { + _, err := o.iamClient.DeleteOpenIDConnectProvider(ctx, &iam.DeleteOpenIDConnectProviderInput{ + OpenIDConnectProviderArn: lo.ToPtr(arns[i]), + }) + if err != nil { + errs = multierr.Append(errs, err) + } + } + return deleted, errs +} + +func fireMetric(ctx context.Context, cloudWatchClient *cloudwatch.Client, name string, value float64) error { + _, err := cloudWatchClient.PutMetricData(ctx, &cloudwatch.PutMetricDataInput{ + Namespace: lo.ToPtr(karpenterMetricNamespace), + MetricData: []cloudwatchtypes.MetricDatum{ + { + MetricName: lo.ToPtr(name), + Value: lo.ToPtr(value), + }, + }, + }) + return err } diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index 6e2fe77c9ab0..f21fc7cdcef7 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -166,6 +166,10 @@ var _ = Describe("Drift", Label("AWS"), func() { Key: awssdk.String(test.DiscoveryLabel), Value: awssdk.String(settings.FromContext(env.Context).ClusterName), }, + { + Key: awssdk.String("creation-date"), + Value: awssdk.String(time.Now().Format(time.RFC3339)), + }, }, }, },