diff --git a/.github/workflows/integration-tests.yaml b/.github/workflows/integration-tests.yaml index 2e9e2f357..c6adf1a29 100644 --- a/.github/workflows/integration-tests.yaml +++ b/.github/workflows/integration-tests.yaml @@ -9,6 +9,10 @@ on: jobs: integration-tests-multi-region: runs-on: ubuntu-latest-4-core + strategy: + fail-fast: false + matrix: + provider: [k3d, gcp] timeout-minutes: 90 permissions: contents: 'read' @@ -35,11 +39,11 @@ jobs: gcloud components install gke-gcloud-auth-plugin - name: Run tests (multi-region) env: - isNightly: true + PROVIDER: ${{ matrix.provider }} USE_GKE_GCLOUD_AUTH_PLUGIN: True run: | - set -e - make test/nightly-e2e/multi-region | tee test_output.log || true + set -euo pipefail + make test/e2e/multi-region | tee test_output.log - name: Archive test results if: ${{ always() }} uses: actions/upload-artifact@v4 @@ -123,6 +127,10 @@ jobs: "${{ secrets.SLACK_WEBHOOK_URL }}" integration-tests-single-region: runs-on: ubuntu-latest-4-core + strategy: + fail-fast: false + matrix: + provider: [k3d, gcp] timeout-minutes: 90 permissions: contents: read @@ -150,11 +158,11 @@ jobs: gcloud components install gke-gcloud-auth-plugin - name: Run tests (single-region) env: - isNightly: true + PROVIDER: ${{ matrix.provider }} USE_GKE_GCLOUD_AUTH_PLUGIN: True run: | - set -e - make test/nightly-e2e/single-region | tee test_output.log || true + set -euo pipefail + make test/e2e/single-region | tee test_output.log - name: Archive test results if: ${{ always() }} uses: actions/upload-artifact@v4 diff --git a/Makefile b/Makefile index 7bfe7ab35..d31e99c13 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ ifeq ($(UNAME_S),Linux) COCKROACH_BIN ?= https://binaries.cockroachdb.com/cockroach-v23.2.0.linux-amd64.tgz HELM_BIN ?= https://get.helm.sh/helm-v3.14.0-linux-amd64.tar.gz K3D_BIN ?= https://github.com/k3d-io/k3d/releases/download/v5.7.4/k3d-linux-amd64 + KIND_BIN ?= https://kind.sigs.k8s.io/dl/v0.29.0/kind-linux-amd64 KUBECTL_BIN ?= https://dl.k8s.io/release/v1.29.1/bin/linux/amd64/kubectl YQ_BIN ?= https://github.com/mikefarah/yq/releases/download/v4.31.2/yq_linux_amd64 JQ_BIN ?= https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64 @@ -14,6 +15,7 @@ ifeq ($(UNAME_S),Darwin) COCKROACH_BIN ?= https://binaries.cockroachdb.com/cockroach-v23.2.0.darwin-10.9-amd64.tgz HELM_BIN ?= https://get.helm.sh/helm-v3.14.0-darwin-amd64.tar.gz K3D_BIN ?= https://github.com/k3d-io/k3d/releases/download/v5.7.4/k3d-darwin-arm64 + KIND_BIN ?= https://kind.sigs.k8s.io/dl/v0.29.0/kind-darwin-arm64 KUBECTL_BIN ?= https://dl.k8s.io/release/v1.29.1/bin/darwin/amd64/kubectl YQ_BIN ?= https://github.com/mikefarah/yq/releases/download/v4.31.2/yq_darwin_amd64 JQ_BIN ?= https://github.com/stedolan/jq/releases/download/jq-1.6/jq-osx-amd64 @@ -118,10 +120,10 @@ test/e2e/%: bin/cockroach bin/kubectl bin/helm build/self-signer test/cluster/up $(MAKE) test/cluster/down; \ exit $${EXIT_CODE:-0} -test/e2e/multi-region: bin/cockroach bin/kubectl bin/helm build/self-signer +test/e2e/multi-region: bin/cockroach bin/kubectl bin/helm build/self-signer bin/k3d @PATH="$(PWD)/bin:${PATH}" go test -timeout 60m -v -test.run TestOperatorInMultiRegion ./tests/e2e/operator/multiRegion/... || (echo "Multi region tests failed with exit code $$?" && exit 1) -test/e2e/single-region: bin/cockroach bin/kubectl bin/helm build/self-signer +test/e2e/single-region: bin/cockroach bin/kubectl bin/helm build/self-signer bin/k3d @PATH="$(PWD)/bin:${PATH}" go test -timeout 60m -v -test.run TestOperatorInSingleRegion ./tests/e2e/operator/singleRegion/... || (echo "Single region tests failed with exit code $$?" && exit 1) test/e2e/migrate: bin/cockroach bin/kubectl bin/helm bin/migration-helper build/self-signer test/cluster/up/3 @@ -135,13 +137,6 @@ test/single-cluster/up: bin/k3d test/multi-cluster/down: bin/k3d ./tests/k3d/dev-multi-cluster.sh down -test/nightly-e2e/single-region: bin/cockroach bin/kubectl bin/helm build/self-signer - @PATH="$(PWD)/bin:${PATH}" go test -timeout 60m -v -test.run TestOperatorInSingleRegion ./tests/e2e/operator/singleRegion/... || (echo "Single region tests failed with exit code $$?" && exit 1) - -test/nightly-e2e/multi-region: bin/cockroach bin/kubectl bin/helm build/self-signer - @PATH="$(PWD)/bin:${PATH}" go test -timeout 60m -v -test.run TestOperatorInMultiRegion ./tests/e2e/operator/multiRegion/... || (echo "Multi region tests failed with exit code $$?" && exit 1) - - test/lint: bin/helm ## lint the helm chart @build/lint.sh && \ bin/helm lint cockroachdb && \ @@ -176,6 +171,11 @@ bin/k3d: ## install k3d @curl -Lo bin/k3d $(K3D_BIN) @chmod +x bin/k3d +bin/kind: ## install kind + @mkdir -p bin + @curl -Lo bin/kind $(KIND_BIN) + @chmod +x bin/kind + bin/kubectl: ## install kubectl @mkdir -p bin @curl -Lo bin/kubectl $(KUBECTL_BIN) diff --git a/cockroachdb-parent/charts/operator/templates/operator.yaml b/cockroachdb-parent/charts/operator/templates/operator.yaml index c45455915..e5a744821 100644 --- a/cockroachdb-parent/charts/operator/templates/operator.yaml +++ b/cockroachdb-parent/charts/operator/templates/operator.yaml @@ -254,6 +254,7 @@ rules: - admissionregistration.k8s.io resources: - validatingwebhookconfigurations + - mutatingwebhookconfigurations verbs: - create - apiGroups: @@ -265,6 +266,15 @@ rules: verbs: - get - patch + - apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + resourceNames: + - cockroach-mutating-webhook-config + verbs: + - get + - patch # The "create" verb cannot be qualified with resourceNames, so grant the # unqualified permission so that the operator can create new CRDs. But only # allow the operator to get and patch its own CRDs. diff --git a/cockroachdb-parent/charts/operator/values.yaml b/cockroachdb-parent/charts/operator/values.yaml index f28414bcb..e48b92fb5 100644 --- a/cockroachdb-parent/charts/operator/values.yaml +++ b/cockroachdb-parent/charts/operator/values.yaml @@ -4,13 +4,13 @@ # image captures the container image settings for Operator pods. image: # registry is the container registry where the image is stored. - registry: "us-docker.pkg.dev/releases-prod/self-hosted" + registry: "us-docker.pkg.dev/cockroach-cloud-images/development" # repository defines the image repository. - repository: "cockroachdb-operator@sha256" + repository: "cockroach-operator@sha256" # pullPolicy specifies the image pull policy. pullPolicy: IfNotPresent # tag is the image tag. - tag: "6f62639e9fee99d99b0387a9dccda84daa1a489b592b008f2f354ec57eae09ac" + tag: "72844b85354fd55b9a487abbd6e253b7d5081f65513c3813fde0ceb7d3ee2f70" # certificate defines the certificate settings for the Operator. certificate: # validForDays specifies the number of days the certificate is valid for. diff --git a/tests/e2e/migrate/helm_chart_to_cockroach_enterprise_operator_test.go b/tests/e2e/migrate/helm_chart_to_cockroach_enterprise_operator_test.go index 11b643f18..078a812c3 100644 --- a/tests/e2e/migrate/helm_chart_to_cockroach_enterprise_operator_test.go +++ b/tests/e2e/migrate/helm_chart_to_cockroach_enterprise_operator_test.go @@ -112,7 +112,7 @@ func (h *HelmChartToOperator) TestDefaultMigration(t *testing.T) { k8s.RunKubectl(t, kubectlOptions, "delete", "priorityclass", "crdb-critical") }() - operator.InstallCockroachDBEnterpriseOperator(t, kubectlOptions) + operator.InstallCockroachDBEnterpriseOperator(t, kubectlOptions, nil) defer func() { t.Log("Uninstall the cockroachdb enterprise operator") operator.UninstallCockroachDBEnterpriseOperator(t, kubectlOptions) @@ -221,7 +221,7 @@ func (h *HelmChartToOperator) TestCertManagerMigration(t *testing.T) { k8s.RunKubectl(t, kubectlOptions, "delete", "priorityclass", "crdb-critical") }() - operator.InstallCockroachDBEnterpriseOperator(t, kubectlOptions) + operator.InstallCockroachDBEnterpriseOperator(t, kubectlOptions, nil) defer func() { t.Log("Uninstall the cockroachdb enterprise operator") operator.UninstallCockroachDBEnterpriseOperator(t, kubectlOptions) @@ -308,7 +308,7 @@ func (h *HelmChartToOperator) TestPCRPrimaryMigration(t *testing.T) { k8s.RunKubectl(t, kubectlOptions, "delete", "priorityclass", "crdb-critical") }() - operator.InstallCockroachDBEnterpriseOperator(t, kubectlOptions) + operator.InstallCockroachDBEnterpriseOperator(t, kubectlOptions, nil) defer func() { t.Log("Uninstall the cockroachdb enterprise operator") operator.UninstallCockroachDBEnterpriseOperator(t, kubectlOptions) diff --git a/tests/e2e/migrate/public_operator_to_cockroach_enterprise_operator_test.go b/tests/e2e/migrate/public_operator_to_cockroach_enterprise_operator_test.go index 7a5ff0b32..598ac8295 100644 --- a/tests/e2e/migrate/public_operator_to_cockroach_enterprise_operator_test.go +++ b/tests/e2e/migrate/public_operator_to_cockroach_enterprise_operator_test.go @@ -127,7 +127,7 @@ func (o *PublicOperatorToCockroachEnterpriseOperator) TestDefaultMigration(t *te k8s.KubectlApply(t, kubectlOptions, filepath.Join(manifestsDirPath, "rbac.yaml")) t.Log("Install the cockroachdb enterprise operator") - operator.InstallCockroachDBEnterpriseOperator(t, kubectlOptions) + operator.InstallCockroachDBEnterpriseOperator(t, kubectlOptions, nil) defer func() { t.Log("Uninstall the cockroachdb enterprise operator") operator.UninstallCockroachDBEnterpriseOperator(t, kubectlOptions) diff --git a/tests/e2e/operator/infra/common.go b/tests/e2e/operator/infra/common.go index 1e38dc4c1..a2fda1f16 100644 --- a/tests/e2e/operator/infra/common.go +++ b/tests/e2e/operator/infra/common.go @@ -24,8 +24,11 @@ const ( // Common constants. const ( - defaultRetries = 30 - defaultRetryInterval = 10 * time.Second + defaultRetries = 30 + defaultRetryInterval = 10 * time.Second + // Load balancer specific retry settings (extended for AWS) + loadBalancerRetries = 60 // 10 minutes total + loadBalancerInterval = 10 * time.Second coreDNSDeploymentName = "coredns" coreDNSServiceName = "crl-core-dns" coreDNSNamespace = "kube-system" @@ -237,7 +240,7 @@ func finalizeCoreDNSDeployment(t *testing.T, kubectlOpts *k8s.KubectlOptions) er func WaitForCoreDNSServiceIPs(t *testing.T, kubectlOpts *k8s.KubectlOptions) ([]string, error) { var ips []string - _, err := retry.DoWithRetryE(t, "waiting for CoreDNS service IPs", defaultRetries, defaultRetryInterval, + _, err := retry.DoWithRetryE(t, "waiting for CoreDNS service IPs", loadBalancerRetries, loadBalancerInterval, func() (string, error) { svc, err := k8s.GetServiceE(t, kubectlOpts, coreDNSServiceName) if err != nil { diff --git a/tests/e2e/operator/infra/k3d.go b/tests/e2e/operator/infra/k3d.go deleted file mode 100644 index 691af2593..000000000 --- a/tests/e2e/operator/infra/k3d.go +++ /dev/null @@ -1,257 +0,0 @@ -package infra - -import ( - "context" - "fmt" - "os" - "strings" - "testing" - "time" - - "github.com/cockroachdb/errors" - "github.com/cockroachdb/helm-charts/tests/e2e/calico" - "github.com/cockroachdb/helm-charts/tests/e2e/coredns" - "github.com/cockroachdb/helm-charts/tests/e2e/operator" - "github.com/cockroachdb/helm-charts/tests/testutil" - "github.com/gruntwork-io/terratest/modules/k8s" - "github.com/gruntwork-io/terratest/modules/retry" - "github.com/gruntwork-io/terratest/modules/shell" - "github.com/stretchr/testify/require" - corev1 "k8s.io/api/core/v1" - apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/config" -) - -// K3dRegion implements CloudProvider for K3D -type K3dRegion struct { - *operator.Region -} - -// SetUpInfra Creates K3d clusters, deploy calico CNI, deploy coredns in each cluster. -func (r *K3dRegion) SetUpInfra(t *testing.T) { - // If using existing infra return clients. - if r.ReusingInfra { - t.Logf("[%s] Reusing existing infrastructure", ProviderK3D) - return - } - - t.Logf("[%s] Setting up infrastructure", ProviderK3D) - - var clients = make(map[string]client.Client) - r.CorednsClusterOptions = make(map[string]coredns.CoreDNSClusterOption) - - // Get the current context name. - kubeConfig, rawConfig := r.GetCurrentContext(t) - - for i, cluster := range r.Clusters { - if _, ok := rawConfig.Contexts[cluster]; !ok { - // Create a cluster using shell command. - err := createK3DCluster(t, cluster, r.NodeCount) - require.NoError(t, err) - } - - cfg, err := config.GetConfigWithContext(cluster) - require.NoError(t, err) - k8sClient, err := client.New(cfg, client.Options{}) - require.NoError(t, err) - clients[cluster] = k8sClient - - // Add the apiextensions scheme to the client's scheme. - _ = apiextv1.AddToScheme(k8sClient.Scheme()) - - kubectlOptions := k8s.NewKubectlOptions(cluster, kubeConfig, coreDNSNamespace) - - // Install Calico. - calico.RegisterCalicoGVK(k8sClient.Scheme()) - objects := calico.K3DCalicoCNI(calico.K3dClusterBGPConfig{ - AddressAllocation: i, - }) - - for _, obj := range objects { - err = k8sClient.Create(context.Background(), obj) - require.NoError(t, err) - } - - // Create or update CoreDNS deployment. - deployment := coredns.CoreDNSDeployment(coreDNSReplicas) - // Apply deployment. - deploymentYaml := coredns.ToYAML(t, deployment) - err = k8s.KubectlApplyFromStringE(t, kubectlOptions, deploymentYaml) - require.NoError(t, err) - - // Wait for deployment to be ready. - _, err = retry.DoWithRetryE(t, "waiting for coredns deployment", - defaultRetries, defaultRetryInterval, - func() (string, error) { - return k8s.RunKubectlAndGetOutputE(t, kubectlOptions, - "wait", "--for=condition=Available", fmt.Sprintf("deployment/%s", coreDNSDeploymentName)) - }) - require.NoError(t, err) - - // Create a CoreDNS service. - service := coredns.CoreDNSService(nil, GetLoadBalancerAnnotations(ProviderK3D)) - serviceYaml := coredns.ToYAML(t, service) - // Apply service. - err = k8s.KubectlApplyFromStringE(t, kubectlOptions, serviceYaml) - require.NoError(t, err) - - // Now get the DNS IPs. - ips, err := WaitForCoreDNSServiceIPs(t, kubectlOptions) - require.NoError(t, err) - - r.CorednsClusterOptions[operator.CustomDomains[i]] = coredns.CoreDNSClusterOption{ - IPs: ips, - Namespace: r.Namespace[cluster], - Domain: operator.CustomDomains[i], - } - if !r.IsMultiRegion { - break - } - } - - // Update Coredns config. - for i, cluster := range r.Clusters { - // Create or update CoreDNS configmap. - kubectlOptions := k8s.NewKubectlOptions(cluster, kubeConfig, coreDNSNamespace) - cm := coredns.CoreDNSConfigMap(operator.CustomDomains[i], r.CorednsClusterOptions) - - // Apply the updated ConfigMap to Kubernetes. - cmYaml := coredns.ToYAML(t, cm) - err := k8s.KubectlApplyFromStringE(t, kubectlOptions, cmYaml) - require.NoError(t, err) - - // restart coredns pods. - err = k8s.RunKubectlE(t, kubectlOptions, "rollout", "restart", "deployment", coreDNSDeploymentName) - require.NoError(t, err) - if !r.IsMultiRegion { - r.Clients = clients - r.ReusingInfra = true - return - } - } - r.Clients = clients - r.ReusingInfra = true - - netConfig := calico.K3dCalicoBGPPeeringOptions{ - ClusterConfig: map[string]calico.K3dClusterBGPConfig{}, - } - - // Update network config for each region. - for i, region := range r.RegionCodes { - rawConfig.CurrentContext = r.Clusters[i] - kubectlOptions := k8s.NewKubectlOptions(r.Clusters[i], kubeConfig, coreDNSNamespace) - err := r.setupNetworking(t, context.TODO(), region, netConfig, kubectlOptions, i) - if err != nil { - t.Logf("[%s] Failed to setup networking for region %s: %v", ProviderK3D, region, err) - } - } - - objectsByRegion := calico.K3dCalicoBGPPeeringObjects(netConfig) - // Apply all the objects for each region on to the cluster. - for i, region := range r.RegionCodes { - ctl := clients[r.Clusters[i]] - for _, obj := range objectsByRegion[region] { - err := ctl.Create(context.Background(), obj) - require.NoError(t, err) - } - } -} - -// TeardownInfra cleans up all resources created by SetUpInfra -func (r *K3dRegion) TeardownInfra(t *testing.T) { - t.Logf("[%s] Tearing down K3D infrastructure", ProviderK3D) - - cmd := shell.Command{ - Command: "make", - Args: []string{ - "test/multi-cluster/down", - }, - WorkingDir: testutil.GetGitRoot(), - } - - output, err := shell.RunCommandAndGetOutputE(t, cmd) - if err != nil { - t.Logf("[%s] Warning: Failed to tear down K3D clusters: %v\nOutput: %s", - ProviderK3D, err, output) - } else { - t.Logf("[%s] Successfully tore down K3D clusters", ProviderK3D) - } -} - -// ScaleNodePool scales the node pool in a K3D cluster -func (r *K3dRegion) ScaleNodePool(t *testing.T, location string, nodeCount, index int) { - t.Logf("[%s] K3D scaling not implemented - K3D doesn't support scaling node pools", ProviderK3D) -} - -func (r *K3dRegion) CanScale() bool { - return false -} - -// setupNetworking ensures there is cross-k3d-cluster network connectivity and -// service discovery. -func (r *K3dRegion) setupNetworking(t *testing.T, ctx context.Context, region string, netConfig calico.K3dCalicoBGPPeeringOptions, options *k8s.KubectlOptions, clusterId int) error { - // Mark the master nodes as our bgp edge. These nodes will act as our bgp - // peers. - clusterConfig := netConfig.ClusterConfig[region] - clusterConfig.AddressAllocation = clusterId - - ctl := r.Clients[options.ContextName] - - // Get master nodes. - var nodes []corev1.Node - nodes, err := k8s.GetNodesByFilterE(t, options, metav1.ListOptions{ - LabelSelector: fmt.Sprintf("%s=%s", "node-role.kubernetes.io/master", "true"), - }) - if err != nil { - return errors.Wrapf(err, "list nodes in %s", region) - } - - // Patch server nodes with new annotation. - for _, node := range nodes { - patch := []byte(`{"metadata": {"annotations": {"projectcalico.org/labels": "{\"edge\":\"true\"}"}}}`) - if err := ctl.Patch(ctx, &node, client.RawPatch(types.StrategicMergePatchType, patch)); err != nil { - return errors.Wrapf(err, "annotate node for calico edge") - } - - time.Sleep(15 * time.Second) - - for _, nodeAddress := range node.Status.Addresses { - if nodeAddress.Type == corev1.NodeInternalIP { - clusterConfig.PeeringNodes = append(clusterConfig.PeeringNodes, nodeAddress.Address) - } - } - } - netConfig.ClusterConfig[region] = clusterConfig - return nil -} - -// createK3DCluster creates a new k3d cluster -// by calling the make command which will create -// a single k3d cluster. -func createK3DCluster(t *testing.T, clusterName string, nodeCount int) error { - t.Logf("[%s] Creating new K3D cluster: %s with %d nodes", ProviderK3D, clusterName, nodeCount) - cmd := shell.Command{ - Command: "make", - Args: []string{ - "test/single-cluster/up", - fmt.Sprintf("name=%s", strings.TrimLeft(clusterName, "k3d-")), - fmt.Sprintf("nodes=%d", nodeCount), - }, - WorkingDir: testutil.GetGitRoot(), - } - if version := os.Getenv("K3DVersion"); version != "" { - cmd.Args = append(cmd.Args, fmt.Sprintf("version=%s", version)) - } - - output, err := shell.RunCommandAndGetOutputE(t, cmd) - if err != nil { - t.Logf("[%s] Failed to create cluster: %v", ProviderK3D, err) - return fmt.Errorf("failed to create cluster: %v\nOutput: %s", err, output) - } - - t.Logf("[%s] Successfully created new K3D cluster: %s", ProviderK3D, clusterName) - return nil -} diff --git a/tests/e2e/operator/infra/local.go b/tests/e2e/operator/infra/local.go new file mode 100644 index 000000000..a4556887f --- /dev/null +++ b/tests/e2e/operator/infra/local.go @@ -0,0 +1,299 @@ +package infra + +import ( + "context" + "fmt" + "os" + "testing" + "time" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/helm-charts/tests/e2e/calico" + "github.com/cockroachdb/helm-charts/tests/e2e/coredns" + "github.com/cockroachdb/helm-charts/tests/e2e/operator" + "github.com/cockroachdb/helm-charts/tests/testutil" + "github.com/gruntwork-io/terratest/modules/k8s" + "github.com/gruntwork-io/terratest/modules/retry" + "github.com/gruntwork-io/terratest/modules/shell" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/config" +) + +// LocalRegion implements CloudProvider for a local Kubernetes provider (K3d) +type LocalRegion struct { + *operator.Region + // "k3d" + ProviderType string +} + +// SetUpInfra Creates local k3d clusters, deploy CNI, deploy coredns in each cluster. +// +// Multi-region networking approach: +// - K3D: Calico CNI with BGP for cross-cluster pod routing, built-in ServiceLB for LBs. +// - CoreDNS instances forward requests for other cluster domains; endpoints can be +// ClusterIP/pod IPs. +func (r *LocalRegion) SetUpInfra(t *testing.T) { + // If using existing infra return clients. + if r.ReusingInfra { + t.Logf("[%s] Reusing existing infrastructure", r.ProviderType) + return + } + + t.Logf("[%s] Setting up infrastructure", r.ProviderType) + + var clients = make(map[string]client.Client) + r.CorednsClusterOptions = make(map[string]coredns.CoreDNSClusterOption) + + // Get the current context name. + kubeConfig, rawConfig := r.GetCurrentContext(t) + + for i, cluster := range r.Clusters { + if _, ok := rawConfig.Contexts[cluster]; !ok { + // Create a cluster using shell command. + err := r.createLocalCluster(t, cluster, r.NodeCount) + require.NoError(t, err) + } + + cfg, err := config.GetConfigWithContext(cluster) + require.NoError(t, err) + k8sClient, err := client.New(cfg, client.Options{}) + require.NoError(t, err) + clients[cluster] = k8sClient + + // Add the apiextensions scheme to the client's scheme. + _ = apiextv1.AddToScheme(k8sClient.Scheme()) + + kubectlOptions := k8s.NewKubectlOptions(cluster, kubeConfig, coreDNSNamespace) + + // Install Calico for multi-cluster networking (k3d) + if r.ProviderType == ProviderK3D { + calico.RegisterCalicoGVK(k8sClient.Scheme()) + objects := calico.K3DCalicoCNI(calico.K3dClusterBGPConfig{ + AddressAllocation: i, + }) + + for _, obj := range objects { + err = k8sClient.Create(context.Background(), obj) + require.NoError(t, err) + } + } + + // Create or update CoreDNS deployment. + deployment := coredns.CoreDNSDeployment(coreDNSReplicas) + // Apply deployment. + deploymentYaml := coredns.ToYAML(t, deployment) + err = k8s.KubectlApplyFromStringE(t, kubectlOptions, deploymentYaml) + require.NoError(t, err) + + // Wait for deployment to be ready. + _, err = retry.DoWithRetryE(t, "waiting for coredns deployment", + defaultRetries, defaultRetryInterval, + func() (string, error) { + return k8s.RunKubectlAndGetOutputE(t, kubectlOptions, + "wait", "--for=condition=Available", fmt.Sprintf("deployment/%s", coreDNSDeploymentName)) + }) + require.NoError(t, err) + + // Create a CoreDNS service. + service := coredns.CoreDNSService(nil, GetLoadBalancerAnnotations(r.ProviderType)) + serviceYaml := coredns.ToYAML(t, service) + // Apply service. + err = k8s.KubectlApplyFromStringE(t, kubectlOptions, serviceYaml) + require.NoError(t, err) + + // Get the DNS IPs. + var ips []string + // Wait for LoadBalancer to get external IPs + ips, err = WaitForCoreDNSServiceIPs(t, kubectlOptions) + require.NoError(t, err) + + // Log the assigned IP for debugging + t.Logf("[%s] CoreDNS service assigned LoadBalancer IP(s): %v", r.ProviderType, ips) + + r.CorednsClusterOptions[operator.CustomDomains[i]] = coredns.CoreDNSClusterOption{ + IPs: ips, + Namespace: r.Namespace[cluster], + Domain: operator.CustomDomains[i], + } + if !r.IsMultiRegion { + break + } + } + + // Update Coredns config. + for i, cluster := range r.Clusters { + // Create or update CoreDNS configmap. + kubectlOptions := k8s.NewKubectlOptions(cluster, kubeConfig, coreDNSNamespace) + cm := coredns.CoreDNSConfigMap(operator.CustomDomains[i], r.CorednsClusterOptions) + + // Apply the updated ConfigMap to Kubernetes + cmYaml := coredns.ToYAML(t, cm) + err := k8s.KubectlApplyFromStringE(t, kubectlOptions, cmYaml) + require.NoError(t, err) + + // Scale down existing kube-dns to prevent conflicts + // Note: We don't scale down "coredns" deployment because that's our custom CoreDNS deployment + t.Logf("[%s] Scaling down default kube-dns in cluster %s", r.ProviderType, cluster) + _ = k8s.RunKubectlE(t, kubectlOptions, "scale", "deployment", "kube-dns-autoscaler", "--replicas=0") + _ = k8s.RunKubectlE(t, kubectlOptions, "scale", "deployment", "kube-dns", "--replicas=0") + + // Restart our custom coredns deployment to pick up the updated ConfigMap + err = k8s.RunKubectlE(t, kubectlOptions, "rollout", "restart", "deployment", coreDNSDeploymentName) + require.NoError(t, err) + + if !r.IsMultiRegion { + break + } + } + r.Clients = clients + r.ReusingInfra = true + + // BGP peering setup for multi-region Calico setups (K3D) + if r.IsMultiRegion && (r.ProviderType == ProviderK3D) { + netConfig := calico.K3dCalicoBGPPeeringOptions{ + ClusterConfig: map[string]calico.K3dClusterBGPConfig{}, + } + + // Update network config for each region. + for i, region := range r.RegionCodes { + rawConfig.CurrentContext = r.Clusters[i] + kubectlOptions := k8s.NewKubectlOptions(r.Clusters[i], kubeConfig, coreDNSNamespace) + err := r.setupNetworking(t, context.TODO(), region, netConfig, kubectlOptions, i) + if err != nil { + t.Logf("[%s] Failed to setup networking for region %s: %v", r.ProviderType, region, err) + } + } + + objectsByRegion := calico.K3dCalicoBGPPeeringObjects(netConfig) + // Apply all the objects for each region on to the cluster. + for i, region := range r.RegionCodes { + ctl := clients[r.Clusters[i]] + for _, obj := range objectsByRegion[region] { + err := ctl.Create(context.Background(), obj) + require.NoError(t, err) + } + } + } else { + t.Logf("[%s] Skipping BGP peering setup (either single region or provider not using Calico)", r.ProviderType) + } +} + +// TeardownInfra cleans up all resources created by SetUpInfra +func (r *LocalRegion) TeardownInfra(t *testing.T) { + t.Logf("[%s] Tearing down %s infrastructure", r.ProviderType, r.ProviderType) + + var cmd shell.Command + switch r.ProviderType { + case ProviderK3D: + cmd = shell.Command{ + Command: "make", + Args: []string{ + "test/multi-cluster/down", + }, + WorkingDir: testutil.GetGitRoot(), + } + default: + t.Logf("[%s] Unknown provider type for teardown", r.ProviderType) + return + } + + output, err := shell.RunCommandAndGetOutputE(t, cmd) + if err != nil { + t.Logf("[%s] Warning: Failed to tear down %s clusters: %v\nOutput: %s", + r.ProviderType, r.ProviderType, err, output) + } else { + t.Logf("[%s] Successfully tore down %s clusters", r.ProviderType, r.ProviderType) + } +} + +// ScaleNodePool scales the node pool in a local cluster +func (r *LocalRegion) ScaleNodePool(t *testing.T, location string, nodeCount, index int) { + t.Logf("[%s] %s scaling not implemented - %s doesn't support scaling node pools", r.ProviderType, r.ProviderType, r.ProviderType) +} + +func (r *LocalRegion) CanScale() bool { + return false +} + +// setupNetworking ensures there is cross-cluster network connectivity and +// service discovery. +func (r *LocalRegion) setupNetworking(t *testing.T, ctx context.Context, region string, netConfig calico.K3dCalicoBGPPeeringOptions, options *k8s.KubectlOptions, clusterId int) error { + // Mark the control-plane/master nodes as our bgp edge. These nodes will act as our bgp + // peers. + clusterConfig := netConfig.ClusterConfig[region] + clusterConfig.AddressAllocation = clusterId + + ctl := r.Clients[options.ContextName] + + // Get control-plane/master nodes based on provider type. + var nodes []corev1.Node + var labelSelector string + switch r.ProviderType { + case ProviderK3D: + labelSelector = fmt.Sprintf("%s=%s", "node-role.kubernetes.io/master", "true") + default: + return fmt.Errorf("unknown provider type: %s", r.ProviderType) + } + + nodes, err := k8s.GetNodesByFilterE(t, options, metav1.ListOptions{ + LabelSelector: labelSelector, + }) + if err != nil { + return errors.Wrapf(err, "list nodes in %s", region) + } + + // Patch control-plane/master nodes with new annotation. + for _, node := range nodes { + patch := []byte(`{"metadata": {"annotations": {"projectcalico.org/labels": "{\"edge\":\"true\"}"}}}`) + if err := ctl.Patch(ctx, &node, client.RawPatch(types.StrategicMergePatchType, patch)); err != nil { + return errors.Wrapf(err, "annotate node for calico edge") + } + + time.Sleep(15 * time.Second) + + for _, nodeAddress := range node.Status.Addresses { + if nodeAddress.Type == corev1.NodeInternalIP { + clusterConfig.PeeringNodes = append(clusterConfig.PeeringNodes, nodeAddress.Address) + } + } + } + netConfig.ClusterConfig[region] = clusterConfig + return nil +} + +// createLocalCluster creates a new local cluster (k3d) +// by calling the appropriate shell command. +func (r *LocalRegion) createLocalCluster(t *testing.T, clusterName string, nodeCount int) error { + t.Logf("[%s] Creating new %s cluster: %s with %d nodes", r.ProviderType, r.ProviderType, clusterName, nodeCount) + + var cmd shell.Command + switch r.ProviderType { + case ProviderK3D: + cmd = shell.Command{ + Command: "make", + Args: []string{ + "test/single-cluster/up", + }, + WorkingDir: testutil.GetGitRoot(), + } + default: + return fmt.Errorf("unknown provider type: %s", r.ProviderType) + } + if version := os.Getenv("K3DVersion"); version != "" { + cmd.Args = append(cmd.Args, fmt.Sprintf("version=%s", version)) + } + + output, err := shell.RunCommandAndGetOutputE(t, cmd) + if err != nil { + t.Logf("[%s] Failed to create cluster: %v", r.ProviderType, err) + return fmt.Errorf("failed to create cluster: %v\nOutput: %s", err, output) + } + + t.Logf("[%s] Successfully created new %s cluster: %s", r.ProviderType, r.ProviderType, clusterName) + return nil +} diff --git a/tests/e2e/operator/infra/provider.go b/tests/e2e/operator/infra/provider.go index 33a6124c7..f45469c2b 100644 --- a/tests/e2e/operator/infra/provider.go +++ b/tests/e2e/operator/infra/provider.go @@ -28,7 +28,7 @@ type CloudProvider interface { func ProviderFactory(providerType string, region *operator.Region) CloudProvider { switch providerType { case ProviderK3D: - provider := K3dRegion{Region: region} + provider := LocalRegion{Region: region, ProviderType: ProviderK3D} provider.RegionCodes = GetRegionCodes(providerType) return &provider case ProviderGCP: diff --git a/tests/e2e/operator/multiRegion/cockroachdb_multi_region_e2e_test.go b/tests/e2e/operator/multiRegion/cockroachdb_multi_region_e2e_test.go index 2c6739b95..9322a21f3 100644 --- a/tests/e2e/operator/multiRegion/cockroachdb_multi_region_e2e_test.go +++ b/tests/e2e/operator/multiRegion/cockroachdb_multi_region_e2e_test.go @@ -18,9 +18,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -// Environment variable name to check if running in nightly mode -const isNightlyEnvVar = "isNightly" - // Region codes for each provider are now centralized in infra.RegionCodes type multiRegion struct { operator.OperatorUseCases @@ -33,87 +30,90 @@ func newMultiRegion() *multiRegion { // TestOperatorInMultiRegion tests CockroachDB operator functionality across multiple regions func TestOperatorInMultiRegion(t *testing.T) { - var providers []string - if os.Getenv(isNightlyEnvVar) == "true" { - providers = []string{infra.ProviderGCP} + providerRegion := newMultiRegion() + // Fetch provider from env + var provider string + if p := strings.TrimSpace(strings.ToLower(os.Getenv("PROVIDER"))); p != "" { + switch p { + case "k3d": + provider = infra.ProviderK3D + providerRegion.UseCustomNodeLabels = true + case "gcp": + provider = infra.ProviderGCP + default: + t.Fatalf("Unsupported provider override: %s", p) + } } else { - providers = []string{infra.ProviderK3D} + provider = infra.ProviderK3D } - for _, provider := range providers { - // Create a new variable to avoid closure issues - provider := provider - t.Run(provider, func(t *testing.T) { - t.Parallel() - - // Create a provider-specific instance to avoid race conditions. - providerRegion := newMultiRegion() - providerRegion.Region = operator.Region{ - IsMultiRegion: true, - NodeCount: 3, - ReusingInfra: false, - } - providerRegion.Clients = make(map[string]client.Client) - providerRegion.Namespace = make(map[string]string) - - providerRegion.Provider = provider - for _, cluster := range operator.Clusters { - clusterName := fmt.Sprintf("%s-%s", providerRegion.Provider, cluster) - if providerRegion.Provider != infra.ProviderK3D { - clusterName = fmt.Sprintf("%s-%s", clusterName, strings.ToLower(random.UniqueId())) - } - providerRegion.Clusters = append(providerRegion.Clusters, clusterName) - } + t.Run(provider, func(t *testing.T) { + t.Parallel() - // Create and reuse the same provider instance for both setup and teardown. - cloudProvider := infra.ProviderFactory(providerRegion.Provider, &providerRegion.Region) - if cloudProvider == nil { - t.Fatalf("Unsupported provider: %s", provider) + providerRegion.Region = operator.Region{ + IsMultiRegion: true, + NodeCount: 3, + ReusingInfra: false, + } + providerRegion.Clients = make(map[string]client.Client) + providerRegion.Namespace = make(map[string]string) + providerRegion.Provider = provider + for _, cluster := range operator.Clusters { + clusterName := fmt.Sprintf("%s-%s", providerRegion.Provider, cluster) + if providerRegion.Provider != infra.ProviderK3D { + clusterName = fmt.Sprintf("%s-%s", clusterName, strings.ToLower(random.UniqueId())) } + providerRegion.Clusters = append(providerRegion.Clusters, clusterName) + } - // Use t.Cleanup for guaranteed cleanup even on test timeout/panic. - t.Cleanup(func() { - t.Logf("Starting infrastructure cleanup for provider: %s", provider) - cloudProvider.TeardownInfra(t) - t.Logf("Completed infrastructure cleanup for provider: %s", provider) - }) + // Create and reuse the same provider instance for both setup and teardown. + cloudProvider := infra.ProviderFactory(providerRegion.Provider, &providerRegion.Region) + if cloudProvider == nil { + t.Fatalf("Unsupported provider: %s", provider) + } - // Set up infrastructure for this provider once. - cloudProvider.SetUpInfra(t) + // Use t.Cleanup for guaranteed cleanup even on test timeout/panic. + t.Cleanup(func() { + t.Logf("Starting infrastructure cleanup for provider: %s", provider) + cloudProvider.TeardownInfra(t) + t.Logf("Completed infrastructure cleanup for provider: %s", provider) + }) - testCases := map[string]func(*testing.T){ - "TestHelmInstall": providerRegion.TestHelmInstall, - "TestHelmUpgrade": providerRegion.TestHelmUpgrade, - "TestClusterRollingRestart": providerRegion.TestClusterRollingRestart, - "TestKillingCockroachNode": providerRegion.TestKillingCockroachNode, - "TestClusterScaleUp": func(t *testing.T) { providerRegion.TestClusterScaleUp(t, cloudProvider) }, - } + // Set up infrastructure for this provider once. + cloudProvider.SetUpInfra(t) + + testCases := map[string]func(*testing.T){ + "TestHelmInstall": providerRegion.TestHelmInstall, + "TestHelmUpgrade": providerRegion.TestHelmUpgrade, + "TestClusterRollingRestart": providerRegion.TestClusterRollingRestart, + "TestKillingCockroachNode": providerRegion.TestKillingCockroachNode, + "TestClusterScaleUp": func(t *testing.T) { providerRegion.TestClusterScaleUp(t, cloudProvider) }, + } - // Run tests sequentially within a provider. - var testFailed bool - for name, method := range testCases { - // Skip remaining tests if a previous test failed to save time - if testFailed { - t.Logf("Skipping test %s due to previous test failure", name) - continue - } - - t.Run(name, func(t *testing.T) { - // Add immediate cleanup trigger if this individual test fails - defer func() { - if t.Failed() { - testFailed = true - t.Logf("Test %s failed, triggering immediate infrastructure cleanup", name) - cloudProvider.TeardownInfra(t) - t.Logf("Infrastructure cleanup completed due to test failure") - } - }() - - method(t) - }) + // Run tests sequentially within a provider. + var testFailed bool + for name, method := range testCases { + // Skip remaining tests if a previous test failed to save time + if testFailed { + t.Logf("Skipping test %s due to previous test failure", name) + continue } - }) - } + + t.Run(name, func(t *testing.T) { + // Add immediate cleanup trigger if this individual test fails + defer func() { + if t.Failed() { + testFailed = true + t.Logf("Test %s failed, triggering immediate infrastructure cleanup", name) + cloudProvider.TeardownInfra(t) + t.Logf("Infrastructure cleanup completed due to test failure") + } + }() + + method(t) + }) + } + }) } // TestHelmInstall will install Operator and CockroachDB charts in multiple regions, diff --git a/tests/e2e/operator/region.go b/tests/e2e/operator/region.go index 9ccc5e29b..4be319813 100644 --- a/tests/e2e/operator/region.go +++ b/tests/e2e/operator/region.go @@ -81,6 +81,8 @@ type Region struct { VirtualClusterModePrimary bool VirtualClusterModeStandby bool IsOperatorInstalled bool + // UseCustomNodeLabels indicates to have custom k3d node labels + UseCustomNodeLabels bool } // InstallCharts Installs both Operator and CockroachDB charts by providing custom CA secret @@ -116,15 +118,15 @@ func (r *Region) InstallCharts(t *testing.T, cluster string, index int) { testutil.CreateBundle(t, kubectlOptions, testutil.CASecretName, testutil.CAConfigMapName) } else { // create CA Secret. - err := k8s.RunKubectlE(t, kubectlOptions, "create", "secret", "generic", customCASecret, "--from-file=ca.crt", - "--from-file=ca.key") + err := k8s.RunKubectlE(t, kubectlOptions, "create", "secret", "generic", customCASecret, fmt.Sprintf("--from-file=ca.crt=%s-ca.crt", r.Provider), + fmt.Sprintf("--from-file=ca.key=%s-ca.key", r.Provider)) require.NoError(t, err) } // Setup kubectl options for this cluster. kubectlOptions = k8s.NewKubectlOptions(cluster, kubeConfig, r.Namespace[cluster]) if !r.IsOperatorInstalled { - InstallCockroachDBEnterpriseOperator(t, kubectlOptions) + InstallCockroachDBEnterpriseOperator(t, kubectlOptions, map[string]string{"cloudRegion": r.RegionCodes[index]}) } if r.IsCertManager { @@ -143,6 +145,19 @@ func (r *Region) InstallCharts(t *testing.T, cluster string, index int) { "cockroachdb.tls.selfSigner.caSecret": customCASecret, }) } + + // when provider is k3d and custom labels are enabled, use localityMappings. + if r.Provider == "k3d" && r.UseCustomNodeLabels { + crdbOp = PatchHelmValues(map[string]string{ + "cockroachdb.crdbCluster.localityMappings[0].nodeLabel": "topology.kubernetes.io/dc", + "cockroachdb.crdbCluster.localityMappings[0].localityLabel": "region", + "cockroachdb.crdbCluster.localityMappings[1].nodeLabel": "topology.kubernetes.io/rack", + "cockroachdb.crdbCluster.localityMappings[1].localityLabel": "zone", + "cockroachdb.crdbCluster.topologySpreadConstraints[0].topologyKey": "topology.kubernetes.io/rack", + "cockroachdb.crdbCluster.topologySpreadConstraints[0].maxSkew": "1", + "cockroachdb.crdbCluster.topologySpreadConstraints[0].whenUnsatisfiable": "DoNotSchedule", + }) + } if r.VirtualClusterModePrimary { crdbOp = PatchHelmValues(map[string]string{ "cockroachdb.clusterDomain": CustomDomains[index], @@ -366,8 +381,9 @@ func (r *Region) ValidateCRDBContainerResources(t *testing.T, kubectlOptions *k8 func (r *Region) CreateCACertificate(t *testing.T) error { // Create CA secret in all regions. cmd := shell.Command{ - Command: "cockroach", - Args: []string{"cert", "create-ca", "--certs-dir=.", "--ca-key=ca.key"}, + Command: "cockroach", + Args: []string{"cert", "create-ca", "--certs-dir=.", fmt.Sprintf("--ca-key=%s-ca.key", r.Provider), + "--allow-ca-key-reuse"}, WorkingDir: ".", Env: nil, Logger: nil, @@ -375,13 +391,29 @@ func (r *Region) CreateCACertificate(t *testing.T) error { certOutput, err := shell.RunCommandAndGetOutputE(t, cmd) t.Log(certOutput) + + // The cockroach cert create-ca command creates ca.crt by default + // We need to rename it to match the provider-specific naming + if err == nil { + // Rename the default ca.crt to a provider-specific name + renameCmd := shell.Command{ + Command: "mv", + Args: []string{"ca.crt", fmt.Sprintf("%s-ca.crt", r.Provider)}, + WorkingDir: ".", + } + _, renameErr := shell.RunCommandAndGetOutputE(t, renameCmd) + if renameErr != nil { + return renameErr + } + } + return err } func (r *Region) CleanUpCACertificate(t *testing.T) { cmd := shell.Command{ Command: "rm", - Args: []string{"-rf", "ca.crt", "ca.key"}, + Args: []string{"-rf", fmt.Sprintf("%s-ca.crt", r.Provider), fmt.Sprintf("%s-ca.key", r.Provider)}, WorkingDir: ".", } @@ -522,12 +554,13 @@ func (r Region) VerifyInitCommandInOperatorLogs(t *testing.T, kubectlOptions *k8 require.Contains(t, logs, expected, "operator logs did not contain expected init command") } -func InstallCockroachDBEnterpriseOperator(t *testing.T, kubectlOptions *k8s.KubectlOptions) { +func InstallCockroachDBEnterpriseOperator(t *testing.T, kubectlOptions *k8s.KubectlOptions, values map[string]string) { _, operatorChartPath := HelmChartPaths() operatorOpts := &helm.Options{ KubectlOptions: kubectlOptions, ExtraArgs: helmExtraArgs, + SetValues: values, } // Install Operator on the cluster. diff --git a/tests/e2e/operator/singleRegion/cockroachdb_single_region_e2e_test.go b/tests/e2e/operator/singleRegion/cockroachdb_single_region_e2e_test.go index d32122a92..b758c06f9 100644 --- a/tests/e2e/operator/singleRegion/cockroachdb_single_region_e2e_test.go +++ b/tests/e2e/operator/singleRegion/cockroachdb_single_region_e2e_test.go @@ -18,9 +18,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -// Environment variable name to check if running in nightly mode -const isNightlyEnvVar = "isNightly" - type singleRegion struct { operator.OperatorUseCases operator.Region @@ -30,87 +27,92 @@ func newSingleRegion() *singleRegion { return &singleRegion{} } func TestOperatorInSingleRegion(t *testing.T) { - var providers []string - if os.Getenv(isNightlyEnvVar) == "true" { - providers = []string{infra.ProviderGCP} + providerRegion := newSingleRegion() + // Fetch provider from env + var provider string + if p := strings.TrimSpace(strings.ToLower(os.Getenv("PROVIDER"))); p != "" { + switch p { + case "k3d": + provider = infra.ProviderK3D + providerRegion.UseCustomNodeLabels = true + case "gcp": + provider = infra.ProviderGCP + default: + t.Fatalf("Unsupported provider override: %s", p) + } } else { - providers = []string{infra.ProviderK3D} + provider = infra.ProviderK3D } - for _, provider := range providers { - provider := provider // Create a new variable to avoid closure issues - t.Run(provider, func(t *testing.T) { - // Run tests for different providers in parallel. - t.Parallel() - - // Create a provider-specific instance to avoid race conditions. - providerRegion := newSingleRegion() - providerRegion.Region = operator.Region{ - IsMultiRegion: false, - NodeCount: 3, - ReusingInfra: false, - } - providerRegion.Clients = make(map[string]client.Client) - providerRegion.Namespace = make(map[string]string) + t.Run(provider, func(t *testing.T) { + // Run tests for different providers in parallel. + t.Parallel() + + // Create a provider-specific instance to avoid race conditions. + providerRegion.Region = operator.Region{ + IsMultiRegion: false, + NodeCount: 3, + ReusingInfra: false, + } + providerRegion.Clients = make(map[string]client.Client) + providerRegion.Namespace = make(map[string]string) + providerRegion.Provider = provider + clusterName := fmt.Sprintf("%s-%s", providerRegion.Provider, operator.Clusters[0]) + if provider != infra.ProviderK3D { + clusterName = fmt.Sprintf("%s-%s", clusterName, strings.ToLower(random.UniqueId())) + } + providerRegion.Clusters = append(providerRegion.Clusters, clusterName) + + // Create and reuse the same provider instance for both setup and teardown. + cloudProvider := infra.ProviderFactory(providerRegion.Provider, &providerRegion.Region) + if cloudProvider == nil { + t.Fatalf("Unsupported provider: %s", provider) + } + + // Use t.Cleanup for guaranteed cleanup even on test timeout/panic + t.Cleanup(func() { + t.Logf("Starting infrastructure cleanup for provider: %s", provider) + cloudProvider.TeardownInfra(t) + t.Logf("Completed infrastructure cleanup for provider: %s", provider) + }) - providerRegion.Provider = provider - clusterName := fmt.Sprintf("%s-%s", providerRegion.Provider, operator.Clusters[0]) - if provider != infra.ProviderK3D { - clusterName = fmt.Sprintf("%s-%s", clusterName, strings.ToLower(random.UniqueId())) + // Set up infrastructure for this provider once. + cloudProvider.SetUpInfra(t) + + testCases := map[string]func(*testing.T){ + "TestHelmInstall": providerRegion.TestHelmInstall, + "TestHelmInstallVirtualCluster": providerRegion.TestHelmInstallVirtualCluster, + "TestHelmUpgrade": providerRegion.TestHelmUpgrade, + "TestClusterRollingRestart": providerRegion.TestClusterRollingRestart, + "TestKillingCockroachNode": providerRegion.TestKillingCockroachNode, + "TestClusterScaleUp": func(t *testing.T) { providerRegion.TestClusterScaleUp(t, cloudProvider) }, + "TestInstallWithCertManager": providerRegion.TestInstallWithCertManager, + } + + // Run tests sequentially within a provider. + var testFailed bool + for name, method := range testCases { + // Skip remaining tests if a previous test failed to save time + if testFailed { + t.Logf("Skipping test %s due to previous test failure", name) + continue } - providerRegion.Clusters = append(providerRegion.Clusters, clusterName) - // Create and reuse the same provider instance for both setup and teardown. - cloudProvider := infra.ProviderFactory(providerRegion.Provider, &providerRegion.Region) - if cloudProvider == nil { - t.Fatalf("Unsupported provider: %s", provider) - } + t.Run(name, func(t *testing.T) { + // Add immediate cleanup trigger if this individual test fails + defer func() { + if t.Failed() { + testFailed = true + t.Logf("Test %s failed, triggering immediate infrastructure cleanup", name) + cloudProvider.TeardownInfra(t) + t.Logf("Infrastructure cleanup completed due to test failure") + } + }() - // Use t.Cleanup for guaranteed cleanup even on test timeout/panic - t.Cleanup(func() { - t.Logf("Starting infrastructure cleanup for provider: %s", provider) - cloudProvider.TeardownInfra(t) - t.Logf("Completed infrastructure cleanup for provider: %s", provider) + method(t) }) - - // Set up infrastructure for this provider once. - cloudProvider.SetUpInfra(t) - - testCases := map[string]func(*testing.T){ - "TestHelmInstall": providerRegion.TestHelmInstall, - "TestHelmInstallVirtualCluster": providerRegion.TestHelmInstallVirtualCluster, - "TestHelmUpgrade": providerRegion.TestHelmUpgrade, - "TestClusterRollingRestart": providerRegion.TestClusterRollingRestart, - "TestKillingCockroachNode": providerRegion.TestKillingCockroachNode, - "TestClusterScaleUp": func(t *testing.T) { providerRegion.TestClusterScaleUp(t, cloudProvider) }, - "TestInstallWithCertManager": providerRegion.TestInstallWithCertManager, - } - - // Run tests sequentially within a provider. - var testFailed bool - for name, method := range testCases { - // Skip remaining tests if a previous test failed to save time - if testFailed { - t.Logf("Skipping test %s due to previous test failure", name) - continue - } - - t.Run(name, func(t *testing.T) { - // Add immediate cleanup trigger if this individual test fails - defer func() { - if t.Failed() { - testFailed = true - t.Logf("Test %s failed, triggering immediate infrastructure cleanup", name) - cloudProvider.TeardownInfra(t) - t.Logf("Infrastructure cleanup completed due to test failure") - } - }() - - method(t) - }) - } - }) - } + } + }) } // TestHelmInstall will install Operator and CockroachDB charts @@ -501,5 +503,4 @@ func (r *singleRegion) TestInstallWithCertManager(t *testing.T) { } rawConfig.CurrentContext = cluster r.ValidateCRDB(t, cluster) - } diff --git a/tests/k3d/dev-multi-cluster.sh b/tests/k3d/dev-multi-cluster.sh index 6a377d070..00b3058d6 100755 --- a/tests/k3d/dev-multi-cluster.sh +++ b/tests/k3d/dev-multi-cluster.sh @@ -155,12 +155,17 @@ configure_node_labels() { # Label server node with region. server_node=$(kubectl --context "k3d-${cluster_name}" get nodes -l node-role.kubernetes.io/control-plane=true -o jsonpath='{.items[0].metadata.name}') - kubectl --context "k3d-${cluster_name}" label node "$server_node" "topology.kubernetes.io/region=${region}" # Label server node with a default zone, e.g. using the first zone from the list. # We want the server node also to be a schedulable node for cockroachdb pod. server_zone="${region}${AVAILABILITY_ZONES[0]}" - kubectl --context "k3d-${cluster_name}" label node "$server_node" "topology.kubernetes.io/zone=${server_zone}" + if [[ "${PROVIDER:-}" == "k3d" ]]; then + kubectl --context "k3d-${cluster_name}" label node "$server_node" "topology.kubernetes.io/dc=${region}" + kubectl --context "k3d-${cluster_name}" label node "$server_node" "topology.kubernetes.io/rack=${server_zone}" + else + kubectl --context "k3d-${cluster_name}" label node "$server_node" "topology.kubernetes.io/region=${region}" + kubectl --context "k3d-${cluster_name}" label node "$server_node" "topology.kubernetes.io/zone=${server_zone}" + fi # Remove the zone labeled for server node. available_agent_zones=("${AVAILABILITY_ZONES[@]:1}") @@ -174,9 +179,15 @@ configure_node_labels() { zone_suffix="${available_agent_zones[$(( (cluster_index * zones) + agent_index )) % ${#available_agent_zones[@]}]}" zone="${region}${zone_suffix}" - kubectl --context "k3d-${cluster_name}" label node "$node" \ - "topology.kubernetes.io/region=${region}" \ - "topology.kubernetes.io/zone=${zone}" + if [[ "${PROVIDER:-}" == "k3d" ]]; then + kubectl --context "k3d-${cluster_name}" label node "$node" \ + "topology.kubernetes.io/dc=${region}" \ + "topology.kubernetes.io/rack=${zone}" + else + kubectl --context "k3d-${cluster_name}" label node "$node" \ + "topology.kubernetes.io/region=${region}" \ + "topology.kubernetes.io/zone=${zone}" + fi agent_index=$((agent_index + 1)) done