From 9db56990b2bdcd21f37b689636223dd83df8f46e Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Wed, 19 Nov 2025 15:12:35 +0300 Subject: [PATCH 01/13] ci: add nested matrix test job with targeted cleanup - Add tests job that runs e2e tests in matrix - Use storage_name in RUN_ID for individual cleanup per matrix leg - Fix VirtualMachineClass to use v1alpha3 instead of deprecated v1alpha2 - Increase kubectl timeout to 30s for webhook validation Signed-off-by: Anton Yachmenev --- .github/workflows/e2e-matrix.yml | 135 +++++++++++++++++++-- ci/dvp-e2e/Taskfile.yaml | 2 +- ci/dvp-e2e/charts/infra/templates/vmc.yaml | 2 +- 3 files changed, 130 insertions(+), 9 deletions(-) diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index 447cc00b76..cceeb71368 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -17,12 +17,11 @@ name: E2E Storage Matrix on: push: branches: - - chore/ci/e2e-matrix-skeleton + - ci-nested-matrix-test-run pull_request: types: [opened, reopened, synchronize, labeled, unlabeled] branches: - - main - - chore/ci/e2e-matrix-skeleton + - ci-e2e-nested-sds schedule: - cron: "30 2 * * *" workflow_dispatch: @@ -58,6 +57,12 @@ jobs: IMAGE_STORAGE_CLASS: ${{ matrix.image_storage_class }} ATTACH_DISK_SIZE: ${{ matrix.attach_disk_size }} DATA_DISK_COUNT: ${{ matrix.data_disk_count }} + outputs: + run_id: ${{ steps.setup-output.outputs.run_id }} + run_artifact: ${{ steps.setup-output.outputs.run_artifact }} + profile: ${{ steps.setup-output.outputs.profile }} + storage_name: ${{ steps.setup-output.outputs.storage_name }} + storage_class: ${{ steps.setup-output.outputs.storage_class }} steps: - uses: actions/checkout@v4 @@ -109,9 +114,125 @@ jobs: run: | task ci:setup-nested-env + - name: Export setup outputs + id: setup-output + run: | + set -euo pipefail + echo "run_id=${RUN_ID}" >> "$GITHUB_OUTPUT" + echo "run_artifact=nested-run-${RUN_ID}" >> "$GITHUB_OUTPUT" + echo "profile=${PROFILE}" >> "$GITHUB_OUTPUT" + echo "storage_name=${{ matrix.storage_name }}" >> "$GITHUB_OUTPUT" + echo "storage_class=${STORAGE_CLASS}" >> "$GITHUB_OUTPUT" + + - name: Upload nested artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: ${{ steps.setup-output.outputs.run_artifact }} + path: | + ci/dvp-e2e/tmp/runs/${{ env.RUN_ID }} + if-no-files-found: error + + tests: + name: Run E2E (${{ matrix.profile }} / ${{ matrix.storage_class }}) + runs-on: ubuntu-latest + needs: setup + timeout-minutes: 300 + strategy: + matrix: + include: + - profile: sds-replicated-volume + storage_name: sds + storage_class: linstor-thin-r2 + parent_storage_class: linstor-thin-r1-immediate + image_storage_class: linstor-thin-r1-immediate + attach_disk_size: 10Gi + data_disk_count: 2 + env: + GO_VERSION: "1.24.6" + TIMEOUT: 4h + RUN_ID: nightly-nested-e2e-${{ matrix.storage_name }}-${{ github.run_number }} + steps: + - uses: actions/checkout@v4 + + - name: Set up Go ${{ env.GO_VERSION }} + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + + - name: Install Task + uses: arduino/setup-task@v2 + with: + version: 3.x + repo-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Install kubectl + uses: azure/setup-kubectl@v4 + with: + version: "latest" + + - name: Setup d8 CLI + uses: werf/trdl/actions/setup-app@v0.12.2 + with: + repo: d8 + url: https://deckhouse.ru/downloads/deckhouse-cli-trdl/ + root-version: 1 + root-sha512: 343bd5f0d8811254e5f0b6fe2923729df3515cb11372dc3899c70df172a4e54c8a596a73d67ae790466a0491 + group: 0 + channel: stable + + - name: Install ginkgo + working-directory: test/e2e + run: | + go install tool + + - name: Download dependencies + working-directory: test/e2e + run: | + go mod download + + - name: Download nested run artifacts + uses: actions/download-artifact@v4 + with: + name: nested-run-${{ env.RUN_ID }} + path: ci/dvp-e2e/tmp/runs + + - name: Configure kubeconfig env + run: | + set -euo pipefail + NESTED_KUBECONFIG="${GITHUB_WORKSPACE}/ci/dvp-e2e/tmp/runs/${RUN_ID}/nested/kubeconfig" + if [ ! -s "$NESTED_KUBECONFIG" ]; then + echo "[ERR] Nested kubeconfig not found at $NESTED_KUBECONFIG" >&2 + exit 1 + fi + echo "KUBECONFIG=$NESTED_KUBECONFIG" >> "$GITHUB_ENV" + + - name: Wait for nested API server + run: | + set -euo pipefail + for i in $(seq 1 30); do + if kubectl --request-timeout=15s get nodes >/dev/null 2>&1; then + echo "[INFO] Nested cluster is reachable" + exit 0 + fi + echo "[INFO] Waiting for nested API... ($i/30)" + sleep 10 + done + echo "[ERR] Nested API server did not become ready in time" >&2 + exit 1 + + - name: Run E2E tests + working-directory: test/e2e + env: + STORAGE_CLASS_NAME: ${{ matrix.storage_class }} + run: | + task run:ci -v + cleanup: name: Cleanup (${{ matrix.profile }}) - needs: setup + needs: + - setup + - tests if: always() runs-on: ubuntu-latest strategy: @@ -124,8 +245,6 @@ jobs: image_storage_class: linstor-thin-r1-immediate attach_disk_size: 10Gi data_disk_count: 2 - env: - CLEANUP_PREFIX: ${{ vars.CLEANUP_PREFIX || 'nightly-nested-e2e-' }} steps: - uses: actions/checkout@v4 @@ -142,7 +261,9 @@ jobs: - name: Cleanup test namespaces working-directory: ci/dvp-e2e run: | + # Cleanup specific RUN_ID for this matrix leg + RUN_ID="nightly-nested-e2e-${{ matrix.storage_name }}-${{ github.run_number }}" task cleanup:namespaces \ - PREFIX="${CLEANUP_PREFIX}" \ + PREFIX="${RUN_ID}" \ API_URL="${E2E_K8S_URL}" \ SA_TOKEN="${{ secrets.E2E_NESTED_SA_SECRET }}" diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index e4ff030c00..cdc8fd63fd 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -238,7 +238,7 @@ tasks: env: KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' cmds: - - kubectl apply --server-side --force-conflicts --validate=false -f {{ .TMP_DIR }}/infra.yaml || kubectl apply --validate=false -f {{ .TMP_DIR }}/infra.yaml + - kubectl apply --server-side --force-conflicts --validate=false --request-timeout=30s -f {{ .TMP_DIR }}/infra.yaml || kubectl apply --validate=false --request-timeout=30s -f {{ .TMP_DIR }}/infra.yaml - | # Persist SSH keypair in parent cluster namespace for diagnostics tools (nested_diag.sh) # Secret contains private and public parts; will be removed with namespace cleanup diff --git a/ci/dvp-e2e/charts/infra/templates/vmc.yaml b/ci/dvp-e2e/charts/infra/templates/vmc.yaml index 39330ced39..9a8d7deaf8 100644 --- a/ci/dvp-e2e/charts/infra/templates/vmc.yaml +++ b/ci/dvp-e2e/charts/infra/templates/vmc.yaml @@ -1,4 +1,4 @@ -apiVersion: virtualization.deckhouse.io/v1alpha2 +apiVersion: virtualization.deckhouse.io/v1alpha3 kind: VirtualMachineClass metadata: name: "{{ .Values.namespace }}-cpu" From 74f2a915615f89244eb20b5f999914402b48b993 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Wed, 19 Nov 2025 15:17:46 +0300 Subject: [PATCH 02/13] ci: fix workflow triggers and revert unnecessary changes - Remove push trigger to avoid duplicate workflow runs - Revert VirtualMachineClass to v1alpha2 (v1alpha3 requires conversion webhook) - Revert Taskfile timeout changes (webhook timeout is server-side) Signed-off-by: Anton Yachmenev --- ci/dvp-e2e/Taskfile.yaml | 2 +- ci/dvp-e2e/charts/infra/templates/vmc.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index cdc8fd63fd..e4ff030c00 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -238,7 +238,7 @@ tasks: env: KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' cmds: - - kubectl apply --server-side --force-conflicts --validate=false --request-timeout=30s -f {{ .TMP_DIR }}/infra.yaml || kubectl apply --validate=false --request-timeout=30s -f {{ .TMP_DIR }}/infra.yaml + - kubectl apply --server-side --force-conflicts --validate=false -f {{ .TMP_DIR }}/infra.yaml || kubectl apply --validate=false -f {{ .TMP_DIR }}/infra.yaml - | # Persist SSH keypair in parent cluster namespace for diagnostics tools (nested_diag.sh) # Secret contains private and public parts; will be removed with namespace cleanup diff --git a/ci/dvp-e2e/charts/infra/templates/vmc.yaml b/ci/dvp-e2e/charts/infra/templates/vmc.yaml index 9a8d7deaf8..39330ced39 100644 --- a/ci/dvp-e2e/charts/infra/templates/vmc.yaml +++ b/ci/dvp-e2e/charts/infra/templates/vmc.yaml @@ -1,4 +1,4 @@ -apiVersion: virtualization.deckhouse.io/v1alpha3 +apiVersion: virtualization.deckhouse.io/v1alpha2 kind: VirtualMachineClass metadata: name: "{{ .Values.namespace }}-cpu" From 1c60f6b581e358127a6b3b73fa136f86f9f9b10d Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Wed, 19 Nov 2025 15:19:09 +0300 Subject: [PATCH 03/13] ci: remove pull_request trigger to avoid duplicate workflow runs Signed-off-by: Anton Yachmenev --- .github/workflows/e2e-matrix.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index cceeb71368..f0aae8a258 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -18,10 +18,6 @@ on: push: branches: - ci-nested-matrix-test-run - pull_request: - types: [opened, reopened, synchronize, labeled, unlabeled] - branches: - - ci-e2e-nested-sds schedule: - cron: "30 2 * * *" workflow_dispatch: From d0422aa62ff729f195c9f07d4d175b41c6aa4629 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Wed, 19 Nov 2025 15:25:40 +0300 Subject: [PATCH 04/13] ci: fix outputs order and artifact upload - Move Export setup outputs before task execution to ensure outputs are set - Change if-no-files-found to ignore to avoid errors when task fails Signed-off-by: Anton Yachmenev --- .github/workflows/e2e-matrix.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index f0aae8a258..4bf80f32bb 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -94,6 +94,16 @@ jobs: curl -L -o /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.44.1/yq_linux_amd64 chmod +x /usr/local/bin/yq + - name: Export setup outputs + id: setup-output + run: | + set -euo pipefail + echo "run_id=${RUN_ID}" >> "$GITHUB_OUTPUT" + echo "run_artifact=nested-run-${RUN_ID}" >> "$GITHUB_OUTPUT" + echo "profile=${PROFILE}" >> "$GITHUB_OUTPUT" + echo "storage_name=${{ matrix.storage_name }}" >> "$GITHUB_OUTPUT" + echo "storage_class=${STORAGE_CLASS}" >> "$GITHUB_OUTPUT" + - name: Setup nested environment env: RUN_ID: ${{ env.RUN_ID }} @@ -110,16 +120,6 @@ jobs: run: | task ci:setup-nested-env - - name: Export setup outputs - id: setup-output - run: | - set -euo pipefail - echo "run_id=${RUN_ID}" >> "$GITHUB_OUTPUT" - echo "run_artifact=nested-run-${RUN_ID}" >> "$GITHUB_OUTPUT" - echo "profile=${PROFILE}" >> "$GITHUB_OUTPUT" - echo "storage_name=${{ matrix.storage_name }}" >> "$GITHUB_OUTPUT" - echo "storage_class=${STORAGE_CLASS}" >> "$GITHUB_OUTPUT" - - name: Upload nested artifacts if: always() uses: actions/upload-artifact@v4 @@ -127,7 +127,7 @@ jobs: name: ${{ steps.setup-output.outputs.run_artifact }} path: | ci/dvp-e2e/tmp/runs/${{ env.RUN_ID }} - if-no-files-found: error + if-no-files-found: ignore tests: name: Run E2E (${{ matrix.profile }} / ${{ matrix.storage_class }}) From af6980d430b4439fe96802b815cf51ba168d4312 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Wed, 19 Nov 2025 15:46:06 +0300 Subject: [PATCH 05/13] ci: try v1alpha3 for VirtualMachineClass to avoid webhook timeout Signed-off-by: Anton Yachmenev --- ci/dvp-e2e/charts/infra/templates/vmc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/dvp-e2e/charts/infra/templates/vmc.yaml b/ci/dvp-e2e/charts/infra/templates/vmc.yaml index 39330ced39..9a8d7deaf8 100644 --- a/ci/dvp-e2e/charts/infra/templates/vmc.yaml +++ b/ci/dvp-e2e/charts/infra/templates/vmc.yaml @@ -1,4 +1,4 @@ -apiVersion: virtualization.deckhouse.io/v1alpha2 +apiVersion: virtualization.deckhouse.io/v1alpha3 kind: VirtualMachineClass metadata: name: "{{ .Values.namespace }}-cpu" From f13b3d431af761dbac57a07f8856de14397bc8eb Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Wed, 19 Nov 2025 15:51:42 +0300 Subject: [PATCH 06/13] Revert "ci: try v1alpha3 for VirtualMachineClass to avoid webhook timeout" This reverts commit af6980d430b4439fe96802b815cf51ba168d4312. --- ci/dvp-e2e/charts/infra/templates/vmc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/dvp-e2e/charts/infra/templates/vmc.yaml b/ci/dvp-e2e/charts/infra/templates/vmc.yaml index 9a8d7deaf8..39330ced39 100644 --- a/ci/dvp-e2e/charts/infra/templates/vmc.yaml +++ b/ci/dvp-e2e/charts/infra/templates/vmc.yaml @@ -1,4 +1,4 @@ -apiVersion: virtualization.deckhouse.io/v1alpha3 +apiVersion: virtualization.deckhouse.io/v1alpha2 kind: VirtualMachineClass metadata: name: "{{ .Values.namespace }}-cpu" From c183633726053ed367e16c85b1da9b92930426f9 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Wed, 19 Nov 2025 19:58:24 +0300 Subject: [PATCH 07/13] ci(e2e): add run-id label to infra resources for cleanup Signed-off-by: Anton Yachmenev --- .github/workflows/e2e-matrix.yml | 40 +++---------------- ci/dvp-e2e/Taskfile.yaml | 29 ++++++++------ .../charts/infra/templates/ingress.yaml | 4 ++ .../infra/templates/jump-host/deploy.yaml | 3 ++ .../charts/infra/templates/jump-host/svc.yaml | 2 + ci/dvp-e2e/charts/infra/templates/ns.yaml | 1 + .../charts/infra/templates/rbac/rbac.yaml | 8 ++++ ci/dvp-e2e/charts/infra/templates/vi.yaml | 2 + ci/dvp-e2e/charts/infra/templates/vmc.yaml | 2 + 9 files changed, 44 insertions(+), 47 deletions(-) diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index 4bf80f32bb..de1b1ac560 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -79,14 +79,7 @@ jobs: version: "latest" - name: Setup d8 - uses: werf/trdl/actions/setup-app@v0.12.2 - with: - repo: d8 - url: https://deckhouse.ru/downloads/deckhouse-cli-trdl/ - root-version: 1 - root-sha512: 343bd5f0d8811254e5f0b6fe292372a7b7eda08d276ff255229200f84e58a8151ab2729df3515cb11372dc3899c70df172a4e54c8a596a73d67ae790466a0491 - group: 0 - channel: stable + uses: ./.github/actions/install-d8 - name: Install yq run: | @@ -147,7 +140,7 @@ jobs: env: GO_VERSION: "1.24.6" TIMEOUT: 4h - RUN_ID: nightly-nested-e2e-${{ matrix.storage_name }}-${{ github.run_number }} + RUN_ID: ${{ needs.setup.outputs.run_id }} steps: - uses: actions/checkout@v4 @@ -167,20 +160,13 @@ jobs: with: version: "latest" - - name: Setup d8 CLI - uses: werf/trdl/actions/setup-app@v0.12.2 - with: - repo: d8 - url: https://deckhouse.ru/downloads/deckhouse-cli-trdl/ - root-version: 1 - root-sha512: 343bd5f0d8811254e5f0b6fe2923729df3515cb11372dc3899c70df172a4e54c8a596a73d67ae790466a0491 - group: 0 - channel: stable + - name: Setup d8 + uses: ./.github/actions/install-d8 - name: Install ginkgo working-directory: test/e2e run: | - go install tool + go install github.com/onsi/ginkgo/v2/ginkgo@latest - name: Download dependencies working-directory: test/e2e @@ -191,7 +177,7 @@ jobs: uses: actions/download-artifact@v4 with: name: nested-run-${{ env.RUN_ID }} - path: ci/dvp-e2e/tmp/runs + path: . - name: Configure kubeconfig env run: | @@ -203,20 +189,6 @@ jobs: fi echo "KUBECONFIG=$NESTED_KUBECONFIG" >> "$GITHUB_ENV" - - name: Wait for nested API server - run: | - set -euo pipefail - for i in $(seq 1 30); do - if kubectl --request-timeout=15s get nodes >/dev/null 2>&1; then - echo "[INFO] Nested cluster is reachable" - exit 0 - fi - echo "[INFO] Waiting for nested API... ($i/30)" - sleep 10 - done - echo "[ERR] Nested API server did not become ready in time" >&2 - exit 1 - - name: Run E2E tests working-directory: test/e2e env: diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index e4ff030c00..1f411976d6 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -127,6 +127,7 @@ tasks: ATTACH_DISK_SIZE: "{{ .ATTACH_DISK_SIZE }}" EFFECTIVE_DISK_SC: "{{ .EFFECTIVE_DISK_SC }}" NAMESPACE: "{{ .RUN_ID }}" + NESTED_DIR: "{{ .RUN_DIR }}/nested" NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG_PATH }}" SDS_SC_NAME: "{{ .STORAGE_CLASS }}" DATA_DISK_COUNT: "{{ .DATA_DISK_COUNT }}" @@ -481,10 +482,7 @@ tasks: set -euo pipefail NESTED_DIR="{{ .NESTED_DIR }}" NESTED_KUBECONFIG="{{ .NESTED_KUBECONFIG }}" - if ! mkdir -p "${NESTED_DIR}"; then - echo "[ERR] Failed to create nested directory: ${NESTED_DIR}" >&2 - exit 1 - fi + mkdir -p "${NESTED_DIR}" "$(dirname "${NESTED_KUBECONFIG}")" - chmod +x scripts/build_nested_kubeconfig.sh - | scripts/build_nested_kubeconfig.sh \ @@ -535,16 +533,21 @@ tasks: echo "[CLEANUP] Prefix='{{ .PREFIX }}'" ns_list=$(kubectl get ns -o json | jq -r --arg p '{{ .PREFIX }}' '.items[].metadata.name | select(startswith($p))') if [ -z "${ns_list}" ]; then - echo "[INFO] No namespaces to delete"; exit 0 + echo "[INFO] No namespaces to delete" + else + for ns in $ns_list; do + echo "[CLEANUP] Deleting namespace $ns ..." + kubectl delete ns "$ns" --wait=false || true + done + echo "[CLEANUP] Waiting for namespaces to be deleted..." + for ns in $ns_list; do + kubectl wait --for=delete ns/"$ns" --timeout=600s || echo "[WARN] Namespace $ns was not fully deleted within timeout" + done fi - for ns in $ns_list; do - echo "[CLEANUP] Deleting namespace $ns ..." - kubectl delete ns "$ns" --wait=false || true - done - echo "[CLEANUP] Waiting for namespaces to be deleted..." - for ns in $ns_list; do - kubectl wait --for=delete ns/"$ns" --timeout=600s || echo "[WARN] Namespace $ns was not fully deleted within timeout" - done + # Cleanup cluster-scoped resources for this run-id (if any) + echo "[CLEANUP] Deleting cluster-scoped resources labeled with run-id='{{ .PREFIX }}'" + kubectl delete virtualmachineclass -l e2e.deckhouse.io/run-id='{{ .PREFIX }}' --ignore-not-found || true + kubectl delete clusterrolebinding -l e2e.deckhouse.io/run-id='{{ .PREFIX }}' --ignore-not-found || true # ------------------------------------------------------------ # CI helpers: kubeconfig + registry diff --git a/ci/dvp-e2e/charts/infra/templates/ingress.yaml b/ci/dvp-e2e/charts/infra/templates/ingress.yaml index b419188353..113142af11 100644 --- a/ci/dvp-e2e/charts/infra/templates/ingress.yaml +++ b/ci/dvp-e2e/charts/infra/templates/ingress.yaml @@ -3,6 +3,8 @@ kind: Service metadata: name: dvp-over-dvp-443 namespace: {{ .Values.namespace }} + labels: + e2e.deckhouse.io/run-id: "{{ .Values.namespace }}" spec: ports: - port: 443 @@ -20,6 +22,8 @@ metadata: annotations: nginx.ingress.kubernetes.io/ssl-passthrough: "true" nginx.ingress.kubernetes.io/backend-protocol: "HTTPS" + labels: + e2e.deckhouse.io/run-id: "{{ .Values.namespace }}" spec: ingressClassName: nginx rules: diff --git a/ci/dvp-e2e/charts/infra/templates/jump-host/deploy.yaml b/ci/dvp-e2e/charts/infra/templates/jump-host/deploy.yaml index a6bee4278a..7ff0c3e8d5 100644 --- a/ci/dvp-e2e/charts/infra/templates/jump-host/deploy.yaml +++ b/ci/dvp-e2e/charts/infra/templates/jump-host/deploy.yaml @@ -4,6 +4,8 @@ kind: Deployment metadata: name: jump-host namespace: {{ .Values.namespace }} + labels: + e2e.deckhouse.io/run-id: "{{ .Values.namespace }}" spec: replicas: 1 selector: @@ -13,6 +15,7 @@ spec: metadata: labels: app: jump-host + e2e.deckhouse.io/run-id: "{{ .Values.namespace }}" spec: containers: - name: jump-host diff --git a/ci/dvp-e2e/charts/infra/templates/jump-host/svc.yaml b/ci/dvp-e2e/charts/infra/templates/jump-host/svc.yaml index e2b809dcab..09adddbc27 100644 --- a/ci/dvp-e2e/charts/infra/templates/jump-host/svc.yaml +++ b/ci/dvp-e2e/charts/infra/templates/jump-host/svc.yaml @@ -4,6 +4,8 @@ kind: Service metadata: name: jump-host namespace: {{ .Values.namespace }} + labels: + e2e.deckhouse.io/run-id: "{{ .Values.namespace }}" spec: type: NodePort selector: diff --git a/ci/dvp-e2e/charts/infra/templates/ns.yaml b/ci/dvp-e2e/charts/infra/templates/ns.yaml index 064087cab7..2ddec28c58 100644 --- a/ci/dvp-e2e/charts/infra/templates/ns.yaml +++ b/ci/dvp-e2e/charts/infra/templates/ns.yaml @@ -4,3 +4,4 @@ metadata: name: {{ .Values.namespace }} labels: heritage: deckhouse + e2e.deckhouse.io/run-id: "{{ .Values.namespace }}" diff --git a/ci/dvp-e2e/charts/infra/templates/rbac/rbac.yaml b/ci/dvp-e2e/charts/infra/templates/rbac/rbac.yaml index 1a6a4b9846..6e1c531459 100644 --- a/ci/dvp-e2e/charts/infra/templates/rbac/rbac.yaml +++ b/ci/dvp-e2e/charts/infra/templates/rbac/rbac.yaml @@ -3,6 +3,8 @@ kind: ServiceAccount metadata: name: dkp-sa namespace: {{ .Values.namespace }} + labels: + e2e.deckhouse.io/run-id: "{{ .Values.namespace }}" --- apiVersion: v1 kind: Secret @@ -11,6 +13,8 @@ metadata: namespace: {{ .Values.namespace }} annotations: kubernetes.io/service-account.name: dkp-sa + labels: + e2e.deckhouse.io/run-id: "{{ .Values.namespace }}" type: kubernetes.io/service-account-token --- apiVersion: rbac.authorization.k8s.io/v1 @@ -18,6 +22,8 @@ kind: RoleBinding metadata: name: dkp-sa-rb namespace: {{ .Values.namespace }} + labels: + e2e.deckhouse.io/run-id: "{{ .Values.namespace }}" subjects: - kind: ServiceAccount name: dkp-sa @@ -31,6 +37,8 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: dkp-sa-cluster-admin-{{ .Values.namespace }} + labels: + e2e.deckhouse.io/run-id: "{{ .Values.namespace }}" subjects: - kind: ServiceAccount name: dkp-sa diff --git a/ci/dvp-e2e/charts/infra/templates/vi.yaml b/ci/dvp-e2e/charts/infra/templates/vi.yaml index 66034a649d..3aa7acec04 100644 --- a/ci/dvp-e2e/charts/infra/templates/vi.yaml +++ b/ci/dvp-e2e/charts/infra/templates/vi.yaml @@ -4,6 +4,8 @@ kind: VirtualImage metadata: name: image namespace: {{ .Values.namespace }} + labels: + e2e.deckhouse.io/run-id: "{{ .Values.namespace }}" spec: storage: ContainerRegistry dataSource: diff --git a/ci/dvp-e2e/charts/infra/templates/vmc.yaml b/ci/dvp-e2e/charts/infra/templates/vmc.yaml index 39330ced39..db7d46cb74 100644 --- a/ci/dvp-e2e/charts/infra/templates/vmc.yaml +++ b/ci/dvp-e2e/charts/infra/templates/vmc.yaml @@ -2,6 +2,8 @@ apiVersion: virtualization.deckhouse.io/v1alpha2 kind: VirtualMachineClass metadata: name: "{{ .Values.namespace }}-cpu" + labels: + e2e.deckhouse.io/run-id: "{{ .Values.namespace }}" spec: cpu: type: Discovery From aa9fa84c3b50a5c0df2ef8506d893413dad70b25 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Wed, 19 Nov 2025 19:58:24 +0300 Subject: [PATCH 08/13] ci(e2e): add run-id label to infra resources for cleanup Signed-off-by: Anton Yachmenev --- .github/workflows/e2e-matrix.yml | 28 +++++---- ci/dvp-e2e/Taskfile.yaml | 68 +++++++++++++++++++++ ci/dvp-e2e/scripts/attach_worker_disks.sh | 32 +--------- ci/dvp-e2e/scripts/configure_sds_storage.sh | 21 ++++--- 4 files changed, 102 insertions(+), 47 deletions(-) diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index de1b1ac560..c8c5e4b499 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -41,7 +41,7 @@ jobs: parent_storage_class: linstor-thin-r1-immediate image_storage_class: linstor-thin-r1-immediate attach_disk_size: 10Gi - data_disk_count: 2 + data_disk_count: 1 concurrency: group: setup-${{ github.head_ref || github.ref_name }}-${{ matrix.profile }} cancel-in-progress: true @@ -136,7 +136,7 @@ jobs: parent_storage_class: linstor-thin-r1-immediate image_storage_class: linstor-thin-r1-immediate attach_disk_size: 10Gi - data_disk_count: 2 + data_disk_count: 1 env: GO_VERSION: "1.24.6" TIMEOUT: 4h @@ -177,17 +177,23 @@ jobs: uses: actions/download-artifact@v4 with: name: nested-run-${{ env.RUN_ID }} - path: . + path: ci/dvp-e2e/tmp/runs/${{ env.RUN_ID }} + merge-multiple: true - name: Configure kubeconfig env + working-directory: ci/dvp-e2e + env: + RUN_ID: ${{ env.RUN_ID }} run: | - set -euo pipefail - NESTED_KUBECONFIG="${GITHUB_WORKSPACE}/ci/dvp-e2e/tmp/runs/${RUN_ID}/nested/kubeconfig" - if [ ! -s "$NESTED_KUBECONFIG" ]; then - echo "[ERR] Nested kubeconfig not found at $NESTED_KUBECONFIG" >&2 - exit 1 - fi - echo "KUBECONFIG=$NESTED_KUBECONFIG" >> "$GITHUB_ENV" + task ci:kubeconfig:ensure + + - name: Pause for manual inspection + working-directory: ci/dvp-e2e + env: + RUN_ID: ${{ env.RUN_ID }} + MANUAL_WAIT_SECONDS: ${{ vars.MANUAL_WAIT_SECONDS || '7200' }} + run: | + task ci:manual-wait - name: Run E2E tests working-directory: test/e2e @@ -212,7 +218,7 @@ jobs: parent_storage_class: linstor-thin-r1-immediate image_storage_class: linstor-thin-r1-immediate attach_disk_size: 10Gi - data_disk_count: 2 + data_disk_count: 1 steps: - uses: actions/checkout@v4 diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index 1f411976d6..a8eea641f9 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -623,3 +623,71 @@ tasks: TMP_DIR: "{{ .TMP_DIR }}" NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" SDS_SC_NAME: "{{ .SDS_SC_NAME }}" + ci:kubeconfig:ensure: + desc: Ensure nested kubeconfig exists at the expected path + vars: + RUN_ID: '{{ .RUN_ID | default (env "RUN_ID") | default "" }}' + WORKSPACE: + sh: git rev-parse --show-toplevel 2>/dev/null || pwd + cmds: + - | + set -euo pipefail + RUN_ID="{{ .RUN_ID }}" + if [ -z "$RUN_ID" ]; then + echo "[ERR] RUN_ID must be provided to locate nested kubeconfig" >&2 + exit 1 + fi + WORKSPACE="${GITHUB_WORKSPACE:-{{ .WORKSPACE }}}" + TARGET_PATH="$WORKSPACE/ci/dvp-e2e/tmp/runs/$RUN_ID/nested/kubeconfig" + if [ ! -s "$TARGET_PATH" ]; then + echo "[ERR] Nested kubeconfig not found at $TARGET_PATH" >&2 + exit 1 + fi + echo "[INFO] Using nested kubeconfig at $TARGET_PATH" + [ -n "${GITHUB_ENV:-}" ] && echo "KUBECONFIG=$TARGET_PATH" >> "$GITHUB_ENV" + ci:manual-wait: + desc: Pause execution to allow manual SSH inspection of nested cluster + vars: + RUN_ID: '{{ .RUN_ID | default (env "RUN_ID") | default "" }}' + WORKSPACE: + sh: git rev-parse --show-toplevel 2>/dev/null || pwd + WAIT_SECONDS: '{{ .WAIT_SECONDS | default (env "MANUAL_WAIT_SECONDS") | default "18000" }}' + cmds: + - | + set -euo pipefail + RUN_ID="{{ .RUN_ID }}" + WAIT="{{ .WAIT_SECONDS }}" + if [ -z "$RUN_ID" ]; then + echo "[ERR] RUN_ID must be set for ci:manual-wait" >&2 + exit 1 + fi + if ! [[ "$WAIT" =~ ^[0-9]+$ ]]; then + echo "[ERR] WAIT_SECONDS must be numeric (got '$WAIT')" >&2 + exit 1 + fi + if [ "$WAIT" -le 0 ]; then + echo "[INFO] Manual wait skipped (WAIT_SECONDS=$WAIT)" + exit 0 + fi + WORKSPACE='{{ .WORKSPACE }}' + PARENT_KUBECONFIG="$WORKSPACE/ci/dvp-e2e/tmp/runs/$RUN_ID/parent.kubeconfig" + echo "[INFO] Pausing for $WAIT seconds before running tests." + echo "[INFO] Use parent kubeconfig for SSH tunneling:" + echo " export KUBECONFIG=$PARENT_KUBECONFIG" + echo " d8 v ssh --namespace $RUN_ID --username ubuntu " + echo "[INFO] Press Ctrl+C in the workflow run to cancel wait early." + START_TS=$(date +%s) + END=$((START_TS + WAIT)) + LAST_NOTE=$START_TS + while true; do + NOW=$(date +%s) + [ "$NOW" -ge "$END" ] && break + REM=$((END - NOW)) + printf '[INFO] Manual wait: %d seconds remaining...\n' "$REM" + if [ $((NOW - LAST_NOTE)) -ge 300 ]; then + echo "[INFO] Cluster should be ready for manual SSH troubleshooting." + LAST_NOTE=$NOW + fi + sleep 60 || true + done + echo "[INFO] Manual wait finished; proceeding to tests." diff --git a/ci/dvp-e2e/scripts/attach_worker_disks.sh b/ci/dvp-e2e/scripts/attach_worker_disks.sh index f6d0b2ca94..5ea133442e 100755 --- a/ci/dvp-e2e/scripts/attach_worker_disks.sh +++ b/ci/dvp-e2e/scripts/attach_worker_disks.sh @@ -22,7 +22,7 @@ set -euo pipefail namespace="" storage_class="" disk_size="10Gi" -disk_count="2" +disk_count="1" kubeconfig="${KUBECONFIG:-}" while getopts ":n:s:z:c:k:" opt; do @@ -90,7 +90,7 @@ for vm in "${workers[@]}"; do vd="storage-disk-${disk_num}-$vm" echo "[INFRA] Creating VirtualDisk $vd (${disk_size}, sc=${storage_class})" - cat > "/tmp/vd-$vd.yaml" </dev/null 2>&1 || kubectl -n "${namespace}" apply -f - </dev/null 2>&1 || kubectl -n "${namespace}" apply -f "/tmp/vd-$vd.yaml" # Wait for VirtualDisk to be Ready echo "[INFRA] Waiting for VirtualDisk $vd to be Ready..." @@ -123,34 +122,10 @@ EOF exit 1 fi - # Wait for PVC - pvc_name="" - for j in $(seq 1 50); do - pvc_name=$(kubectl -n "${namespace}" get vd "$vd" -o jsonpath='{.status.target.persistentVolumeClaimName}' 2>/dev/null || true) - [ -n "$pvc_name" ] && break - echo "[INFRA] Waiting for PVC name for VD $vd; retry $j/50" - sleep 3 - done - - if [ -n "$pvc_name" ]; then - echo "[INFRA] Waiting PVC $pvc_name to reach phase=Bound..." - for j in $(seq 1 120); do - pvc_phase=$(kubectl -n "${namespace}" get pvc "$pvc_name" -o jsonpath='{.status.phase}' 2>/dev/null || true) - if [ "$pvc_phase" = "Bound" ]; then - break - fi - [ $((j % 10)) -eq 0 ] && echo "[INFRA] PVC $pvc_name phase=$pvc_phase; retry $j/120" - sleep 2 - done - if [ "$pvc_phase" != "Bound" ]; then - echo "[WARN] PVC $pvc_name not Bound after waiting" - fi - fi - # Create hotplug attachment att="att-$vd" echo "[INFRA] Creating VirtualMachineBlockDeviceAttachment $att for VM $vm" - cat > "/tmp/att-$att.yaml" </dev/null 2>&1 || kubectl -n "${namespace}" apply -f - </dev/null 2>&1 || kubectl -n "${namespace}" apply -f "/tmp/att-$att.yaml" # Wait for attachment echo "[INFRA] Waiting for VMBDA $att to be Attached..." diff --git a/ci/dvp-e2e/scripts/configure_sds_storage.sh b/ci/dvp-e2e/scripts/configure_sds_storage.sh index 8f3b6d6ff6..f1a7e11fb2 100755 --- a/ci/dvp-e2e/scripts/configure_sds_storage.sh +++ b/ci/dvp-e2e/scripts/configure_sds_storage.sh @@ -123,21 +123,28 @@ if [ -z "$NODES" ]; then NODES=$(kubectl get nodes -o json | jq -r '.items[].metadata.name') fi +MATCH_EXPR_TYPE=$(yq eval -n ' + .key = "status.blockdevice.storage.deckhouse.io/type" | + .operator = "In" | + .values = ["disk"] +') + +MATCH_EXPR_MODEL=$(yq eval -n ' + .key = "status.blockdevice.storage.deckhouse.io/model" | + .operator = "In" | + .values = ["QEMU-HARDDISK"] +') + for node in $NODES; do [ -z "$node" ] && continue - MATCH_EXPR=$(yq eval -n ' - .key = "storage.deckhouse.io/device-path" | - .operator = "In" | - .values = ["/dev/sdb","/dev/vdb","/dev/xvdb","/dev/sdc","/dev/vdc","/dev/xvdc","/dev/sdd","/dev/vdd","/dev/xvdd"] - ') - NODE="$node" MATCH_EXPR="$MATCH_EXPR" yq eval -n ' + NODE="$node" MATCH_EXPR_TYPE="$MATCH_EXPR_TYPE" MATCH_EXPR_MODEL="$MATCH_EXPR_MODEL" yq eval -n ' .apiVersion = "storage.deckhouse.io/v1alpha1" | .kind = "LVMVolumeGroup" | .metadata.name = "data-" + env(NODE) | .spec.type = "Local" | .spec.local.nodeName = env(NODE) | .spec.actualVGNameOnTheNode = "data" | - .spec.blockDeviceSelector.matchExpressions = [ env(MATCH_EXPR) ] + .spec.blockDeviceSelector.matchExpressions = [ env(MATCH_EXPR_TYPE), env(MATCH_EXPR_MODEL) ] ' | kubectl apply -f - done From d6fc1e77dd3ed3b27ef92cbd5852afc38b5c8cf3 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Fri, 21 Nov 2025 14:18:19 +0300 Subject: [PATCH 09/13] ci(e2e): bump manual wait to 10h --- ci/dvp-e2e/Taskfile.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index a8eea641f9..038fcc2947 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -651,7 +651,7 @@ tasks: RUN_ID: '{{ .RUN_ID | default (env "RUN_ID") | default "" }}' WORKSPACE: sh: git rev-parse --show-toplevel 2>/dev/null || pwd - WAIT_SECONDS: '{{ .WAIT_SECONDS | default (env "MANUAL_WAIT_SECONDS") | default "18000" }}' + WAIT_SECONDS: '{{ .WAIT_SECONDS | default (env "MANUAL_WAIT_SECONDS") | default "36000" }}' cmds: - | set -euo pipefail From 52e20820fb49eea166514c629319c9f7fa21641b Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Sat, 22 Nov 2025 13:11:50 +0300 Subject: [PATCH 10/13] ci: fix hotplug storage class and extend wait --- .github/workflows/e2e-matrix.yml | 2 +- ci/dvp-e2e/Taskfile.yaml | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index c8c5e4b499..2bfb758b55 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -191,7 +191,7 @@ jobs: working-directory: ci/dvp-e2e env: RUN_ID: ${{ env.RUN_ID }} - MANUAL_WAIT_SECONDS: ${{ vars.MANUAL_WAIT_SECONDS || '7200' }} + MANUAL_WAIT_SECONDS: ${{ vars.MANUAL_WAIT_SECONDS || '36000' }} run: | task ci:manual-wait diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index 038fcc2947..0995f1fd60 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -124,6 +124,7 @@ tasks: PARENT_KUBECONFIG: "{{ .PARENT_KUBECONFIG_PATH }}" REGISTRY_DOCKER_CFG: "{{ .REGISTRY_DOCKER_CFG }}" TARGET_STORAGE_CLASS: "{{ .PARENT_STORAGE_CLASS }}" + PARENT_STORAGE_CLASS: "{{ .PARENT_STORAGE_CLASS }}" ATTACH_DISK_SIZE: "{{ .ATTACH_DISK_SIZE }}" EFFECTIVE_DISK_SC: "{{ .EFFECTIVE_DISK_SC }}" NAMESPACE: "{{ .RUN_ID }}" @@ -577,6 +578,7 @@ tasks: PARENT_KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") }}' REGISTRY_DOCKER_CFG: '{{ .REGISTRY_DOCKER_CFG | default (env "REGISTRY_DOCKER_CFG") | default "" }}' TARGET_STORAGE_CLASS: "{{ .TARGET_STORAGE_CLASS }}" + PARENT_STORAGE_CLASS: "{{ .PARENT_STORAGE_CLASS | default .TARGET_STORAGE_CLASS }}" ATTACH_DISK_SIZE: '{{ .ATTACH_DISK_SIZE | default "10Gi" }}' EFFECTIVE_DISK_SC: "{{ .EFFECTIVE_DISK_SC }}" NAMESPACE: "{{ .NAMESPACE }}" @@ -606,7 +608,7 @@ tasks: VALUES_FILE: "{{ .VALUES_FILE }}" PARENT_KUBECONFIG: "{{ .PARENT_KUBECONFIG }}" DISK_SIZE: "{{ .ATTACH_DISK_SIZE }}" - STORAGE_CLASS: "{{ .EFFECTIVE_DISK_SC }}" + STORAGE_CLASS: "{{ .PARENT_STORAGE_CLASS }}" DISK_COUNT: "{{ .DATA_DISK_COUNT }}" - echo "🔐 Building nested kubeconfig" - task: nested:kubeconfig From c76051d07c5aaba5c82bda4d77fa76edf92688f4 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Sat, 22 Nov 2025 13:52:27 +0300 Subject: [PATCH 11/13] ci: separate hotplug storage class --- .github/workflows/e2e-matrix.yml | 21 ++++++++++++--------- ci/dvp-e2e/Taskfile.yaml | 5 ++++- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index 2bfb758b55..9675ca51e0 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -40,6 +40,7 @@ jobs: storage_class: linstor-thin-r2 parent_storage_class: linstor-thin-r1-immediate image_storage_class: linstor-thin-r1-immediate + hotplug_storage_class: linstor-thin-r1-immediate attach_disk_size: 10Gi data_disk_count: 1 concurrency: @@ -51,6 +52,7 @@ jobs: STORAGE_CLASS: ${{ matrix.storage_class }} PARENT_STORAGE_CLASS: ${{ matrix.parent_storage_class }} IMAGE_STORAGE_CLASS: ${{ matrix.image_storage_class }} + HOTPLUG_STORAGE_CLASS: ${{ matrix.hotplug_storage_class || matrix.parent_storage_class }} ATTACH_DISK_SIZE: ${{ matrix.attach_disk_size }} DATA_DISK_COUNT: ${{ matrix.data_disk_count }} outputs: @@ -100,15 +102,16 @@ jobs: - name: Setup nested environment env: RUN_ID: ${{ env.RUN_ID }} - PROFILE: ${{ env.PROFILE }} - STORAGE_CLASS: ${{ env.STORAGE_CLASS }} - PARENT_STORAGE_CLASS: ${{ env.PARENT_STORAGE_CLASS }} - IMAGE_STORAGE_CLASS: ${{ env.IMAGE_STORAGE_CLASS }} - ATTACH_DISK_SIZE: ${{ env.ATTACH_DISK_SIZE }} - DATA_DISK_COUNT: ${{ matrix.data_disk_count }} - REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} - API_URL: ${{ env.E2E_K8S_URL }} - SA_TOKEN: ${{ secrets.E2E_NESTED_SA_SECRET }} + PROFILE: ${{ env.PROFILE }} + STORAGE_CLASS: ${{ env.STORAGE_CLASS }} + PARENT_STORAGE_CLASS: ${{ env.PARENT_STORAGE_CLASS }} + IMAGE_STORAGE_CLASS: ${{ env.IMAGE_STORAGE_CLASS }} + HOTPLUG_STORAGE_CLASS: ${{ env.HOTPLUG_STORAGE_CLASS || env.PARENT_STORAGE_CLASS }} + ATTACH_DISK_SIZE: ${{ env.ATTACH_DISK_SIZE }} + DATA_DISK_COUNT: ${{ matrix.data_disk_count }} + REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} + API_URL: ${{ env.E2E_K8S_URL }} + SA_TOKEN: ${{ secrets.E2E_NESTED_SA_SECRET }} working-directory: ci/dvp-e2e run: | task ci:setup-nested-env diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index 0995f1fd60..0255414e57 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -96,6 +96,7 @@ tasks: STORAGE_CLASS: '{{ .STORAGE_CLASS | default (env "STORAGE_CLASS") | default "" }}' IMAGE_STORAGE_CLASS: '{{ .IMAGE_STORAGE_CLASS | default (env "IMAGE_STORAGE_CLASS") | default "" }}' PARENT_STORAGE_CLASS: '{{ .PARENT_STORAGE_CLASS | default (env "PARENT_STORAGE_CLASS") | default "" }}' + HOTPLUG_STORAGE_CLASS: '{{ .HOTPLUG_STORAGE_CLASS | default (env "HOTPLUG_STORAGE_CLASS") | default "" }}' ATTACH_DISK_SIZE: '{{ .ATTACH_DISK_SIZE | default (env "ATTACH_DISK_SIZE") | default "10Gi" }}' DATA_DISK_COUNT: '{{ .DATA_DISK_COUNT | default (env "DATA_DISK_COUNT") | default "2" }}' REGISTRY_DOCKER_CFG: '{{ .REGISTRY_DOCKER_CFG | default (env "REGISTRY_DOCKER_CFG") | default "" }}' @@ -105,7 +106,7 @@ tasks: VALUES_FILE_PATH: '{{ printf "%s/values.yaml" .RUN_DIR }}' PARENT_KUBECONFIG_PATH: '{{ printf "%s/parent.kubeconfig" .RUN_DIR }}' NESTED_KUBECONFIG_PATH: '{{ printf "%s/nested/kubeconfig" .RUN_DIR }}' - EFFECTIVE_DISK_SC: "{{ if .IMAGE_STORAGE_CLASS }}{{ .IMAGE_STORAGE_CLASS }}{{ else }}{{ .STORAGE_CLASS }}{{ end }}" + EFFECTIVE_DISK_SC: "{{ if .HOTPLUG_STORAGE_CLASS }}{{ .HOTPLUG_STORAGE_CLASS }}{{ else if .IMAGE_STORAGE_CLASS }}{{ .IMAGE_STORAGE_CLASS }}{{ else }}{{ .STORAGE_CLASS }}{{ end }}" cmds: - task: ci:prepare-env vars: @@ -114,6 +115,7 @@ tasks: PROFILE: "{{ .PROFILE }}" STORAGE_CLASS: "{{ .STORAGE_CLASS }}" PARENT_STORAGE_CLASS: "{{ .PARENT_STORAGE_CLASS }}" + HOTPLUG_STORAGE_CLASS: "{{ .HOTPLUG_STORAGE_CLASS }}" REGISTRY_DOCKER_CFG: "{{ .REGISTRY_DOCKER_CFG }}" API_URL: "{{ .API_URL }}" SA_TOKEN: "{{ .SA_TOKEN }}" @@ -141,6 +143,7 @@ tasks: PROFILE: '{{ .PROFILE | default (env "PROFILE") | default "" }}' STORAGE_CLASS: '{{ .STORAGE_CLASS | default (env "STORAGE_CLASS") | default "" }}' PARENT_STORAGE_CLASS: '{{ .PARENT_STORAGE_CLASS | default (env "PARENT_STORAGE_CLASS") | default "" }}' + HOTPLUG_STORAGE_CLASS: '{{ .HOTPLUG_STORAGE_CLASS | default (env "HOTPLUG_STORAGE_CLASS") | default "" }}' REGISTRY_DOCKER_CFG: '{{ .REGISTRY_DOCKER_CFG | default (env "REGISTRY_DOCKER_CFG") | default "" }}' API_URL: '{{ .API_URL | default (env "API_URL") | default (env "E2E_K8S_URL") | default "" }}' SA_TOKEN: '{{ .SA_TOKEN | default (env "SA_TOKEN") | default (env "E2E_NESTED_SA_SECRET") | default "" }}' From ab848cf25dd049fbc368f9378ef46e5a93fdb054 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Sat, 22 Nov 2025 13:58:03 +0300 Subject: [PATCH 12/13] ci: fix workflow indentation for hotplug env --- .github/workflows/e2e-matrix.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index 9675ca51e0..bbe3d21e5d 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -102,16 +102,16 @@ jobs: - name: Setup nested environment env: RUN_ID: ${{ env.RUN_ID }} - PROFILE: ${{ env.PROFILE }} - STORAGE_CLASS: ${{ env.STORAGE_CLASS }} - PARENT_STORAGE_CLASS: ${{ env.PARENT_STORAGE_CLASS }} - IMAGE_STORAGE_CLASS: ${{ env.IMAGE_STORAGE_CLASS }} - HOTPLUG_STORAGE_CLASS: ${{ env.HOTPLUG_STORAGE_CLASS || env.PARENT_STORAGE_CLASS }} - ATTACH_DISK_SIZE: ${{ env.ATTACH_DISK_SIZE }} - DATA_DISK_COUNT: ${{ matrix.data_disk_count }} - REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} - API_URL: ${{ env.E2E_K8S_URL }} - SA_TOKEN: ${{ secrets.E2E_NESTED_SA_SECRET }} + PROFILE: ${{ env.PROFILE }} + STORAGE_CLASS: ${{ env.STORAGE_CLASS }} + PARENT_STORAGE_CLASS: ${{ env.PARENT_STORAGE_CLASS }} + IMAGE_STORAGE_CLASS: ${{ env.IMAGE_STORAGE_CLASS }} + HOTPLUG_STORAGE_CLASS: ${{ env.HOTPLUG_STORAGE_CLASS || env.PARENT_STORAGE_CLASS }} + ATTACH_DISK_SIZE: ${{ env.ATTACH_DISK_SIZE }} + DATA_DISK_COUNT: ${{ matrix.data_disk_count }} + REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} + API_URL: ${{ env.E2E_K8S_URL }} + SA_TOKEN: ${{ secrets.E2E_NESTED_SA_SECRET }} working-directory: ci/dvp-e2e run: | task ci:setup-nested-env From 9de6019fda6b40fbc51b80a20f44437c4c3fc48d Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Sat, 22 Nov 2025 20:18:58 +0300 Subject: [PATCH 13/13] ci: add manual wait and vmclass ensure before tests --- ci/dvp-e2e/Taskfile.yaml | 2 +- ci/dvp-e2e/scripts/attach_worker_disks.sh | 67 ++++++++++++++++++++++- 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index 0255414e57..f5a02029f3 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -611,7 +611,7 @@ tasks: VALUES_FILE: "{{ .VALUES_FILE }}" PARENT_KUBECONFIG: "{{ .PARENT_KUBECONFIG }}" DISK_SIZE: "{{ .ATTACH_DISK_SIZE }}" - STORAGE_CLASS: "{{ .PARENT_STORAGE_CLASS }}" + STORAGE_CLASS: "{{ .EFFECTIVE_DISK_SC }}" DISK_COUNT: "{{ .DATA_DISK_COUNT }}" - echo "🔐 Building nested kubeconfig" - task: nested:kubeconfig diff --git a/ci/dvp-e2e/scripts/attach_worker_disks.sh b/ci/dvp-e2e/scripts/attach_worker_disks.sh index 5ea133442e..e11ab63fe2 100755 --- a/ci/dvp-e2e/scripts/attach_worker_disks.sh +++ b/ci/dvp-e2e/scripts/attach_worker_disks.sh @@ -50,6 +50,12 @@ fi echo "[INFRA] Attaching ${disk_count} storage disks to worker VMs using hotplug in namespace ${namespace}" +# Cleanup stale hp-volume pods (older than 10 minutes) to avoid interference +echo "[INFRA] Cleaning up stale hp-volume pods (older than 10m) before attachment" +kubectl -n "${namespace}" get pods --no-headers 2>/dev/null \ + | awk '$1 ~ /^hp-volume-/ && $3 == "Running" && $5 ~ /[0-9]+m/ { split($5,t,"m"); if (t[1] > 10) print $1 }' \ + | xargs -r kubectl -n "${namespace}" delete pod --force --grace-period=0 2>/dev/null || true + # Wait for worker VMs for i in $(seq 1 50); do worker_count=$(kubectl -n "${namespace}" get vm -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null | grep -c worker || echo "0") @@ -114,7 +120,7 @@ EOF echo "[INFRA] VD $vd phase=$vd_phase; retry $j/50" sleep 5 done - + if [ "$vd_phase" != "Ready" ]; then echo "[ERROR] VirtualDisk $vd not Ready" kubectl -n "${namespace}" get vd "$vd" -o yaml || true @@ -122,6 +128,47 @@ EOF exit 1 fi + # Ensure VirtualDisk is not marked in use before attaching + in_use="false" + for j in $(seq 1 30); do + in_use=$(kubectl -n "${namespace}" get vd "$vd" -o json 2>/dev/null | jq -r '.status.inUse // false' || echo "false") + if [ "$in_use" = "false" ]; then + break + fi + echo "[INFRA] VD $vd inUse=$in_use; retry $j/30" + sleep 5 + done + + if [ "$in_use" != "false" ]; then + echo "[ERROR] VirtualDisk $vd remains InUse; aborting attachment" + kubectl -n "${namespace}" get vd "$vd" -o yaml || true + kubectl -n "${namespace}" get events --sort-by=.lastTimestamp | tail -n 100 || true + exit 1 + fi + + # Skip if VM already reports this disk attached/hotplugged + if kubectl -n "${namespace}" get vm "$vm" -o json 2>/dev/null \ + | jq -e --arg disk "$vd" ' + ([.status.blockDeviceRefs[]? + | select(.name == $disk and .attached == true) + ] | length) > 0' >/dev/null; then + echo "[INFO] VM $vm already has disk $vd attached; skipping VMBDA creation" + continue + fi + + # Skip if there is an existing non-failed VMBDA for this disk + conflict_vmbda=$(kubectl -n "${namespace}" get vmbda -o json 2>/dev/null \ + | jq -r --arg name "$vd" ' + .items[]? + | select(.spec.blockDeviceRef.kind == "VirtualDisk" + and .spec.blockDeviceRef.name == $name + and (.status.phase != "" and .status.phase != "Failed")) + | .metadata.name' | head -n 1) + if [ -n "${conflict_vmbda:-}" ]; then + echo "[WARN] Found existing VMBDA $conflict_vmbda for disk $vd; skipping" + continue + fi + # Create hotplug attachment att="att-$vd" echo "[INFRA] Creating VirtualMachineBlockDeviceAttachment $att for VM $vm" @@ -138,11 +185,14 @@ spec: name: $vd EOF + # Give controller time to react on creation + sleep 60 + # Wait for attachment echo "[INFRA] Waiting for VMBDA $att to be Attached..." att_phase="" success_by_vm=0 - for i in $(seq 1 50); do + for i in $(seq 1 100); do att_phase=$(kubectl -n "${namespace}" get vmbda "$att" -o jsonpath='{.status.phase}' 2>/dev/null || true) if [ "$att_phase" = "Attached" ]; then echo "[INFRA] Disk $vd attached to VM $vm" @@ -161,7 +211,7 @@ EOF success_by_vm=1 break fi - [ $((i % 10)) -eq 0 ] && echo "[INFRA] Disk $vd phase=$att_phase; retry $i/50" + [ $((i % 10)) -eq 0 ] && echo "[INFRA] Disk $vd phase=$att_phase; retry $i/100" sleep 5 done @@ -175,6 +225,17 @@ EOF done echo "[INFRA] VM $vm configured with hotplug disks" + + echo "[DEBUG] BlockDeviceRefs for VM $vm" + kubectl -n "${namespace}" get vm "$vm" -o json 2>/dev/null | jq '.status.blockDeviceRefs' || true + echo "[DEBUG] BlockDevices in cluster (all namespaces)" + kubectl get blockdevices.storage.deckhouse.io -A 2>/dev/null || true + + # Throttle between VMs to avoid concurrent hotplug flaps + if [ ${#workers[@]} -gt 1 ]; then + echo "[INFRA] Waiting 60s before processing next VM..." + sleep 60 + fi done echo "[INFRA] All worker VMs configured with storage disks via hotplug"