diff --git a/.github/workflows/build-universe-images.yml b/.github/workflows/build-universe-images.yml index 48a046c404..d66277283c 100644 --- a/.github/workflows/build-universe-images.yml +++ b/.github/workflows/build-universe-images.yml @@ -12,6 +12,8 @@ on: - 'bots/**' - 'libs/**' - 'messages/**' + - 'contrib/docker/docker-compose.universe.yaml' + - 'scripts/*universe*' - '.github/workflows/build-universe-images.yml' workflow_dispatch: inputs: diff --git a/.github/workflows/test-universe-images.yml b/.github/workflows/test-universe-images.yml index 17367a969c..98a3876677 100644 --- a/.github/workflows/test-universe-images.yml +++ b/.github/workflows/test-universe-images.yml @@ -5,12 +5,18 @@ on: workflows: ["Build Universe Images for Coolify"] types: - completed + branches: [universe] workflow_dispatch: inputs: docker_tag: - description: 'Docker tag to test (defaults to latest)' + description: 'Docker tag to test (defaults to universe)' required: false - default: 'latest' + default: 'universe' + deploy: + description: 'Trigger production deploy after tests' + required: false + default: false + type: boolean env: REGISTRY: ghcr.io @@ -37,6 +43,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + with: + ref: ${{ github.event.workflow_run.head_sha || github.sha }} - name: Set up Node.js uses: actions/setup-node@v3 @@ -86,7 +94,7 @@ jobs: fi echo "tag=${BRANCH_NAME}" >> $GITHUB_OUTPUT else - echo "tag=latest" >> $GITHUB_OUTPUT + echo "tag=universe" >> $GITHUB_OUTPUT fi - name: Login to GHCR @@ -183,3 +191,30 @@ jobs: path: tests/playwright-report/ retention-days: 30 + deploy-universe: + name: "Deploy Universe" + needs: test-universe-images + runs-on: ubuntu-latest + if: ${{ github.event_name == 'workflow_run' || (github.event_name == 'workflow_dispatch' && github.event.inputs.deploy == 'true') }} + environment: production + concurrency: + group: deploy-universe + cancel-in-progress: false + steps: + - name: Trigger production deploy webhook + env: + UNIVERSE_DEPLOY_WEBHOOK: ${{ secrets.UNIVERSE_DEPLOY_WEBHOOK }} + run: | + if [ -z "$UNIVERSE_DEPLOY_WEBHOOK" ]; then + echo "::notice::UNIVERSE_DEPLOY_WEBHOOK is not configured. Universe images were tested successfully; configure this secret to trigger the production host automatically." + exit 0 + fi + + curl \ + --fail-with-body \ + --silent --show-error \ + --request POST \ + --max-time 30 \ + --retry 2 --retry-delay 5 --retry-connrefused \ + "$UNIVERSE_DEPLOY_WEBHOOK" + diff --git a/docs/cicd.md b/docs/cicd.md new file mode 100644 index 0000000000..10d7b433c3 --- /dev/null +++ b/docs/cicd.md @@ -0,0 +1,146 @@ +# BAWES Universe CI/CD + +This document audits the Universe build, test, and deployment path for the `universe` branch. + +## Summary + +BAWES Universe uses a dedicated GitHub Actions path for the `universe` branch: + +1. Pull requests to `universe` run the upstream WorkAdventure CI plus the mobile shell validation when `mobile/**` changes. +2. Pushes to `universe` build Universe Docker images and publish them to GitHub Container Registry. +3. A follow-up workflow tests the published Universe images with the production docker-compose test stack. +4. After the image tests pass, GitHub Actions can trigger the production host through `UNIVERSE_DEPLOY_WEBHOOK`. + +The repository does not currently contain SSH host, user, or key secrets for a direct production deploy from GitHub Actions. Adding a blind SSH deploy job would create an untestable deployment path, so the safe handoff is to publish and test immutable images, then notify the production host through a deploy webhook when that secret is configured. + +## Workflow Inventory + +### `.github/workflows/continuous_integration.yml` + +Runs on all pull requests and on pushes to `master` and `develop`. + +Important validation jobs: + +- `continuous-integration-play`: installs the `workadventure-play` workspace, builds `play`, runs typecheck, Svelte check, lint, prettier, and unit tests. +- `continuous-integration-back`: installs `workadventureback`, builds messages, then runs typecheck, lint, unit tests, and prettier in `back`. +- `continuous-integration-uploader`: runs lint and tests in `uploader`. +- `continuous-integration-map-storage`: builds messages, then runs typecheck, lint, tests, and prettier in `map-storage`. +- `continuous-integration-end-to-end-tests`: validates the end-to-end test package and Play API typings. + +This workflow is the PR validation gate for the web/backend services. It does not deploy. + +### `.github/workflows/mobile-ci.yml` + +Runs on pull requests and pushes to `universe` that touch `mobile/**` or the workflow itself. + +Validation jobs: + +- `validate`: checks that `mobile/capacitor.config.js` is loadable and has the required app id and server URL. +- `fastlane-setup`: installs Ruby dependencies from `mobile/Gemfile` and verifies Fastlane is available. + +`npx cap doctor` is intentionally informational because the scaffold stage does not commit native `android/` or `ios/` folders. + +### `.github/workflows/build-universe-images.yml` + +Runs on pushes to `universe` when service, shared library, message, Universe compose, Universe script, or workflow files change. It can also be launched manually. + +Image build jobs: + +- `build-play`: builds `play/Dockerfile.universe` and pushes `ghcr.io//play-universe`. +- `build-back`: builds `back/Dockerfile.universe` and pushes `ghcr.io//back-universe`. +- `build-map-storage`: builds `map-storage/Dockerfile.universe` and pushes `ghcr.io//map-storage-universe`. +- `build-uploader`: builds `uploader/Dockerfile.universe` and pushes `ghcr.io//uploader-universe`. +- `build-discord-bot`: builds `discord-bot/Dockerfile` and pushes `ghcr.io//discord-bot-universe`. +- `build-bot-server`: builds `bots/Dockerfile` and pushes `ghcr.io//bot-server-universe`. + +The issue mentions `pusher`, but this fork does not have a standalone `pusher/` package or Dockerfile. The browser-facing service is built through `play`. + +The workflow publishes branch tags such as `universe` and SHA tags such as `universe-`. It only publishes `latest` when `universe` is the repository default branch, so manual tests should default to the `universe` tag instead of `latest`. + +### `.github/workflows/test-universe-images.yml` + +Runs after `Build Universe Images for Coolify` completes successfully on `universe`, or manually with a selected Docker tag. + +The workflow: + +- Checks out the exact commit that triggered the build workflow. +- Installs the Playwright test package and Room API client dependencies. +- Starts the production docker-compose stack with `contrib/docker/docker-compose.universe.yaml` image overrides. +- Runs `npm run test-single-domain-install` in two shards. +- Uploads the Playwright report and docker-compose logs on failure. +- Runs a single `deploy-universe` job after all shards pass. This job calls `UNIVERSE_DEPLOY_WEBHOOK` when configured and otherwise exits successfully with a notice. + +Manual runs are test-only by default. To intentionally deploy after a manual image test, launch the workflow with the `deploy` input set to `true`. + +### `.github/workflows/build-test-and-deploy.yml` + +This is the upstream WorkAdventure build/test/deploy workflow for `master`, `develop`, releases, and labeled pull requests. It builds the upstream `workadventure/*` images, runs production-like tests, can trigger GitLab SaaS tests, and can deploy preview environments with Helm. + +It is not the Universe production pipeline because it does not run on pushes to `universe` and it publishes upstream image names instead of the `*-universe` GHCR images. Keeping Universe deployment in the dedicated workflows avoids accidental deployment to upstream WorkAdventure infrastructure. + +## Deployment Contract + +The GitHub-side contract for `universe.bawes.net` is: + +- `ghcr.io//play-universe:universe` +- `ghcr.io//back-universe:universe` +- `ghcr.io//map-storage-universe:universe` +- `ghcr.io//uploader-universe:universe` + +`discord-bot-universe` and `bot-server-universe` are built by the Universe image workflow for auxiliary bot deployments, but they are intentionally excluded from this `universe.bawes.net` production contract because the current `contrib/docker/docker-compose.universe.yaml` runtime override only wires `play-universe`, `back-universe`, `map-storage-universe`, and `uploader-universe`. + +The production host should watch those tags or the matching `universe-` tags. A typical Coolify setup should point each service to the GHCR image, keep the runtime environment variables in Coolify, and expose a deploy webhook saved in GitHub as `UNIVERSE_DEPLOY_WEBHOOK`. + +If the project later wants direct SSH deployment from GitHub Actions instead of the webhook handoff, add it as a separate explicit job after these secrets exist: + +- `UNIVERSE_DEPLOY_HOST` +- `UNIVERSE_DEPLOY_USER` +- `UNIVERSE_DEPLOY_KEY` +- `UNIVERSE_DEPLOY_PATH` + +Do not add a production SSH job before those secrets and rollback steps are documented. + +## Rollback Contract + +The build workflow publishes immutable `universe-` tags in addition to the moving `universe` branch tag. Rollbacks should use those immutable tags so the production host can return to a known image set. + +Current rollback procedure: + +1. Identify the last successful `Build Universe Images for Coolify` run before the bad deploy. +2. Copy its commit SHA and use the matching `universe-` tag for each production service. +3. In Coolify or the host deployment configuration, point these runtime images back to that tag: + - `ghcr.io//play-universe:universe-` + - `ghcr.io//back-universe:universe-` + - `ghcr.io//map-storage-universe:universe-` + - `ghcr.io//uploader-universe:universe-` +4. Trigger the host deploy through Coolify or the configured host webhook. +5. Confirm `universe.bawes.net` and the backend health endpoint after the rollout. + +The GitHub workflow intentionally does not include a blind SSH rollback job yet. The repository does not currently define production SSH secrets or a tested rollback script, and the webhook contract does not currently accept a tag payload. Once the production receiver supports tag-specific deploys, a separate `workflow_dispatch` rollback job can be added to validate the requested tag and call that receiver. + +## Operational Checklist + +Before merging into `universe`: + +- CI passes for touched web/backend packages. +- `mobile-ci.yml` passes when `mobile/**` is touched. +- No unrelated service files are changed. +- For daily development steps, use `docs/dev-workflow.md`. + +After merging into `universe`: + +- `Build Universe Images for Coolify` completes successfully. +- `Test Universe Images` runs against the same triggering commit and the `universe` image tag. +- GHCR contains the updated `*-universe:universe` images. +- `Deploy Universe` either calls `UNIVERSE_DEPLOY_WEBHOOK` successfully after a `workflow_run`, or after a manual `workflow_dispatch` only when the `deploy` input is set to `true`. +- The production host reports a successful rollout of the new image set after the webhook is configured. +- If rollback is needed, redeploy the last known good `universe-` image set from the production host. + +## Audit Notes + +- `test-universe-images.yml` now checks out the triggering build commit on `workflow_run`, which keeps the test compose files aligned with the images under test. +- Manual Universe image tests default to the `universe` tag because `latest` is not guaranteed unless `universe` is the default branch. +- `build-universe-images.yml` now also triggers when the Universe compose override or Universe helper scripts change. +- `test-universe-images.yml` now has a post-test `deploy-universe` job that triggers a configured production deploy webhook exactly once after all image-test shards pass. +- The deploy handoff is protected with the `production` GitHub Environment, serialized through the `deploy-universe` concurrency group, disabled by default for manual test dispatches, and bounded with curl timeout/retry flags. +- `docs/dev-workflow.md` now documents branch naming, PR flow, local docker-compose smoke tests, deploy status checks, and host-side rollback. diff --git a/docs/dev-workflow.md b/docs/dev-workflow.md new file mode 100644 index 0000000000..af868e6183 --- /dev/null +++ b/docs/dev-workflow.md @@ -0,0 +1,88 @@ +# BAWES Universe Developer Workflow + +This document describes the day-to-day development loop for the `universe` branch. + +## Branching + +Use short branch prefixes that describe the kind of work: + +- `feat/` for new product or infrastructure behavior. +- `fix/` for bug fixes. +- `chore/` for maintenance-only changes. + +Create branches from `universe` for Universe-specific work: + +```bash +git checkout universe +git pull --ff-only origin universe +git checkout -b feat/my-universe-change +``` + +## Pull Requests + +Open pull requests against `universe`. + +Every pull request to `universe` should keep unrelated upstream WorkAdventure changes out of scope. The normal validation path is: + +1. Update the code, docs, workflow, or mobile files required by the task. +2. Run the smallest relevant local checks before pushing. +3. Open a PR against `universe`. +4. Wait for GitHub Actions and review feedback. +5. Address review comments with small follow-up commits. + +When `mobile/**` changes, the mobile validation workflow checks the Capacitor config and Fastlane setup. Web/backend changes are covered by the existing WorkAdventure CI jobs. + +## Local Docker Smoke Test + +For production-image smoke tests, use the Universe compose override with the production compose file: + +```bash +cd contrib/docker +export GITHUB_REPOSITORY_OWNER=BAWES-Universe +export VERSION=universe +docker compose -f docker-compose.prod.yaml -f docker-compose.universe.yaml up -d +docker compose -f docker-compose.prod.yaml -f docker-compose.universe.yaml ps +``` + +The automated image-test workflow adds `tests/docker-compose.test.yaml` and runs the WorkAdventure Playwright single-domain install tests after the images are published. + +## Merge And Deploy + +After a PR is merged into `universe`: + +1. `Build Universe Images for Coolify` builds the `*-universe` GHCR images. +2. `Test Universe Images` checks out the same commit and tests those images. +3. `Deploy Universe` calls `UNIVERSE_DEPLOY_WEBHOOK` if the secret is configured. + +Manual image tests are safe by default. A manual `workflow_dispatch` run only deploys when the `deploy` input is explicitly set to `true`. + +## Checking Deploy Status + +Check GitHub first: + +- `Build Universe Images for Coolify` completed successfully. +- `Test Universe Images` completed successfully. +- `Deploy Universe` either called the webhook or logged the notice that `UNIVERSE_DEPLOY_WEBHOOK` is not configured. + +Then check the production host: + +- Coolify or the host deployment log shows a rollout for the new `universe` or `universe-` tag. +- `https://universe.bawes.net` responds. +- The backend health endpoint used by the host responds successfully. + +## Rollback + +The image workflow publishes both the moving `universe` tag and immutable `universe-` tags. Prefer immutable tags for rollback records. + +To roll back from the production host: + +1. Find the last known good `universe-` tag in GHCR or the previous successful GitHub Actions run. +2. In Coolify or the host service configuration, point each runtime service back to the matching tag: + - `ghcr.io//play-universe:universe-` + - `ghcr.io//back-universe:universe-` + - `ghcr.io//map-storage-universe:universe-` + - `ghcr.io//uploader-universe:universe-` +3. Trigger the host deploy from Coolify or the configured host webhook. +4. Confirm the site and backend health endpoint after the rollout. + +Do not add a GitHub-hosted SSH rollback job until the production SSH secrets, rollback path, and receiver behavior are documented and tested. The current GitHub-side contract is a webhook handoff, so tag selection remains host-side.