Skip to content

refactor: streamline file service retrieval and enhance spider templa… #726

refactor: streamline file service retrieval and enhance spider templa…

refactor: streamline file service retrieval and enhance spider templa… #726

name: "Docker Image CI: crawlab"
on:
push:
branches: [ develop, test, main ]
release:
types: [ published ]
workflow_dispatch:
repository_dispatch:
types: [ docker-crawlab ]
env:
IMAGE_PATH_CRAWLAB_BACKEND: backend
IMAGE_PATH_CRAWLAB_FRONTEND: frontend
GH_PKG_NAME_CRAWLAB_BASE: ${{ github.repository_owner }}/crawlab-base
GH_PKG_NAME_CRAWLAB_BACKEND: ${{ github.repository_owner }}/crawlab-backend
GH_PKG_NAME_CRAWLAB_FRONTEND: ${{ github.repository_owner }}/crawlab-frontend
GH_PKG_NAME_CRAWLAB: ${{ github.repository_owner }}/crawlab
IMAGE_NAME_CRAWLAB_BASE: ghcr.io/${{ github.repository_owner }}/crawlab-base
IMAGE_NAME_CRAWLAB_BACKEND: ghcr.io/${{ github.repository_owner }}/crawlab-backend
IMAGE_NAME_CRAWLAB_FRONTEND: ghcr.io/${{ github.repository_owner }}/crawlab-frontend
IMAGE_NAME_CRAWLAB_GH: ghcr.io/${{ github.repository_owner }}/crawlab
IMAGE_NAME_CRAWLAB_DOCKERHUB: crawlabteam/crawlab
IMAGE_NAME_CRAWLAB_TENCENT: ccr.ccs.tencentyun.com/crawlab/crawlab
E2E_TESTS_IMAGE_NAME: ghcr.io/${{ github.repository_owner }}/e2e-tests
E2E_TESTS_WORKSPACE: ${{ github.workspace }}/playwright-report
jobs:
setup:
name: Setup
runs-on: ubuntu-latest
outputs:
backend_changed: ${{ steps.check_changed_files.outputs.backend_changed }}
frontend_changed: ${{ steps.check_changed_files.outputs.frontend_changed }}
docker_changed: ${{ steps.check_changed_files.outputs.docker_changed }}
workflow_changed: ${{ steps.check_changed_files.outputs.workflow_changed }}
base_image_changed: ${{ steps.check_changed_files.outputs.base_image_changed }}
version: ${{ steps.version.outputs.version }}
test_script: ${{ steps.test_config.outputs.test_script }}
steps:
- uses: actions/checkout@v4
- name: Get changed files
id: changed_files
uses: tj-actions/changed-files@v45
with:
files_yaml: |
backend:
- 'backend/**'
- 'core/**'
- 'fs/**'
- 'grpc/**'
- 'vcs/**'
- 'trace/**'
frontend:
- '${{ env.IMAGE_PATH_CRAWLAB_FRONTEND }}/**'
docker:
- 'Dockerfile'
- 'docker/bin/**'
- 'docker/nginx/**'
workflow:
- '.github/workflows/docker-crawlab.yml'
base_image:
- 'docker/base-image/**'
- id: check_changed_files
name: Check changed files
run: |
# set outputs
if [[ "${{ github.ref }}" == "refs/tags/"* ]]; then
echo "backend_changed=true" >> $GITHUB_OUTPUT
echo "frontend_changed=true" >> $GITHUB_OUTPUT
echo "docker_changed=true" >> $GITHUB_OUTPUT
echo "workflow_changed=true" >> $GITHUB_OUTPUT
echo "base_image_changed=true" >> $GITHUB_OUTPUT
else
echo "backend_changed=${{ steps.changed_files.outputs.backend_any_changed }}" >> $GITHUB_OUTPUT
echo "frontend_changed=${{ steps.changed_files.outputs.frontend_any_changed }}" >> $GITHUB_OUTPUT
echo "docker_changed=${{ steps.changed_files.outputs.docker_any_changed }}" >> $GITHUB_OUTPUT
echo "workflow_changed=${{ steps.changed_files.outputs.workflow_any_changed }}" >> $GITHUB_OUTPUT
echo "base_image_changed=${{ steps.changed_files.outputs.base_image_any_changed }}" >> $GITHUB_OUTPUT
fi
# Display change status for each component
echo "Backend changed: ${{ steps.changed_files.outputs.backend_any_changed }}"
echo "Frontend changed: ${{ steps.changed_files.outputs.frontend_any_changed }}"
echo "Docker changed: ${{ steps.changed_files.outputs.docker_any_changed }}"
echo "Workflow changed: ${{ steps.changed_files.outputs.workflow_any_changed }}"
echo "Base image changed: ${{ steps.changed_files.outputs.base_image_any_changed }}"
- id: version
name: Get version
run: |
# Strip git ref prefix from version
VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,')
# Strip "v" prefix from tag name
[[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//')
# Use Docker `latest` tag convention
[ "$VERSION" == "main" ] && VERSION=latest
echo "version=$VERSION" >> $GITHUB_OUTPUT
- id: test_config
name: Set test configuration
run: |
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
echo "test_script=test:full" >> $GITHUB_OUTPUT
elif [[ "${{ github.ref }}" == "refs/heads/test" ]]; then
echo "test_script=test:extended" >> $GITHUB_OUTPUT
else
echo "test_script=test:normal" >> $GITHUB_OUTPUT
fi
build_base_image:
name: Build base image
needs: [ setup ]
if: needs.setup.outputs.base_image_changed == 'true' || needs.setup.outputs.workflow_changed == 'true'
runs-on: ubuntu-latest
outputs:
failed: ${{ steps.set_output.outputs.failed }}
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push image
uses: docker/build-push-action@v5
with:
context: ./docker/base-image
push: true
tags: ${{ env.IMAGE_NAME_CRAWLAB_BASE }}:${{ needs.setup.outputs.version }}
- name: Set output
id: set_output
if: failure()
run: echo "failed=true" >> $GITHUB_OUTPUT
test_backend:
name: Test backend
needs: [ setup ]
if: needs.setup.outputs.backend_changed == 'true' || needs.setup.outputs.workflow_changed == 'true'
runs-on: ubuntu-latest
outputs:
failed: ${{ steps.set_output.outputs.failed }}
services:
mongo:
image: mongo:5
ports:
- 27017:27017
strategy:
matrix:
package: [core]
steps:
- uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version-file: '${{ matrix.package }}/go.mod'
cache-dependency-path: '${{ matrix.package }}/go.sum'
- name: Run tests
working-directory: ${{ matrix.package }}
run: |
# Find all directories containing *_test.go files
test_dirs=$(find . -name "*_test.go" -exec dirname {} \; | sort -u)
# Run go test on each directory
for dir in $test_dirs
do
echo "Running tests in $dir"
go test ./$dir
done
- name: Set output
id: set_output
if: failure()
run: echo "failed=true" >> $GITHUB_OUTPUT
build_backend:
name: Build backend
needs: [ setup, test_backend ]
if: needs.test_backend.result == 'success' || needs.setup.outputs.workflow_changed == 'true'
runs-on: ubuntu-latest
outputs:
failed: ${{ steps.set_output.outputs.failed }}
steps:
- uses: actions/checkout@v4
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push image
uses: docker/build-push-action@v5
with:
context: .
file: ${{ env.IMAGE_PATH_CRAWLAB_BACKEND }}/Dockerfile
push: true
tags: ${{ env.IMAGE_NAME_CRAWLAB_BACKEND }}:${{ needs.setup.outputs.version }}
- name: Set output
id: set_output
if: failure()
run: echo "failed=true" >> $GITHUB_OUTPUT
build_frontend:
name: Build frontend
needs: [ setup ]
if: needs.setup.outputs.frontend_changed == 'true' || needs.setup.outputs.workflow_changed == 'true'
runs-on: ubuntu-latest
outputs:
failed: ${{ steps.set_output.outputs.failed }}
steps:
- uses: actions/checkout@v4
with:
submodules: 'recursive'
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push image
uses: docker/build-push-action@v5
with:
context: ${{ env.IMAGE_PATH_CRAWLAB_FRONTEND }}
push: true
tags: ${{ env.IMAGE_NAME_CRAWLAB_FRONTEND }}:${{ needs.setup.outputs.version }}
- name: Set output
id: set_output
if: failure()
run: echo "failed=true" >> $GITHUB_OUTPUT
build_crawlab:
name: Build crawlab
needs: [setup, build_base_image, test_backend, build_backend, build_frontend]
if: |
always() &&
(
needs.test_backend.outputs.failed != 'true' &&
needs.build_backend.outputs.failed != 'true' &&
needs.build_frontend.outputs.failed != 'true' &&
needs.build_base_image.outputs.failed != 'true'
) &&
(
needs.setup.outputs.backend_changed == 'true' ||
needs.setup.outputs.frontend_changed == 'true' ||
needs.setup.outputs.docker_changed == 'true' ||
needs.setup.outputs.base_image_changed == 'true' ||
needs.setup.outputs.workflow_changed == 'true'
)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Update Dockerfile
run: |
IMAGE_NAMES=(
"crawlab-base"
"crawlab-backend"
"crawlab-frontend"
)
for name in "${IMAGE_NAMES[@]}"; do
IMAGE_NAME="ghcr.io/${{ github.repository_owner }}/$name"
OLD_IMAGE="crawlabteam/${name}:"
NEW_IMAGE="${IMAGE_NAME}:"
sed -i "s|${OLD_IMAGE}|${NEW_IMAGE}|" Dockerfile
done
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push image
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile
build-args: |
CRAWLAB_TAG=${{ needs.setup.outputs.version }}
push: true
tags: |
${{ env.IMAGE_NAME_CRAWLAB_GH }}:${{ needs.setup.outputs.version }}
test_crawlab:
name: Test crawlab
needs: [setup, build_crawlab]
if: ${{ always() && needs.build_crawlab.result == 'success' }}
runs-on: ubuntu-latest
services:
mongo:
image: mongo:5
options: >-
--health-cmd "mongosh --eval 'db.adminCommand(\"ping\")' || exit 1"
--health-interval 10s
--health-timeout 5s
--health-retries 5
master:
image: ghcr.io/${{ github.repository_owner }}/crawlab:${{ needs.setup.outputs.version }}
env:
CRAWLAB_NODE_MASTER: Y
CRAWLAB_MONGO_HOST: mongo
CRAWLAB_MONGO_PORT: 27017
ports:
- 8080:8080
worker:
image: ghcr.io/${{ github.repository_owner }}/crawlab:${{ needs.setup.outputs.version }}
env:
CRAWLAB_NODE_MASTER: N
CRAWLAB_MASTER_HOST: master
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Pull e2e test image
run: |
docker pull ${{ env.E2E_TESTS_IMAGE_NAME }}:latest
- name: Run e2e test image
run: |
docker run --network host \
-e BASE_URL=http://localhost:8080 \
-e TEST_SCRIPT=${{ needs.setup.outputs.test_script }} \
-v ${{ env.E2E_TESTS_WORKSPACE }}:/app/playwright-report \
${{ env.E2E_TESTS_IMAGE_NAME }}:latest
- name: Upload test results
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v4
with:
name: playwright-report-${{ needs.setup.outputs.version }}
path: ${{ env.E2E_TESTS_WORKSPACE }}
retention-days: 1
overwrite: true
push_images:
name: Push images
if: ${{ always() && needs.test_crawlab.result == 'success' }}
needs: [setup, test_crawlab]
runs-on: ubuntu-latest
strategy:
matrix:
registry: [dockerhub, tencent]
steps:
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Pull Docker image from GitHub Container Registry
run: docker pull ${{ env.IMAGE_NAME_CRAWLAB_GH }}:${{ needs.setup.outputs.version }}
- name: Login to DockerHub or Tencent Registry
uses: docker/login-action@v3
with:
registry: ${{ (matrix.registry == 'dockerhub' && 'https://index.docker.io/v1/') || (matrix.registry == 'tencent' && 'ccr.ccs.tencentyun.com') }}
username: ${{ (matrix.registry == 'dockerhub' && secrets.DOCKER_USERNAME) || (matrix.registry == 'tencent' && secrets.DOCKER_TENCENT_USERNAME) }}
password: ${{ (matrix.registry == 'dockerhub' && secrets.DOCKER_PASSWORD) || (matrix.registry == 'tencent' && secrets.DOCKER_TENCENT_PASSWORD) }}
- name: Tag and push image
if: ${{ matrix.registry == 'dockerhub' || (matrix.registry == 'tencent' && github.ref != 'refs/heads/develop') }}
run: |
docker tag ${{ env.IMAGE_NAME_CRAWLAB_GH }}:${{ needs.setup.outputs.version }} ${{ (matrix.registry == 'dockerhub' && env.IMAGE_NAME_CRAWLAB_DOCKERHUB) || (matrix.registry == 'tencent' && env.IMAGE_NAME_CRAWLAB_TENCENT) }}:${{ needs.setup.outputs.version }}
docker push ${{ (matrix.registry == 'dockerhub' && env.IMAGE_NAME_CRAWLAB_DOCKERHUB) || (matrix.registry == 'tencent' && env.IMAGE_NAME_CRAWLAB_TENCENT) }}:${{ needs.setup.outputs.version }}