Skip to content

Commit 72b6698

Browse files
committed
build-workflow
1 parent 86f456c commit 72b6698

File tree

1 file changed

+293
-0
lines changed

1 file changed

+293
-0
lines changed
Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
name: Docker Build, Scan, Test
2+
on:
3+
workflow_dispatch:
4+
inputs:
5+
profileName:
6+
description: "Profile name for the smoke-test. Defaults to quickstart-consumers if not specified"
7+
required: false
8+
default: "quickstart-consumers"
9+
type: string
10+
push:
11+
branches:
12+
- cr-oss-web-react-build-caching
13+
pull_request:
14+
branches:
15+
- "**"
16+
release:
17+
types: [published]
18+
19+
concurrency:
20+
# Using `github.run_id` (unique val) instead of `github.ref` here
21+
# because we don't want to cancel this workflow on master only for PRs
22+
# as that makes reproducing issues easier
23+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}
24+
cancel-in-progress: true
25+
26+
env:
27+
DOCKER_REGISTRY: "acryldata"
28+
PROFILE_NAME: "${{ github.event.inputs.profileName || 'quickstart-consumers' }}"
29+
30+
DOCKER_CACHE: "DEPOT"
31+
DEPOT_PROJECT_ID: "${{ vars.DEPOT_PROJECT_ID }}"
32+
DEPOT_TOKEN: "${{ secrets.DEPOT_TOKEN }}"
33+
34+
permissions:
35+
contents: read
36+
id-token: write
37+
38+
jobs:
39+
setup:
40+
runs-on: depot-ubuntu-24.04-small
41+
outputs:
42+
# TODO: Many of the vars below should not be required anymore.
43+
tag: ${{ steps.tag.outputs.tag }}
44+
slim_tag: ${{ steps.tag.outputs.slim_tag }}
45+
full_tag: ${{ steps.tag.outputs.full_tag }}
46+
short_sha: ${{ steps.tag.outputs.short_sha }} # needed for auto-deploy
47+
unique_tag: ${{ steps.tag.outputs.unique_tag }}
48+
unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }}
49+
unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }}
50+
docker-login: ${{ steps.docker-login.outputs.docker-login }}
51+
publish: ${{ steps.publish.outputs.publish }}
52+
pr-publish: ${{ steps.pr-publish.outputs.publish }}
53+
python_release_version: ${{ steps.tag.outputs.python_release_version }}
54+
branch_name: ${{ steps.tag.outputs.branch_name }}
55+
repository_name: ${{ steps.tag.outputs.repository_name }}
56+
frontend_change: ${{ steps.ci-optimize.outputs.frontend-change == 'true' || github.event_name != 'pull_request' }}
57+
actions_change: ${{ steps.ci-optimize.outputs.actions-change == 'true' || github.event_name != 'pull_request'}}
58+
ingestion_change: ${{ steps.ci-optimize.outputs.ingestion-change == 'true' || github.event_name != 'pull_request' }}
59+
ingestion_base_change: ${{ steps.ci-optimize.outputs.ingestion-base-change == 'true' }}
60+
backend_change: ${{ steps.ci-optimize.outputs.backend-change == 'true' || github.event_name != 'pull_request' }}
61+
frontend_only: ${{ steps.ci-optimize.outputs.frontend-only == 'true' }}
62+
ingestion_only: ${{ steps.ci-optimize.outputs.ingestion-only == 'true' }}
63+
backend_only: ${{ steps.ci-optimize.outputs.backend-only == 'true' }}
64+
kafka_setup_change: ${{ steps.ci-optimize.outputs.kafka-setup-change == 'true' }}
65+
mysql_setup_change: ${{ steps.ci-optimize.outputs.mysql-setup-change == 'true' }}
66+
postgres_setup_change: ${{ steps.ci-optimize.outputs.postgres-setup-change == 'true' }}
67+
elasticsearch_setup_change: ${{ steps.ci-optimize.outputs.elasticsearch-setup-change == 'true' }}
68+
smoke_test_change: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }}
69+
integrations_service_change: "false"
70+
datahub_executor_change: "false"
71+
72+
build_runner_type: ${{ steps.set-runner.outputs.build_runner_type }}
73+
test_runner_type: ${{ steps.set-runner.outputs.test_runner_type }}
74+
test_runner_type_small: ${{ steps.set-runner.outputs.test_runner_type_small }}
75+
use_depot_cache: ${{ steps.set-runner.outputs.use_depot_cache }}
76+
uv_cache_key: ${{ steps.uv-cache-key.outputs.uv_cache_key }}
77+
uv_cache_key_prefix: ${{ steps.uv-cache-key.outputs.uv_cache_key_prefix }}
78+
yarn_cache_key: ${{ steps.yarn-cache-key.outputs.yarn_cache_key }}
79+
yarn_cache_key_prefix: ${{ steps.yarn-cache-key.outputs.yarn_cache_key_prefix }}
80+
steps:
81+
- name: Check out the repo
82+
uses: acryldata/sane-checkout-action@v4
83+
- name: Compute Tag
84+
id: tag
85+
env:
86+
GITHUB_REF_FALLBACK: ${{ github.event_name == 'release' && format('refs/tags/{0}', github.event.release.tag_name) || github.ref}}
87+
GITHUB_EVENT_NAME: ${{ github.event_name }}
88+
run: |
89+
source .github/scripts/docker_helpers.sh
90+
{
91+
echo "short_sha=${SHORT_SHA}"
92+
echo "tag=$(get_tag)"
93+
echo "slim_tag=$(get_tag_slim)"
94+
echo "full_tag=$(get_tag_full)"
95+
echo "unique_tag=$(get_unique_tag)"
96+
echo "unique_slim_tag=$(get_unique_tag_slim)"
97+
echo "unique_full_tag=$(get_unique_tag_full)"
98+
echo "python_release_version=$(get_python_docker_release_v)"
99+
echo "branch_name=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}"
100+
echo "repository_name=${GITHUB_REPOSITORY#*/}"
101+
} >> "$GITHUB_OUTPUT"
102+
- name: Check whether docker login is possible
103+
id: docker-login
104+
env:
105+
ENABLE_DOCKER_LOGIN: ${{ secrets.ACRYL_DOCKER_PASSWORD != '' }}
106+
run: |
107+
echo "Enable Docker Login: ${{ env.ENABLE_DOCKER_LOGIN }}"
108+
echo "docker-login=${{ env.ENABLE_DOCKER_LOGIN }}" >> "$GITHUB_OUTPUT"
109+
- name: Check whether publishing enabled
110+
id: publish
111+
env:
112+
ENABLE_PUBLISH: >-
113+
${{
114+
(github.event_name == 'release' || ((github.event_name == 'workflow_dispatch' || github.event_name == 'push') && github.ref == 'refs/heads/master'))
115+
&& ( secrets.ACRYL_DOCKER_PASSWORD != '' )
116+
}}
117+
run: |
118+
echo "Enable publish: ${{ env.ENABLE_PUBLISH }}"
119+
echo "publish=${{ env.ENABLE_PUBLISH }}" >> "$GITHUB_OUTPUT"
120+
- name: Check whether PR publishing enabled
121+
id: pr-publish
122+
env:
123+
ENABLE_PUBLISH: >-
124+
${{
125+
(github.event_name == 'pull_request' && (contains(github.event.pull_request.labels.*.name, 'publish') || contains(github.event.pull_request.labels.*.name, 'publish-docker')))
126+
&& ( secrets.ACRYL_DOCKER_PASSWORD != '' )
127+
}}
128+
run: |
129+
echo "Enable PR publish: ${{ env.ENABLE_PUBLISH }}"
130+
echo "publish=${{ env.ENABLE_PUBLISH }}" >> "$GITHUB_OUTPUT"
131+
- uses: ./.github/actions/ci-optimization
132+
id: ci-optimize
133+
134+
- name: Determine runner type
135+
id: set-runner
136+
# This needs to handle two scenarios:
137+
# 1. Running on a PR from a fork. There are some auth issues that prevent us from using depot in that case.
138+
# So, Its easier to just use the regular github actions cache and build all images for each parallel job running smoke test.
139+
# Note, concurrency is lower when using github runners, queue times can be longer, test time is longer due to fewer parallel jobs.
140+
# 2. Running on a PR from a branch in the datahub-project org and push/schedule events on master.
141+
# Depot is used here for remote container builds in base_build and also for all runners. Depot runners support unlimited concurrency
142+
# and hence short queue times and higher parallelism of smoke tests
143+
144+
run: |
145+
if [[ "${{ env.DOCKER_CACHE }}" == "DEPOT" && "${{ env.DEPOT_PROJECT_ID }}" != "" ]]; then
146+
echo "build_runner_type=depot-ubuntu-24.04-4" >> "$GITHUB_OUTPUT"
147+
echo "test_runner_type=depot-ubuntu-24.04-4" >> "$GITHUB_OUTPUT"
148+
echo "test_runner_type_small=depot-ubuntu-24.04-small" >> "$GITHUB_OUTPUT"
149+
echo "use_depot_cache=true" >> "$GITHUB_OUTPUT"
150+
else
151+
echo "build_runner_type=ubuntu-latest" >> "$GITHUB_OUTPUT"
152+
echo "test_runner_type=ubuntu-latest" >> "$GITHUB_OUTPUT"
153+
echo "test_runner_type_small=ubuntu-latest" >> "$GITHUB_OUTPUT"
154+
echo "use_depot_cache=false" >> "$GITHUB_OUTPUT"
155+
# publishing is currently only supported via depot
156+
fi
157+
158+
- name: Compute UV Cache Key
159+
id: uv-cache-key
160+
run: |
161+
echo "uv_cache_key=docker-unified-${{ runner.os }}-uv-${{ hashFiles(
162+
'./datahub-actions/pyproject.toml',
163+
'./datahub-actions/setup.py',
164+
'./smoke-test/requirements.txt',
165+
'./smoke-test/pyproject.toml',
166+
'./metadata-ingestion/pyproject.toml',
167+
'./metadata-ingestion/setup.py') }}" >> "$GITHUB_OUTPUT"
168+
echo "uv_cache_key_prefix=docker-unified-${{ runner.os }}-uv-" >> "$GITHUB_OUTPUT"
169+
170+
- name: Compute Yarn Cache Key
171+
id: yarn-cache-key
172+
run: |
173+
echo "yarn_cache_key=docker-unified-${{ runner.os }}-yarn-${{ hashFiles('./smoke-test/tests/cypress/yarn.lock', './datahub-web-react/yarn.lock') }}" >> "$GITHUB_OUTPUT"
174+
echo "yarn_cache_key_prefix=docker-unified-${{ runner.os }}-yarn-" >> "$GITHUB_OUTPUT"
175+
176+
base_build:
177+
name: Build all images
178+
runs-on: ${{ needs.setup.outputs.build_runner_type }}
179+
needs: setup
180+
if: ${{ needs.setup.outputs.use_depot_cache == 'true' }} # On fork, smoke test job does the build since depot cache is not available
181+
outputs:
182+
build_id: ${{ steps.capture-build-id.outputs.build_id }}
183+
matrix: ${{ steps.capture-build-id.outputs.matrix }}
184+
steps:
185+
- name: Set up JDK 17
186+
uses: actions/setup-java@v5
187+
with:
188+
distribution: "zulu"
189+
java-version: 17
190+
191+
- uses: actions/cache/restore@v4
192+
with:
193+
path: |
194+
~/.cache/uv
195+
key: ${{ needs.setup.outputs.uv_cache_key }}
196+
restore-keys: |
197+
${{ needs.setup.outputs.uv_cache_key_prefix }}
198+
199+
- uses: actions/cache/restore@v4
200+
with:
201+
path: |
202+
~/.cache/yarn
203+
key: ${{ needs.setup.outputs.yarn_cache_key }}
204+
restore-keys: |
205+
${{ needs.setup.outputs.yarn_cache_key_prefix }}
206+
207+
- uses: actions/cache/restore@v4
208+
with:
209+
path: |
210+
~/.gradle
211+
key: gradle-plugins-cache
212+
restore-keys: |
213+
gradle-plugins-cache
214+
215+
- name: Set up Depot CLI
216+
if: ${{ env.DOCKER_CACHE == 'DEPOT' }}
217+
uses: depot/setup-action@v1
218+
219+
- name: Check out the repo
220+
uses: acryldata/sane-checkout-action@v4
221+
with:
222+
checkout-head-only: false
223+
224+
- uses: actions/setup-python@v6
225+
with:
226+
python-version: "3.11"
227+
cache: "pip"
228+
229+
- name: Login to DockerHub
230+
uses: docker/login-action@v3
231+
if: ${{ needs.setup.outputs.docker-login == 'true' }}
232+
with:
233+
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
234+
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
235+
236+
- name: Build all Images (For Smoke tests)
237+
if: ${{ needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' }}
238+
# If not publishing, just a subset of images required for smoke tests is sufficient.
239+
# Use buildImagesAll for workflow_dispatch, otherwise buildImagesQuickStartDebugConsumers
240+
run: |
241+
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
242+
# if triggered via workflow_dispatch, this can run other quickstart variants, so lets build all images to allow that.
243+
# we still dont need matrixed builds since this is for smoke test only.
244+
BUILD_TASK=":docker:buildImagesAll"
245+
else
246+
BUILD_TASK=":docker:buildImagesQuickstart"
247+
fi
248+
./gradlew $BUILD_TASK -Ptag=${{ needs.setup.outputs.tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }}
249+
250+
- name: Build all Images (Publish)
251+
if: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }}
252+
# since this is for publishing, we will build all images, not just those for smoke tests. But will publish only if tests pass for publish (head images, releases).
253+
# for pr-publish, publish images without waiting for tests to pass.
254+
run: |
255+
./gradlew :docker:buildImagesAll -PmatrixBuild=true -Ptag=${{ needs.setup.outputs.tag }} -PshaTag=${{ needs.setup.outputs.short_sha }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }} -PdockerPush=${{ needs.setup.outputs.pr-publish }}
256+
257+
- name: Capture build Id
258+
id: capture-build-id
259+
run: |
260+
pip install jq
261+
DEPOT_BUILD_ID=$(jq -r '.["depot.build"]?.buildID' ${{ github.workspace }}/build/build-metadata.json)
262+
263+
echo "build_id=${DEPOT_BUILD_ID}" >> "$GITHUB_OUTPUT"
264+
echo "matrix=$(jq -c '{"target":.["depot.build"].targets}' ${{ github.workspace }}/build/build-metadata.json)" >> $GITHUB_OUTPUT
265+
266+
- name: Save build Metadata
267+
if: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }}
268+
uses: actions/upload-artifact@v4
269+
with:
270+
name: build-metadata-${{ needs.setup.outputs.tag }}
271+
path: |
272+
${{ github.workspace }}/build/build-metadata.json
273+
${{ github.workspace }}/build/bake-spec-allImages.json
274+
275+
- uses: actions/cache/save@v4
276+
if: ${{ github.ref == 'refs/heads/master' }}
277+
with:
278+
path: |
279+
~/.cache/uv
280+
key: ${{ needs.setup.outputs.uv_cache_key }}
281+
282+
- uses: actions/cache/save@v4
283+
if: ${{ github.ref == 'refs/heads/master' }}
284+
with:
285+
path: |
286+
~/.cache/yarn
287+
key: ${{ needs.setup.outputs.yarn_cache_key }}
288+
289+
- uses: actions/cache/save@v4
290+
with:
291+
path: |
292+
~/.gradle
293+
key: gradle-plugins-cache

0 commit comments

Comments
 (0)