Skip to content

Commit d60da1b

Browse files
committed
Cache dryrun results locally
1 parent 5cb73be commit d60da1b

File tree

7 files changed

+415
-15
lines changed

7 files changed

+415
-15
lines changed

.circleci/workflows.yml

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,16 @@ executors:
3535
machine:
3636
image: ubuntu-2404:current
3737

38+
# environment variables for caching
39+
environment: &cache_environment
40+
DRYRUN_CACHE_ENABLED: "true"
41+
DRYRUN_CACHE_DIR: "/tmp/workspace/dryrun_cache"
42+
3843
jobs:
3944
build:
4045
docker: &docker
4146
- image: python:<< pipeline.parameters.python-version >>
47+
environment: *cache_environment
4248
steps:
4349
- checkout
4450
- &restore_venv_cache
@@ -166,7 +172,7 @@ jobs:
166172
- checkout
167173
- *restore_venv_cache
168174
- *build
169-
- &attach_generated_sql
175+
- &attach_workspace_artifacts
170176
attach_workspace:
171177
at: /tmp/workspace
172178
- &copy_staged_sql
@@ -181,6 +187,11 @@ jobs:
181187
name: Run SQL tests
182188
command: |
183189
PATH="venv/bin:$PATH" script/entrypoint -m sql -n 8 -p no:bigquery_etl.pytest_plugin.routine
190+
- &persist_dryrun_cache
191+
persist_to_workspace:
192+
root: /tmp/workspace
193+
paths:
194+
- dryrun_cache
184195
- &copy_debug_sql
185196
run:
186197
name: Copy generated SQL to save for debugging
@@ -214,7 +225,7 @@ jobs:
214225
- checkout
215226
- *restore_venv_cache
216227
- *build
217-
- *attach_generated_sql
228+
- *attach_workspace_artifacts
218229
- *copy_staged_sql
219230
- *authenticate
220231
- run:
@@ -244,6 +255,7 @@ jobs:
244255
echo $PATHS
245256
PATH="venv/bin:$PATH" script/bqetl dryrun --validate-schemas $PATHS
246257
# yamllint enable rule:line-length
258+
- *persist_dryrun_cache
247259
- *copy_debug_sql
248260
- *store_debug_artifacts
249261
- unless:
@@ -262,7 +274,7 @@ jobs:
262274
- checkout
263275
- *restore_venv_cache
264276
- *build
265-
- *attach_generated_sql
277+
- *attach_workspace_artifacts
266278
- &copy_generated_sql
267279
run:
268280
name: Move generated-sql into place
@@ -293,7 +305,7 @@ jobs:
293305
name: Validate workgroup access configuration on main
294306
command: |
295307
PATH="venv/bin:$PATH" script/bqetl metadata validate-workgroups sql/
296-
- *attach_generated_sql
308+
- *attach_workspace_artifacts
297309
- *copy_staged_sql
298310
- *authenticate
299311
- run:
@@ -341,7 +353,7 @@ jobs:
341353
- checkout
342354
- *restore_venv_cache
343355
- *build
344-
- *attach_generated_sql
356+
- *attach_workspace_artifacts
345357
- *authenticate
346358
- &add_private_bigquery_etl_ssh_keys
347359
add_ssh_keys:
@@ -391,7 +403,7 @@ jobs:
391403
name: Pull telemetry-airflow
392404
command: |
393405
git clone https://github.com/mozilla/telemetry-airflow.git ~/telemetry-airflow
394-
- *attach_generated_sql
406+
- *attach_workspace_artifacts
395407
- *copy_generated_sql
396408
- run:
397409
name: Replace telemetry-airflow DAGs with BigQuery ETL DAGs
@@ -441,7 +453,7 @@ jobs:
441453
- checkout
442454
- *restore_venv_cache
443455
- *build
444-
- *attach_generated_sql
456+
- *attach_workspace_artifacts
445457
- *copy_staged_sql
446458
- run:
447459
name: Run routine tests
@@ -468,7 +480,7 @@ jobs:
468480
- checkout
469481
- *restore_venv_cache
470482
- *build
471-
- *attach_generated_sql
483+
- *attach_workspace_artifacts
472484
- *copy_staged_sql
473485
- *authenticate
474486
- run:
@@ -491,7 +503,7 @@ jobs:
491503
- checkout
492504
- *restore_venv_cache
493505
- *build
494-
- *attach_generated_sql
506+
- *attach_workspace_artifacts
495507
- add_ssh_keys:
496508
fingerprints:
497509
- "22:b9:3c:1b:82:ab:3f:e4:b5:79:70:d1:7b:b9:28:d2"
@@ -520,6 +532,7 @@ jobs:
520532
- *restore_venv_cache
521533
- *build
522534
- *authenticate
535+
- *attach_workspace_artifacts
523536
- run:
524537
name: Generate SQL content
525538
command: |
@@ -614,6 +627,7 @@ jobs:
614627
root: /tmp/workspace
615628
paths:
616629
- generated-sql
630+
- dryrun_cache
617631
- unless:
618632
condition: *validate-sql-or-routines
619633
steps:
@@ -628,7 +642,7 @@ jobs:
628642
- checkout
629643
- *restore_venv_cache
630644
- *build
631-
- *attach_generated_sql
645+
- *attach_workspace_artifacts
632646
- *copy_generated_sql
633647
- add_ssh_keys:
634648
fingerprints:
@@ -664,6 +678,7 @@ jobs:
664678
root: /tmp/workspace
665679
paths:
666680
- staged-generated-sql
681+
- dryrun_cache
667682
- *copy_debug_sql
668683
- *store_debug_artifacts
669684
- unless:
@@ -678,7 +693,7 @@ jobs:
678693
or:
679694
- << pipeline.parameters.deploy >>
680695
steps:
681-
- *attach_generated_sql
696+
- *attach_workspace_artifacts
682697
- add_ssh_keys:
683698
fingerprints:
684699
- "22:b9:3c:1b:82:ab:3f:e4:b5:79:70:d1:7b:b9:28:d2"
@@ -790,7 +805,7 @@ jobs:
790805
- checkout
791806
- *restore_venv_cache
792807
- *build
793-
- *attach_generated_sql
808+
- *attach_workspace_artifacts
794809
- *authenticate
795810
- *add_private_bigquery_etl_ssh_keys
796811
- run:
@@ -824,6 +839,7 @@ jobs:
824839
paths:
825840
- private-generated-sql
826841
- PRIVATE_BIGQUERY_ETL_SHA
842+
- dryrun_cache
827843
- unless:
828844
condition: *validate-sql
829845
steps:
@@ -834,7 +850,7 @@ jobs:
834850
- when:
835851
condition: *deploy
836852
steps:
837-
- *attach_generated_sql
853+
- *attach_workspace_artifacts
838854
- *add_private_bigquery_etl_ssh_keys
839855
- run:
840856
name: Push to private-generated-sql branch
@@ -871,7 +887,7 @@ jobs:
871887
condition: *deploy
872888
steps:
873889
- checkout
874-
- *attach_generated_sql
890+
- *attach_workspace_artifacts
875891
- run:
876892
name: Move generated-sql into place
877893
command: |

bigquery_etl/cli/dryrun.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,33 @@
6464
help="GCP project to perform dry run in when --use_cloud_function=False",
6565
default=ConfigLoader.get("default", "project", fallback="moz-fx-data-shared-prod"),
6666
)
67+
@click.option(
68+
"--use-cache/--no-cache",
69+
help="Enable or disable local caching of dry run results. Default is --no-cache.",
70+
default=ConfigLoader.get("dry_run", "cache_enabled", fallback=False),
71+
)
72+
@click.option(
73+
"--cache-ttl-hours",
74+
help="Cache time-to-live in hours. Default is 1 hour.",
75+
type=int,
76+
default=ConfigLoader.get("dry_run", "cache_ttl_hours", fallback=1),
77+
)
78+
@click.option(
79+
"--cache-dir",
80+
help="Directory to store cache files. Default is system temp directory.",
81+
type=click.Path(),
82+
default=None,
83+
)
6784
@billing_project_option()
6885
def dryrun(
6986
paths: List[str],
7087
use_cloud_function: bool,
7188
validate_schemas: bool,
7289
respect_skip: bool,
7390
project: str,
91+
use_cache: bool,
92+
cache_ttl_hours: int,
93+
cache_dir: str,
7494
billing_project: str,
7595
):
7696
"""Perform a dry run."""
@@ -118,6 +138,9 @@ def dryrun(
118138
use_cloud_function,
119139
respect_skip,
120140
validate_schemas,
141+
use_cache,
142+
cache_ttl_hours,
143+
cache_dir,
121144
credentials=credentials,
122145
id_token=id_token,
123146
billing_project=billing_project,
@@ -141,6 +164,9 @@ def _sql_file_valid(
141164
use_cloud_function,
142165
respect_skip,
143166
validate_schemas,
167+
use_cache,
168+
cache_ttl_hours,
169+
cache_dir,
144170
sqlfile,
145171
credentials,
146172
id_token,
@@ -154,6 +180,9 @@ def _sql_file_valid(
154180
respect_skip=respect_skip,
155181
id_token=id_token,
156182
billing_project=billing_project,
183+
cache_enabled=use_cache,
184+
cache_ttl_hours=cache_ttl_hours,
185+
cache_dir=cache_dir,
157186
)
158187
if validate_schemas:
159188
try:

0 commit comments

Comments
 (0)