|
84 | 84 | repo_retry = float(os.environ.get("REPO_RETRY_DELAY", 30)) |
85 | 85 |
|
86 | 86 |
|
87 | | -@connect.retry(2, sqlalchemy.exc.OperationalError, wait=repo_retry) |
| 87 | +# TODO: revisit which cases should be retried after DM-50934 |
| 88 | +# TODO: catch ButlerConnectionError once it's available |
| 89 | +SQL_EXCEPTIONS = (sqlalchemy.exc.OperationalError, sqlalchemy.exc.InterfaceError) |
| 90 | +DATASTORE_EXCEPTIONS = SQL_EXCEPTIONS + (botocore.exceptions.ClientError, ) |
| 91 | + |
| 92 | + |
| 93 | +@connect.retry(2, SQL_EXCEPTIONS, wait=repo_retry) |
88 | 94 | def get_central_butler(central_repo: str, instrument_class: str): |
89 | 95 | """Provide a Butler that can access the given repository and read and write |
90 | 96 | data for the given instrument. |
@@ -360,7 +366,7 @@ def _init_visit_definer(self): |
360 | 366 | define_visits_config.groupExposures = "one-to-one" |
361 | 367 | self.define_visits = lsst.obs.base.DefineVisitsTask(config=define_visits_config, butler=self.butler) |
362 | 368 |
|
363 | | - @connect.retry(2, (sqlalchemy.exc.OperationalError, botocore.exceptions.ClientError), wait=repo_retry) |
| 369 | + @connect.retry(2, DATASTORE_EXCEPTIONS, wait=repo_retry) |
364 | 370 | def _init_governor_datasets(self, timestamp, skymap): |
365 | 371 | """Load and store the camera and skymap for later use. |
366 | 372 |
|
@@ -537,7 +543,7 @@ def prep_butler(self) -> None: |
537 | 543 | detector=self.visit.detector, |
538 | 544 | group=self.visit.groupId) |
539 | 545 |
|
540 | | - @connect.retry(2, sqlalchemy.exc.OperationalError, wait=repo_retry) |
| 546 | + @connect.retry(2, SQL_EXCEPTIONS, wait=repo_retry) |
541 | 547 | def _find_data_to_preload(self, region): |
542 | 548 | """Identify the datasets to export from the central repo. |
543 | 549 |
|
@@ -912,7 +918,7 @@ def _find_init_outputs(self): |
912 | 918 | _log.debug("Found %d new init-output datasets from %s.", n_datasets, run) |
913 | 919 | return datasets |
914 | 920 |
|
915 | | - @connect.retry(2, (sqlalchemy.exc.OperationalError, botocore.exceptions.ClientError), wait=repo_retry) |
| 921 | + @connect.retry(2, DATASTORE_EXCEPTIONS, wait=repo_retry) |
916 | 922 | def _transfer_data(self, datasets, calibs): |
917 | 923 | """Transfer datasets and all associated collections from the central |
918 | 924 | repo to the local repo. |
@@ -1602,7 +1608,7 @@ def _get_safe_dataset_types(butler): |
1602 | 1608 | return [dstype.name for dstype in butler.registry.queryDatasetTypes(...) |
1603 | 1609 | if "detector" in dstype.dimensions] |
1604 | 1610 |
|
1605 | | - @connect.retry(2, (sqlalchemy.exc.OperationalError, botocore.exceptions.ClientError), wait=repo_retry) |
| 1611 | + @connect.retry(2, DATASTORE_EXCEPTIONS, wait=repo_retry) |
1606 | 1612 | def _export_subset(self, exposure_ids: set[int], |
1607 | 1613 | dataset_types: typing.Any, in_collections: typing.Any) -> None: |
1608 | 1614 | """Copy datasets associated with a processing run back to the |
|
0 commit comments