Skip to content

Commit

Permalink
Merge pull request #294 from zillow/tz/AIP-8176-pvc-retry-count
Browse files Browse the repository at this point in the history
AIP-8176 PVC & Sensor retry count 7->3
  • Loading branch information
talebzeghmi authored Mar 13, 2024
2 parents b15a7fa + 7e56e10 commit 4f72b07
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
2 changes: 2 additions & 0 deletions metaflow/metaflow_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ def from_conf(name, default=None):
AIP_MAX_PARALLELISM = int(from_conf("AIP_MAX_PARALLELISM", 10))
AIP_MAX_RUN_CONCURRENCY = int(from_conf("AIP_MAX_RUN_CONCURRENCY", 10))
AIP_SHOW_METAFLOW_UI_URL = bool(from_conf("AIP_SHOW_METAFLOW_UI_URL", False))
AIP_PVC_CREATE_RETRY_COUNT = int(from_conf("AIP_PVC_CREATE_RETRY_COUNT", 3))
AIP_S3_SENSOR_RETRY_COUNT = int(from_conf("AIP_S3_SENSOR_RETRY_COUNT", 3))
ZILLOW_ZODIAC_SERVICE = from_conf("ZODIAC_SERVICE")
ZILLOW_ZODIAC_TEAM = from_conf("ZODIAC_TEAM")
ZILLOW_INDIVIDUAL_NAMESPACE = from_conf("INDIVIDUAL_NAMESPACE", "false")
Expand Down
8 changes: 4 additions & 4 deletions metaflow/plugins/aip/aip.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
from metaflow.metaflow_config import (
DATASTORE_SYSROOT_S3,
AIP_TTL_SECONDS_AFTER_FINISHED,
AIP_PVC_CREATE_RETRY_COUNT,
AIP_S3_SENSOR_RETRY_COUNT,
KUBERNETES_SERVICE_ACCOUNT,
METAFLOW_USER,
ZILLOW_INDIVIDUAL_NAMESPACE,
Expand All @@ -48,8 +50,6 @@
)
from metaflow.plugins import EnvironmentDecorator, AIPInternalDecorator
from metaflow.plugins.aip.aip_constants import (
S3_SENSOR_RETRY_COUNT,
PVC_CREATE_RETRY_COUNT,
EXIT_HANDLER_RETRY_COUNT,
BACKOFF_DURATION,
BACKOFF_DURATION_INT,
Expand Down Expand Up @@ -993,7 +993,7 @@ def _create_volume(
attribute_outputs=attribute_outputs,
)
resource.set_retry(
PVC_CREATE_RETRY_COUNT,
AIP_PVC_CREATE_RETRY_COUNT,
policy="Always",
backoff_duration=BACKOFF_DURATION,
backoff_factor=RETRY_BACKOFF_FACTOR,
Expand Down Expand Up @@ -1568,7 +1568,7 @@ def _create_s3_sensor_op(

KubeflowPipelines._set_minimal_container_resources(s3_sensor_op)
s3_sensor_op.set_retry(
S3_SENSOR_RETRY_COUNT,
AIP_S3_SENSOR_RETRY_COUNT,
policy="Always",
backoff_duration=BACKOFF_DURATION,
backoff_factor=RETRY_BACKOFF_FACTOR,
Expand Down
2 changes: 0 additions & 2 deletions metaflow/plugins/aip/aip_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
SPLIT_INDEX_ENV_NAME = "SPLIT_INDEX_ENV_NAME"
INPUT_PATHS_ENV_NAME = "INPUT_PATHS_ENV_NAME"
RETRY_COUNT = "MF_ATTEMPT"
S3_SENSOR_RETRY_COUNT = 7
PVC_CREATE_RETRY_COUNT = 7
EXIT_HANDLER_RETRY_COUNT = 7
BACKOFF_DURATION_INT = 2
BACKOFF_DURATION = f"{BACKOFF_DURATION_INT}m"
Expand Down

0 comments on commit 4f72b07

Please sign in to comment.