@@ -294,6 +294,59 @@ def _create_workflow_yaml(
294
294
295
295
KubeflowPipelines ._add_archive_section_to_cards_artifacts (workflow )
296
296
297
+ if "onExit" in workflow ["spec" ]:
298
+ # replace entrypoint content with the exit handler handler content
299
+ """
300
+ # What it looks like beforehand...
301
+ entrypoint: helloflow
302
+ templates:
303
+ - name: exit-handler-1
304
+ dag:
305
+ tasks:
306
+ - name: end
307
+ template: end
308
+ dependencies: [start]
309
+ - {name: start, template: start}
310
+ - name: helloflow
311
+ dag:
312
+ tasks:
313
+ - {name: exit-handler-1, template: exit-handler-1}
314
+ - {name: sqs-exit-handler, template: sqs-exit-handler}
315
+ """
316
+ # find the exit-handler-1 template
317
+ exit_handler_template : dict = [
318
+ template
319
+ for template in workflow ["spec" ]["templates" ]
320
+ if template ["name" ] == "exit-handler-1"
321
+ ][0 ]
322
+
323
+ # find the entrypoint template
324
+ entrypoint_template : dict = [
325
+ template
326
+ for template in workflow ["spec" ]["templates" ]
327
+ if template ["name" ] == workflow ["spec" ]["entrypoint" ]
328
+ ][0 ]
329
+
330
+ # replace the entrypoint template with the exit handler template
331
+ entrypoint_template ["dag" ] = exit_handler_template ["dag" ]
332
+
333
+ # rename exit-handler-1 to exit-handler
334
+ exit_handler_template ["name" ] = "exit-handler"
335
+ workflow ["spec" ]["onExit" ] = "exit-handler"
336
+ exit_handler_template ["dag" ] = {
337
+ "tasks" : [
338
+ {
339
+ "name" : "sqs-exit-handler" ,
340
+ "template" : "sqs-exit-handler" ,
341
+ "dependencies" : ["notify-email-exit-handler" ],
342
+ },
343
+ {
344
+ "name" : "notify-email-exit-handler" ,
345
+ "template" : "notify-email-exit-handler" ,
346
+ },
347
+ ]
348
+ }
349
+
297
350
return workflow
298
351
299
352
@staticmethod
@@ -1239,11 +1292,18 @@ def call_build_kfp_dag(workflow_uid_op: ContainerOp):
1239
1292
)
1240
1293
1241
1294
if self .notify or self .sqs_url_on_error :
1242
- with dsl .ExitHandler (
1243
- self ._create_exit_handler_op (
1244
- flow_variables .package_commands , flow_parameters
1245
- )
1246
- ):
1295
+ op = self ._create_notify_exit_handler_op (
1296
+ flow_variables .package_commands , flow_parameters
1297
+ )
1298
+
1299
+ # The following exit handler gets created and added as a ContainerOp
1300
+ # and also as a parallel task to the Argo template "exit-handler-1"
1301
+ # (the hardcoded kfp compiler name of the exit handler)
1302
+ # We replace, and rename, this parallel task dag with dag of steps in _create_workflow_yaml().
1303
+ self ._create_sqs_exit_handler_op (
1304
+ flow_variables .package_commands , flow_parameters
1305
+ )
1306
+ with dsl .ExitHandler (op ):
1247
1307
s3_sensor_op : Optional [ContainerOp ] = self .create_s3_sensor_op (
1248
1308
flow_variables ,
1249
1309
)
@@ -1551,12 +1611,36 @@ def _create_s3_sensor_op(
1551
1611
)
1552
1612
return s3_sensor_op
1553
1613
1554
- def _create_exit_handler_op (
1614
+ def _create_sqs_exit_handler_op (
1615
+ self ,
1616
+ package_commands : str ,
1617
+ flow_parameters : Dict ,
1618
+ ) -> ContainerOp :
1619
+ env_variables : dict = {
1620
+ key : from_conf (key )
1621
+ for key in [
1622
+ "ARGO_RUN_URL_PREFIX" ,
1623
+ ]
1624
+ if from_conf (key )
1625
+ }
1626
+
1627
+ if self .sqs_role_arn_on_error :
1628
+ env_variables ["METAFLOW_SQS_ROLE_ARN_ON_ERROR" ] = self .sqs_role_arn_on_error
1629
+
1630
+ return self ._get_aip_exit_handler_op (
1631
+ flow_parameters ,
1632
+ env_variables ,
1633
+ package_commands ,
1634
+ name = "sqs-exit-handler" ,
1635
+ flag = "--run_sqs_on_error" ,
1636
+ )
1637
+
1638
+ def _create_notify_exit_handler_op (
1555
1639
self ,
1556
1640
package_commands : str ,
1557
1641
flow_parameters : Dict ,
1558
1642
) -> ContainerOp :
1559
- notify_variables : dict = {
1643
+ env_variables : dict = {
1560
1644
key : from_conf (key )
1561
1645
for key in [
1562
1646
"METAFLOW_NOTIFY_EMAIL_FROM" ,
@@ -1569,19 +1653,27 @@ def _create_exit_handler_op(
1569
1653
}
1570
1654
1571
1655
if self .notify_on_error :
1572
- notify_variables ["METAFLOW_NOTIFY_ON_ERROR" ] = self .notify_on_error
1656
+ env_variables ["METAFLOW_NOTIFY_ON_ERROR" ] = self .notify_on_error
1573
1657
1574
1658
if self .notify_on_success :
1575
- notify_variables ["METAFLOW_NOTIFY_ON_SUCCESS" ] = self .notify_on_success
1576
-
1577
- if self .sqs_url_on_error :
1578
- notify_variables [ "METAFLOW_SQS_URL_ON_ERROR" ] = self . sqs_url_on_error
1579
-
1580
- if self . sqs_role_arn_on_error :
1581
- notify_variables [
1582
- "METAFLOW_SQS_ROLE_ARN_ON_ERROR"
1583
- ] = self . sqs_role_arn_on_error
1659
+ env_variables ["METAFLOW_NOTIFY_ON_SUCCESS" ] = self .notify_on_success
1660
+
1661
+ return self ._get_aip_exit_handler_op (
1662
+ flow_parameters ,
1663
+ env_variables ,
1664
+ package_commands ,
1665
+ name = "notify-email-exit-handler" ,
1666
+ flag = "--run_email_notify" ,
1667
+ )
1584
1668
1669
+ def _get_aip_exit_handler_op (
1670
+ self ,
1671
+ flow_parameters : Dict ,
1672
+ env_variables : Dict ,
1673
+ package_commands : str ,
1674
+ name : str ,
1675
+ flag : str = "" ,
1676
+ ) -> ContainerOp :
1585
1677
# when there are no flow parameters argo complains
1586
1678
# that {{workflow.parameters}} failed to resolve
1587
1679
# see https://github.com/argoproj/argo-workflows/issues/6036
@@ -1594,19 +1686,19 @@ def _create_exit_handler_op(
1594
1686
" && python -m metaflow.plugins.aip.aip_exit_handler"
1595
1687
f" --flow_name { self .name } "
1596
1688
" --run_id {{workflow.name}}"
1597
- f" --notify_variables_json { json .dumps (json .dumps (notify_variables ))} "
1689
+ f" --env_variables_json { json .dumps (json .dumps (env_variables ))} "
1598
1690
f" --flow_parameters_json { flow_parameters_json if flow_parameters else '{}' } "
1599
1691
" --status {{workflow.status}}"
1692
+ f" { flag } "
1600
1693
),
1601
1694
]
1602
-
1603
1695
return (
1604
1696
dsl .ContainerOp (
1605
- name = "exit_handler" ,
1697
+ name = name ,
1606
1698
image = self .base_image ,
1607
1699
command = exit_handler_command ,
1608
1700
)
1609
- .set_display_name ("exit_handler" )
1701
+ .set_display_name (name )
1610
1702
.set_retry (
1611
1703
EXIT_HANDLER_RETRY_COUNT ,
1612
1704
policy = "Always" ,
0 commit comments