@@ -252,6 +252,7 @@ def create(
252
252
reservation_affinity_values : Optional [List [str ]] = None ,
253
253
spot : bool = False ,
254
254
required_replica_count : Optional [int ] = 0 ,
255
+ multihost_gpu_node_count : Optional [int ] = None ,
255
256
) -> "DeploymentResourcePool" :
256
257
"""Creates a new DeploymentResourcePool.
257
258
@@ -332,6 +333,9 @@ def create(
332
333
set, the model deploy/mutate operation will succeed once
333
334
available_replica_count reaches required_replica_count, and the
334
335
rest of the replicas will be retried.
336
+ multihost_gpu_node_count (int):
337
+ Optional. The number of nodes per replica for multihost GPU DeployedModel.
338
+ Required for multihost GPU deployments.
335
339
336
340
Returns:
337
341
DeploymentResourcePool
@@ -363,6 +367,7 @@ def create(
363
367
sync = sync ,
364
368
create_request_timeout = create_request_timeout ,
365
369
required_replica_count = required_replica_count ,
370
+ multihost_gpu_node_count = multihost_gpu_node_count ,
366
371
)
367
372
368
373
@classmethod
@@ -389,6 +394,7 @@ def _create(
389
394
sync = True ,
390
395
create_request_timeout : Optional [float ] = None ,
391
396
required_replica_count : Optional [int ] = 0 ,
397
+ multihost_gpu_node_count : Optional [int ] = None ,
392
398
) -> "DeploymentResourcePool" :
393
399
"""Creates a new DeploymentResourcePool.
394
400
@@ -472,6 +478,9 @@ def _create(
472
478
set, the model deploy/mutate operation will succeed once
473
479
available_replica_count reaches required_replica_count, and the
474
480
rest of the replicas will be retried.
481
+ multihost_gpu_node_count (int):
482
+ Optional. The number of nodes per replica for multihost GPU DeployedModel.
483
+ Required for multihost GPU deployments.
475
484
476
485
Returns:
477
486
DeploymentResourcePool
@@ -505,6 +514,7 @@ def _create(
505
514
[autoscaling_metric_spec ]
506
515
)
507
516
517
+ # TODO(joelletiangco): accelerator_type present here
508
518
if accelerator_type and accelerator_count :
509
519
utils .validate_accelerator_type (accelerator_type )
510
520
machine_spec .accelerator_type = accelerator_type
@@ -1327,6 +1337,7 @@ def deploy(
1327
1337
accelerator_type : Optional [str ] = None ,
1328
1338
accelerator_count : Optional [int ] = None ,
1329
1339
tpu_topology : Optional [str ] = None ,
1340
+ multihost_gpu_node_count : Optional [int ] = None ,
1330
1341
service_account : Optional [str ] = None ,
1331
1342
explanation_metadata : Optional [aiplatform .explain .ExplanationMetadata ] = None ,
1332
1343
explanation_parameters : Optional [
@@ -1399,6 +1410,9 @@ def deploy(
1399
1410
tpu_topology (str):
1400
1411
Optional. The TPU topology to use for the DeployedModel.
1401
1412
Required for CloudTPU multihost deployments.
1413
+ multihost_gpu_node_count (int):
1414
+ Optional. The number of nodes per replica for multihost GPU DeployedModel.
1415
+ Required for multihost GPU deployments.
1402
1416
service_account (str):
1403
1417
The service account that the DeployedModel's container runs as. Specify the
1404
1418
email address of the service account. If this service account is not
@@ -1500,6 +1514,7 @@ def deploy(
1500
1514
accelerator_type = accelerator_type ,
1501
1515
accelerator_count = accelerator_count ,
1502
1516
tpu_topology = tpu_topology ,
1517
+ multihost_gpu_node_count = multihost_gpu_node_count ,
1503
1518
reservation_affinity_type = reservation_affinity_type ,
1504
1519
reservation_affinity_key = reservation_affinity_key ,
1505
1520
reservation_affinity_values = reservation_affinity_values ,
@@ -1532,6 +1547,7 @@ def _deploy(
1532
1547
accelerator_type : Optional [str ] = None ,
1533
1548
accelerator_count : Optional [int ] = None ,
1534
1549
tpu_topology : Optional [str ] = None ,
1550
+ multihost_gpu_node_count : Optional [int ] = None ,
1535
1551
reservation_affinity_type : Optional [str ] = None ,
1536
1552
reservation_affinity_key : Optional [str ] = None ,
1537
1553
reservation_affinity_values : Optional [List [str ]] = None ,
@@ -1601,6 +1617,9 @@ def _deploy(
1601
1617
tpu_topology (str):
1602
1618
Optional. The TPU topology to use for the DeployedModel.
1603
1619
Required for CloudTPU multihost deployments.
1620
+ multihost_gpu_node_count (int):
1621
+ Optional. The number of nodes per replica for multihost GPU DeployedModel.
1622
+ Required for multihost GPU deployments.
1604
1623
reservation_affinity_type (str):
1605
1624
Optional. The type of reservation affinity.
1606
1625
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
@@ -1686,6 +1705,7 @@ def _deploy(
1686
1705
accelerator_type = accelerator_type ,
1687
1706
accelerator_count = accelerator_count ,
1688
1707
tpu_topology = tpu_topology ,
1708
+ multihost_gpu_node_count = multihost_gpu_node_count ,
1689
1709
reservation_affinity_type = reservation_affinity_type ,
1690
1710
reservation_affinity_key = reservation_affinity_key ,
1691
1711
reservation_affinity_values = reservation_affinity_values ,
@@ -1725,6 +1745,7 @@ def _deploy_call(
1725
1745
accelerator_type : Optional [str ] = None ,
1726
1746
accelerator_count : Optional [int ] = None ,
1727
1747
tpu_topology : Optional [str ] = None ,
1748
+ multihost_gpu_node_count : Optional [int ] = None ,
1728
1749
reservation_affinity_type : Optional [str ] = None ,
1729
1750
reservation_affinity_key : Optional [str ] = None ,
1730
1751
reservation_affinity_values : Optional [List [str ]] = None ,
@@ -1803,6 +1824,9 @@ def _deploy_call(
1803
1824
tpu_topology (str):
1804
1825
Optional. The TPU topology to use for the DeployedModel.
1805
1826
Required for CloudTPU multihost deployments.
1827
+ multihost_gpu_node_count (int):
1828
+ Optional. The number of nodes per replica for multihost GPU DeployedModel.
1829
+ Required for multihost GPU deployments.
1806
1830
reservation_affinity_type (str):
1807
1831
Optional. The type of reservation affinity.
1808
1832
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
@@ -2030,6 +2054,9 @@ def _deploy_call(
2030
2054
if tpu_topology is not None :
2031
2055
machine_spec .tpu_topology = tpu_topology
2032
2056
2057
+ if multihost_gpu_node_count is not None :
2058
+ machine_spec .multihost_gpu_node_count = multihost_gpu_node_count
2059
+
2033
2060
dedicated_resources .machine_spec = machine_spec
2034
2061
deployed_model .dedicated_resources = dedicated_resources
2035
2062
if fast_tryout_enabled :
@@ -4012,6 +4039,7 @@ def deploy(
4012
4039
accelerator_type : Optional [str ] = None ,
4013
4040
accelerator_count : Optional [int ] = None ,
4014
4041
tpu_topology : Optional [str ] = None ,
4042
+ multihost_gpu_node_count : Optional [int ] = None ,
4015
4043
service_account : Optional [str ] = None ,
4016
4044
explanation_metadata : Optional [aiplatform .explain .ExplanationMetadata ] = None ,
4017
4045
explanation_parameters : Optional [
@@ -4089,6 +4117,9 @@ def deploy(
4089
4117
tpu_topology (str):
4090
4118
Optional. The TPU topology to use for the DeployedModel.
4091
4119
Required for CloudTPU multihost deployments.
4120
+ multihost_gpu_node_count (int):
4121
+ Optional. The number of nodes per replica for multihost GPU DeployedModel.
4122
+ Required for multihost GPU deployments.
4092
4123
service_account (str):
4093
4124
The service account that the DeployedModel's container runs as. Specify the
4094
4125
email address of the service account. If this service account is not
@@ -4190,6 +4221,7 @@ def deploy(
4190
4221
accelerator_type = accelerator_type ,
4191
4222
accelerator_count = accelerator_count ,
4192
4223
tpu_topology = tpu_topology ,
4224
+ multihost_gpu_node_count = multihost_gpu_node_count ,
4193
4225
reservation_affinity_type = reservation_affinity_type ,
4194
4226
reservation_affinity_key = reservation_affinity_key ,
4195
4227
reservation_affinity_values = reservation_affinity_values ,
@@ -5241,6 +5273,7 @@ def deploy(
5241
5273
accelerator_type : Optional [str ] = None ,
5242
5274
accelerator_count : Optional [int ] = None ,
5243
5275
tpu_topology : Optional [str ] = None ,
5276
+ multihost_gpu_node_count : Optional [int ] = None ,
5244
5277
service_account : Optional [str ] = None ,
5245
5278
explanation_metadata : Optional [aiplatform .explain .ExplanationMetadata ] = None ,
5246
5279
explanation_parameters : Optional [
@@ -5318,6 +5351,9 @@ def deploy(
5318
5351
tpu_topology (str):
5319
5352
Optional. The TPU topology to use for the DeployedModel.
5320
5353
Requireid for CloudTPU multihost deployments.
5354
+ multihost_gpu_node_count (int):
5355
+ Optional. The number of nodes per replica for multihost GPU DeployedModel.
5356
+ Required for multihost GPU deployments.
5321
5357
service_account (str):
5322
5358
The service account that the DeployedModel's container runs as. Specify the
5323
5359
email address of the service account. If this service account is not
@@ -5462,6 +5498,7 @@ def deploy(
5462
5498
accelerator_type = accelerator_type ,
5463
5499
accelerator_count = accelerator_count ,
5464
5500
tpu_topology = tpu_topology ,
5501
+ multihost_gpu_node_count = multihost_gpu_node_count ,
5465
5502
reservation_affinity_type = reservation_affinity_type ,
5466
5503
reservation_affinity_key = reservation_affinity_key ,
5467
5504
reservation_affinity_values = reservation_affinity_values ,
@@ -5505,6 +5542,7 @@ def _deploy(
5505
5542
accelerator_type : Optional [str ] = None ,
5506
5543
accelerator_count : Optional [int ] = None ,
5507
5544
tpu_topology : Optional [str ] = None ,
5545
+ multihost_gpu_node_count : Optional [int ] = None ,
5508
5546
reservation_affinity_type : Optional [str ] = None ,
5509
5547
reservation_affinity_key : Optional [str ] = None ,
5510
5548
reservation_affinity_values : Optional [List [str ]] = None ,
@@ -5579,6 +5617,9 @@ def _deploy(
5579
5617
tpu_topology (str):
5580
5618
Optional. The TPU topology to use for the DeployedModel.
5581
5619
Requireid for CloudTPU multihost deployments.
5620
+ multihost_gpu_node_count (int):
5621
+ Optional. The number of nodes per replica for multihost GPU DeployedModel.
5622
+ Required for multihost GPU deployments.
5582
5623
reservation_affinity_type (str):
5583
5624
Optional. The type of reservation affinity.
5584
5625
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
@@ -5713,6 +5754,7 @@ def _deploy(
5713
5754
accelerator_type = accelerator_type ,
5714
5755
accelerator_count = accelerator_count ,
5715
5756
tpu_topology = tpu_topology ,
5757
+ multihost_gpu_node_count = multihost_gpu_node_count ,
5716
5758
reservation_affinity_type = reservation_affinity_type ,
5717
5759
reservation_affinity_key = reservation_affinity_key ,
5718
5760
reservation_affinity_values = reservation_affinity_values ,
0 commit comments