@@ -1327,6 +1327,7 @@ def deploy(
1327
1327
accelerator_type : Optional [str ] = None ,
1328
1328
accelerator_count : Optional [int ] = None ,
1329
1329
tpu_topology : Optional [str ] = None ,
1330
+ multihost_gpu_node_count : Optional [int ] = None ,
1330
1331
service_account : Optional [str ] = None ,
1331
1332
explanation_metadata : Optional [aiplatform .explain .ExplanationMetadata ] = None ,
1332
1333
explanation_parameters : Optional [
@@ -1399,6 +1400,9 @@ def deploy(
1399
1400
tpu_topology (str):
1400
1401
Optional. The TPU topology to use for the DeployedModel.
1401
1402
Required for CloudTPU multihost deployments.
1403
+ multihost_gpu_node_count (int):
1404
+ Optional. The number of nodes per replica for multihost GPU DeployedModel.
1405
+ Required for multihost GPU deployments.
1402
1406
service_account (str):
1403
1407
The service account that the DeployedModel's container runs as. Specify the
1404
1408
email address of the service account. If this service account is not
@@ -1500,6 +1504,7 @@ def deploy(
1500
1504
accelerator_type = accelerator_type ,
1501
1505
accelerator_count = accelerator_count ,
1502
1506
tpu_topology = tpu_topology ,
1507
+ multihost_gpu_node_count = multihost_gpu_node_count ,
1503
1508
reservation_affinity_type = reservation_affinity_type ,
1504
1509
reservation_affinity_key = reservation_affinity_key ,
1505
1510
reservation_affinity_values = reservation_affinity_values ,
@@ -1532,6 +1537,7 @@ def _deploy(
1532
1537
accelerator_type : Optional [str ] = None ,
1533
1538
accelerator_count : Optional [int ] = None ,
1534
1539
tpu_topology : Optional [str ] = None ,
1540
+ multihost_gpu_node_count : Optional [int ] = None ,
1535
1541
reservation_affinity_type : Optional [str ] = None ,
1536
1542
reservation_affinity_key : Optional [str ] = None ,
1537
1543
reservation_affinity_values : Optional [List [str ]] = None ,
@@ -1601,6 +1607,9 @@ def _deploy(
1601
1607
tpu_topology (str):
1602
1608
Optional. The TPU topology to use for the DeployedModel.
1603
1609
Required for CloudTPU multihost deployments.
1610
+ multihost_gpu_node_count (int):
1611
+ Optional. The number of nodes per replica for multihost GPU DeployedModel.
1612
+ Required for multihost GPU deployments.
1604
1613
reservation_affinity_type (str):
1605
1614
Optional. The type of reservation affinity.
1606
1615
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
@@ -1686,6 +1695,7 @@ def _deploy(
1686
1695
accelerator_type = accelerator_type ,
1687
1696
accelerator_count = accelerator_count ,
1688
1697
tpu_topology = tpu_topology ,
1698
+ multihost_gpu_node_count = multihost_gpu_node_count ,
1689
1699
reservation_affinity_type = reservation_affinity_type ,
1690
1700
reservation_affinity_key = reservation_affinity_key ,
1691
1701
reservation_affinity_values = reservation_affinity_values ,
@@ -1725,6 +1735,7 @@ def _deploy_call(
1725
1735
accelerator_type : Optional [str ] = None ,
1726
1736
accelerator_count : Optional [int ] = None ,
1727
1737
tpu_topology : Optional [str ] = None ,
1738
+ multihost_gpu_node_count : Optional [int ] = None ,
1728
1739
reservation_affinity_type : Optional [str ] = None ,
1729
1740
reservation_affinity_key : Optional [str ] = None ,
1730
1741
reservation_affinity_values : Optional [List [str ]] = None ,
@@ -1803,6 +1814,9 @@ def _deploy_call(
1803
1814
tpu_topology (str):
1804
1815
Optional. The TPU topology to use for the DeployedModel.
1805
1816
Required for CloudTPU multihost deployments.
1817
+ multihost_gpu_node_count (int):
1818
+ Optional. The number of nodes per replica for multihost GPU DeployedModel.
1819
+ Required for multihost GPU deployments.
1806
1820
reservation_affinity_type (str):
1807
1821
Optional. The type of reservation affinity.
1808
1822
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
@@ -2030,6 +2044,9 @@ def _deploy_call(
2030
2044
if tpu_topology is not None :
2031
2045
machine_spec .tpu_topology = tpu_topology
2032
2046
2047
+ if multihost_gpu_node_count is not None :
2048
+ machine_spec .multihost_gpu_node_count = multihost_gpu_node_count
2049
+
2033
2050
dedicated_resources .machine_spec = machine_spec
2034
2051
deployed_model .dedicated_resources = dedicated_resources
2035
2052
if fast_tryout_enabled :
@@ -4012,6 +4029,7 @@ def deploy(
4012
4029
accelerator_type : Optional [str ] = None ,
4013
4030
accelerator_count : Optional [int ] = None ,
4014
4031
tpu_topology : Optional [str ] = None ,
4032
+ multihost_gpu_node_count : Optional [int ] = None ,
4015
4033
service_account : Optional [str ] = None ,
4016
4034
explanation_metadata : Optional [aiplatform .explain .ExplanationMetadata ] = None ,
4017
4035
explanation_parameters : Optional [
@@ -4089,6 +4107,9 @@ def deploy(
4089
4107
tpu_topology (str):
4090
4108
Optional. The TPU topology to use for the DeployedModel.
4091
4109
Required for CloudTPU multihost deployments.
4110
+ multihost_gpu_node_count (int):
4111
+ Optional. The number of nodes per replica for multihost GPU DeployedModel.
4112
+ Required for multihost GPU deployments.
4092
4113
service_account (str):
4093
4114
The service account that the DeployedModel's container runs as. Specify the
4094
4115
email address of the service account. If this service account is not
@@ -4190,6 +4211,7 @@ def deploy(
4190
4211
accelerator_type = accelerator_type ,
4191
4212
accelerator_count = accelerator_count ,
4192
4213
tpu_topology = tpu_topology ,
4214
+ multihost_gpu_node_count = multihost_gpu_node_count ,
4193
4215
reservation_affinity_type = reservation_affinity_type ,
4194
4216
reservation_affinity_key = reservation_affinity_key ,
4195
4217
reservation_affinity_values = reservation_affinity_values ,
@@ -5241,6 +5263,7 @@ def deploy(
5241
5263
accelerator_type : Optional [str ] = None ,
5242
5264
accelerator_count : Optional [int ] = None ,
5243
5265
tpu_topology : Optional [str ] = None ,
5266
+ multihost_gpu_node_count : Optional [int ] = None ,
5244
5267
service_account : Optional [str ] = None ,
5245
5268
explanation_metadata : Optional [aiplatform .explain .ExplanationMetadata ] = None ,
5246
5269
explanation_parameters : Optional [
@@ -5318,6 +5341,9 @@ def deploy(
5318
5341
tpu_topology (str):
5319
5342
Optional. The TPU topology to use for the DeployedModel.
5320
5343
Requireid for CloudTPU multihost deployments.
5344
+ multihost_gpu_node_count (int):
5345
+ Optional. The number of nodes per replica for multihost GPU DeployedModel.
5346
+ Required for multihost GPU deployments.
5321
5347
service_account (str):
5322
5348
The service account that the DeployedModel's container runs as. Specify the
5323
5349
email address of the service account. If this service account is not
@@ -5462,6 +5488,7 @@ def deploy(
5462
5488
accelerator_type = accelerator_type ,
5463
5489
accelerator_count = accelerator_count ,
5464
5490
tpu_topology = tpu_topology ,
5491
+ multihost_gpu_node_count = multihost_gpu_node_count ,
5465
5492
reservation_affinity_type = reservation_affinity_type ,
5466
5493
reservation_affinity_key = reservation_affinity_key ,
5467
5494
reservation_affinity_values = reservation_affinity_values ,
@@ -5505,6 +5532,7 @@ def _deploy(
5505
5532
accelerator_type : Optional [str ] = None ,
5506
5533
accelerator_count : Optional [int ] = None ,
5507
5534
tpu_topology : Optional [str ] = None ,
5535
+ multihost_gpu_node_count : Optional [int ] = None ,
5508
5536
reservation_affinity_type : Optional [str ] = None ,
5509
5537
reservation_affinity_key : Optional [str ] = None ,
5510
5538
reservation_affinity_values : Optional [List [str ]] = None ,
@@ -5579,6 +5607,9 @@ def _deploy(
5579
5607
tpu_topology (str):
5580
5608
Optional. The TPU topology to use for the DeployedModel.
5581
5609
Requireid for CloudTPU multihost deployments.
5610
+ multihost_gpu_node_count (int):
5611
+ Optional. The number of nodes per replica for multihost GPU DeployedModel.
5612
+ Required for multihost GPU deployments.
5582
5613
reservation_affinity_type (str):
5583
5614
Optional. The type of reservation affinity.
5584
5615
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
@@ -5713,6 +5744,7 @@ def _deploy(
5713
5744
accelerator_type = accelerator_type ,
5714
5745
accelerator_count = accelerator_count ,
5715
5746
tpu_topology = tpu_topology ,
5747
+ multihost_gpu_node_count = multihost_gpu_node_count ,
5716
5748
reservation_affinity_type = reservation_affinity_type ,
5717
5749
reservation_affinity_key = reservation_affinity_key ,
5718
5750
reservation_affinity_values = reservation_affinity_values ,
0 commit comments