Skip to content

Commit deb33fd

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Add multihost_gpu_node_count to Vertex SDK.
PiperOrigin-RevId: 703635901
1 parent ef596f5 commit deb33fd

File tree

2 files changed

+94
-0
lines changed

2 files changed

+94
-0
lines changed

google/cloud/aiplatform/models.py

+42
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ def create(
252252
reservation_affinity_values: Optional[List[str]] = None,
253253
spot: bool = False,
254254
required_replica_count: Optional[int] = 0,
255+
multihost_gpu_node_count: Optional[int] = None,
255256
) -> "DeploymentResourcePool":
256257
"""Creates a new DeploymentResourcePool.
257258
@@ -332,6 +333,9 @@ def create(
332333
set, the model deploy/mutate operation will succeed once
333334
available_replica_count reaches required_replica_count, and the
334335
rest of the replicas will be retried.
336+
multihost_gpu_node_count (int):
337+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
338+
Required for multihost GPU deployments.
335339
336340
Returns:
337341
DeploymentResourcePool
@@ -363,6 +367,7 @@ def create(
363367
sync=sync,
364368
create_request_timeout=create_request_timeout,
365369
required_replica_count=required_replica_count,
370+
multihost_gpu_node_count=multihost_gpu_node_count,
366371
)
367372

368373
@classmethod
@@ -389,6 +394,7 @@ def _create(
389394
sync=True,
390395
create_request_timeout: Optional[float] = None,
391396
required_replica_count: Optional[int] = 0,
397+
multihost_gpu_node_count: Optional[int] = None,
392398
) -> "DeploymentResourcePool":
393399
"""Creates a new DeploymentResourcePool.
394400
@@ -472,6 +478,9 @@ def _create(
472478
set, the model deploy/mutate operation will succeed once
473479
available_replica_count reaches required_replica_count, and the
474480
rest of the replicas will be retried.
481+
multihost_gpu_node_count (int):
482+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
483+
Required for multihost GPU deployments.
475484
476485
Returns:
477486
DeploymentResourcePool
@@ -505,6 +514,7 @@ def _create(
505514
[autoscaling_metric_spec]
506515
)
507516

517+
# TODO(joelletiangco): accelerator_type present here
508518
if accelerator_type and accelerator_count:
509519
utils.validate_accelerator_type(accelerator_type)
510520
machine_spec.accelerator_type = accelerator_type
@@ -1327,6 +1337,7 @@ def deploy(
13271337
accelerator_type: Optional[str] = None,
13281338
accelerator_count: Optional[int] = None,
13291339
tpu_topology: Optional[str] = None,
1340+
multihost_gpu_node_count: Optional[int] = None,
13301341
service_account: Optional[str] = None,
13311342
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
13321343
explanation_parameters: Optional[
@@ -1399,6 +1410,9 @@ def deploy(
13991410
tpu_topology (str):
14001411
Optional. The TPU topology to use for the DeployedModel.
14011412
Required for CloudTPU multihost deployments.
1413+
multihost_gpu_node_count (int):
1414+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
1415+
Required for multihost GPU deployments.
14021416
service_account (str):
14031417
The service account that the DeployedModel's container runs as. Specify the
14041418
email address of the service account. If this service account is not
@@ -1500,6 +1514,7 @@ def deploy(
15001514
accelerator_type=accelerator_type,
15011515
accelerator_count=accelerator_count,
15021516
tpu_topology=tpu_topology,
1517+
multihost_gpu_node_count=multihost_gpu_node_count,
15031518
reservation_affinity_type=reservation_affinity_type,
15041519
reservation_affinity_key=reservation_affinity_key,
15051520
reservation_affinity_values=reservation_affinity_values,
@@ -1532,6 +1547,7 @@ def _deploy(
15321547
accelerator_type: Optional[str] = None,
15331548
accelerator_count: Optional[int] = None,
15341549
tpu_topology: Optional[str] = None,
1550+
multihost_gpu_node_count: Optional[int] = None,
15351551
reservation_affinity_type: Optional[str] = None,
15361552
reservation_affinity_key: Optional[str] = None,
15371553
reservation_affinity_values: Optional[List[str]] = None,
@@ -1601,6 +1617,9 @@ def _deploy(
16011617
tpu_topology (str):
16021618
Optional. The TPU topology to use for the DeployedModel.
16031619
Required for CloudTPU multihost deployments.
1620+
multihost_gpu_node_count (int):
1621+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
1622+
Required for multihost GPU deployments.
16041623
reservation_affinity_type (str):
16051624
Optional. The type of reservation affinity.
16061625
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
@@ -1686,6 +1705,7 @@ def _deploy(
16861705
accelerator_type=accelerator_type,
16871706
accelerator_count=accelerator_count,
16881707
tpu_topology=tpu_topology,
1708+
multihost_gpu_node_count=multihost_gpu_node_count,
16891709
reservation_affinity_type=reservation_affinity_type,
16901710
reservation_affinity_key=reservation_affinity_key,
16911711
reservation_affinity_values=reservation_affinity_values,
@@ -1725,6 +1745,7 @@ def _deploy_call(
17251745
accelerator_type: Optional[str] = None,
17261746
accelerator_count: Optional[int] = None,
17271747
tpu_topology: Optional[str] = None,
1748+
multihost_gpu_node_count: Optional[int] = None,
17281749
reservation_affinity_type: Optional[str] = None,
17291750
reservation_affinity_key: Optional[str] = None,
17301751
reservation_affinity_values: Optional[List[str]] = None,
@@ -1803,6 +1824,9 @@ def _deploy_call(
18031824
tpu_topology (str):
18041825
Optional. The TPU topology to use for the DeployedModel.
18051826
Required for CloudTPU multihost deployments.
1827+
multihost_gpu_node_count (int):
1828+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
1829+
Required for multihost GPU deployments.
18061830
reservation_affinity_type (str):
18071831
Optional. The type of reservation affinity.
18081832
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
@@ -2030,6 +2054,9 @@ def _deploy_call(
20302054
if tpu_topology is not None:
20312055
machine_spec.tpu_topology = tpu_topology
20322056

2057+
if multihost_gpu_node_count is not None:
2058+
machine_spec.multihost_gpu_node_count = multihost_gpu_node_count
2059+
20332060
dedicated_resources.machine_spec = machine_spec
20342061
deployed_model.dedicated_resources = dedicated_resources
20352062
if fast_tryout_enabled:
@@ -4012,6 +4039,7 @@ def deploy(
40124039
accelerator_type: Optional[str] = None,
40134040
accelerator_count: Optional[int] = None,
40144041
tpu_topology: Optional[str] = None,
4042+
multihost_gpu_node_count: Optional[int] = None,
40154043
service_account: Optional[str] = None,
40164044
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
40174045
explanation_parameters: Optional[
@@ -4089,6 +4117,9 @@ def deploy(
40894117
tpu_topology (str):
40904118
Optional. The TPU topology to use for the DeployedModel.
40914119
Required for CloudTPU multihost deployments.
4120+
multihost_gpu_node_count (int):
4121+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
4122+
Required for multihost GPU deployments.
40924123
service_account (str):
40934124
The service account that the DeployedModel's container runs as. Specify the
40944125
email address of the service account. If this service account is not
@@ -4190,6 +4221,7 @@ def deploy(
41904221
accelerator_type=accelerator_type,
41914222
accelerator_count=accelerator_count,
41924223
tpu_topology=tpu_topology,
4224+
multihost_gpu_node_count=multihost_gpu_node_count,
41934225
reservation_affinity_type=reservation_affinity_type,
41944226
reservation_affinity_key=reservation_affinity_key,
41954227
reservation_affinity_values=reservation_affinity_values,
@@ -5241,6 +5273,7 @@ def deploy(
52415273
accelerator_type: Optional[str] = None,
52425274
accelerator_count: Optional[int] = None,
52435275
tpu_topology: Optional[str] = None,
5276+
multihost_gpu_node_count: Optional[int] = None,
52445277
service_account: Optional[str] = None,
52455278
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
52465279
explanation_parameters: Optional[
@@ -5318,6 +5351,9 @@ def deploy(
53185351
tpu_topology (str):
53195352
Optional. The TPU topology to use for the DeployedModel.
53205353
Requireid for CloudTPU multihost deployments.
5354+
multihost_gpu_node_count (int):
5355+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
5356+
Required for multihost GPU deployments.
53215357
service_account (str):
53225358
The service account that the DeployedModel's container runs as. Specify the
53235359
email address of the service account. If this service account is not
@@ -5462,6 +5498,7 @@ def deploy(
54625498
accelerator_type=accelerator_type,
54635499
accelerator_count=accelerator_count,
54645500
tpu_topology=tpu_topology,
5501+
multihost_gpu_node_count=multihost_gpu_node_count,
54655502
reservation_affinity_type=reservation_affinity_type,
54665503
reservation_affinity_key=reservation_affinity_key,
54675504
reservation_affinity_values=reservation_affinity_values,
@@ -5505,6 +5542,7 @@ def _deploy(
55055542
accelerator_type: Optional[str] = None,
55065543
accelerator_count: Optional[int] = None,
55075544
tpu_topology: Optional[str] = None,
5545+
multihost_gpu_node_count: Optional[int] = None,
55085546
reservation_affinity_type: Optional[str] = None,
55095547
reservation_affinity_key: Optional[str] = None,
55105548
reservation_affinity_values: Optional[List[str]] = None,
@@ -5579,6 +5617,9 @@ def _deploy(
55795617
tpu_topology (str):
55805618
Optional. The TPU topology to use for the DeployedModel.
55815619
Requireid for CloudTPU multihost deployments.
5620+
multihost_gpu_node_count (int):
5621+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
5622+
Required for multihost GPU deployments.
55825623
reservation_affinity_type (str):
55835624
Optional. The type of reservation affinity.
55845625
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
@@ -5713,6 +5754,7 @@ def _deploy(
57135754
accelerator_type=accelerator_type,
57145755
accelerator_count=accelerator_count,
57155756
tpu_topology=tpu_topology,
5757+
multihost_gpu_node_count=multihost_gpu_node_count,
57165758
reservation_affinity_type=reservation_affinity_type,
57175759
reservation_affinity_key=reservation_affinity_key,
57185760
reservation_affinity_values=reservation_affinity_values,

tests/unit/aiplatform/test_models.py

+52
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,11 @@
146146
_TEST_TPU_MACHINE_TYPE = "ct5lp-hightpu-4t"
147147
_TEST_TPU_TOPOLOGY = "2x2"
148148

149+
_TEST_GPU_MACHINE_TYPE = "a3-highgpu-8g"
150+
_TEST_GPU_ACCELERATOR_TYPE = "NVIDIA_TESLA_A100"
151+
_TEST_GPU_ACCELERATOR_COUNT = 8
152+
_TEST_MULTIHOST_GPU_NODE_COUNT = 2
153+
149154
_TEST_BATCH_SIZE = 16
150155

151156
_TEST_PIPELINE_RESOURCE_NAME = (
@@ -2239,6 +2244,53 @@ def test_deploy_no_endpoint_with_tpu_topology(self, deploy_model_mock, sync):
22392244
timeout=None,
22402245
)
22412246

2247+
@pytest.mark.usefixtures(
2248+
"get_endpoint_mock", "get_model_mock", "create_endpoint_mock"
2249+
)
2250+
@pytest.mark.parametrize("sync", [True, False])
2251+
def test_deploy_no_endpoint_with_multihost_gpu_node_count(self, deploy_model_mock, sync):
2252+
test_model = models.Model(_TEST_ID)
2253+
test_model._gca_resource.supported_deployment_resources_types.append(
2254+
aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
2255+
)
2256+
test_endpoint = test_model.deploy(
2257+
machine_type=_TEST_GPU_MACHINE_TYPE,
2258+
accelerator_type=_TEST_GPU_ACCELERATOR_TYPE,
2259+
accelerator_count=_TEST_GPU_ACCELERATOR_COUNT,
2260+
multihost_gpu_node_count=_TEST_MULTIHOST_GPU_NODE_COUNT,
2261+
sync=sync,
2262+
deploy_request_timeout=None,
2263+
)
2264+
2265+
if not sync:
2266+
test_endpoint.wait()
2267+
2268+
expected_machine_spec = gca_machine_resources.MachineSpec(
2269+
machine_type=_TEST_GPU_MACHINE_TYPE,
2270+
accelerator_type=_TEST_GPU_ACCELERATOR_COUNT,
2271+
accelerator_count=_TEST_GPU_ACCELERATOR_COUNT,
2272+
multihost_gpu_node_count=_TEST_MULTIHOST_GPU_NODE_COUNT,
2273+
)
2274+
expected_dedicated_resources = gca_machine_resources.DedicatedResources(
2275+
machine_spec=expected_machine_spec,
2276+
min_replica_count=1,
2277+
max_replica_count=1,
2278+
spot=False,
2279+
)
2280+
expected_deployed_model = gca_endpoint.DeployedModel(
2281+
dedicated_resources=expected_dedicated_resources,
2282+
model=test_model.resource_name,
2283+
display_name=None,
2284+
)
2285+
deploy_model_mock.assert_called_once_with(
2286+
endpoint=test_endpoint.resource_name,
2287+
deployed_model=expected_deployed_model,
2288+
traffic_split={"0": 100},
2289+
metadata=(),
2290+
timeout=None,
2291+
)
2292+
2293+
22422294
@pytest.mark.usefixtures(
22432295
"get_endpoint_mock", "get_model_mock", "create_endpoint_mock"
22442296
)

0 commit comments

Comments
 (0)