Skip to content

Commit f4809bd

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Add multihost_gpu_node_count to Vertex SDK.
PiperOrigin-RevId: 703635901
1 parent 4620e6f commit f4809bd

File tree

2 files changed

+84
-0
lines changed

2 files changed

+84
-0
lines changed

Diff for: google/cloud/aiplatform/models.py

+32
Original file line numberDiff line numberDiff line change
@@ -1327,6 +1327,7 @@ def deploy(
13271327
accelerator_type: Optional[str] = None,
13281328
accelerator_count: Optional[int] = None,
13291329
tpu_topology: Optional[str] = None,
1330+
multihost_gpu_node_count: Optional[int] = None,
13301331
service_account: Optional[str] = None,
13311332
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
13321333
explanation_parameters: Optional[
@@ -1399,6 +1400,9 @@ def deploy(
13991400
tpu_topology (str):
14001401
Optional. The TPU topology to use for the DeployedModel.
14011402
Required for CloudTPU multihost deployments.
1403+
multihost_gpu_node_count (int):
1404+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
1405+
Required for multihost GPU deployments.
14021406
service_account (str):
14031407
The service account that the DeployedModel's container runs as. Specify the
14041408
email address of the service account. If this service account is not
@@ -1500,6 +1504,7 @@ def deploy(
15001504
accelerator_type=accelerator_type,
15011505
accelerator_count=accelerator_count,
15021506
tpu_topology=tpu_topology,
1507+
multihost_gpu_node_count=multihost_gpu_node_count,
15031508
reservation_affinity_type=reservation_affinity_type,
15041509
reservation_affinity_key=reservation_affinity_key,
15051510
reservation_affinity_values=reservation_affinity_values,
@@ -1532,6 +1537,7 @@ def _deploy(
15321537
accelerator_type: Optional[str] = None,
15331538
accelerator_count: Optional[int] = None,
15341539
tpu_topology: Optional[str] = None,
1540+
multihost_gpu_node_count: Optional[int] = None,
15351541
reservation_affinity_type: Optional[str] = None,
15361542
reservation_affinity_key: Optional[str] = None,
15371543
reservation_affinity_values: Optional[List[str]] = None,
@@ -1601,6 +1607,9 @@ def _deploy(
16011607
tpu_topology (str):
16021608
Optional. The TPU topology to use for the DeployedModel.
16031609
Required for CloudTPU multihost deployments.
1610+
multihost_gpu_node_count (int):
1611+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
1612+
Required for multihost GPU deployments.
16041613
reservation_affinity_type (str):
16051614
Optional. The type of reservation affinity.
16061615
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
@@ -1686,6 +1695,7 @@ def _deploy(
16861695
accelerator_type=accelerator_type,
16871696
accelerator_count=accelerator_count,
16881697
tpu_topology=tpu_topology,
1698+
multihost_gpu_node_count=multihost_gpu_node_count,
16891699
reservation_affinity_type=reservation_affinity_type,
16901700
reservation_affinity_key=reservation_affinity_key,
16911701
reservation_affinity_values=reservation_affinity_values,
@@ -1725,6 +1735,7 @@ def _deploy_call(
17251735
accelerator_type: Optional[str] = None,
17261736
accelerator_count: Optional[int] = None,
17271737
tpu_topology: Optional[str] = None,
1738+
multihost_gpu_node_count: Optional[int] = None,
17281739
reservation_affinity_type: Optional[str] = None,
17291740
reservation_affinity_key: Optional[str] = None,
17301741
reservation_affinity_values: Optional[List[str]] = None,
@@ -1803,6 +1814,9 @@ def _deploy_call(
18031814
tpu_topology (str):
18041815
Optional. The TPU topology to use for the DeployedModel.
18051816
Required for CloudTPU multihost deployments.
1817+
multihost_gpu_node_count (int):
1818+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
1819+
Required for multihost GPU deployments.
18061820
reservation_affinity_type (str):
18071821
Optional. The type of reservation affinity.
18081822
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
@@ -2030,6 +2044,9 @@ def _deploy_call(
20302044
if tpu_topology is not None:
20312045
machine_spec.tpu_topology = tpu_topology
20322046

2047+
if multihost_gpu_node_count is not None:
2048+
machine_spec.multihost_gpu_node_count = multihost_gpu_node_count
2049+
20332050
dedicated_resources.machine_spec = machine_spec
20342051
deployed_model.dedicated_resources = dedicated_resources
20352052
if fast_tryout_enabled:
@@ -4012,6 +4029,7 @@ def deploy(
40124029
accelerator_type: Optional[str] = None,
40134030
accelerator_count: Optional[int] = None,
40144031
tpu_topology: Optional[str] = None,
4032+
multihost_gpu_node_count: Optional[int] = None,
40154033
service_account: Optional[str] = None,
40164034
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
40174035
explanation_parameters: Optional[
@@ -4089,6 +4107,9 @@ def deploy(
40894107
tpu_topology (str):
40904108
Optional. The TPU topology to use for the DeployedModel.
40914109
Required for CloudTPU multihost deployments.
4110+
multihost_gpu_node_count (int):
4111+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
4112+
Required for multihost GPU deployments.
40924113
service_account (str):
40934114
The service account that the DeployedModel's container runs as. Specify the
40944115
email address of the service account. If this service account is not
@@ -4190,6 +4211,7 @@ def deploy(
41904211
accelerator_type=accelerator_type,
41914212
accelerator_count=accelerator_count,
41924213
tpu_topology=tpu_topology,
4214+
multihost_gpu_node_count=multihost_gpu_node_count,
41934215
reservation_affinity_type=reservation_affinity_type,
41944216
reservation_affinity_key=reservation_affinity_key,
41954217
reservation_affinity_values=reservation_affinity_values,
@@ -5241,6 +5263,7 @@ def deploy(
52415263
accelerator_type: Optional[str] = None,
52425264
accelerator_count: Optional[int] = None,
52435265
tpu_topology: Optional[str] = None,
5266+
multihost_gpu_node_count: Optional[int] = None,
52445267
service_account: Optional[str] = None,
52455268
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
52465269
explanation_parameters: Optional[
@@ -5318,6 +5341,9 @@ def deploy(
53185341
tpu_topology (str):
53195342
Optional. The TPU topology to use for the DeployedModel.
53205343
Requireid for CloudTPU multihost deployments.
5344+
multihost_gpu_node_count (int):
5345+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
5346+
Required for multihost GPU deployments.
53215347
service_account (str):
53225348
The service account that the DeployedModel's container runs as. Specify the
53235349
email address of the service account. If this service account is not
@@ -5462,6 +5488,7 @@ def deploy(
54625488
accelerator_type=accelerator_type,
54635489
accelerator_count=accelerator_count,
54645490
tpu_topology=tpu_topology,
5491+
multihost_gpu_node_count=multihost_gpu_node_count,
54655492
reservation_affinity_type=reservation_affinity_type,
54665493
reservation_affinity_key=reservation_affinity_key,
54675494
reservation_affinity_values=reservation_affinity_values,
@@ -5505,6 +5532,7 @@ def _deploy(
55055532
accelerator_type: Optional[str] = None,
55065533
accelerator_count: Optional[int] = None,
55075534
tpu_topology: Optional[str] = None,
5535+
multihost_gpu_node_count: Optional[int] = None,
55085536
reservation_affinity_type: Optional[str] = None,
55095537
reservation_affinity_key: Optional[str] = None,
55105538
reservation_affinity_values: Optional[List[str]] = None,
@@ -5579,6 +5607,9 @@ def _deploy(
55795607
tpu_topology (str):
55805608
Optional. The TPU topology to use for the DeployedModel.
55815609
Requireid for CloudTPU multihost deployments.
5610+
multihost_gpu_node_count (int):
5611+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
5612+
Required for multihost GPU deployments.
55825613
reservation_affinity_type (str):
55835614
Optional. The type of reservation affinity.
55845615
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
@@ -5713,6 +5744,7 @@ def _deploy(
57135744
accelerator_type=accelerator_type,
57145745
accelerator_count=accelerator_count,
57155746
tpu_topology=tpu_topology,
5747+
multihost_gpu_node_count=multihost_gpu_node_count,
57165748
reservation_affinity_type=reservation_affinity_type,
57175749
reservation_affinity_key=reservation_affinity_key,
57185750
reservation_affinity_values=reservation_affinity_values,

Diff for: tests/unit/aiplatform/test_models.py

+52
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,11 @@
146146
_TEST_TPU_MACHINE_TYPE = "ct5lp-hightpu-4t"
147147
_TEST_TPU_TOPOLOGY = "2x2"
148148

149+
_TEST_GPU_MACHINE_TYPE = "a3-highgpu-8g"
150+
_TEST_GPU_ACCELERATOR_TYPE = "NVIDIA_TESLA_A100"
151+
_TEST_GPU_ACCELERATOR_COUNT = 8
152+
_TEST_MULTIHOST_GPU_NODE_COUNT = 2
153+
149154
_TEST_BATCH_SIZE = 16
150155

151156
_TEST_PIPELINE_RESOURCE_NAME = (
@@ -2239,6 +2244,53 @@ def test_deploy_no_endpoint_with_tpu_topology(self, deploy_model_mock, sync):
22392244
timeout=None,
22402245
)
22412246

2247+
@pytest.mark.usefixtures(
2248+
"get_endpoint_mock", "get_model_mock", "create_endpoint_mock"
2249+
)
2250+
@pytest.mark.parametrize("sync", [True, False])
2251+
def test_deploy_no_endpoint_with_multihost_gpu_node_count(self, deploy_model_mock, sync):
2252+
test_model = models.Model(_TEST_ID)
2253+
test_model._gca_resource.supported_deployment_resources_types.append(
2254+
aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
2255+
)
2256+
test_endpoint = test_model.deploy(
2257+
machine_type=_TEST_GPU_MACHINE_TYPE,
2258+
accelerator_type=_TEST_GPU_ACCELERATOR_TYPE,
2259+
accelerator_count=_TEST_GPU_ACCELERATOR_COUNT,
2260+
multihost_gpu_node_count=_TEST_MULTIHOST_GPU_NODE_COUNT,
2261+
sync=sync,
2262+
deploy_request_timeout=None,
2263+
)
2264+
2265+
if not sync:
2266+
test_endpoint.wait()
2267+
2268+
expected_machine_spec = gca_machine_resources.MachineSpec(
2269+
machine_type=_TEST_GPU_MACHINE_TYPE,
2270+
accelerator_type=_TEST_GPU_ACCELERATOR_COUNT,
2271+
accelerator_count=_TEST_GPU_ACCELERATOR_COUNT,
2272+
multihost_gpu_node_count=_TEST_MULTIHOST_GPU_NODE_COUNT,
2273+
)
2274+
expected_dedicated_resources = gca_machine_resources.DedicatedResources(
2275+
machine_spec=expected_machine_spec,
2276+
min_replica_count=1,
2277+
max_replica_count=1,
2278+
spot=False,
2279+
)
2280+
expected_deployed_model = gca_endpoint.DeployedModel(
2281+
dedicated_resources=expected_dedicated_resources,
2282+
model=test_model.resource_name,
2283+
display_name=None,
2284+
)
2285+
deploy_model_mock.assert_called_once_with(
2286+
endpoint=test_endpoint.resource_name,
2287+
deployed_model=expected_deployed_model,
2288+
traffic_split={"0": 100},
2289+
metadata=(),
2290+
timeout=None,
2291+
)
2292+
2293+
22422294
@pytest.mark.usefixtures(
22432295
"get_endpoint_mock", "get_model_mock", "create_endpoint_mock"
22442296
)

0 commit comments

Comments
 (0)