Skip to content

Commit 0d35a6d

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Add multihost_gpu_node_count to Vertex SDK.
PiperOrigin-RevId: 703635901
1 parent 1b2457f commit 0d35a6d

File tree

2 files changed

+84
-0
lines changed

2 files changed

+84
-0
lines changed

Diff for: google/cloud/aiplatform/models.py

+32
Original file line numberDiff line numberDiff line change
@@ -1294,6 +1294,7 @@ def deploy(
12941294
accelerator_type: Optional[str] = None,
12951295
accelerator_count: Optional[int] = None,
12961296
tpu_topology: Optional[str] = None,
1297+
multihost_gpu_node_count: Optional[int] = None,
12971298
service_account: Optional[str] = None,
12981299
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
12991300
explanation_parameters: Optional[
@@ -1365,6 +1366,9 @@ def deploy(
13651366
tpu_topology (str):
13661367
Optional. The TPU topology to use for the DeployedModel.
13671368
Required for CloudTPU multihost deployments.
1369+
multihost_gpu_node_count (int):
1370+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
1371+
Required for multihost GPU deployments.
13681372
service_account (str):
13691373
The service account that the DeployedModel's container runs as. Specify the
13701374
email address of the service account. If this service account is not
@@ -1457,6 +1461,7 @@ def deploy(
14571461
accelerator_type=accelerator_type,
14581462
accelerator_count=accelerator_count,
14591463
tpu_topology=tpu_topology,
1464+
multihost_gpu_node_count=multihost_gpu_node_count,
14601465
reservation_affinity_type=reservation_affinity_type,
14611466
reservation_affinity_key=reservation_affinity_key,
14621467
reservation_affinity_values=reservation_affinity_values,
@@ -1488,6 +1493,7 @@ def _deploy(
14881493
accelerator_type: Optional[str] = None,
14891494
accelerator_count: Optional[int] = None,
14901495
tpu_topology: Optional[str] = None,
1496+
multihost_gpu_node_count: Optional[int] = None,
14911497
reservation_affinity_type: Optional[str] = None,
14921498
reservation_affinity_key: Optional[str] = None,
14931499
reservation_affinity_values: Optional[List[str]] = None,
@@ -1556,6 +1562,9 @@ def _deploy(
15561562
tpu_topology (str):
15571563
Optional. The TPU topology to use for the DeployedModel.
15581564
Required for CloudTPU multihost deployments.
1565+
multihost_gpu_node_count (int):
1566+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
1567+
Required for multihost GPU deployments.
15591568
reservation_affinity_type (str):
15601569
Optional. The type of reservation affinity.
15611570
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
@@ -1633,6 +1642,7 @@ def _deploy(
16331642
accelerator_type=accelerator_type,
16341643
accelerator_count=accelerator_count,
16351644
tpu_topology=tpu_topology,
1645+
multihost_gpu_node_count=multihost_gpu_node_count,
16361646
reservation_affinity_type=reservation_affinity_type,
16371647
reservation_affinity_key=reservation_affinity_key,
16381648
reservation_affinity_values=reservation_affinity_values,
@@ -1671,6 +1681,7 @@ def _deploy_call(
16711681
accelerator_type: Optional[str] = None,
16721682
accelerator_count: Optional[int] = None,
16731683
tpu_topology: Optional[str] = None,
1684+
multihost_gpu_node_count: Optional[int] = None,
16741685
reservation_affinity_type: Optional[str] = None,
16751686
reservation_affinity_key: Optional[str] = None,
16761687
reservation_affinity_values: Optional[List[str]] = None,
@@ -1748,6 +1759,9 @@ def _deploy_call(
17481759
tpu_topology (str):
17491760
Optional. The TPU topology to use for the DeployedModel.
17501761
Required for CloudTPU multihost deployments.
1762+
multihost_gpu_node_count (int):
1763+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
1764+
Required for multihost GPU deployments.
17511765
reservation_affinity_type (str):
17521766
Optional. The type of reservation affinity.
17531767
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
@@ -1966,6 +1980,9 @@ def _deploy_call(
19661980
if tpu_topology is not None:
19671981
machine_spec.tpu_topology = tpu_topology
19681982

1983+
if multihost_gpu_node_count is not None:
1984+
machine_spec.multihost_gpu_node_count = multihost_gpu_node_count
1985+
19691986
dedicated_resources.machine_spec = machine_spec
19701987
deployed_model.dedicated_resources = dedicated_resources
19711988
if fast_tryout_enabled:
@@ -3948,6 +3965,7 @@ def deploy(
39483965
accelerator_type: Optional[str] = None,
39493966
accelerator_count: Optional[int] = None,
39503967
tpu_topology: Optional[str] = None,
3968+
multihost_gpu_node_count: Optional[int] = None,
39513969
service_account: Optional[str] = None,
39523970
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
39533971
explanation_parameters: Optional[
@@ -4024,6 +4042,9 @@ def deploy(
40244042
tpu_topology (str):
40254043
Optional. The TPU topology to use for the DeployedModel.
40264044
Required for CloudTPU multihost deployments.
4045+
multihost_gpu_node_count (int):
4046+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
4047+
Required for multihost GPU deployments.
40274048
service_account (str):
40284049
The service account that the DeployedModel's container runs as. Specify the
40294050
email address of the service account. If this service account is not
@@ -4116,6 +4137,7 @@ def deploy(
41164137
accelerator_type=accelerator_type,
41174138
accelerator_count=accelerator_count,
41184139
tpu_topology=tpu_topology,
4140+
multihost_gpu_node_count=multihost_gpu_node_count,
41194141
reservation_affinity_type=reservation_affinity_type,
41204142
reservation_affinity_key=reservation_affinity_key,
41214143
reservation_affinity_values=reservation_affinity_values,
@@ -5166,6 +5188,7 @@ def deploy(
51665188
accelerator_type: Optional[str] = None,
51675189
accelerator_count: Optional[int] = None,
51685190
tpu_topology: Optional[str] = None,
5191+
multihost_gpu_node_count: Optional[int] = None,
51695192
service_account: Optional[str] = None,
51705193
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
51715194
explanation_parameters: Optional[
@@ -5242,6 +5265,9 @@ def deploy(
52425265
tpu_topology (str):
52435266
Optional. The TPU topology to use for the DeployedModel.
52445267
Requireid for CloudTPU multihost deployments.
5268+
multihost_gpu_node_count (int):
5269+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
5270+
Required for multihost GPU deployments.
52455271
service_account (str):
52465272
The service account that the DeployedModel's container runs as. Specify the
52475273
email address of the service account. If this service account is not
@@ -5377,6 +5403,7 @@ def deploy(
53775403
accelerator_type=accelerator_type,
53785404
accelerator_count=accelerator_count,
53795405
tpu_topology=tpu_topology,
5406+
multihost_gpu_node_count=multihost_gpu_node_count,
53805407
reservation_affinity_type=reservation_affinity_type,
53815408
reservation_affinity_key=reservation_affinity_key,
53825409
reservation_affinity_values=reservation_affinity_values,
@@ -5419,6 +5446,7 @@ def _deploy(
54195446
accelerator_type: Optional[str] = None,
54205447
accelerator_count: Optional[int] = None,
54215448
tpu_topology: Optional[str] = None,
5449+
multihost_gpu_node_count: Optional[int] = None,
54225450
reservation_affinity_type: Optional[str] = None,
54235451
reservation_affinity_key: Optional[str] = None,
54245452
reservation_affinity_values: Optional[List[str]] = None,
@@ -5492,6 +5520,9 @@ def _deploy(
54925520
tpu_topology (str):
54935521
Optional. The TPU topology to use for the DeployedModel.
54945522
Requireid for CloudTPU multihost deployments.
5523+
multihost_gpu_node_count (int):
5524+
Optional. The number of nodes per replica for multihost GPU DeployedModel.
5525+
Required for multihost GPU deployments.
54955526
reservation_affinity_type (str):
54965527
Optional. The type of reservation affinity.
54975528
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
@@ -5618,6 +5649,7 @@ def _deploy(
56185649
accelerator_type=accelerator_type,
56195650
accelerator_count=accelerator_count,
56205651
tpu_topology=tpu_topology,
5652+
multihost_gpu_node_count=multihost_gpu_node_count,
56215653
reservation_affinity_type=reservation_affinity_type,
56225654
reservation_affinity_key=reservation_affinity_key,
56235655
reservation_affinity_values=reservation_affinity_values,

Diff for: tests/unit/aiplatform/test_models.py

+52
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,11 @@
145145
_TEST_TPU_MACHINE_TYPE = "ct5lp-hightpu-4t"
146146
_TEST_TPU_TOPOLOGY = "2x2"
147147

148+
_TEST_GPU_MACHINE_TYPE = "a3-highgpu-8g"
149+
_TEST_GPU_ACCELERATOR_TYPE = "NVIDIA_TESLA_A100"
150+
_TEST_GPU_ACCELERATOR_COUNT = 8
151+
_TEST_MULTIHOST_GPU_NODE_COUNT = 2
152+
148153
_TEST_BATCH_SIZE = 16
149154

150155
_TEST_PIPELINE_RESOURCE_NAME = (
@@ -2234,6 +2239,53 @@ def test_deploy_no_endpoint_with_tpu_topology(self, deploy_model_mock, sync):
22342239
timeout=None,
22352240
)
22362241

2242+
@pytest.mark.usefixtures(
2243+
"get_endpoint_mock", "get_model_mock", "create_endpoint_mock"
2244+
)
2245+
@pytest.mark.parametrize("sync", [True, False])
2246+
def test_deploy_no_endpoint_with_multihost_gpu_node_count(self, deploy_model_mock, sync):
2247+
test_model = models.Model(_TEST_ID)
2248+
test_model._gca_resource.supported_deployment_resources_types.append(
2249+
aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
2250+
)
2251+
test_endpoint = test_model.deploy(
2252+
machine_type=_TEST_GPU_MACHINE_TYPE,
2253+
accelerator_type=_TEST_GPU_ACCELERATOR_COUNT,
2254+
accelerator_count=_TEST_GPU_ACCELERATOR_COUNT,
2255+
multihost_gpu_node_count=_TEST_MULTIHOST_GPU_NODE_COUNT,
2256+
sync=sync,
2257+
deploy_request_timeout=None,
2258+
)
2259+
2260+
if not sync:
2261+
test_endpoint.wait()
2262+
2263+
expected_machine_spec = gca_machine_resources.MachineSpec(
2264+
machine_type=_TEST_GPU_MACHINE_TYPE,
2265+
accelerator_type=_TEST_GPU_ACCELERATOR_COUNT,
2266+
accelerator_count=_TEST_GPU_ACCELERATOR_COUNT,
2267+
multihost_gpu_node_count=_TEST_MULTIHOST_GPU_NODE_COUNT,
2268+
)
2269+
expected_dedicated_resources = gca_machine_resources.DedicatedResources(
2270+
machine_spec=expected_machine_spec,
2271+
min_replica_count=1,
2272+
max_replica_count=1,
2273+
spot=False,
2274+
)
2275+
expected_deployed_model = gca_endpoint.DeployedModel(
2276+
dedicated_resources=expected_dedicated_resources,
2277+
model=test_model.resource_name,
2278+
display_name=None,
2279+
)
2280+
deploy_model_mock.assert_called_once_with(
2281+
endpoint=test_endpoint.resource_name,
2282+
deployed_model=expected_deployed_model,
2283+
traffic_split={"0": 100},
2284+
metadata=(),
2285+
timeout=None,
2286+
)
2287+
2288+
22372289
@pytest.mark.usefixtures(
22382290
"get_endpoint_mock", "get_model_mock", "create_endpoint_mock"
22392291
)

0 commit comments

Comments
 (0)