Skip to content

Commit f423974

Browse files
authored
Merge pull request #208 from Paperspace/PS-12571-Support_datasets_storage_configuration_options
Add support for experiment storage options. Removed some duplicated t…
2 parents fe8a3eb + 04a8c16 commit f423974

File tree

10 files changed

+111
-258
lines changed

10 files changed

+111
-258
lines changed

gradient/api_sdk/clients/experiment_client.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from .base_client import BaseClient
44
from .. import repositories, models, constants, utils
5-
from ..sdk_exceptions import InvalidParametersError
5+
from ..sdk_exceptions import ResourceCreatingDataError, InvalidParametersError
66
from ..validation_messages import EXPERIMENT_MODEL_PATH_VALIDATION_ERROR
77

88

@@ -72,6 +72,8 @@ def create_single_node(
7272
if not is_preemptible:
7373
is_preemptible = None
7474

75+
datasets = self._dataset_dicts_to_instances(datasets)
76+
7577
experiment = models.SingleNodeExperiment(
7678
experiment_type_id=constants.ExperimentType.SINGLE_NODE,
7779
name=name,
@@ -191,6 +193,8 @@ def create_multi_node(
191193
if not is_preemptible:
192194
is_preemptible = None
193195

196+
datasets = self._dataset_dicts_to_instances(datasets)
197+
194198
experiment = models.MultiNodeExperiment(
195199
name=name,
196200
project_id=project_id,
@@ -314,6 +318,7 @@ def create_mpi_multi_node(
314318
if not is_preemptible:
315319
is_preemptible = None
316320

321+
datasets = self._dataset_dicts_to_instances(datasets)
317322
experiment_type_id = constants.ExperimentType.MPI_MULTI_NODE
318323

319324
experiment = models.MpiMultiNodeExperiment(
@@ -421,6 +426,8 @@ def run_single_node(
421426
if not is_preemptible:
422427
is_preemptible = None
423428

429+
datasets = self._dataset_dicts_to_instances(datasets)
430+
424431
experiment = models.SingleNodeExperiment(
425432
experiment_type_id=constants.ExperimentType.SINGLE_NODE,
426433
name=name,
@@ -538,6 +545,8 @@ def run_multi_node(
538545
if not is_preemptible:
539546
is_preemptible = None
540547

548+
datasets = self._dataset_dicts_to_instances(datasets)
549+
541550
experiment = models.MultiNodeExperiment(
542551
name=name,
543552
project_id=project_id,
@@ -661,6 +670,8 @@ def run_mpi_multi_node(
661670
if not is_preemptible:
662671
is_preemptible = None
663672

673+
datasets = self._dataset_dicts_to_instances(datasets)
674+
664675
experiment_type_id = constants.ExperimentType.MPI_MULTI_NODE
665676

666677
experiment = models.MpiMultiNodeExperiment(
@@ -840,3 +851,23 @@ def stream_metrics(self, experiment_id, interval="30s", built_in_metrics=None):
840851
built_in_metrics=built_in_metrics,
841852
)
842853
return metrics
854+
855+
def _dataset_dicts_to_instances(self, datasets):
856+
if not datasets:
857+
return None
858+
859+
if isinstance(datasets, dict):
860+
datasets = [datasets]
861+
862+
for ds in datasets:
863+
if not ds.get("uri"):
864+
raise ResourceCreatingDataError("Error while creating experiment with dataset: "
865+
"\"uri\" key is required and it's value must be a valid S3 URI")
866+
867+
for ds in datasets:
868+
volume_options = ds.setdefault("volume_options", {})
869+
volume_options.setdefault("kind", ds.pop("volume_kind", None))
870+
volume_options.setdefault("size", ds.pop("volume_size", None))
871+
872+
datasets = [models.Dataset(**ds) for ds in datasets]
873+
return datasets

gradient/api_sdk/constants.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,16 @@ class BuiltinMetrics:
136136
("gpuMemoryUtilization", BuiltinMetrics.gpu_memory_utilization),
137137
)
138138
)
139+
140+
141+
class DatasetVolumeKinds:
142+
DYNAMIC = "dynamic"
143+
SHARED = "shared"
144+
145+
146+
DATASET_VOLUME_KINDS = collections.OrderedDict(
147+
(
148+
("dynamic", DatasetVolumeKinds.DYNAMIC),
149+
("shared", DatasetVolumeKinds.SHARED),
150+
),
151+
)

gradient/api_sdk/models/dataset.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
import attr
22

33

4+
@attr.s
5+
class VolumeOptions(object):
6+
kind = attr.ib(type=str, default=None)
7+
size = attr.ib(type=str, default=None)
8+
9+
410
@attr.s
511
class Dataset(object):
612
uri = attr.ib(type=str, default=None)
@@ -9,3 +15,4 @@ class Dataset(object):
915
etag = attr.ib(type=str, default=None)
1016
version_id = attr.ib(type=str, default=None)
1117
name = attr.ib(type=str, default=None)
18+
volume_options = attr.ib(type=VolumeOptions, factory=VolumeOptions)

gradient/api_sdk/serializers/dataset.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
from .. import models
66

77

8+
class DatasetVolumeOptionsSchema(BaseSchema):
9+
kind = ma.fields.Str(required=True)
10+
size = ma.fields.Str(required=True)
11+
12+
813
class DatasetSchema(BaseSchema):
914
MODEL = models.Dataset
1015

@@ -15,3 +20,4 @@ class DatasetSchema(BaseSchema):
1520
version_id = ma.fields.String(dump_to="versionId", load_from="versionId")
1621
name = ma.fields.String()
1722
tags = ma.fields.Nested(TagSchema, only="name", many=True, load_only=True)
23+
volume_options = ma.fields.Nested(DatasetVolumeOptionsSchema, dump_to="volumeOptions", load_from="volumeOptions")

gradient/cli/experiments.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,21 @@ def dataset_options(f):
225225
help="S3 dataset's ETag",
226226
cls=common.GradientDatasetOption,
227227
),
228+
click.option(
229+
"--datasetVolumeKind",
230+
"dataset_volume_kind_list",
231+
multiple=True,
232+
type=ChoiceType(constants.DATASET_VOLUME_KINDS, case_sensitive=False),
233+
help="S3 dataset's volume kind. If used, --datasetVolumeSize has to be set as well",
234+
cls=common.GradientDatasetOption,
235+
),
236+
click.option(
237+
"--datasetVolumeSize",
238+
"dataset_volume_size_list",
239+
multiple=True,
240+
help="S3 dataset's volume size",
241+
cls=common.GradientDatasetOption,
242+
),
228243
]
229244
return functools.reduce(lambda x, opt: opt(x), reversed(options), f)
230245

gradient/commands/experiments.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ def _handle_dataset_data(json_):
143143
json_.pop("dataset_secret_access_key_list", ()),
144144
json_.pop("dataset_version_id_list", ()),
145145
json_.pop("dataset_etag_list", ()),
146+
json_.pop("dataset_volume_kind_list", ()),
147+
json_.pop("dataset_volume_size_list", ()),
146148
]
147149

148150
if not any(datasets):
@@ -165,6 +167,8 @@ def _handle_dataset_data(json_):
165167
"aws_secret_access_key": dataset[3],
166168
"version_id": dataset[4],
167169
"etag": dataset[5],
170+
"volume_kind": dataset[6],
171+
"volume_size": dataset[7],
168172
} for dataset in datasets]
169173

170174
json_["datasets"] = datasets

tests/config_files/experiments_create_multi_node.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,15 @@ datasetAwsSecretAccessKey:
1414
-
1515
- some_other_secret
1616
datasetVersionId:
17+
- version1
18+
- version2
1719
datasetEtag:
1820
- "some etag"
1921
- "some other etag"
22+
datasetVolumeKind:
23+
- dynamic
24+
datasetVolumeSize:
25+
- 10Gi
2026
experimentEnv:
2127
key: val
2228
experimentType: MPI

tests/config_files/experiments_create_multi_node_ds_objs.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,16 @@ datasets:
66
name: "some dataset name"
77
awsAccessKeyId: none
88
awsSecretAccessKey: none
9-
versionId: version1
109
etag: "some etag"
10+
volumeKind: "dynamic"
11+
volumeSize: "10Gi"
12+
versionId: version1
1113
- uri: "s3://some.other.dataset/uri"
1214
name: null
1315
awsAccessKeyId: some_other_key_id
1416
awsSecretAccessKey: some_other_secret
15-
versionId: version2
1617
etag: "some other etag"
18+
versionId: version2
1719
experimentEnv:
1820
key: val
1921
experimentType: MPI

0 commit comments

Comments
 (0)