Skip to content

Commit 7b46520

Browse files
authored
[SDK] Support PyTorchJob as a Trial Worker (#2512)
* [SDK] Support PyTorchJob as Trial Worker Signed-off-by: Andrey Velichkevich <[email protected]> * Fix pod spec for Job Signed-off-by: Andrey Velichkevich <[email protected]> * Set default restart_policy to Never Signed-off-by: Andrey Velichkevich <[email protected]> * Fix primary_container_name for PyTorchJob Signed-off-by: Andrey Velichkevich <[email protected]> * Add unit tests for PyTorchJob as Trial Signed-off-by: Andrey Velichkevich <[email protected]> * Add e2e test for PyTorchJob as Trial Signed-off-by: Andrey Velichkevich <[email protected]> * Bump kubeflow-training SDK Signed-off-by: Andrey Velichkevich <[email protected]> * Deploy Training Operator with server side apply Signed-off-by: Andrey Velichkevich <[email protected]> * Decrease CPUs for E2E Signed-off-by: Andrey Velichkevich <[email protected]> * Install Training Operator for tune workflow Signed-off-by: Andrey Velichkevich <[email protected]> * Fix comments Signed-off-by: Andrey Velichkevich <[email protected]> --------- Signed-off-by: Andrey Velichkevich <[email protected]>
1 parent 6389cba commit 7b46520

File tree

15 files changed

+395
-194
lines changed

15 files changed

+395
-194
lines changed

.github/workflows/e2e-test-tune-api.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,12 @@ jobs:
2121
uses: ./.github/workflows/template-setup-e2e-test
2222
with:
2323
kubernetes-version: ${{ matrix.kubernetes-version }}
24-
24+
2525
- name: Run e2e test with tune API
2626
uses: ./.github/workflows/template-e2e-test
2727
with:
2828
tune-api: true
29+
training-operator: true
2930

3031
strategy:
3132
fail-fast: false

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,6 @@ $RECYCLE.BIN/
7878

7979
## Vendor dir
8080
vendor
81+
82+
# Jupyter Notebooks.
83+
**/.ipynb_checkpoints

hack/gen-python-sdk/post_gen.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,7 @@ def _rewrite_helper(input_file, output_file, rewrite_rules):
4242
lines.append("# Import Katib API client.\n")
4343
lines.append("from kubeflow.katib.api.katib_client import KatibClient\n")
4444
lines.append("# Import Katib TrainerResources class.\n")
45-
lines.append(
46-
"from kubeflow.katib.types.trainer_resources import TrainerResources\n"
47-
)
45+
lines.append("from kubeflow.katib.types.types import TrainerResources\n")
4846
lines.append("# Import Katib report metrics functions\n")
4947
lines.append("from kubeflow.katib.api.report_metrics import report_metrics\n")
5048
lines.append("# Import Katib helper functions.\n")

pkg/apis/v1beta1/swagger.json

+18-18
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@
132132
"$ref": "#/definitions/v1beta1.AlgorithmSetting"
133133
},
134134
"x-kubernetes-list-map-keys": [
135-
"Name"
135+
"name"
136136
],
137137
"x-kubernetes-list-type": "map"
138138
},
@@ -148,7 +148,7 @@
148148
"$ref": "#/definitions/.v1beta1.SuggestionCondition"
149149
},
150150
"x-kubernetes-list-map-keys": [
151-
"Type"
151+
"type"
152152
],
153153
"x-kubernetes-list-type": "map"
154154
},
@@ -173,7 +173,7 @@
173173
"$ref": "#/definitions/.v1beta1.TrialAssignment"
174174
},
175175
"x-kubernetes-list-map-keys": [
176-
"Name"
176+
"name"
177177
],
178178
"x-kubernetes-list-type": "map"
179179
}
@@ -217,7 +217,7 @@
217217
"$ref": "#/definitions/v1beta1.EarlyStoppingRule"
218218
},
219219
"x-kubernetes-list-map-keys": [
220-
"Name"
220+
"name"
221221
],
222222
"x-kubernetes-list-type": "map"
223223
},
@@ -241,7 +241,7 @@
241241
"$ref": "#/definitions/v1beta1.ParameterAssignment"
242242
},
243243
"x-kubernetes-list-map-keys": [
244-
"Name"
244+
"name"
245245
],
246246
"x-kubernetes-list-type": "map"
247247
}
@@ -323,7 +323,7 @@
323323
"$ref": "#/definitions/v1beta1.EarlyStoppingRule"
324324
},
325325
"x-kubernetes-list-map-keys": [
326-
"Name"
326+
"name"
327327
],
328328
"x-kubernetes-list-type": "map"
329329
},
@@ -356,7 +356,7 @@
356356
"$ref": "#/definitions/v1beta1.ParameterAssignment"
357357
},
358358
"x-kubernetes-list-map-keys": [
359-
"Name"
359+
"name"
360360
],
361361
"x-kubernetes-list-type": "map"
362362
},
@@ -402,7 +402,7 @@
402402
"$ref": "#/definitions/.v1beta1.TrialCondition"
403403
},
404404
"x-kubernetes-list-map-keys": [
405-
"Type"
405+
"type"
406406
],
407407
"x-kubernetes-list-type": "map"
408408
},
@@ -450,7 +450,7 @@
450450
"$ref": "#/definitions/v1beta1.AlgorithmSetting"
451451
},
452452
"x-kubernetes-list-map-keys": [
453-
"Name"
453+
"name"
454454
],
455455
"x-kubernetes-list-type": "map"
456456
}
@@ -539,7 +539,7 @@
539539
"$ref": "#/definitions/v1beta1.EarlyStoppingSetting"
540540
},
541541
"x-kubernetes-list-map-keys": [
542-
"Name"
542+
"name"
543543
],
544544
"x-kubernetes-list-type": "map"
545545
}
@@ -681,7 +681,7 @@
681681
"$ref": "#/definitions/v1beta1.ParameterSpec"
682682
},
683683
"x-kubernetes-list-map-keys": [
684-
"Name"
684+
"name"
685685
],
686686
"x-kubernetes-list-type": "map"
687687
},
@@ -711,7 +711,7 @@
711711
"$ref": "#/definitions/v1beta1.ExperimentCondition"
712712
},
713713
"x-kubernetes-list-map-keys": [
714-
"Type"
714+
"type"
715715
],
716716
"x-kubernetes-list-type": "map"
717717
},
@@ -968,7 +968,7 @@
968968
"$ref": "#/definitions/v1beta1.Operation"
969969
},
970970
"x-kubernetes-list-map-keys": [
971-
"OperationType"
971+
"operationType"
972972
],
973973
"x-kubernetes-list-type": "map"
974974
}
@@ -1000,7 +1000,7 @@
10001000
"$ref": "#/definitions/v1beta1.MetricStrategy"
10011001
},
10021002
"x-kubernetes-list-map-keys": [
1003-
"Name"
1003+
"name"
10041004
],
10051005
"x-kubernetes-list-type": "map"
10061006
},
@@ -1025,7 +1025,7 @@
10251025
"$ref": "#/definitions/v1beta1.Metric"
10261026
},
10271027
"x-kubernetes-list-map-keys": [
1028-
"Name"
1028+
"name"
10291029
],
10301030
"x-kubernetes-list-type": "map"
10311031
}
@@ -1045,7 +1045,7 @@
10451045
"$ref": "#/definitions/v1beta1.ParameterSpec"
10461046
},
10471047
"x-kubernetes-list-map-keys": [
1048-
"Name"
1048+
"name"
10491049
],
10501050
"x-kubernetes-list-type": "map"
10511051
}
@@ -1072,7 +1072,7 @@
10721072
"$ref": "#/definitions/v1beta1.ParameterAssignment"
10731073
},
10741074
"x-kubernetes-list-map-keys": [
1075-
"Name"
1075+
"name"
10761076
],
10771077
"x-kubernetes-list-type": "map"
10781078
}
@@ -1193,7 +1193,7 @@
11931193
"$ref": "#/definitions/v1beta1.TrialParameterSpec"
11941194
},
11951195
"x-kubernetes-list-map-keys": [
1196-
"Name"
1196+
"name"
11971197
],
11981198
"x-kubernetes-list-type": "map"
11991199
},

pkg/apis/v1beta1/zz_generated.openapi.go

+18-18
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sdk/python/v1beta1/kubeflow/katib/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272
# Import Katib API client.
7373
from kubeflow.katib.api.katib_client import KatibClient
7474
# Import Katib TrainerResources class.
75-
from kubeflow.katib.types.trainer_resources import TrainerResources
75+
from kubeflow.katib.types.types import TrainerResources
7676
# Import Katib report metrics functions
7777
from kubeflow.katib.api.report_metrics import report_metrics
7878
# Import Katib helper functions.

0 commit comments

Comments
 (0)