Skip to content

Commit 0e1bc95

Browse files
committed
Merge branch 'master' into update-transformers-integrations
2 parents f3def71 + 30576b0 commit 0e1bc95

File tree

114 files changed

+7406
-4906
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

114 files changed

+7406
-4906
lines changed

.github/workflows/test-examples.yml

+3
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ jobs:
2121
- integrations/model-evaluation/gradio/notebooks/Gradio_and_Comet.ipynb
2222
- integrations/model-evaluation/gradio/notebooks/Logging_Model_Inferences_with_Comet_and_Gradio.ipynb
2323
- integrations/model-optimization/ray-tune/notebooks/Comet_and_Ray.ipynb
24+
- integrations/model-training/composer/notebooks/comet_composer.ipynb
2425
- integrations/model-training/fastai/notebooks/fastai_hello_world.ipynb
2526
- integrations/model-training/hugging_face/notebooks/Comet_with_Hugging_Face_Trainer.ipynb
2627
- integrations/model-training/keras/notebooks/Comet_with_Keras.ipynb
@@ -38,6 +39,7 @@ jobs:
3839
- integrations/model-training/yolov5/notebooks/Comet_and_YOLOv5.ipynb
3940
- integrations/model-training/yolov8/notebooks/YOLOv8_and_Comet.ipynb
4041
- integrations/reinforcement-learning/gymnasium/notebooks/comet_gymnasium_example.ipynb
42+
- integrations/reinforcement-learning/rllib/notebooks/Comet_and_RLLib.ipynb
4143
- integrations/workflow-orchestration/metaflow/notebooks/metaflow_hello_world.ipynb
4244
env:
4345
NOTEBOOK_TO_TEST: ${{ matrix.notebooks }}
@@ -103,6 +105,7 @@ jobs:
103105
example:
104106
- {script: "integrations/model-evaluation/shap/shap-hello-world/shap-hello-world.py", arg: ""}
105107
- {script: "integrations/model-optimization/optuna/optuna-hello-world/optuna-hello-world.py", arg: ""}
108+
- {script: "integrations/model-training/composer/mosaicml-getting-started/mosaicml-getting-started.py", arg: ""}
106109
- {script: "integrations/model-training/fastai/fastai-hello-world/fastai_hello_world.py", arg: ""}
107110
- {script: "integrations/model-training/hugging_face/transformers-distilbert-fine-tuning/transformers-distilbert-fine-tuning.py", arg: ""}
108111
- {script: "integrations/model-training/keras/keras-mnist-dnn/keras-mnist-dnn.py", arg: ""}

SageMaker/Linear_example.ipynb

+57-37
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@
3636
"metadata": {},
3737
"outputs": [],
3838
"source": [
39-
"bucket = 'NAME_YOUR_BUCKET'\n",
40-
"prefix = 'sagemaker/DEMO-linear-mnist'\n",
41-
" \n",
39+
"bucket = \"NAME_YOUR_BUCKET\"\n",
40+
"prefix = \"sagemaker/DEMO-linear-mnist\"\n",
41+
"\n",
4242
"# Define IAM role\n",
4343
"import boto3\n",
4444
"import re\n",
@@ -64,9 +64,11 @@
6464
"import pickle, gzip, numpy, urllib.request, json\n",
6565
"\n",
6666
"# Load the dataset\n",
67-
"urllib.request.urlretrieve(\"http://deeplearning.net/data/mnist/mnist.pkl.gz\", \"mnist.pkl.gz\")\n",
68-
"with gzip.open('mnist.pkl.gz', 'rb') as f:\n",
69-
" train_set, valid_set, test_set = pickle.load(f, encoding='latin1')"
67+
"urllib.request.urlretrieve(\n",
68+
" \"http://deeplearning.net/data/mnist/mnist.pkl.gz\", \"mnist.pkl.gz\"\n",
69+
")\n",
70+
"with gzip.open(\"mnist.pkl.gz\", \"rb\") as f:\n",
71+
" train_set, valid_set, test_set = pickle.load(f, encoding=\"latin1\")"
7072
]
7173
},
7274
{
@@ -84,18 +86,20 @@
8486
"source": [
8587
"%matplotlib inline\n",
8688
"import matplotlib.pyplot as plt\n",
87-
"plt.rcParams[\"figure.figsize\"] = (2,10)\n",
89+
"\n",
90+
"plt.rcParams[\"figure.figsize\"] = (2, 10)\n",
8891
"\n",
8992
"\n",
90-
"def show_digit(img, caption='', subplot=None):\n",
91-
" if subplot==None:\n",
92-
" _,(subplot)=plt.subplots(1,1)\n",
93-
" imgr=img.reshape((28,28))\n",
94-
" subplot.axis('off')\n",
95-
" subplot.imshow(imgr, cmap='gray')\n",
93+
"def show_digit(img, caption=\"\", subplot=None):\n",
94+
" if subplot == None:\n",
95+
" _, (subplot) = plt.subplots(1, 1)\n",
96+
" imgr = img.reshape((28, 28))\n",
97+
" subplot.axis(\"off\")\n",
98+
" subplot.imshow(imgr, cmap=\"gray\")\n",
9699
" plt.title(caption)\n",
97100
"\n",
98-
"show_digit(train_set[0][30], 'This is a {}'.format(train_set[1][30]))"
101+
"\n",
102+
"show_digit(train_set[0][30], \"This is a {}\".format(train_set[1][30]))"
99103
]
100104
},
101105
{
@@ -115,8 +119,10 @@
115119
"import numpy as np\n",
116120
"import sagemaker.amazon.common as smac\n",
117121
"\n",
118-
"vectors = np.array([t.tolist() for t in train_set[0]]).astype('float32')\n",
119-
"labels = np.where(np.array([t.tolist() for t in train_set[1]]) == 0, 1, 0).astype('float32')\n",
122+
"vectors = np.array([t.tolist() for t in train_set[0]]).astype(\"float32\")\n",
123+
"labels = np.where(np.array([t.tolist() for t in train_set[1]]) == 0, 1, 0).astype(\n",
124+
" \"float32\"\n",
125+
")\n",
120126
"\n",
121127
"buf = io.BytesIO()\n",
122128
"smac.write_numpy_to_dense_tensor(buf, vectors, labels)\n",
@@ -139,10 +145,12 @@
139145
"import boto3\n",
140146
"import os\n",
141147
"\n",
142-
"key = 'recordio-pb-data'\n",
143-
"boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)\n",
144-
"s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)\n",
145-
"print('uploaded training data location: {}'.format(s3_train_data))"
148+
"key = \"recordio-pb-data\"\n",
149+
"boto3.resource(\"s3\").Bucket(bucket).Object(\n",
150+
" os.path.join(prefix, \"train\", key)\n",
151+
").upload_fileobj(buf)\n",
152+
"s3_train_data = \"s3://{}/{}/train/{}\".format(bucket, prefix, key)\n",
153+
"print(\"uploaded training data location: {}\".format(s3_train_data))"
146154
]
147155
},
148156
{
@@ -158,8 +166,8 @@
158166
"metadata": {},
159167
"outputs": [],
160168
"source": [
161-
"output_location = 's3://{}/{}/output'.format(bucket, prefix)\n",
162-
"print('training artifacts will be uploaded to: {}'.format(output_location))"
169+
"output_location = \"s3://{}/{}/output\".format(bucket, prefix)\n",
170+
"print(\"training artifacts will be uploaded to: {}\".format(output_location))"
163171
]
164172
},
165173
{
@@ -176,7 +184,8 @@
176184
"outputs": [],
177185
"source": [
178186
"from sagemaker.amazon.amazon_estimator import get_image_uri\n",
179-
"container = get_image_uri(boto3.Session().region_name, 'linear-learner')"
187+
"\n",
188+
"container = get_image_uri(boto3.Session().region_name, \"linear-learner\")"
180189
]
181190
},
182191
{
@@ -190,17 +199,19 @@
190199
"\n",
191200
"sess = sagemaker.Session()\n",
192201
"\n",
193-
"linear = sagemaker.estimator.Estimator(container,\n",
194-
" role, \n",
195-
" train_instance_count=1, \n",
196-
" train_instance_type='ml.c4.xlarge',\n",
197-
" output_path=output_location,\n",
198-
" sagemaker_session=sess)\n",
199-
"linear.set_hyperparameters(feature_dim=784,\n",
200-
" predictor_type='binary_classifier',\n",
201-
" mini_batch_size=200)\n",
202+
"linear = sagemaker.estimator.Estimator(\n",
203+
" container,\n",
204+
" role,\n",
205+
" train_instance_count=1,\n",
206+
" train_instance_type=\"ml.c4.xlarge\",\n",
207+
" output_path=output_location,\n",
208+
" sagemaker_session=sess,\n",
209+
")\n",
210+
"linear.set_hyperparameters(\n",
211+
" feature_dim=784, predictor_type=\"binary_classifier\", mini_batch_size=200\n",
212+
")\n",
202213
"\n",
203-
"linear.fit({'train': s3_train_data})"
214+
"linear.fit({\"train\": s3_train_data})"
204215
]
205216
},
206217
{
@@ -214,7 +225,7 @@
214225
"cell_type": "markdown",
215226
"metadata": {},
216227
"source": [
217-
"Define your Comet [REST API](https://www.comet.ml/docs/rest-api/getting-started/) and your [workspace](https://www.comet.ml/docs/user-interface/#workspaces). See the [configuration documentation](http://docs.comet.ml/python-sdk/advanced/#python-configuration) for info on both specifications."
228+
"Define your Comet [REST API](https://www.comet.com/docs/rest-api/getting-started/) and your [workspace](https://www.comet.com/docs/user-interface/#workspaces). See the [configuration documentation](http://docs.comet.ml/python-sdk/advanced/#python-configuration) for info on both specifications."
218229
]
219230
},
220231
{
@@ -264,7 +275,9 @@
264275
"source": [
265276
"# .log_sagemaker_job(regressor/estimator object from Sagemaker SDK, Comet Rest API key (optional, can be taken from usual config source), workspace (comet), project (comet))\n",
266277
"# I have used the Sagemaker SDK to train a model. I have the estimator/regressor object. I want to log whatever I just trained\n",
267-
"experiment = comet_ml_sagemaker.log_sagemaker_job(linear, api_key=COMET_REST_API, workspace=COMET_WORKSPACE, project_name=\"sagemaker\")\n",
278+
"experiment = comet_ml_sagemaker.log_sagemaker_job(\n",
279+
" linear, api_key=COMET_REST_API, workspace=COMET_WORKSPACE, project_name=\"sagemaker\"\n",
280+
")\n",
268281
"print(experiment.url)"
269282
]
270283
},
@@ -290,7 +303,12 @@
290303
"# I have the name of a completed training job I want to lob\n",
291304
"# Same as .log_sagemaker_job, except instead of passing the regressor/estimator object, you pass the job name\n",
292305
"SAGEMAKER_TRAINING_JOB_NAME = \"SAGEMAKER_TRAINING_JOB_NAME\"\n",
293-
"experiment = comet_ml_sagemaker.log_sagemaker_job_by_name(SAGEMAKER_TRAINING_JOB_NAME, api_key=COMET_REST_API, workspace=COMET_WORKSPACE, project_name=\"sagemaker\")\n",
306+
"experiment = comet_ml_sagemaker.log_sagemaker_job_by_name(\n",
307+
" SAGEMAKER_TRAINING_JOB_NAME,\n",
308+
" api_key=COMET_REST_API,\n",
309+
" workspace=COMET_WORKSPACE,\n",
310+
" project_name=\"sagemaker\",\n",
311+
")\n",
294312
"print(experiment.url)"
295313
]
296314
},
@@ -313,7 +331,9 @@
313331
"outputs": [],
314332
"source": [
315333
"# Logs the last job for your current Amazon Region / S3\n",
316-
"experiment = comet_ml_sagemaker.log_last_sagemaker_job(api_key=COMET_REST_API, workspace=COMET_WORKSPACE, project_name=\"sagemaker\")\n",
334+
"experiment = comet_ml_sagemaker.log_last_sagemaker_job(\n",
335+
" api_key=COMET_REST_API, workspace=COMET_WORKSPACE, project_name=\"sagemaker\"\n",
336+
")\n",
317337
"print(experiment.url)"
318338
]
319339
},

SageMaker/random_forest.ipynb

+36-23
Original file line numberDiff line numberDiff line change
@@ -41,26 +41,28 @@
4141
"import sys\n",
4242
"\n",
4343
"\n",
44-
"bucket = 'NAME_YOUR_BUCKET' # <--- specify a bucket you have access to\n",
45-
"prefix = 'sagemaker/rcf-benchmarks'\n",
44+
"bucket = \"NAME_YOUR_BUCKET\" # <--- specify a bucket you have access to\n",
45+
"prefix = \"sagemaker/rcf-benchmarks\"\n",
4646
"execution_role = sagemaker.get_execution_role()\n",
4747
"\n",
4848
"\n",
4949
"# check if the bucket exists\n",
5050
"try:\n",
51-
" boto3.Session().client('s3').head_bucket(Bucket=bucket)\n",
51+
" boto3.Session().client(\"s3\").head_bucket(Bucket=bucket)\n",
5252
"except botocore.exceptions.ParamValidationError as e:\n",
53-
" print('Hey! You either forgot to specify your S3 bucket'\n",
54-
" ' or you gave your bucket an invalid name!')\n",
53+
" print(\n",
54+
" \"Hey! You either forgot to specify your S3 bucket\"\n",
55+
" \" or you gave your bucket an invalid name!\"\n",
56+
" )\n",
5557
"except botocore.exceptions.ClientError as e:\n",
56-
" if e.response['Error']['Code'] == '403':\n",
58+
" if e.response[\"Error\"][\"Code\"] == \"403\":\n",
5759
" print(\"Hey! You don't have permission to access the bucket, {}.\".format(bucket))\n",
58-
" elif e.response['Error']['Code'] == '404':\n",
60+
" elif e.response[\"Error\"][\"Code\"] == \"404\":\n",
5961
" print(\"Hey! Your bucket, {}, doesn't exist!\".format(bucket))\n",
6062
" else:\n",
6163
" raise\n",
6264
"else:\n",
63-
" print('Training input/output will be stored in: s3://{}/{}'.format(bucket, prefix))"
65+
" print(\"Training input/output will be stored in: s3://{}/{}\".format(bucket, prefix))"
6466
]
6567
},
6668
{
@@ -81,11 +83,11 @@
8183
"import pandas as pd\n",
8284
"import urllib.request\n",
8385
"\n",
84-
"data_filename = 'nyc_taxi.csv'\n",
85-
"data_source = 'https://raw.githubusercontent.com/numenta/NAB/master/data/realKnownCause/nyc_taxi.csv'\n",
86+
"data_filename = \"nyc_taxi.csv\"\n",
87+
"data_source = \"https://raw.githubusercontent.com/numenta/NAB/master/data/realKnownCause/nyc_taxi.csv\"\n",
8688
"\n",
8789
"urllib.request.urlretrieve(data_source, data_filename)\n",
88-
"taxi_data = pd.read_csv(data_filename, delimiter=',')"
90+
"taxi_data = pd.read_csv(data_filename, delimiter=\",\")"
8991
]
9092
},
9193
{
@@ -108,16 +110,18 @@
108110
"session = sagemaker.Session()\n",
109111
"\n",
110112
"# specify general training job information\n",
111-
"rcf = RandomCutForest(role=execution_role,\n",
112-
" train_instance_count=1,\n",
113-
" train_instance_type='ml.m4.xlarge',\n",
114-
" data_location='s3://{}/{}/'.format(bucket, prefix),\n",
115-
" output_path='s3://{}/{}/output'.format(bucket, prefix),\n",
116-
" num_samples_per_tree=512,\n",
117-
" num_trees=50)\n",
113+
"rcf = RandomCutForest(\n",
114+
" role=execution_role,\n",
115+
" train_instance_count=1,\n",
116+
" train_instance_type=\"ml.m4.xlarge\",\n",
117+
" data_location=\"s3://{}/{}/\".format(bucket, prefix),\n",
118+
" output_path=\"s3://{}/{}/output\".format(bucket, prefix),\n",
119+
" num_samples_per_tree=512,\n",
120+
" num_trees=50,\n",
121+
")\n",
118122
"\n",
119123
"# automatically upload the training data to S3 and run the training job\n",
120-
"rcf.fit(rcf.record_set(taxi_data.value.as_matrix().reshape(-1,1)))"
124+
"rcf.fit(rcf.record_set(taxi_data.value.as_matrix().reshape(-1, 1)))"
121125
]
122126
},
123127
{
@@ -131,7 +135,7 @@
131135
"cell_type": "markdown",
132136
"metadata": {},
133137
"source": [
134-
"Define your Comet [REST API](https://www.comet.ml/docs/rest-api/getting-started/) and your [workspace](https://www.comet.ml/docs/user-interface/#workspaces). See the [configuration documentation](http://docs.comet.ml/python-sdk/advanced/#python-configuration) for info on both specifications."
138+
"Define your Comet [REST API](https://www.comet.com/docs/rest-api/getting-started/) and your [workspace](https://www.comet.com/docs/user-interface/#workspaces). See the [configuration documentation](http://docs.comet.ml/python-sdk/advanced/#python-configuration) for info on both specifications."
135139
]
136140
},
137141
{
@@ -181,7 +185,9 @@
181185
"source": [
182186
"# .log_sagemaker_job(regressor/estimator object from Sagemaker SDK, Comet Rest API key (optional, can be taken from usual config source), workspace (comet), project (comet))\n",
183187
"# I have used the Sagemaker SDK to train a model. I have the estimator/regressor object. I want to log whatever I just trained\n",
184-
"experiment = comet_ml_sagemaker.log_sagemaker_job(rcf, api_key=COMET_REST_API, workspace=COMET_WORKSPACE, project_name=\"sagemaker\")\n",
188+
"experiment = comet_ml_sagemaker.log_sagemaker_job(\n",
189+
" rcf, api_key=COMET_REST_API, workspace=COMET_WORKSPACE, project_name=\"sagemaker\"\n",
190+
")\n",
185191
"print(experiment.url)\n",
186192
"experiment.add_tags([\"random_forest\"])"
187193
]
@@ -208,7 +214,12 @@
208214
"# I have the name of a completed training job I want to lob\n",
209215
"# Same as .log_sagemaker_job, except instead of passing the regressor/estimator object, you pass the job name\n",
210216
"SAGEMAKER_TRAINING_JOB_NAME = \"SAGEMAKER_TRAINING_JOB_NAME\"\n",
211-
"experiment = comet_ml_sagemaker.log_sagemaker_job_by_name(SAGEMAKER_TRAINING_JOB_NAME, api_key=COMET_REST_API, workspace=COMET_WORKSPACE, project_name=\"sagemaker\")\n",
217+
"experiment = comet_ml_sagemaker.log_sagemaker_job_by_name(\n",
218+
" SAGEMAKER_TRAINING_JOB_NAME,\n",
219+
" api_key=COMET_REST_API,\n",
220+
" workspace=COMET_WORKSPACE,\n",
221+
" project_name=\"sagemaker\",\n",
222+
")\n",
212223
"print(experiment.url)"
213224
]
214225
},
@@ -231,7 +242,9 @@
231242
"outputs": [],
232243
"source": [
233244
"# Logs the last job for your current Amazon Region / S3\n",
234-
"experiment = comet_ml_sagemaker.log_last_sagemaker_job(api_key=COMET_REST_API, workspace=COMET_WORKSPACE, project_name=\"sagemaker\")\n",
245+
"experiment = comet_ml_sagemaker.log_last_sagemaker_job(\n",
246+
" api_key=COMET_REST_API, workspace=COMET_WORKSPACE, project_name=\"sagemaker\"\n",
247+
")\n",
235248
"print(experiment.url)\n",
236249
"experiment.add_tags([\"random_forest\"])"
237250
]

0 commit comments

Comments
 (0)