|
1 | 1 | {
|
2 | 2 | "cells": [
|
| 3 | + { |
| 4 | + "cell_type": "markdown", |
| 5 | + "metadata": {}, |
| 6 | + "source": [ |
| 7 | + "<img src=\"https://cdn.comet.ml/img/notebook_logo.png\">" |
| 8 | + ] |
| 9 | + }, |
| 10 | + { |
| 11 | + "cell_type": "markdown", |
| 12 | + "metadata": {}, |
| 13 | + "source": [ |
| 14 | + "[Comet](https://www.comet.com/site/products/ml-experiment-tracking/) is an MLOps Platform that is designed to help Data Scientists and Teams build better models faster! Comet provides tooling to track, Explain, Manage, and Monitor your models in a single place! It works with Jupyter Notebooks and Scripts and most importantly it's 100% free to get started!\n", |
| 15 | + "\n", |
| 16 | + "[MLflow](https://github.com/mlflow/mlflow) is an Open source platform for the machine learning lifecycle\n", |
| 17 | + "\n", |
| 18 | + "Instrument MLFlow with Comet to start managing experiments, create dataset versions and track hyperparameters for faster and easier reproducibility and collaboration.\n", |
| 19 | + "\n", |
| 20 | + "[Find more information about our integration with MLflow](https://www.comet.com/docs/v2/integrations/ml-frameworks/mlflow/)\n", |
| 21 | + "\n", |
| 22 | + "Curious about how Comet can help you build better models, faster? Find out more about [Comet](https://www.comet.com/site/products/ml-experiment-tracking/) and our [other integrations](https://www.comet.com/docs/v2/integrations/overview/)\n", |
| 23 | + "\n", |
| 24 | + "Get a preview for what's to come. Check out a completed experiment created from this notebook [here](https://www.comet.com/examples/comet-example-mlflow-notebook/5b41a47a2f424209a48e38c96619bbcb)." |
| 25 | + ] |
| 26 | + }, |
3 | 27 | {
|
4 | 28 | "cell_type": "markdown",
|
5 | 29 | "metadata": {
|
|
17 | 41 | },
|
18 | 42 | "outputs": [],
|
19 | 43 | "source": [
|
20 |
| - "%pip install -U \"comet_ml>=3.44.0\" mlflow" |
| 44 | + "%pip install -U \"comet_ml>=3.44.0\" mlflow keras tensorflow" |
21 | 45 | ]
|
22 | 46 | },
|
23 | 47 | {
|
|
42 | 66 | "comet_ml.login(project_name=\"comet-example-mlflow-notebook\")"
|
43 | 67 | ]
|
44 | 68 | },
|
| 69 | + { |
| 70 | + "cell_type": "markdown", |
| 71 | + "metadata": {}, |
| 72 | + "source": [ |
| 73 | + "# Import dependencies" |
| 74 | + ] |
| 75 | + }, |
| 76 | + { |
| 77 | + "cell_type": "code", |
| 78 | + "execution_count": null, |
| 79 | + "metadata": {}, |
| 80 | + "outputs": [], |
| 81 | + "source": [ |
| 82 | + "import os\n", |
| 83 | + "\n", |
| 84 | + "# You can use 'tensorflow', 'torch' or 'jax' as backend. Make sure to set the environment variable before importing.\n", |
| 85 | + "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"" |
| 86 | + ] |
| 87 | + }, |
| 88 | + { |
| 89 | + "cell_type": "code", |
| 90 | + "execution_count": null, |
| 91 | + "metadata": {}, |
| 92 | + "outputs": [], |
| 93 | + "source": [ |
| 94 | + "import keras\n", |
| 95 | + "import numpy as np\n", |
| 96 | + "\n", |
| 97 | + "import mlflow.keras" |
| 98 | + ] |
| 99 | + }, |
45 | 100 | {
|
46 | 101 | "cell_type": "markdown",
|
47 | 102 | "metadata": {
|
48 | 103 | "id": "1R-zIOmm2gJP"
|
49 | 104 | },
|
50 | 105 | "source": [
|
51 |
| - "# Run MLFlow" |
| 106 | + "# Load Dataset" |
| 107 | + ] |
| 108 | + }, |
| 109 | + { |
| 110 | + "cell_type": "code", |
| 111 | + "execution_count": null, |
| 112 | + "metadata": {}, |
| 113 | + "outputs": [], |
| 114 | + "source": [ |
| 115 | + "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", |
| 116 | + "x_train = np.expand_dims(x_train, axis=3)\n", |
| 117 | + "x_test = np.expand_dims(x_test, axis=3)\n", |
| 118 | + "x_train[0].shape" |
52 | 119 | ]
|
53 | 120 | },
|
54 | 121 | {
|
55 | 122 | "cell_type": "markdown",
|
56 |
| - "metadata": { |
57 |
| - "id": "u-a86wIo3mfj" |
58 |
| - }, |
| 123 | + "metadata": {}, |
59 | 124 | "source": [
|
60 |
| - "Once Comet is imported at the top of your script, it will automatically log experiment data from your MLFlow runs" |
| 125 | + "# Build Model " |
61 | 126 | ]
|
62 | 127 | },
|
63 | 128 | {
|
64 | 129 | "cell_type": "code",
|
65 | 130 | "execution_count": null,
|
66 |
| - "metadata": { |
67 |
| - "id": "ljuZ8I_q2ZgX" |
68 |
| - }, |
| 131 | + "metadata": {}, |
69 | 132 | "outputs": [],
|
70 | 133 | "source": [
|
71 |
| - "import keras\n", |
72 |
| - "\n", |
73 |
| - "# The following import and function call are the only additions to code required\n", |
74 |
| - "# to automatically log metrics and parameters to MLflow.\n", |
75 |
| - "import mlflow\n", |
76 |
| - "import mlflow.keras\n", |
| 134 | + "NUM_CLASSES = 10\n", |
| 135 | + "INPUT_SHAPE = (28, 28, 1)\n", |
77 | 136 | "\n",
|
78 |
| - "import numpy as np\n", |
79 |
| - "from keras.datasets import reuters\n", |
80 |
| - "from keras.layers import Activation, Dense, Dropout\n", |
81 |
| - "from keras.models import Sequential\n", |
82 |
| - "from keras.preprocessing.text import Tokenizer\n", |
83 | 137 | "\n",
|
84 |
| - "# The sqlite store is needed for the model registry\n", |
85 |
| - "mlflow.set_tracking_uri(\"sqlite:///db.sqlite\")\n", |
| 138 | + "def initialize_model():\n", |
| 139 | + " return keras.Sequential(\n", |
| 140 | + " [\n", |
| 141 | + " keras.Input(shape=INPUT_SHAPE),\n", |
| 142 | + " keras.layers.Conv2D(32, kernel_size=(3, 3), activation=\"relu\"),\n", |
| 143 | + " keras.layers.Conv2D(32, kernel_size=(3, 3), activation=\"relu\"),\n", |
| 144 | + " keras.layers.Conv2D(32, kernel_size=(3, 3), activation=\"relu\"),\n", |
| 145 | + " keras.layers.GlobalAveragePooling2D(),\n", |
| 146 | + " keras.layers.Dense(NUM_CLASSES, activation=\"softmax\"),\n", |
| 147 | + " ]\n", |
| 148 | + " )\n", |
86 | 149 | "\n",
|
87 |
| - "# We need to create a run before calling keras or MLFlow will end the run by itself\n", |
88 |
| - "mlflow.start_run()\n", |
89 | 150 | "\n",
|
90 |
| - "mlflow.keras.autolog()\n", |
| 151 | + "model = initialize_model()\n", |
| 152 | + "model.summary()" |
| 153 | + ] |
| 154 | + }, |
| 155 | + { |
| 156 | + "cell_type": "markdown", |
| 157 | + "metadata": {}, |
| 158 | + "source": [ |
| 159 | + "# Train Model" |
| 160 | + ] |
| 161 | + }, |
| 162 | + { |
| 163 | + "cell_type": "code", |
| 164 | + "execution_count": null, |
| 165 | + "metadata": {}, |
| 166 | + "outputs": [], |
| 167 | + "source": [ |
| 168 | + "BATCH_SIZE = 64 # adjust this based on the memory of your machine\n", |
| 169 | + "EPOCHS = 3\n", |
91 | 170 | "\n",
|
92 |
| - "max_words = 1000\n", |
93 |
| - "batch_size = 32\n", |
94 |
| - "epochs = 5\n", |
| 171 | + "model = initialize_model()\n", |
95 | 172 | "\n",
|
96 |
| - "print(\"Loading data...\")\n", |
97 |
| - "(x_train, y_train), (x_test, y_test) = reuters.load_data(\n", |
98 |
| - " num_words=max_words, test_split=0.2\n", |
| 173 | + "model.compile(\n", |
| 174 | + " loss=keras.losses.SparseCategoricalCrossentropy(),\n", |
| 175 | + " optimizer=keras.optimizers.Adam(),\n", |
| 176 | + " metrics=[\"accuracy\"],\n", |
99 | 177 | ")\n",
|
100 | 178 | "\n",
|
101 |
| - "print(len(x_train), \"train sequences\")\n", |
102 |
| - "print(len(x_test), \"test sequences\")\n", |
103 |
| - "\n", |
104 |
| - "num_classes = np.max(y_train) + 1\n", |
105 |
| - "print(num_classes, \"classes\")\n", |
106 |
| - "\n", |
107 |
| - "print(\"Vectorizing sequence data...\")\n", |
108 |
| - "tokenizer = Tokenizer(num_words=max_words)\n", |
109 |
| - "x_train = tokenizer.sequences_to_matrix(x_train, mode=\"binary\")\n", |
110 |
| - "x_test = tokenizer.sequences_to_matrix(x_test, mode=\"binary\")\n", |
111 |
| - "print(\"x_train shape:\", x_train.shape)\n", |
112 |
| - "print(\"x_test shape:\", x_test.shape)\n", |
113 |
| - "\n", |
114 |
| - "print(\n", |
115 |
| - " \"Convert class vector to binary class matrix \"\n", |
116 |
| - " \"(for use with categorical_crossentropy)\"\n", |
117 |
| - ")\n", |
118 |
| - "y_train = keras.utils.np_utils.to_categorical(y_train, num_classes)\n", |
119 |
| - "y_test = keras.utils.np_utils.to_categorical(y_test, num_classes)\n", |
120 |
| - "print(\"y_train shape:\", y_train.shape)\n", |
121 |
| - "print(\"y_test shape:\", y_test.shape)\n", |
122 |
| - "\n", |
123 |
| - "print(\"Building model...\")\n", |
124 |
| - "model = Sequential()\n", |
125 |
| - "model.add(Dense(512, input_shape=(max_words,)))\n", |
126 |
| - "model.add(Activation(\"relu\"))\n", |
127 |
| - "model.add(Dropout(0.5))\n", |
128 |
| - "model.add(Dense(num_classes))\n", |
129 |
| - "model.add(Activation(\"softmax\"))\n", |
130 |
| - "\n", |
131 |
| - "model.compile(loss=\"categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])\n", |
132 |
| - "\n", |
133 |
| - "history = model.fit(\n", |
| 179 | + "run = mlflow.start_run()\n", |
| 180 | + "model.fit(\n", |
134 | 181 | " x_train,\n",
|
135 | 182 | " y_train,\n",
|
136 |
| - " batch_size=batch_size,\n", |
137 |
| - " epochs=epochs,\n", |
138 |
| - " verbose=1,\n", |
| 183 | + " batch_size=BATCH_SIZE,\n", |
| 184 | + " epochs=EPOCHS,\n", |
139 | 185 | " validation_split=0.1,\n",
|
| 186 | + " callbacks=[mlflow.keras.MlflowCallback(run)],\n", |
140 | 187 | ")\n",
|
141 |
| - "score = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=1)\n", |
142 |
| - "print(\"Test score:\", score[0])\n", |
143 |
| - "print(\"Test accuracy:\", score[1])\n", |
144 | 188 | "\n",
|
145 | 189 | "mlflow.keras.log_model(model, \"model\", registered_model_name=\"Test Model\")\n",
|
| 190 | + "\n", |
146 | 191 | "mlflow.end_run()"
|
147 | 192 | ]
|
148 |
| - }, |
149 |
| - { |
150 |
| - "cell_type": "code", |
151 |
| - "execution_count": null, |
152 |
| - "metadata": { |
153 |
| - "id": "sVz6748M6R8_" |
154 |
| - }, |
155 |
| - "outputs": [], |
156 |
| - "source": [] |
157 | 193 | }
|
158 | 194 | ],
|
159 | 195 | "metadata": {
|
160 | 196 | "colab": {
|
161 | 197 | "provenance": []
|
162 | 198 | },
|
163 | 199 | "kernelspec": {
|
164 |
| - "display_name": "Python 3", |
| 200 | + "display_name": "Python 3 (ipykernel)", |
| 201 | + "language": "python", |
165 | 202 | "name": "python3"
|
166 | 203 | },
|
167 | 204 | "language_info": {
|
168 |
| - "name": "python" |
| 205 | + "codemirror_mode": { |
| 206 | + "name": "ipython", |
| 207 | + "version": 3 |
| 208 | + }, |
| 209 | + "file_extension": ".py", |
| 210 | + "mimetype": "text/x-python", |
| 211 | + "name": "python", |
| 212 | + "nbconvert_exporter": "python", |
| 213 | + "pygments_lexer": "ipython3", |
| 214 | + "version": "3.10.12" |
169 | 215 | }
|
170 | 216 | },
|
171 | 217 | "nbformat": 4,
|
172 |
| - "nbformat_minor": 0 |
| 218 | + "nbformat_minor": 4 |
173 | 219 | }
|
0 commit comments