UofT-DSI · danielrazavi · Oct 21, 2025 · Sep 12, 2025 · Sep 12, 2025 · Sep 12, 2025
diff --git a/.gitignore b/.gitignore
@@ -19,3 +19,14 @@
 02_activities/assignments/best_model/
 02_activities/assignments/*.zip
 02_activities/assignments/*.txt
+test_notebooks/
+
+# Large artifacts generated during labs
+voc_representations.h5
+VOCdevkit/
+VOCtrainval_06-Nov-2007.tar.gz
+
+# Python virtual environments
+.venv
+uv.lock
+deep-learning-env
diff --git a/01_materials/labs/lab_1.ipynb b/01_materials/labs/lab_1.ipynb
@@ -280,18 +280,19 @@
    "outputs": [],
    "source": [
     "from tensorflow.keras.models import Sequential\n",
-    "from tensorflow.keras.layers import Dense\n",
+    "from tensorflow.keras.layers import Input, Dense\n",
     "\n",
     "model = Sequential()\n",
     "\n",
     "# Input layer\n",
-    "model.add(Dense(64, activation='relu', input_shape=(64,))) # 64 neurons, ReLU activation, input shape of 64\n",
+    "model.add(Input(shape=(64,)))  # Input tensor specifying the shape\n",
+    "model.add(Dense(64, activation='relu'))  # 64 neurons, ReLU activation\n",
     "\n",
     "# Hidden layer\n",
-    "model.add(Dense(64, activation='relu')) # 64 neurons, ReLU activation\n",
+    "model.add(Dense(64, activation='relu'))  # 64 neurons, ReLU activation\n",
     "\n",
     "# Output layer\n",
-    "model.add(Dense(10, activation='softmax')) # 10 neurons, softmax activation\n",
+    "model.add(Dense(10, activation='softmax'))  # 10 neurons, softmax activation\n",
     "\n",
     "model.summary()"
    ]
@@ -812,7 +813,7 @@
  "metadata": {
   "file_extension": ".py",
   "kernelspec": {
-   "display_name": "lab_1",
+   "display_name": ".venv",
    "language": "python",
    "name": "python3"
   },
@@ -826,7 +827,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.9"
+   "version": "3.12.12"
   },
   "mimetype": "text/x-python",
   "name": "python",

diff --git a/01_materials/labs/lab_2.ipynb b/01_materials/labs/lab_2.ipynb
@@ -472,8 +472,8 @@
     "    lr.W -= learning_rate * grad_W\n",
     "    lr.b -= learning_rate * grad_b\n",
     "\n",
-    "    # Print the average negative log likelihood every 100 steps\n",
-    "    if i % 100 == 0:\n",
+    "    # Print the average negative log likelihood every 100 steps (avoid empty slice at i==0)\n",
+    "    if i > 0 and i % 100 == 0:\n",
     "        avg_nll = lr.loss(X_train[max(0, i-100):i], y_train[max(0, i-100):i])\n",
     "        print(\"Average NLL over the last 100 samples at step %d: %0.f\" % (i, avg_nll))\n"
    ]
@@ -822,7 +822,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "lab_1",
+   "display_name": ".venv",
    "language": "python",
    "name": "python3"
   },
@@ -836,7 +836,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.9"
+   "version": "3.12.12"
   }
  },
  "nbformat": 4,

diff --git a/01_materials/labs/lab_5.ipynb b/01_materials/labs/lab_5.ipynb
@@ -17,50 +17,6 @@
         "We will use the Pascal VOC 2007 dataset, which contains 20 classes of objects. We will only use 5 classes: \"dog\", \"cat\", \"bus\", \"car\", \"aeroplane\". To get started, we will use a pre-trained ResNet50 model to precompute the convolutional representations of the images. We will then build a simple model to predict the class and the bounding box of the object in the image."
       ]
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ELYR5msdWlk1"
-      },
-      "outputs": [],
-      "source": [
-        "# !pip install \"imageio[pyav]\""
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "collapsed": false,
-        "id": "xbPrrHQBWlk2"
-      },
-      "source": [
-        "Before we start, it's important that if you're on Google Colab, you've enabled the GPU. To do this, go to `Runtime` > `Change runtime type` and select `GPU` from the `Hardware accelerator` dropdown.\n",
-        "\n",
-        "The following code cell will check if you have a GPU available. If you don't, you will still be able to run the notebook, but the code will take much longer to execute."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "FsrE0uJEWlk2",
-        "outputId": "b1829dac-3f25-42ce-a941-45b1c6aecf25"
-      },
-      "outputs": [],
-      "source": [
-        "import tensorflow as tf\n",
-        "import sys\n",
-        "\n",
-        "if tf.test.gpu_device_name() == '':\n",
-        "    print('You do not have a GPU available.')\n",
-        "else:\n",
-        "    print('You have a GPU available.')"
-      ]
-    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -77,9 +33,9 @@
         "import tarfile\n",
         "from urllib.request import urlretrieve\n",
         "\n",
-        "URL_VOC = (\"http://host.robots.ox.ac.uk/pascal/VOC/\"\n",
-        "           \"voc2007/VOCtrainval_06-Nov-2007.tar\")\n",
-        "FILE_VOC = \"VOCtrainval_06-Nov-2007.tar\"\n",
+        "URL_VOC = (\"https://github.com/alexwolson/pascal-voc-2007-mirror/\"\n",
+        "           \"raw/main/VOCtrainval_06-Nov-2007.tar.gz\")\n",
+        "FILE_VOC = \"VOCtrainval_06-Nov-2007.tar.gz\"\n",
         "FOLDER_VOC = \"VOCdevkit\"\n",
         "\n",
         "if not op.exists(FILE_VOC):\n",
@@ -91,13 +47,7 @@
         "    with tarfile.open(FILE_VOC) as tar:\n",
         "        tar.extractall(filter='data')\n",
         "\n",
-        "URL_REPRESENTATIONS = (\"https://github.com/m2dsupsdlclass/lectures-labs/\"\n",
-        "                       \"releases/download/0.2/voc_representations.h5\")\n",
-        "FILE_REPRESENTATIONS = \"voc_representations.h5\"\n",
-        "\n",
-        "if not op.exists(FILE_REPRESENTATIONS):\n",
-        "    print(f'Downloading from {URL_REPRESENTATIONS} to {FILE_REPRESENTATIONS}...')\n",
-        "    urlretrieve(URL_REPRESENTATIONS, './' + FILE_REPRESENTATIONS)"
+        "print(\"Done!\")"
       ]
     },
     {
@@ -433,9 +383,9 @@
       "source": [
         "### Compute representations on all images in our annotations\n",
         "\n",
-        "Computing representations for all images may take some time (especially without a GPU), so it was pre-computed and save in `voc_representations.h5`\n",
+        "We will use the ResNet50 model to compute convolutional representations for all images in our filtered annotations.\n",
         "\n",
-        "We will load the representations from the file `voc_representations.h5` and store them in a numpy array `reprs`."
+        "Computing representations for all images may take some time (especially without a GPU), so we'll cache them to disk in `voc_representations.h5`. If the file already exists, we'll load from it; otherwise, we'll compute the representations and save them for future use."
       ]
     },
     {
@@ -447,9 +397,40 @@
       "outputs": [],
       "source": [
         "import h5py\n",
+        "import numpy as np\n",
+        "from tqdm import tqdm\n",
+        "from skimage.io import imread\n",
+        "\n",
+        "CACHE_FILE = 'voc_representations.h5'\n",
         "\n",
-        "with h5py.File('voc_representations.h5', 'r') as h5f:\n",
-        "    reprs = h5f['reprs'][:]"
+        "if op.exists(CACHE_FILE):\n",
+        "    print(f\"Loading cached representations from {CACHE_FILE}...\")\n",
+        "    with h5py.File(CACHE_FILE, 'r') as h5f:\n",
+        "        reprs = h5f['reprs'][:]\n",
+        "    print(f\"Loaded {len(reprs)} cached representations\")\n",
+        "else:\n",
+        "    print(f\"Computing representations for {len(annotations)} images...\")\n",
+        "    print(\"This may take a few minutes...\")\n",
+        "\n",
+        "    batch_size = 32\n",
+        "    all_reprs = []\n",
+        "\n",
+        "    for i in tqdm(range(0, len(annotations), batch_size)):\n",
+        "        batch_annotations = annotations[i:i+batch_size]\n",
+        "        img_paths = [\"VOCdevkit/VOC2007/JPEGImages/\" + ann['filename'] for ann in batch_annotations]\n",
+        "\n",
+        "        batch_imgs = [imread(path) for path in img_paths]\n",
+        "\n",
+        "        batch_reprs = predict_batch(headless_conv, batch_imgs, img_size=(224, 224))\n",
+        "        all_reprs.append(batch_reprs)\n",
+        "\n",
+        "    reprs = np.vstack(all_reprs)\n",
+        "    print(f\"Computed {len(reprs)} representations with shape {reprs.shape}\")\n",
+        "\n",
+        "    print(f\"Saving representations to {CACHE_FILE}...\")\n",
+        "    with h5py.File(CACHE_FILE, 'w') as h5f:\n",
+        "        h5f.create_dataset('reprs', data=reprs)\n",
+        "    print(\"Saved! Next time this will load instantly.\")"
       ]
     },
     {
@@ -779,7 +760,7 @@
         "    \"\"\"bad model that averages all the spatial information\"\"\"\n",
         "\n",
         "    model_input = Input(shape=(7, 7, 2048))\n",
-        "    x = GlobalAveragePooling2D()(model_input) # We aren't doing any convolutional operation\n",
+        "    x = GlobalAveragePooling2D()(model_input)  # We aren't doing any convolutional operation\n",
         "\n",
         "    # Now we build two separate heads for the model: one for classification and one for localization\n",
         "    # Each takes in the output of the global average pooling layer\n",
@@ -791,7 +772,7 @@
         "    model = Model(model_input, outputs=[class_prediction_head, box_prediction_head],\n",
         "                  name=\"resnet_loc\")\n",
         "    model.compile(optimizer=\"adam\", loss=[categorical_crossentropy, \"mse\"],\n",
-        "                  loss_weights=[1., 0.0001])\n",
+        "                  loss_weights=[1., 10.])\n",
         "    return model"
       ]
     },
@@ -1088,28 +1069,22 @@
       },
       "outputs": [],
       "source": [
-        "# Compute the previous function on the whole train / test set\n",
-        "def compute_acc(model, train=True):\n",
-        "    n_samples = len(annotations)\n",
-        "    if train:\n",
-        "        beg, end = 0, (9 * n_samples // 10)\n",
-        "        split_name = \"train\"\n",
-        "    else:\n",
-        "        beg, end = (9 * n_samples) // 10, n_samples\n",
-        "        split_name = \"test\"\n",
-        "    res = model.predict(reprs[beg:end])\n",
+        "# Compute accuracy metrics for a given split (explicit API)\n",
+        "def compute_acc(model, reprs_split, annotations_split, split_name=\"eval\"):\n",
+        "    if len(reprs_split) == 0:\n",
+        "        print(f\"[{split_name}] Warning: Empty split, skipping evaluation\")\n",
+        "        return\n",
+        "\n",
+        "    res = model.predict(reprs_split, batch_size=32, verbose=0)\n",
         "    outputs = []\n",
         "    for index, (classes, boxes) in enumerate(zip(res[0], res[1])):\n",
         "        output = interpret_output(classes, boxes,\n",
-        "                                  img_size=annotations[index][\"size\"])\n",
+        "                                  img_size=annotations_split[index][\"size\"])\n",
         "        outputs.append(output)\n",
         "\n",
-        "    acc, iou, valid = accuracy_and_iou(outputs, annotations[beg:end],\n",
-        "                                       threshold=0.5)\n",
-        "\n",
-        "    print('[{}] class accuracy: {:0.3f}, mean IoU: {:0.3f},'\n",
-        "          ' valid accuracy: {:0.3f}'.format(\n",
-        "            split_name, acc, iou, valid) )"
+        "    acc, iou, valid = accuracy_and_iou(outputs, annotations_split, threshold=0.5)\n",
+        "    print('[{}] class accuracy: {:0.3f}, mean IoU: {:0.3f}, valid accuracy: {:0.3f}'.format(\n",
+        "        split_name, acc, iou, valid))"
       ]
     },
     {
@@ -1124,8 +1099,11 @@
       },
       "outputs": [],
       "source": [
-        "compute_acc(bad_model, train=True)\n",
-        "compute_acc(bad_model, train=False)"
+        "# For initial evaluation before train/test split variables exist\n",
+        "n_samples = len(annotations)\n",
+        "train_split_end = (9 * n_samples) // 10\n",
+        "compute_acc(bad_model, reprs[:train_split_end], annotations[:train_split_end], \"train\")\n",
+        "compute_acc(bad_model, reprs[train_split_end:], annotations[train_split_end:], \"test\")"
       ]
     },
     {
@@ -1205,8 +1183,8 @@
       },
       "outputs": [],
       "source": [
-        "compute_acc(bad_model, train=True)\n",
-        "compute_acc(bad_model, train=False)"
+        "compute_acc(bad_model, reprs[:train_num], annotations[:train_num], \"train\")\n",
+        "compute_acc(bad_model, reprs[train_num:], annotations[train_num:], \"test\")"
       ]
     },
     {
@@ -1247,19 +1225,21 @@
         "    model_input = Input(shape=(7, 7, 2048))\n",
         "\n",
         "    # TODO: Build a better model. Remember that you have two heads: one for classification and one for localization\n",
+        "    # You can start from a pooled feature map as a baseline:\n",
+        "    # features = GlobalAveragePooling2D()(model_input)\n",
+        "    # class_prediction_head = Dense(num_classes, activation=\"softmax\", name=\"head_classes\")(features)\n",
+        "    # box_prediction_head = Dense(4, name=\"head_boxes\")(features)\n",
         "\n",
-        "    # add some stuff that works directly on `model_input` here\n",
+        "    # Placeholders for students to implement\n",
+        "    class_prediction_head = None\n",
+        "    box_prediction_head = None\n",
         "\n",
-        "    # then build the two separate heads\n",
+        "    if class_prediction_head is None or box_prediction_head is None:\n",
+        "        raise ValueError(\"Both class_prediction_head and box_prediction_head must be defined.\")\n",
         "\n",
-        "    class_prediction_head = None    # TODO\n",
-        "\n",
-        "    box_prediction_head = None      # TODO\n",
-        "\n",
-        "    if class_prediction_head and box_prediction_head:\n",
-        "        model = Model(model_input, outputs=[class_prediction_head, box_prediction_head], name=\"resnet_loc\")\n",
-        "        model.compile(optimizer=\"adam\", loss=[categorical_crossentropy, \"mse\"],\n",
-        "                      loss_weights=[1., 1 / 0.001])\n",
+        "    model = Model(model_input, outputs=[class_prediction_head, box_prediction_head], name=\"resnet_loc\")\n",
+        "    model.compile(optimizer=\"adam\", loss=[categorical_crossentropy, \"mse\"],\n",
+        "                  loss_weights=[1., 10.])\n",
         "    return model\n"
       ]
     },
@@ -1285,7 +1265,7 @@
         "    compute_acc(better_model, train=True)\n",
         "    compute_acc(better_model, train=False)\n",
         "\n",
-        "except NameError as e:\n",
+        "except ValueError as e:\n",
         "    print(str(e) + \" Possible issue: Complete the relevant section of the assignment to initialize it.\")\n",
         "\n"
       ]
@@ -1304,7 +1284,7 @@
       "source": [
         "try:\n",
         "    display_prediction(better_model, 11)\n",
-        "except NameError as e:\n",
+        "except ValueError as e:\n",
         "    print(str(e) + \" Possible issue: Complete the relevant section of the assignment to initialize it.\")"
       ]
     },
@@ -1338,7 +1318,7 @@
       "source": [
         "try:\n",
         "    display_prediction(better_model, 52)\n",
-        "except NameError as e:\n",
+        "except ValueError as e:\n",
         "    print(str(e) + \" Possible issue: Complete the relevant section of the assignment to initialize it.\")"
       ]
     },
@@ -1364,7 +1344,7 @@
       "provenance": []
     },
     "kernelspec": {
-      "display_name": "lab_1",
+      "display_name": ".venv",
       "language": "python",
       "name": "python3"
     },
@@ -1378,7 +1358,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.12.9"
+      "version": "3.12.12"
     }
   },
   "nbformat": 4,

diff --git a/01_materials/labs/lab_6.ipynb b/01_materials/labs/lab_6.ipynb
@@ -206,7 +206,7 @@
     "\n",
     "text_classifier = make_pipeline(\n",
     "    CountVectorizer(max_features=2000),\n",
-    "    LogisticRegression(),\n",
+    "    LogisticRegression(max_iter=1000),\n",
     ")"
    ]
   },
@@ -225,7 +225,7 @@
     "collapsed": false
    },
    "source": [
-    "You may get a warning above that \"lbfgs failed to converge\". This means that the optimization algorithm did not reach the desired precision. This is not a big deal here, as we are not looking for the best possible accuracy, but just a baseline. We can check the accuracy of this model on the test set:"
+    "Let's check the accuracy of this baseline model on the test set:"
    ]
   },
   {