GokuMohandas · GokuMohandas · Dec 7, 2023 · Dec 7, 2023 · Dec 7, 2023
diff --git a/.gitignore b/.gitignore
@@ -4,6 +4,7 @@ stores/
 mlflow/
 results/
 workspaces/
+efs/
 
 # VSCode
 .vscode/

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.5.0
     hooks:
     -   id: trailing-whitespace
     -   id: end-of-file-fixer

diff --git a/Makefile b/Makefile
@@ -12,6 +12,7 @@ style:
 # Cleaning
 .PHONY: clean
 clean: style
+	python notebooks/clear_cell_nums.py
 	find . -type f -name "*.DS_Store" -ls -delete
 	find . | grep -E "(__pycache__|\.pyc|\.pyo)" | xargs rm -rf
 	find . | grep -E ".pytest_cache" | xargs rm -rf

diff --git a/README.md b/README.md
@@ -83,7 +83,7 @@ We'll start by setting up our cluster with the environment and compute configura
   - Project: `madewithml`
   - Cluster environment name: `madewithml-cluster-env`
   # Toggle `Select from saved configurations`
-  - Compute config: `madewithml-cluster-compute`
+  - Compute config: `madewithml-cluster-compute-g5.4xlarge`
   ```
 
   > Alternatively, we can use the [CLI](https://docs.anyscale.com/reference/anyscale-cli) to create the workspace via `anyscale workspace create ...`
@@ -423,7 +423,7 @@ anyscale cluster-env build deploy/cluster_env.yaml --name $CLUSTER_ENV_NAME
 The compute configuration determines **what** resources our workloads will be executes on. We've already created this [compute configuration](./deploy/cluster_compute.yaml) for us but this is how we can create it ourselves.
 
 ```bash
-export CLUSTER_COMPUTE_NAME="madewithml-cluster-compute"
+export CLUSTER_COMPUTE_NAME="madewithml-cluster-compute-g5.4xlarge"
 anyscale cluster-compute create deploy/cluster_compute.yaml --name $CLUSTER_COMPUTE_NAME
 ```
 

diff --git a/deploy/cluster_compute.yaml b/deploy/cluster_compute.yaml
@@ -1,12 +1,12 @@
-cloud: madewithml-us-east-2
-region: us-east2
+cloud: education-us-west-2
+region: us-west-2
 head_node_type:
   name: head_node_type
-  instance_type: m5.2xlarge  # 8 CPU, 0 GPU, 32 GB RAM
+  instance_type: g5.4xlarge
 worker_node_types:
 - name: gpu_worker
-  instance_type: g4dn.xlarge  # 4 CPU, 1 GPU, 16 GB RAM
-  min_workers: 0
+  instance_type: g5.4xlarge
+  min_workers: 1
   max_workers: 1
   use_spot: False
 aws:

diff --git a/deploy/cluster_env.yaml b/deploy/cluster_env.yaml
@@ -1,4 +1,4 @@
-base_image: anyscale/ray:2.6.0-py310-cu118
+base_image: anyscale/ray:2.7.0optimized-py310-cu118
 env_vars: {}
 debian_packages:
   - curl

diff --git a/deploy/jobs/workloads.sh b/deploy/jobs/workloads.sh
@@ -1,6 +1,5 @@
 #!/bin/bash
 export PYTHONPATH=$PYTHONPATH:$PWD
-export RAY_AIR_REENABLE_DEPRECATED_SYNC_TO_HEAD_NODE=1
 mkdir results
 
 # Test data

diff --git a/madewithml/config.py b/madewithml/config.py
@@ -11,6 +11,11 @@
 LOGS_DIR = Path(ROOT_DIR, "logs")
 LOGS_DIR.mkdir(parents=True, exist_ok=True)
 EFS_DIR = Path(f"/efs/shared_storage/madewithml/{os.environ.get('GITHUB_USERNAME', '')}")
+try:
+    Path(EFS_DIR).mkdir(parents=True, exist_ok=True)
+except OSError:
+    EFS_DIR = Path(ROOT_DIR, "efs")
+    Path(EFS_DIR).mkdir(parents=True, exist_ok=True)
 
 # Config MLflow
 MODEL_REGISTRY = Path(f"{EFS_DIR}/mlflow")

diff --git a/notebooks/benchmarks.ipynb b/notebooks/benchmarks.ipynb
@@ -58,7 +58,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "e2c96931-d511-4c6e-b582-87d24455a11e",
    "metadata": {
     "tags": []
@@ -79,7 +79,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "953a577e-3cd0-4c6b-81f9-8bc32850214d",
    "metadata": {
     "tags": []
@@ -101,7 +101,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "1790e2f5-6b8b-425c-8842-a2b0ea8f3f07",
    "metadata": {
     "tags": []
@@ -113,7 +113,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "6b9bfadb-ba49-4f5a-b216-4db14c8888ab",
    "metadata": {
     "tags": []
@@ -208,7 +208,7 @@
        "4  A PyTorch Implementation of \"Watch Your Step: ...            other  "
       ]
      },
-     "execution_count": 4,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -222,7 +222,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "aa5b95d5-d61e-48e4-9100-d9d2fc0d53fa",
    "metadata": {
     "tags": []
@@ -234,7 +234,7 @@
        "['computer-vision', 'other', 'natural-language-processing', 'mlops']"
       ]
      },
-     "execution_count": 5,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -247,7 +247,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "3c828129-8248-4e38-93a4-cabb097e7ba5",
    "metadata": {
     "tags": []
@@ -279,7 +279,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "8e3c3f44-2c19-4c32-9bc5-e9a7a917d19d",
    "metadata": {},
    "outputs": [],
@@ -295,7 +295,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "4950bdb4",
    "metadata": {},
    "outputs": [
@@ -337,7 +337,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "b2aae14c-9870-4a27-b5ad-90f339686620",
    "metadata": {
     "tags": []
@@ -364,7 +364,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "03ee23e5",
    "metadata": {},
    "outputs": [
@@ -401,7 +401,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "71c43e8c",
    "metadata": {},
    "outputs": [
@@ -416,7 +416,7 @@
        "  'description': 'A PyTorch implementation of \"Capsule Graph Neural Network\" (ICLR 2019).'}]"
       ]
      },
-     "execution_count": 11,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -429,7 +429,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "id": "c9359a91-ac19-48a4-babb-e65d53f39b42",
    "metadata": {
     "tags": []
@@ -462,7 +462,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "id": "5fac795e",
    "metadata": {},
    "outputs": [
@@ -486,7 +486,7 @@
        "['other', 'computer-vision', 'computer-vision']"
       ]
      },
-     "execution_count": 13,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -507,7 +507,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "id": "e4cb38a8-44cb-4cea-828c-590f223d4063",
    "metadata": {
     "tags": []
@@ -543,7 +543,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "id": "de2d0416",
    "metadata": {},
    "outputs": [],
@@ -576,7 +576,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "id": "ff3c37fb",
    "metadata": {},
    "outputs": [],
@@ -618,7 +618,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "id": "972fee2f-86e2-445e-92d0-923f5690132a",
    "metadata": {},
    "outputs": [],
@@ -647,7 +647,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "id": "9ee4e745-ef56-4b76-8230-fcbe56ac46aa",
    "metadata": {
     "tags": []
@@ -663,7 +663,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "id": "73780054-afeb-4ce6-8255-51bf91f9f820",
    "metadata": {
     "tags": []
@@ -709,7 +709,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "id": "24af6d04-d29e-4adb-a289-4c34c2cc7ec8",
    "metadata": {
     "tags": []
@@ -780,7 +780,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "id": "e22ed1e1-b34d-43d1-ae8b-32b1fd5be53d",
    "metadata": {
     "tags": []
@@ -815,7 +815,7 @@
        "  'tag': 'mlops'}]"
       ]
      },
-     "execution_count": 22,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -833,7 +833,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
    "id": "294548a5-9edf-4dea-ab8d-dc7464246810",
    "metadata": {
     "tags": []
@@ -864,7 +864,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": null,
    "id": "29bca273-3ea8-4ce0-9fa9-fe19062b7c5b",
    "metadata": {
     "tags": []
@@ -917,7 +917,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": null,
    "id": "3e59a3b9-69d9-4bb5-8b88-0569fcc72f0c",
    "metadata": {
     "tags": []
@@ -1001,7 +1001,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": null,
    "id": "15ea136e",
    "metadata": {},
    "outputs": [],
@@ -1020,7 +1020,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": null,
    "id": "ec0b498a-97c1-488c-a6b9-dc63a8a9df4d",
    "metadata": {
     "tags": []
@@ -1065,7 +1065,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": null,
    "id": "4cc80311",
    "metadata": {},
    "outputs": [],
@@ -1080,7 +1080,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
    "id": "6771b1d2",
    "metadata": {},
    "outputs": [