example: sine function model prediction with litdata & pytorch-lightning (#517)

deependujha · web-flow · commit 4e8335f6699d · 2025-03-25T12:59:20.000Z
* example: sine function model prediction with litdata &amp; pytorch-lightning

* update

* update

* update

* update

* clear jupyter notebook output
diff --git a/.gitignore b/.gitignore
@@ -116,3 +116,6 @@ lightning_logs
 
 # status.json file
 status.json
+
+# use the below name for your optimize dataset directory for examples
+example_optimize_dataset
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -44,6 +44,10 @@ repos:
     hooks:
       - id: codespell
         additional_dependencies: [tomli]
+        exclude: >
+          (?x)^(
+              .*\.ipynb
+          )$
         #args: ["--write-changes"] # uncomment if you want to get automatic fixing
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
diff --git a/examples/sine_function_model_prediction/01-optimize.py b/examples/sine_function_model_prediction/01-optimize.py
@@ -0,0 +1,23 @@
+import numpy as np
+
+import litdata as ld
+
+
+def sine_function(x: int):
+    # You can use any key:value pairs. Note that their types must not change between samples, and Python lists must
+    # always contain the same number of elements with the same types.
+    data = {"x": x, "sine": np.sin(x)}
+
+    return data  # noqa: RET504
+
+
+if __name__ == "__main__":
+    # The optimize function writes data in an optimized format.
+    ld.optimize(
+        fn=sine_function,  # the function applied to each input
+        inputs=list(np.linspace(-5, 5, 1000)),  # the inputs to the function (here it's a list of numbers)
+        output_dir="example_optimize_dataset",  # optimized data is stored here
+        num_workers=4,  # The number of workers on the same machine
+        chunk_size=50,  # number of items in each chunk (1000/50 = 20 chunks should be made)
+        mode="overwrite",  # if optimized dataset already exists in dir, overwrite it.
+    )
diff --git a/examples/sine_function_model_prediction/02-model_training.py b/examples/sine_function_model_prediction/02-model_training.py
@@ -0,0 +1,92 @@
+# ruff: noqa: RET504
+import lightning as L
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+import litdata as ld
+
+
+class SineModule(L.LightningModule):
+    def __init__(self):
+        super().__init__()
+        self.fc1 = nn.Linear(1, 32)
+        self.fc2 = nn.Linear(32, 32)
+        self.fc3 = nn.Linear(32, 8)
+        self.fc4 = nn.Linear(8, 1)
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = F.relu(self.fc3(x))
+        x = F.tanh(self.fc4(x))  # for output to be in -1 to 1
+        return x
+
+    def training_step(self, batch, batch_idx):
+        # training_step defines the train loop.
+        x, y = batch["x"], batch["sine"]
+        x = x.view(x.size(0), -1)
+        x = self.forward(x)
+
+        loss = F.mse_loss(x.squeeze(), y)
+        return loss
+
+    def test_step(self, batch, batch_idx):
+        # this is the test loop
+        x, y = batch["x"], batch["sine"]
+        x = x.view(x.size(0), -1)
+        x = self.forward(x)
+
+        test_loss = F.mse_loss(x.squeeze(), y)
+        self.log("test_loss", test_loss)
+
+    def validation_step(self, batch, batch_idx):
+        # this is the validation loop
+        x, y = batch["x"], batch["sine"]
+        x = x.view(x.size(0), -1)
+        x = self.forward(x)
+
+        val_loss = F.mse_loss(x.squeeze(), y)
+        self.log("val_loss", val_loss)
+
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
+        return optimizer
+
+
+class SineDataModule(L.LightningDataModule):
+    def __init__(self, data_dir: str, batch_size: int = 4):
+        super().__init__()
+        self.data_dir = data_dir
+        self.batch_size = batch_size
+
+    def setup(self, stage: str):
+        dataset = ld.StreamingDataset(self.data_dir)
+        self.train_dataset, self.val_dataset, self.test_dataset = ld.train_test_split(dataset, splits=[0.7, 0.1, 0.1])
+
+    def train_dataloader(self):
+        return ld.StreamingDataLoader(
+            self.train_dataset, batch_size=self.batch_size, num_workers=7, persistent_workers=True
+        )
+
+    def val_dataloader(self):
+        return ld.StreamingDataLoader(
+            self.val_dataset, batch_size=self.batch_size, num_workers=7, persistent_workers=True
+        )
+
+    def test_dataloader(self):
+        return ld.StreamingDataLoader(
+            self.test_dataset, batch_size=self.batch_size, num_workers=7, persistent_workers=True
+        )
+
+
+# ======================================================
+
+
+if __name__ == "__main__":
+    model = SineModule()
+    data = SineDataModule("example_optimize_dataset")
+
+    trainer = L.Trainer(max_epochs=100, accelerator="cpu", precision="64-true")
+    trainer.fit(model, data)
+    trainer.test(model, data)
diff --git a/examples/sine_function_model_prediction/README.md b/examples/sine_function_model_prediction/README.md
@@ -0,0 +1,18 @@
+# Sine function model prediction with `LitData` & `PyTorch Lightning`
+
+<a target="_blank" href="https://lightning.ai/deependu/studios/sine-function-model-prediction-with-litdata-and-pytorch-lightning"><img src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/app-2/studio-badge.svg" alt="Open in Studio"/>
+</a>
+
+- Checkout this example in [Lightning Studio](https://lightning.ai/deependu/studios/sine-function-model-prediction-with-litdata-and-pytorch-lightning)
+
+---
+
+## Steps
+
+- Prepare Optimize dataset. [Check optimize.py file](./01-optimize.py)
+
+- Train Model with LitData Streaming Dataset & Dataloader + PyTorch Lightning & Datamodule. [check model training code](./02-model_training.py)
+
+- Visualize the prediction. [Check jupyter notebook](./main.ipynb)
+
+![visualize prediction](https://storage.googleapis.com/lightning-avatars/litpages/01jphhqptdw8t8sbrdxgdbj3np/5e809ecf-6781-4089-9f48-654519db7c34.png)
diff --git a/examples/sine_function_model_prediction/main.ipynb b/examples/sine_function_model_prediction/main.ipynb
@@ -0,0 +1,160 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import lightning as L\n",
+    "import torch\n",
+    "import torch.nn.functional as F\n",
+    "from torch import nn\n",
+    "\n",
+    "\n",
+    "# ruff: noqa: RET504\n",
+    "class SineModule(L.LightningModule):\n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.fc1 = nn.Linear(1, 32)\n",
+    "        self.fc2 = nn.Linear(32, 32)\n",
+    "        self.fc3 = nn.Linear(32, 8)\n",
+    "        self.fc4 = nn.Linear(8, 1)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = F.relu(self.fc1(x))\n",
+    "        x = F.relu(self.fc2(x))\n",
+    "        x = F.relu(self.fc3(x))\n",
+    "        x = F.tanh(self.fc4(x))  # for output to be in -1 to 1\n",
+    "        return x\n",
+    "\n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        # training_step defines the train loop.\n",
+    "        x, y = batch[\"x\"], batch[\"sine\"]\n",
+    "        x = x.view(x.size(0), -1)\n",
+    "        x = F.relu(self.fc1(x))\n",
+    "        x = F.relu(self.fc2(x))\n",
+    "        x = F.relu(self.fc3(x))\n",
+    "        x = F.tanh(self.fc4(x))  # for output to be in -1 to 1\n",
+    "\n",
+    "        loss = F.mse_loss(x.squeeze(), y)\n",
+    "        return loss\n",
+    "\n",
+    "    def test_step(self, batch, batch_idx):\n",
+    "        # this is the test loop\n",
+    "        x, y = batch[\"x\"], batch[\"sine\"]\n",
+    "        x = x.view(x.size(0), -1)\n",
+    "        x = F.relu(self.fc1(x))\n",
+    "        x = F.relu(self.fc2(x))\n",
+    "        x = F.relu(self.fc3(x))\n",
+    "        x = F.tanh(self.fc4(x))  # for output to be in -1 to 1\n",
+    "\n",
+    "        test_loss = F.mse_loss(x.squeeze(), y)\n",
+    "        self.log(\"test_loss\", test_loss)\n",
+    "\n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        # this is the validation loop\n",
+    "        x, y = batch[\"x\"], batch[\"sine\"]\n",
+    "        x = x.view(x.size(0), -1)\n",
+    "        x = F.relu(self.fc1(x))\n",
+    "        x = F.relu(self.fc2(x))\n",
+    "        x = F.relu(self.fc3(x))\n",
+    "        x = F.tanh(self.fc4(x))  # for output to be in -1 to 1\n",
+    "\n",
+    "        val_loss = F.mse_loss(x.squeeze(), y)\n",
+    "        self.log(\"val_loss\", val_loss)\n",
+    "\n",
+    "    def configure_optimizers(self):\n",
+    "        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)\n",
+    "        return optimizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = SineModule.load_from_checkpoint(\"lightning_logs/version_0/checkpoints/epoch=99-step=17500.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "model.eval()\n",
+    "\n",
+    "x = np.linspace(-5, 5, 100)\n",
+    "original_sine = np.sin(x)\n",
+    "\n",
+    "y = []\n",
+    "\n",
+    "with torch.no_grad():\n",
+    "    for _x in x:\n",
+    "        _x = torch.Tensor([_x])\n",
+    "        y_hat = model(_x)\n",
+    "        y.append(y_hat)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.plot(x, y, color=\"red\", label=\"predicted\")  # Red color for y\n",
+    "# Blue color for original_sine\n",
+    "plt.plot(x, original_sine, color=\"blue\", label=\"original sine\")\n",
+    "\n",
+    "plt.legend()  # Show labels in the plot\n",
+    "plt.xlabel(\"X-axis\")\n",
+    "plt.ylabel(\"Y-axis\")\n",
+    "plt.title(\"Comparison of y and original_sine\")\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "litdata",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}