fabric-testbed · kthare10 · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025
diff --git a/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama.ipynb b/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama.ipynb
@@ -41,21 +41,6 @@
     "fablib.show_config();"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cores_column_name = 'cores_available'\n",
-    "ram_column_name = 'ram_available'\n",
-    "disk_column_name = 'disk_available'\n",
-    "\n",
-    "core=16\n",
-    "ram=32\n",
-    "disk=100"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -96,24 +81,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# choices include\n",
-    "# GPU_RTX6000\n",
-    "# GPU_TeslaT4\n",
-    "# GPU_A30\n",
-    "# GPU_A40\n",
-    "GPU_CHOICE = 'GPU_A30' \n",
-    "\n",
-    "# don't edit - convert from GPU type to a resource column name\n",
-    "# to use in filter lambda function below\n",
-    "choice_to_column = {\n",
-    "    \"GPU_RTX6000\": \"rtx6000_available\",\n",
-    "    \"GPU_TeslaT4\": \"tesla_t4_available\",\n",
-    "    \"GPU_A30\": \"a30_available\",\n",
-    "    \"GPU_A40\": \"a40_available\"\n",
-    "}\n",
-    "\n",
-    "column_name = choice_to_column.get(GPU_CHOICE, \"Unknown\")\n",
-    "print(f'{column_name=}')"
+    "min_cores = 16\n",
+    "min_ram_gb = 32\n",
+    "min_disk_gb = 100\n",
+    "min_gpu_any = 0       # >0 means at least one GPU of any model for the initial filter\n",
+    "min_gpu_for_pick = 1  # >1 means at least two for the random pick"
    ]
   },
   {
@@ -122,23 +94,72 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# find a site with at least one available GPU of the selected type\n",
-    "site_override = None\n",
-    "\n",
-    "cores_column_name = 'cores_available'\n",
-    "ram_column_name = 'ram_available'\n",
-    "disk_column_name = 'disk_available'\n",
-    "\n",
-    "if site_override:\n",
-    "    site1 = site_override\n",
+    "import random\n",
+    "import pandas as pd\n",
+    "\n",
+    "fields = ['name', 'cores_available', 'ram_available', 'disk_available']\n",
+    "gpu_models = [\"GPU_RTX6000\", \"GPU_Tesla_T4\", \"GPU_A30\", \"GPU_A40\"]\n",
+    "gpu_fields = [f\"{m.split('_', 1)[1].lower()}_available\" for m in gpu_models]\n",
+    "fields += [f for f in gpu_fields if f not in fields]\n",
+    "\n",
+    "# If empty -> do not filter by name\n",
+    "sites_like: list[str] = []   # e.g., ['BRIST', 'TOKY'] or [] to disable\n",
+    "\n",
+    "avoid_sites_like: list[str] = [\"GATECH\", \"GPN\"]\n",
+    "\n",
+    "def site_filter(row: dict) -> bool:\n",
+    "    # Name filter: only apply if sites_like is non-empty\n",
+    "    if sites_like:\n",
+    "        name = (row.get('name') or '')\n",
+    "        name_ok = any(tok.lower() in name.lower() for tok in sites_like)\n",
+    "    else:\n",
+    "        name_ok = True\n",
+    "\n",
+    "    res_ok = (\n",
+    "        row.get('cores_available', 0) > min_cores and\n",
+    "        row.get('ram_available', 0) > min_ram_gb and\n",
+    "        row.get('disk_available', 0) > min_disk_gb\n",
+    "    )\n",
+    "    any_gpu_ok = any(row.get(gf, 0) > min_gpu_any for gf in gpu_fields)\n",
+    "\n",
+    "    return name_ok and res_ok and any_gpu_ok\n",
+    "\n",
+    "styled_or_df = fablib.list_sites(fields=fields, pretty_names=False, avoid=avoid_sites_like, filter_function=site_filter)\n",
+    "\n",
+    "# Normalize Styler/DataFrame/list-of-dicts -> DataFrame\n",
+    "if isinstance(styled_or_df, pd.io.formats.style.Styler):\n",
+    "    df = styled_or_df.data\n",
+    "elif isinstance(styled_or_df, pd.DataFrame):\n",
+    "    df = styled_or_df\n",
     "else:\n",
-    "    site1 = fablib.get_random_site(filter_function=lambda x: x[column_name] > 0 and \n",
-    "                                   x[cores_column_name] > core and \n",
-    "                                   x[ram_column_name] > ram and  \n",
-    "                                   x[disk_column_name] > disk,\n",
-    "                                  avoid = [\"GATECH\", \"GPN\"])\n",
-    "    \n",
-    "print(f'Preparing to create slice \"{ollama_slice_name}\" with node {ollama_node_name} in site {site1}')"
+    "    df = pd.DataFrame(styled_or_df or [])\n",
+    "\n",
+    "if df.empty:\n",
+    "    raise RuntimeError(\"No sites matched the filter criteria.\")\n",
+    "\n",
+    "# Random pick where any GPU count > 1\n",
+    "model_map = dict(zip(gpu_fields, gpu_models))\n",
+    "long = (\n",
+    "    df.reset_index()[[\"index\"] + gpu_fields]\n",
+    "      .melt(id_vars=\"index\", var_name=\"gpu_field\", value_name=\"count\")\n",
+    ")\n",
+    "eligible = long[long[\"count\"] > min_gpu_for_pick]\n",
+    "if eligible.empty:\n",
+    "    raise RuntimeError(\"No site has any GPU model with count > 1.\")\n",
+    "\n",
+    "pick = eligible.sample(1).iloc[0]\n",
+    "site_row = df.loc[pick[\"index\"]]\n",
+    "picked_gpu_model = model_map[pick[\"gpu_field\"]]\n",
+    "\n",
+    "print(\n",
+    "    f\"Chosen site: {site_row.get('name', '<unknown>')} | \"\n",
+    "    f\"GPU: {picked_gpu_model} | Available: {int(pick['count'])}\"\n",
+    ")\n",
+    "\n",
+    "if \"GPU_Tesla_T4\" == picked_gpu_model:\n",
+    "    picked_gpu_model = \"GPU_TeslaT4\"\n",
+    "\n",
+    "picked_site = site_row.get('name')"
    ]
   },
   {
@@ -174,10 +195,10 @@
     "\n",
     "net1 = ollama_slice.add_l3network(name=network_name)\n",
     "\n",
-    "ollama_node = ollama_slice.add_node(name=ollama_node_name, cores=core, ram=ram, \n",
-    "                                    disk=disk, site=site1, image='default_ubuntu_22')\n",
+    "ollama_node = ollama_slice.add_node(name=ollama_node_name, cores=min_cores, ram=min_ram_gb, \n",
+    "                                    disk=min_disk_gb, site=picked_site, image='default_ubuntu_22')\n",
     "\n",
-    "ollama_node.add_component(model=GPU_CHOICE, name='gpu1')\n",
+    "ollama_node.add_component(model=picked_gpu_model, name='gpu1')\n",
     "\n",
     "\n",
     "iface1 = ollama_node.add_component(model=model_name, name=nic_name).get_interfaces()[0]\n",
@@ -308,6 +329,138 @@
     "print(f\"Ollama is accessible from other slices at: {ollama_fabnet_ip_addr}\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Querying Ollama\n",
+    "\n",
+    "Users can interact with the LLM through the REST API, the command-line interface, or the Open WebUI.\n",
+    "\n",
+    "### REST Examples\n",
+    "\n",
+    "The `query.py` script demonstrates how to query the LLM over the REST interface. Although Ollama can run on a remote host, the example below targets the local instance by passing `--host localhost`. Users may also specify a different `--host` and `--port` as needed.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stdout, stderr = ollama_node.execute(f\"python3 ollama_tools/query.py --model {default_llm_model} --prompt 'Tell me about National Science Foundation' --host localhost --port 11434\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stdout, stderr = ollama_node.execute(f\"python3 ollama_tools/query.py --model {default_llm_model} --prompt 'Tell me about NVIDIA BlueField DPUs' --host localhost --port 11434\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### CLI Examples\n",
+    "\n",
+    "SSH into the `ollama_node` using the command provided above.\n",
+    "To view available models, run:\n",
+    "\n",
+    "```bash\n",
+    "docker exec -it ollama ollama list\n",
+    "```\n",
+    "\n",
+    "To start a model and interact with it:\n",
+    "\n",
+    "```bash\n",
+    "docker exec -it ollama ollama run deepseek-r1:7b\n",
+    "```\n",
+    "\n",
+    "This will open an interactive prompt where you can type questions directly."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Open Web UI\n",
+    "\n",
+    "To access the Open Web UI from your laptop, you’ll need to create an SSH tunnel.\n",
+    "Follow the steps below to complete the setup.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Start the SSH Tunnel\n",
+    "\n",
+    "- Create SSH Tunnel Configuration `fabric_ssh_tunnel_tools.zip`\n",
+    "- Download your custom `fabric_ssh_tunnel_tools.zip` tarball from the `fabric_config` folder.  \n",
+    "- Untar the tarball and put the resulting folder (`fabric_ssh_tunnel_tools`) somewhere you can access it from the command line.\n",
+    "- Open a terminal window. (Windows: use `powershell`) \n",
+    "- Use `cd` to navigate to the `fabric_ssh_tunnel_tools` folder.\n",
+    "- In your terminal, run the command that results from running the following cell (leave the terminal window open)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fablib.create_ssh_tunnel_config(overwrite=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Launch Open Web UI\n",
+    "\n",
+    "To access the Open Web UI running on the ollama node, create an SSH tunnel from your local machine using the command generated by the next cell:\n",
+    "\n",
+    "```bash\n",
+    "ssh -L 8080:<manager-ip>:8080 -i <private_key> -F <ssh_config> <your-username>@<manager-host>\n",
+    "```\n",
+    "\n",
+    "Replace `<manager-ip>` and `<manager-host>` with the actual IP address and hostname of the Ceph manager VM.\n",
+    "\n",
+    "Then, open your browser and navigate to:\n",
+    "\n",
+    "\n",
+    "http://localhost:8080\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "# Port on your local machine that you want to map the File Browser to.\n",
+    "local_port='8080'\n",
+    "# Local interface to map the File Browser to (can be `localhost`)\n",
+    "local_host='127.0.0.1'\n",
+    "\n",
+    "# Port on the node used by the File Browser Service\n",
+    "target_port='8080'\n",
+    "\n",
+    "# Username/node on FABRIC\n",
+    "target_host=f'{ollama_node.get_username()}@{ollama_node.get_management_ip()}'\n",
+    "\n",
+    "print(\"Use `cd` to navigate into the `fabric_ssh_tunnel_tools` folder.\")\n",
+    "print(\"In your terminal, run the SSH tunnel command\")\n",
+    "print()\n",
+    "print(f'ssh  -L {local_host}:{local_port}:127.0.0.1:{target_port} -i {os.path.basename(fablib.get_default_slice_public_key_file())[:-4]} -F ssh_config {target_host}')\n",
+    "print()\n",
+    "print(\"After running the SSH command, open Open WebUI at http://localhost:8080. If prompted, create an account and start asking questions.\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -325,8 +478,8 @@
    },
    "outputs": [],
    "source": [
-    "ollama_node = fablib.get_slice(ollama_slice_name)\n",
-    "ollama_node.delete()"
+    "#ollama_node = fablib.get_slice(ollama_slice_name)\n",
+    "#ollama_node.delete()"
    ]
   },
   {

diff --git a/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/docker-compose.yml b/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/docker-compose.yml
@@ -27,7 +27,7 @@ services:
     ports:
       - ${OPEN_WEBUI_PORT-3000}:8080
     environment:
-      - 'OLLAMA_BASE_URL=http://ollama:11434'
+      - 'OLLAMA_BASE_URL=http://localhost:11434'
       - 'WEBUI_SECRET_KEY='
     network_mode: host
     extra_hosts:

diff --git a/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/env.template b/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/env.template
@@ -1,3 +1,3 @@
-MODEL_NAME=deepseek-r1:7b
+MODEL_NAME=gemma3:270m
 NVIDIA_VISIBLE_DEVICES=all
 NVIDIA_DRIVER_CAPABILITIES=compute,utility
diff --git a/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/query.py b/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/query.py
@@ -2,8 +2,8 @@
 import requests
 import json
 
-def query_deepseek(prompt, model, host, port, stream=False):
-    """Sends a query to the DeepSeek model via Ollama API."""
+def query_model(prompt, model, host, port, stream=False):
+    """Sends a query to the model via Ollama API."""
     api_url = f"http://{host}:{port}/api/generate"
 
     payload = {
@@ -21,7 +21,7 @@ def query_deepseek(prompt, model, host, port, stream=False):
         return f"Request error: {str(e)}"
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Query the DeepSeek model via Ollama API.")
+    parser = argparse.ArgumentParser(description="Query the model via Ollama API.")
     parser.add_argument("--prompt", required=True, help="The prompt text to send to the model.")
     parser.add_argument("--model", required=True, help="The model name to use.")
     parser.add_argument("--host", required=False, default="127.0.0.1", help="The host where Ollama API is running.")
@@ -30,5 +30,5 @@ def query_deepseek(prompt, model, host, port, stream=False):
 
     args = parser.parse_args()
 
-    response = query_deepseek(args.prompt, args.model, args.host, args.port, args.stream)
-    print("\nDeepSeek Response:\n", response)
+    response = query_model(args.prompt, args.model, args.host, args.port, args.stream)
+    print("Model Response:\n", response)