From 80982668a1fdd26032c6e87c16369d3b3d3169a2 Mon Sep 17 00:00:00 2001 From: Komal Thareja Date: Wed, 29 Oct 2025 11:24:20 -0400 Subject: [PATCH 1/3] update ollama example with better site selection and ssh tunnels for open webui --- .../collaborative_slices/ollama/ollama.ipynb | 195 +++++++++++++----- .../ollama/ollama_tools/docker-compose.yml | 2 +- .../ollama/ollama_tools/env.template | 2 +- 3 files changed, 143 insertions(+), 56 deletions(-) diff --git a/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama.ipynb b/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama.ipynb index 70194b4..3c55f62 100644 --- a/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama.ipynb +++ b/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama.ipynb @@ -41,21 +41,6 @@ "fablib.show_config();" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cores_column_name = 'cores_available'\n", - "ram_column_name = 'ram_available'\n", - "disk_column_name = 'disk_available'\n", - "\n", - "core=16\n", - "ram=32\n", - "disk=100" - ] - }, { "cell_type": "markdown", "metadata": { @@ -96,24 +81,11 @@ "metadata": {}, "outputs": [], "source": [ - "# choices include\n", - "# GPU_RTX6000\n", - "# GPU_TeslaT4\n", - "# GPU_A30\n", - "# GPU_A40\n", - "GPU_CHOICE = 'GPU_A30' \n", - "\n", - "# don't edit - convert from GPU type to a resource column name\n", - "# to use in filter lambda function below\n", - "choice_to_column = {\n", - " \"GPU_RTX6000\": \"rtx6000_available\",\n", - " \"GPU_TeslaT4\": \"tesla_t4_available\",\n", - " \"GPU_A30\": \"a30_available\",\n", - " \"GPU_A40\": \"a40_available\"\n", - "}\n", - "\n", - "column_name = choice_to_column.get(GPU_CHOICE, \"Unknown\")\n", - "print(f'{column_name=}')" + "min_cores = 16\n", + "min_ram_gb = 32\n", + "min_disk_gb = 100\n", + "min_gpu_any = 0 # >0 means at least one GPU of any model for the initial filter\n", + "min_gpu_for_pick = 1 # >1 means at least two for the random pick" ] }, { @@ -122,23 +94,72 @@ "metadata": {}, "outputs": [], "source": [ - "# find a site with at least one available GPU of the selected type\n", - "site_override = None\n", - "\n", - "cores_column_name = 'cores_available'\n", - "ram_column_name = 'ram_available'\n", - "disk_column_name = 'disk_available'\n", - "\n", - "if site_override:\n", - " site1 = site_override\n", + "import random\n", + "import pandas as pd\n", + "\n", + "fields = ['name', 'cores_available', 'ram_available', 'disk_available']\n", + "gpu_models = [\"GPU_RTX6000\", \"GPU_Tesla_T4\", \"GPU_A30\", \"GPU_A40\"]\n", + "gpu_fields = [f\"{m.split('_', 1)[1].lower()}_available\" for m in gpu_models]\n", + "fields += [f for f in gpu_fields if f not in fields]\n", + "\n", + "# If empty -> do not filter by name\n", + "sites_like: list[str] = [] # e.g., ['BRIST', 'TOKY'] or [] to disable\n", + "\n", + "avoid_sites_like: list[str] = [\"GATECH\", \"GPN\"]\n", + "\n", + "def site_filter(row: dict) -> bool:\n", + " # Name filter: only apply if sites_like is non-empty\n", + " if sites_like:\n", + " name = (row.get('name') or '')\n", + " name_ok = any(tok.lower() in name.lower() for tok in sites_like)\n", + " else:\n", + " name_ok = True\n", + "\n", + " res_ok = (\n", + " row.get('cores_available', 0) > min_cores and\n", + " row.get('ram_available', 0) > min_ram_gb and\n", + " row.get('disk_available', 0) > min_disk_gb\n", + " )\n", + " any_gpu_ok = any(row.get(gf, 0) > min_gpu_any for gf in gpu_fields)\n", + "\n", + " return name_ok and res_ok and any_gpu_ok\n", + "\n", + "styled_or_df = fablib.list_sites(fields=fields, pretty_names=False, avoid=avoid_sites_like, filter_function=site_filter)\n", + "\n", + "# Normalize Styler/DataFrame/list-of-dicts -> DataFrame\n", + "if isinstance(styled_or_df, pd.io.formats.style.Styler):\n", + " df = styled_or_df.data\n", + "elif isinstance(styled_or_df, pd.DataFrame):\n", + " df = styled_or_df\n", "else:\n", - " site1 = fablib.get_random_site(filter_function=lambda x: x[column_name] > 0 and \n", - " x[cores_column_name] > core and \n", - " x[ram_column_name] > ram and \n", - " x[disk_column_name] > disk,\n", - " avoid = [\"GATECH\", \"GPN\"])\n", - " \n", - "print(f'Preparing to create slice \"{ollama_slice_name}\" with node {ollama_node_name} in site {site1}')" + " df = pd.DataFrame(styled_or_df or [])\n", + "\n", + "if df.empty:\n", + " raise RuntimeError(\"No sites matched the filter criteria.\")\n", + "\n", + "# Random pick where any GPU count > 1\n", + "model_map = dict(zip(gpu_fields, gpu_models))\n", + "long = (\n", + " df.reset_index()[[\"index\"] + gpu_fields]\n", + " .melt(id_vars=\"index\", var_name=\"gpu_field\", value_name=\"count\")\n", + ")\n", + "eligible = long[long[\"count\"] > min_gpu_for_pick]\n", + "if eligible.empty:\n", + " raise RuntimeError(\"No site has any GPU model with count > 1.\")\n", + "\n", + "pick = eligible.sample(1).iloc[0]\n", + "site_row = df.loc[pick[\"index\"]]\n", + "picked_gpu_model = model_map[pick[\"gpu_field\"]]\n", + "\n", + "print(\n", + " f\"Chosen site: {site_row.get('name', '')} | \"\n", + " f\"GPU: {picked_gpu_model} | Available: {int(pick['count'])}\"\n", + ")\n", + "\n", + "if \"GPU_Tesla_T4\" == picked_gpu_model:\n", + " picked_gpu_model = \"GPU_TeslaT4\"\n", + "\n", + "picked_site = site_row.get('name')" ] }, { @@ -174,10 +195,10 @@ "\n", "net1 = ollama_slice.add_l3network(name=network_name)\n", "\n", - "ollama_node = ollama_slice.add_node(name=ollama_node_name, cores=core, ram=ram, \n", - " disk=disk, site=site1, image='default_ubuntu_22')\n", + "ollama_node = ollama_slice.add_node(name=ollama_node_name, cores=min_cores, ram=min_ram_gb, \n", + " disk=min_disk_gb, site=picked_site, image='default_ubuntu_22')\n", "\n", - "ollama_node.add_component(model=GPU_CHOICE, name='gpu1')\n", + "ollama_node.add_component(model=picked_gpu_model, name='gpu1')\n", "\n", "\n", "iface1 = ollama_node.add_component(model=model_name, name=nic_name).get_interfaces()[0]\n", @@ -308,6 +329,72 @@ "print(f\"Ollama is accessible from other slices at: {ollama_fabnet_ip_addr}\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start the SSH Tunnel\n", + "\n", + "- Create SSH Tunnel Configuration `fabric_ssh_tunnel_tools.zip`\n", + "- Download your custom `fabric_ssh_tunnel_tools.zip` tarball from the `fabric_config` folder. \n", + "- Untar the tarball and put the resulting folder (`fabric_ssh_tunnel_tools`) somewhere you can access it from the command line.\n", + "- Open a terminal window. (Windows: use `powershell`) \n", + "- Use `cd` to navigate to the `fabric_ssh_tunnel_tools` folder.\n", + "- In your terminal, run the command that results from running the following cell (leave the terminal window open)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fablib.create_ssh_tunnel_config(overwrite=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Open Web UI\n", + "\n", + "To access the Open Web UI running on the ollama node, create an SSH tunnel from your local machine using the command generated by the next cell:\n", + "\n", + "```bash\n", + "ssh -L 8080::8080 -i -F @\n", + "```\n", + "\n", + "Replace `` and `` with the actual IP address and hostname of the Ceph manager VM.\n", + "\n", + "Then, open your browser and navigate to:\n", + "\n", + "\n", + "http://localhost:8080\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "# Port on your local machine that you want to map the File Browser to.\n", + "local_port='8080'\n", + "# Local interface to map the File Browser to (can be `localhost`)\n", + "local_host='127.0.0.1'\n", + "\n", + "# Port on the node used by the File Browser Service\n", + "target_port='8080'\n", + "\n", + "# Username/node on FABRIC\n", + "target_host=f'{ollama_node.get_username()}@{ollama_node.get_management_ip()}'\n", + "\n", + "print(f'ssh -L {local_host}:{local_port}:127.0.0.1:{target_port} -i {os.path.basename(fablib.get_default_slice_public_key_file())[:-4]} -F ssh_config {target_host}')\n", + "print()\n", + "print(\"After running the SSH command, open Open WebUI at http://localhost:8080. If prompted, create an account and start asking questions.\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -325,8 +412,8 @@ }, "outputs": [], "source": [ - "ollama_node = fablib.get_slice(ollama_slice_name)\n", - "ollama_node.delete()" + "#ollama_node = fablib.get_slice(ollama_slice_name)\n", + "#ollama_node.delete()" ] }, { diff --git a/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/docker-compose.yml b/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/docker-compose.yml index 6512bb4..e17a795 100644 --- a/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/docker-compose.yml +++ b/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/docker-compose.yml @@ -27,7 +27,7 @@ services: ports: - ${OPEN_WEBUI_PORT-3000}:8080 environment: - - 'OLLAMA_BASE_URL=http://ollama:11434' + - 'OLLAMA_BASE_URL=http://localhost:11434' - 'WEBUI_SECRET_KEY=' network_mode: host extra_hosts: diff --git a/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/env.template b/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/env.template index 91b3bb4..e9986c3 100644 --- a/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/env.template +++ b/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/env.template @@ -1,3 +1,3 @@ -MODEL_NAME=deepseek-r1:7b +MODEL_NAME=gemma3:270m NVIDIA_VISIBLE_DEVICES=all NVIDIA_DRIVER_CAPABILITIES=compute,utility \ No newline at end of file From fc0d9d8163f927630e7e00e930de4d686eff2125 Mon Sep 17 00:00:00 2001 From: Komal Thareja Date: Wed, 29 Oct 2025 11:58:20 -0400 Subject: [PATCH 2/3] update the notebook to include some examples querying ollama --- .../collaborative_slices/ollama/ollama.ipynb | 70 ++++++++++++++++++- .../ollama/ollama_tools/query.py | 10 +-- 2 files changed, 73 insertions(+), 7 deletions(-) diff --git a/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama.ipynb b/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama.ipynb index 3c55f62..05bd672 100644 --- a/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama.ipynb +++ b/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama.ipynb @@ -333,7 +333,70 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Start the SSH Tunnel\n", + "## Querying Ollama\n", + "\n", + "Users can interact with the LLM through the REST API, the command-line interface, or the Open WebUI.\n", + "\n", + "### REST Examples\n", + "\n", + "The `query.py` script demonstrates how to query the LLM over the REST interface. Although Ollama can run on a remote host, the example below targets the local instance by passing `--host localhost`. Users may also specify a different `--host` and `--port` as needed.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "stdout, stderr = ollama_node.execute(f\"python3 ollama_tools/query.py --model {default_llm_model} --prompt 'Tell me about National Science Foundation' --host localhost --port 11434\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "stdout, stderr = ollama_node.execute(f\"python3 ollama_tools/query.py --model {default_llm_model} --prompt 'Tell me about NVIDIA BlueField DPUs' --host localhost --port 11434\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### CLI Examples\n", + "\n", + "SSH into the `ollama_node` using the command provided above.\n", + "To view available models, run:\n", + "\n", + "```bash\n", + "docker exec -it ollama ollama list\n", + "```\n", + "\n", + "To start a model and interact with it:\n", + "\n", + "```bash\n", + "docker exec -it ollama ollama run deepseek-r1:7b\n", + "```\n", + "\n", + "This will open an interactive prompt where you can type questions directly." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Open Web UI\n", + "\n", + "To access the Open Web UI from your laptop, you’ll need to create an SSH tunnel.\n", + "Follow the steps below to complete the setup.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Start the SSH Tunnel\n", "\n", "- Create SSH Tunnel Configuration `fabric_ssh_tunnel_tools.zip`\n", "- Download your custom `fabric_ssh_tunnel_tools.zip` tarball from the `fabric_config` folder. \n", @@ -356,7 +419,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Open Web UI\n", + "#### Launch Open Web UI\n", "\n", "To access the Open Web UI running on the ollama node, create an SSH tunnel from your local machine using the command generated by the next cell:\n", "\n", @@ -390,6 +453,9 @@ "# Username/node on FABRIC\n", "target_host=f'{ollama_node.get_username()}@{ollama_node.get_management_ip()}'\n", "\n", + "print(\"Use `cd` to navigate into the `fabric_ssh_tunnel_tools` folder.\")\n", + "print(\"In your terminal, run the SSH tunnel command\")\n", + "print()\n", "print(f'ssh -L {local_host}:{local_port}:127.0.0.1:{target_port} -i {os.path.basename(fablib.get_default_slice_public_key_file())[:-4]} -F ssh_config {target_host}')\n", "print()\n", "print(\"After running the SSH command, open Open WebUI at http://localhost:8080. If prompted, create an account and start asking questions.\")" diff --git a/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/query.py b/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/query.py index 227fafc..1ba8940 100644 --- a/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/query.py +++ b/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama_tools/query.py @@ -2,8 +2,8 @@ import requests import json -def query_deepseek(prompt, model, host, port, stream=False): - """Sends a query to the DeepSeek model via Ollama API.""" +def query_model(prompt, model, host, port, stream=False): + """Sends a query to the model via Ollama API.""" api_url = f"http://{host}:{port}/api/generate" payload = { @@ -21,7 +21,7 @@ def query_deepseek(prompt, model, host, port, stream=False): return f"Request error: {str(e)}" if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Query the DeepSeek model via Ollama API.") + parser = argparse.ArgumentParser(description="Query the model via Ollama API.") parser.add_argument("--prompt", required=True, help="The prompt text to send to the model.") parser.add_argument("--model", required=True, help="The model name to use.") parser.add_argument("--host", required=False, default="127.0.0.1", help="The host where Ollama API is running.") @@ -30,5 +30,5 @@ def query_deepseek(prompt, model, host, port, stream=False): args = parser.parse_args() - response = query_deepseek(args.prompt, args.model, args.host, args.port, args.stream) - print("\nDeepSeek Response:\n", response) \ No newline at end of file + response = query_model(args.prompt, args.model, args.host, args.port, args.stream) + print("Model Response:\n", response) \ No newline at end of file From b2cd02419fd9fd4d8c890d793c6ff18971ab3edb Mon Sep 17 00:00:00 2001 From: Komal Thareja Date: Wed, 29 Oct 2025 13:06:12 -0400 Subject: [PATCH 3/3] query hosts instead of sites to avoid disk/core/ram error --- .../collaborative_slices/ollama/ollama.ipynb | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama.ipynb b/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama.ipynb index 05bd672..d983c6b 100644 --- a/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama.ipynb +++ b/fabric_examples/complex_recipes/collaborative_slices/ollama/ollama.ipynb @@ -58,7 +58,7 @@ "metadata": {}, "outputs": [], "source": [ - "ollama_slice_name = 'Ollama-deep-seek'\n", + "ollama_slice_name = 'Ollama-slice'\n", "\n", "ollama_node_name ='ollama_node'\n", "\n", @@ -97,17 +97,21 @@ "import random\n", "import pandas as pd\n", "\n", - "fields = ['name', 'cores_available', 'ram_available', 'disk_available']\n", + "fields = ['name', 'state', 'cores_available', 'ram_available', 'disk_available']\n", "gpu_models = [\"GPU_RTX6000\", \"GPU_Tesla_T4\", \"GPU_A30\", \"GPU_A40\"]\n", "gpu_fields = [f\"{m.split('_', 1)[1].lower()}_available\" for m in gpu_models]\n", "fields += [f for f in gpu_fields if f not in fields]\n", "\n", "# If empty -> do not filter by name\n", "sites_like: list[str] = [] # e.g., ['BRIST', 'TOKY'] or [] to disable\n", + "avoid_like: list[str] = [\"TACC\", \"GATECH\", \"GPN\"] # e.g., ['BRIST', 'TOKY'] or [] to disable\n", + "min_cores = 4\n", + "min_ram_gb = 16\n", + "min_disk_gb = 200\n", + "min_gpu_any = 0 # >0 means at least one GPU of any model for the initial filter\n", + "min_gpu_for_pick = 1 # >1 means at least two for the random pick\n", "\n", - "avoid_sites_like: list[str] = [\"GATECH\", \"GPN\"]\n", - "\n", - "def site_filter(row: dict) -> bool:\n", + "def filter_function(row: dict) -> bool:\n", " # Name filter: only apply if sites_like is non-empty\n", " if sites_like:\n", " name = (row.get('name') or '')\n", @@ -118,13 +122,14 @@ " res_ok = (\n", " row.get('cores_available', 0) > min_cores and\n", " row.get('ram_available', 0) > min_ram_gb and\n", - " row.get('disk_available', 0) > min_disk_gb\n", + " row.get('disk_available', 0) > min_disk_gb and\n", + " row.get('state') == 'Active'\n", " )\n", " any_gpu_ok = any(row.get(gf, 0) > min_gpu_any for gf in gpu_fields)\n", "\n", " return name_ok and res_ok and any_gpu_ok\n", "\n", - "styled_or_df = fablib.list_sites(fields=fields, pretty_names=False, avoid=avoid_sites_like, filter_function=site_filter)\n", + "styled_or_df = fablib.list_hosts(fields=fields, pretty_names=False, avoid=avoid_like, filter_function=filter_function)\n", "\n", "# Normalize Styler/DataFrame/list-of-dicts -> DataFrame\n", "if isinstance(styled_or_df, pd.io.formats.style.Styler):\n", @@ -135,7 +140,7 @@ " df = pd.DataFrame(styled_or_df or [])\n", "\n", "if df.empty:\n", - " raise RuntimeError(\"No sites matched the filter criteria.\")\n", + " raise RuntimeError(\"No hosts matched the filter criteria.\")\n", "\n", "# Random pick where any GPU count > 1\n", "model_map = dict(zip(gpu_fields, gpu_models))\n", @@ -148,18 +153,19 @@ " raise RuntimeError(\"No site has any GPU model with count > 1.\")\n", "\n", "pick = eligible.sample(1).iloc[0]\n", - "site_row = df.loc[pick[\"index\"]]\n", + "host_row = df.loc[pick[\"index\"]]\n", "picked_gpu_model = model_map[pick[\"gpu_field\"]]\n", "\n", "print(\n", - " f\"Chosen site: {site_row.get('name', '')} | \"\n", + " f\"Chosen Host: {host_row.get('name', '')} | \"\n", " f\"GPU: {picked_gpu_model} | Available: {int(pick['count'])}\"\n", ")\n", "\n", "if \"GPU_Tesla_T4\" == picked_gpu_model:\n", " picked_gpu_model = \"GPU_TeslaT4\"\n", "\n", - "picked_site = site_row.get('name')" + "picked_host = host_row.get('name')\n", + "picked_site = picked_host.split('-', 1)[0].upper()" ] }, { @@ -195,7 +201,7 @@ "\n", "net1 = ollama_slice.add_l3network(name=network_name)\n", "\n", - "ollama_node = ollama_slice.add_node(name=ollama_node_name, cores=min_cores, ram=min_ram_gb, \n", + "ollama_node = ollama_slice.add_node(name=ollama_node_name, cores=min_cores, ram=min_ram_gb, host=picked_host,\n", " disk=min_disk_gb, site=picked_site, image='default_ubuntu_22')\n", "\n", "ollama_node.add_component(model=picked_gpu_model, name='gpu1')\n",