Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
261 changes: 207 additions & 54 deletions fabric_examples/complex_recipes/collaborative_slices/ollama/ollama.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,21 +41,6 @@
"fablib.show_config();"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cores_column_name = 'cores_available'\n",
"ram_column_name = 'ram_available'\n",
"disk_column_name = 'disk_available'\n",
"\n",
"core=16\n",
"ram=32\n",
"disk=100"
]
},
{
"cell_type": "markdown",
"metadata": {
Expand Down Expand Up @@ -96,24 +81,11 @@
"metadata": {},
"outputs": [],
"source": [
"# choices include\n",
"# GPU_RTX6000\n",
"# GPU_TeslaT4\n",
"# GPU_A30\n",
"# GPU_A40\n",
"GPU_CHOICE = 'GPU_A30' \n",
"\n",
"# don't edit - convert from GPU type to a resource column name\n",
"# to use in filter lambda function below\n",
"choice_to_column = {\n",
" \"GPU_RTX6000\": \"rtx6000_available\",\n",
" \"GPU_TeslaT4\": \"tesla_t4_available\",\n",
" \"GPU_A30\": \"a30_available\",\n",
" \"GPU_A40\": \"a40_available\"\n",
"}\n",
"\n",
"column_name = choice_to_column.get(GPU_CHOICE, \"Unknown\")\n",
"print(f'{column_name=}')"
"min_cores = 16\n",
"min_ram_gb = 32\n",
"min_disk_gb = 100\n",
"min_gpu_any = 0 # >0 means at least one GPU of any model for the initial filter\n",
"min_gpu_for_pick = 1 # >1 means at least two for the random pick"
]
},
{
Expand All @@ -122,23 +94,72 @@
"metadata": {},
"outputs": [],
"source": [
"# find a site with at least one available GPU of the selected type\n",
"site_override = None\n",
"\n",
"cores_column_name = 'cores_available'\n",
"ram_column_name = 'ram_available'\n",
"disk_column_name = 'disk_available'\n",
"\n",
"if site_override:\n",
" site1 = site_override\n",
"import random\n",
"import pandas as pd\n",
"\n",
"fields = ['name', 'cores_available', 'ram_available', 'disk_available']\n",
"gpu_models = [\"GPU_RTX6000\", \"GPU_Tesla_T4\", \"GPU_A30\", \"GPU_A40\"]\n",
"gpu_fields = [f\"{m.split('_', 1)[1].lower()}_available\" for m in gpu_models]\n",
"fields += [f for f in gpu_fields if f not in fields]\n",
"\n",
"# If empty -> do not filter by name\n",
"sites_like: list[str] = [] # e.g., ['BRIST', 'TOKY'] or [] to disable\n",
"\n",
"avoid_sites_like: list[str] = [\"GATECH\", \"GPN\"]\n",
"\n",
"def site_filter(row: dict) -> bool:\n",
" # Name filter: only apply if sites_like is non-empty\n",
" if sites_like:\n",
" name = (row.get('name') or '')\n",
" name_ok = any(tok.lower() in name.lower() for tok in sites_like)\n",
" else:\n",
" name_ok = True\n",
"\n",
" res_ok = (\n",
" row.get('cores_available', 0) > min_cores and\n",
" row.get('ram_available', 0) > min_ram_gb and\n",
" row.get('disk_available', 0) > min_disk_gb\n",
" )\n",
" any_gpu_ok = any(row.get(gf, 0) > min_gpu_any for gf in gpu_fields)\n",
"\n",
" return name_ok and res_ok and any_gpu_ok\n",
"\n",
"styled_or_df = fablib.list_sites(fields=fields, pretty_names=False, avoid=avoid_sites_like, filter_function=site_filter)\n",
"\n",
"# Normalize Styler/DataFrame/list-of-dicts -> DataFrame\n",
"if isinstance(styled_or_df, pd.io.formats.style.Styler):\n",
" df = styled_or_df.data\n",
"elif isinstance(styled_or_df, pd.DataFrame):\n",
" df = styled_or_df\n",
"else:\n",
" site1 = fablib.get_random_site(filter_function=lambda x: x[column_name] > 0 and \n",
" x[cores_column_name] > core and \n",
" x[ram_column_name] > ram and \n",
" x[disk_column_name] > disk,\n",
" avoid = [\"GATECH\", \"GPN\"])\n",
" \n",
"print(f'Preparing to create slice \"{ollama_slice_name}\" with node {ollama_node_name} in site {site1}')"
" df = pd.DataFrame(styled_or_df or [])\n",
"\n",
"if df.empty:\n",
" raise RuntimeError(\"No sites matched the filter criteria.\")\n",
"\n",
"# Random pick where any GPU count > 1\n",
"model_map = dict(zip(gpu_fields, gpu_models))\n",
"long = (\n",
" df.reset_index()[[\"index\"] + gpu_fields]\n",
" .melt(id_vars=\"index\", var_name=\"gpu_field\", value_name=\"count\")\n",
")\n",
"eligible = long[long[\"count\"] > min_gpu_for_pick]\n",
"if eligible.empty:\n",
" raise RuntimeError(\"No site has any GPU model with count > 1.\")\n",
"\n",
"pick = eligible.sample(1).iloc[0]\n",
"site_row = df.loc[pick[\"index\"]]\n",
"picked_gpu_model = model_map[pick[\"gpu_field\"]]\n",
"\n",
"print(\n",
" f\"Chosen site: {site_row.get('name', '<unknown>')} | \"\n",
" f\"GPU: {picked_gpu_model} | Available: {int(pick['count'])}\"\n",
")\n",
"\n",
"if \"GPU_Tesla_T4\" == picked_gpu_model:\n",
" picked_gpu_model = \"GPU_TeslaT4\"\n",
"\n",
"picked_site = site_row.get('name')"
]
},
{
Expand Down Expand Up @@ -174,10 +195,10 @@
"\n",
"net1 = ollama_slice.add_l3network(name=network_name)\n",
"\n",
"ollama_node = ollama_slice.add_node(name=ollama_node_name, cores=core, ram=ram, \n",
" disk=disk, site=site1, image='default_ubuntu_22')\n",
"ollama_node = ollama_slice.add_node(name=ollama_node_name, cores=min_cores, ram=min_ram_gb, \n",
" disk=min_disk_gb, site=picked_site, image='default_ubuntu_22')\n",
"\n",
"ollama_node.add_component(model=GPU_CHOICE, name='gpu1')\n",
"ollama_node.add_component(model=picked_gpu_model, name='gpu1')\n",
"\n",
"\n",
"iface1 = ollama_node.add_component(model=model_name, name=nic_name).get_interfaces()[0]\n",
Expand Down Expand Up @@ -308,6 +329,138 @@
"print(f\"Ollama is accessible from other slices at: {ollama_fabnet_ip_addr}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Querying Ollama\n",
"\n",
"Users can interact with the LLM through the REST API, the command-line interface, or the Open WebUI.\n",
"\n",
"### REST Examples\n",
"\n",
"The `query.py` script demonstrates how to query the LLM over the REST interface. Although Ollama can run on a remote host, the example below targets the local instance by passing `--host localhost`. Users may also specify a different `--host` and `--port` as needed.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stdout, stderr = ollama_node.execute(f\"python3 ollama_tools/query.py --model {default_llm_model} --prompt 'Tell me about National Science Foundation' --host localhost --port 11434\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stdout, stderr = ollama_node.execute(f\"python3 ollama_tools/query.py --model {default_llm_model} --prompt 'Tell me about NVIDIA BlueField DPUs' --host localhost --port 11434\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### CLI Examples\n",
"\n",
"SSH into the `ollama_node` using the command provided above.\n",
"To view available models, run:\n",
"\n",
"```bash\n",
"docker exec -it ollama ollama list\n",
"```\n",
"\n",
"To start a model and interact with it:\n",
"\n",
"```bash\n",
"docker exec -it ollama ollama run deepseek-r1:7b\n",
"```\n",
"\n",
"This will open an interactive prompt where you can type questions directly."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Open Web UI\n",
"\n",
"To access the Open Web UI from your laptop, you’ll need to create an SSH tunnel.\n",
"Follow the steps below to complete the setup.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Start the SSH Tunnel\n",
"\n",
"- Create SSH Tunnel Configuration `fabric_ssh_tunnel_tools.zip`\n",
"- Download your custom `fabric_ssh_tunnel_tools.zip` tarball from the `fabric_config` folder. \n",
"- Untar the tarball and put the resulting folder (`fabric_ssh_tunnel_tools`) somewhere you can access it from the command line.\n",
"- Open a terminal window. (Windows: use `powershell`) \n",
"- Use `cd` to navigate to the `fabric_ssh_tunnel_tools` folder.\n",
"- In your terminal, run the command that results from running the following cell (leave the terminal window open)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fablib.create_ssh_tunnel_config(overwrite=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Launch Open Web UI\n",
"\n",
"To access the Open Web UI running on the ollama node, create an SSH tunnel from your local machine using the command generated by the next cell:\n",
"\n",
"```bash\n",
"ssh -L 8080:<manager-ip>:8080 -i <private_key> -F <ssh_config> <your-username>@<manager-host>\n",
"```\n",
"\n",
"Replace `<manager-ip>` and `<manager-host>` with the actual IP address and hostname of the Ceph manager VM.\n",
"\n",
"Then, open your browser and navigate to:\n",
"\n",
"\n",
"http://localhost:8080\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"# Port on your local machine that you want to map the File Browser to.\n",
"local_port='8080'\n",
"# Local interface to map the File Browser to (can be `localhost`)\n",
"local_host='127.0.0.1'\n",
"\n",
"# Port on the node used by the File Browser Service\n",
"target_port='8080'\n",
"\n",
"# Username/node on FABRIC\n",
"target_host=f'{ollama_node.get_username()}@{ollama_node.get_management_ip()}'\n",
"\n",
"print(\"Use `cd` to navigate into the `fabric_ssh_tunnel_tools` folder.\")\n",
"print(\"In your terminal, run the SSH tunnel command\")\n",
"print()\n",
"print(f'ssh -L {local_host}:{local_port}:127.0.0.1:{target_port} -i {os.path.basename(fablib.get_default_slice_public_key_file())[:-4]} -F ssh_config {target_host}')\n",
"print()\n",
"print(\"After running the SSH command, open Open WebUI at http://localhost:8080. If prompted, create an account and start asking questions.\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -325,8 +478,8 @@
},
"outputs": [],
"source": [
"ollama_node = fablib.get_slice(ollama_slice_name)\n",
"ollama_node.delete()"
"#ollama_node = fablib.get_slice(ollama_slice_name)\n",
"#ollama_node.delete()"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ services:
ports:
- ${OPEN_WEBUI_PORT-3000}:8080
environment:
- 'OLLAMA_BASE_URL=http://ollama:11434'
- 'OLLAMA_BASE_URL=http://localhost:11434'
- 'WEBUI_SECRET_KEY='
network_mode: host
extra_hosts:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
MODEL_NAME=deepseek-r1:7b
MODEL_NAME=gemma3:270m
NVIDIA_VISIBLE_DEVICES=all
NVIDIA_DRIVER_CAPABILITIES=compute,utility
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import requests
import json

def query_deepseek(prompt, model, host, port, stream=False):
"""Sends a query to the DeepSeek model via Ollama API."""
def query_model(prompt, model, host, port, stream=False):
"""Sends a query to the model via Ollama API."""
api_url = f"http://{host}:{port}/api/generate"

payload = {
Expand All @@ -21,7 +21,7 @@ def query_deepseek(prompt, model, host, port, stream=False):
return f"Request error: {str(e)}"

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Query the DeepSeek model via Ollama API.")
parser = argparse.ArgumentParser(description="Query the model via Ollama API.")
parser.add_argument("--prompt", required=True, help="The prompt text to send to the model.")
parser.add_argument("--model", required=True, help="The model name to use.")
parser.add_argument("--host", required=False, default="127.0.0.1", help="The host where Ollama API is running.")
Expand All @@ -30,5 +30,5 @@ def query_deepseek(prompt, model, host, port, stream=False):

args = parser.parse_args()

response = query_deepseek(args.prompt, args.model, args.host, args.port, args.stream)
print("\nDeepSeek Response:\n", response)
response = query_model(args.prompt, args.model, args.host, args.port, args.stream)
print("Model Response:\n", response)