Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 53 additions & 51 deletions api/llm_client.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,44 @@
"""LLM client wrapper for API chat functionality using Google Gemini."""

import os
import time
from typing import Any, Dict, List
import requests
import json
from typing import Dict, List
from dotenv import load_dotenv

try:
import google.generativeai as genai
except ModuleNotFoundError:
genai = None
SYSTEM_PROMPTS = {
"default": """You are BAIO, an expert bioinformatics assistant specialized in DNA sequence classification and pathogen detection.

You help researchers:
- Understand classification results (Virus vs Host predictions)
- Interpret confidence scores and risk levels
- Explain k-mer analysis and model predictions
- Provide guidance on next steps for validation

Be concise, helpful, and scientific in your responses. Use emojis sparingly.""",
"analysis_helper": "You are analyzing metagenomic sequencing data with BAIO. Help interpret the classification results and suggest next steps.",
"technical_expert": "You are a technical expert on BAIO's architecture, focusing on RandomForest models, k-mer tokenization, and TF-IDF features.",
}


class LLMClient:
"""Wrapper class for Google Gemini LLM API calls."""

def __init__(self, provider: str = "google", model: str = "gemini-1.5-flash"):
self.provider = provider
def __init__(self, model: str = "liquid/lfm-2.5-1.2b-instruct:free"):
load_dotenv()
self.model = model
self.api_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")
self.client: Any = None

if self.api_key and genai is not None:
try:
genai.configure(api_key=self.api_key)
self.client = genai.GenerativeModel(model)
except Exception as e:
print(f"Failed to initialize Gemini: {e}", flush=True)
elif self.api_key and genai is None:
self.api_key = os.getenv("OPENROUTER_API_KEY")
if self.api_key is None:
print(
"google-generativeai is not installed; falling back to mock responses.",
"OpenRouter api key not found; falling back to mock responses.",
flush=True,
)

def generate_response(
self, messages: List[Dict[str, str]], system_prompt: str
self,
messages: List[Dict[str, str]],
system_prompt: str = SYSTEM_PROMPTS["default"],
) -> str:
"""
Generate response from Gemini LLM.
Generate response from the LLM or fallback to mock if API key missing/error.

Args:
messages: List of conversation messages
Expand All @@ -44,25 +47,39 @@ def generate_response(
Returns:
Generated response text
"""
try:
if self.client is None:
return self._mock_response(messages)

history = []
for msg in messages[:-1]:
role = "user" if msg["role"] == "user" else "model"
history.append({"role": role, "parts": [msg["content"]]})
if self.api_key is None:
return "OpenRouter api key not found; falling back to mock responses."

chat = self.client.start_chat(history=history)
# Build the payload for OpenRouter API
payload = {
"model": self.model,
"messages": [
{"role": "system", "content": system_prompt},
*messages,
],
}

last_message = messages[-1]["content"] if messages else ""
full_prompt = f"{system_prompt}\n\nUser: {last_message}"
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}

response = chat.send_message(full_prompt)
return response.text
try:
response = requests.post(
url="https://openrouter.ai/api/v1/chat/completions",
headers=headers,
data=json.dumps(payload),
timeout=10, # seconds
)
response.raise_for_status()
data = response.json()

# Extract the assistant's reply
return data["choices"][0]["message"]["content"]

except Exception as e:
print(f"Gemini API error: {str(e)}")
print(f"API error: {e}. Falling back to mock response.", flush=True)
return self._mock_response(messages)

def _mock_response(self, messages: List[Dict[str, str]]) -> str:
Expand Down Expand Up @@ -90,18 +107,3 @@ def _mock_response(self, messages: List[Dict[str, str]]) -> str:
"virus/host detection, confidence scores, and the analysis pipeline. "
"What would you like to know?"
)


SYSTEM_PROMPTS = {
"default": """You are BAIO, an expert bioinformatics assistant specialized in DNA sequence classification and pathogen detection.

You help researchers:
- Understand classification results (Virus vs Host predictions)
- Interpret confidence scores and risk levels
- Explain k-mer analysis and model predictions
- Provide guidance on next steps for validation

Be concise, helpful, and scientific in your responses. Use emojis sparingly.""",
"analysis_helper": "You are analyzing metagenomic sequencing data with BAIO. Help interpret the classification results and suggest next steps.",
"technical_expert": "You are a technical expert on BAIO's architecture, focusing on RandomForest models, k-mer tokenization, and TF-IDF features.",
}
74 changes: 42 additions & 32 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,67 +2,77 @@ name: baio
channels:
- conda-forge
- bioconda
- defaults

dependencies:
# Core
# === Core ===
- python>=3.12,<3.13
- uvicorn
- requests
- python-dotenv>=1.0
- click
- rich
- jsonschema
- python-json-logger>=3.0
- pyyaml>=6.0

# Numeric stack compatible with Py3.12
# === Numeric / Scientific stack ===
- numpy=2.2
- scipy=1.14
- numba=0.61
- llvmlite=0.44

# ML / DS
- scikit-learn=1.5
- pandas=2.2
- matplotlib=3.9
- seaborn=0.13
- plotly
- joblib>=1.3

# Torch (CPU/MPS on Apple Silicon)
# === ML / AI ===
- scikit-learn=1.5
- pytorch=2.5.1

# I/O & storage
- pyarrow=17
# === Bioinformatics / Clustering ===
- biopython>=1.85
- hdbscan=0.8.39
- umap-learn=0.5.7

# === I/O / Storage ===
- pyarrow>=17.0
- h5py
- zarr

# App / tooling
- tqdm
- click
- python-dotenv
- pyyaml
- jsonschema
- rich
- requests

# Dev / QA
- pytest
- pytest-cov
- black
- flake8
- isort
- mypy

# Jupyter
# === Jupyter / Notebook ===
- jupyter
- jupyterlab
- ipywidgets
- ipykernel>=6.0

# Install without conflicts
- hdbscan=0.8.39
- umap-learn==0.5.7
# === Dev / QA ===
- pytest>=8.0
- pytest-cov>=4.1
- pytest-mock>=3.12
- pytest-asyncio>=0.23
- black>=24.0
- flake8>=7.0
- isort>=5.13
- mypy>=1.10
- watchdog>=3.0

# Install via pip
# === Pip-only packages ===
- pip
- pip:
# runtime / HuggingFace
- transformers==4.56.1
- tokenizers==0.22.0
- accelerate>=0.30
- datasets>=2.19
- types-requests>=2.31.0
- fastapi>=0.115.0
- pandas-stubs==2.3.2.250926

# dev / profiling / type stubs
- line-profiler>=4.1
- memory-profiler>=0.61
- pandas-stubs==2.3.2.250926
- types-requests>=2.31.0
- types-PyYAML>=6.0
- types-jsonschema
- pre-commit>=3.3
6 changes: 3 additions & 3 deletions frontend/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 12 additions & 7 deletions frontend/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import Header from './components/Header'
import SequenceInput from './components/SequenceInput'
import ConfigPanel from './components/ConfigPanel'
import ResultsDashboard from './components/ResultsDashboard'
import ChatWidget from './components/ChatWidget'
import type {
ChatMessage,
ClassificationResponse,
Expand Down Expand Up @@ -73,10 +74,11 @@ function App() {

const [chatMessages, setChatMessages] = useState<ChatMessage[]>([
{
role: 'assistant',
content:
"role": 'assistant',
"content":
'Hi! Paste FASTA sequences, run classification, and ask questions here.',
},

])
const [chatInput, setChatInput] = useState('')
const [chatLoading, setChatLoading] = useState(false)
Expand Down Expand Up @@ -167,11 +169,6 @@ function App() {
healthOk={healthOk}
darkMode={darkMode}
toggleDarkMode={() => setDarkMode(!darkMode)}
chatMessages={chatMessages}
chatInput={chatInput}
onChatInputChange={setChatInput}
onChatSend={handleChatSend}
chatLoading={chatLoading}
/>

{error && (
Expand Down Expand Up @@ -261,6 +258,14 @@ function App() {
</div>
</div>
</div>

<ChatWidget
messages={chatMessages}
input={chatInput}
onInputChange={setChatInput}
onSend={handleChatSend}
isLoading={chatLoading}
/>
</div>
)
}
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ export type ClassificationResponse = {
}

export type ChatMessage = {
role: 'user' | 'assistant'
role: 'user' | 'system' | 'assistant'
content: string
}

Expand Down
2 changes: 1 addition & 1 deletion metaseq/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import List, Tuple, Optional, Dict, Any
import os
import json
import yaml # type: ignore[import-untyped]
import yaml
import pandas as pd
from sklearn.model_selection import train_test_split # type: ignore[import-untyped]
from sklearn.metrics import classification_report # type: ignore[import-untyped]
Expand Down
Loading
Loading