diff --git a/.github/workflows/test_chat_sdk.yml b/.github/workflows/test_chat_sdk.yml
index 31b0bf3f..003b7932 100644
--- a/.github/workflows/test_chat_sdk.yml
+++ b/.github/workflows/test_chat_sdk.yml
@@ -76,6 +76,32 @@ jobs:
           Write-Host "Found gaia at: $($gaiaPath.Source)"
           python -m pip check
 
+      - name: Run Chat SDK Unit Tests (no server required)
+        shell: cmd
+        run: |
+          REM Activate conda environment
+          call "%GITHUB_WORKSPACE%\miniforge3\Scripts\activate.bat" gaiaenv
+
+          echo ================================================================
+          echo           CHAT SDK UNIT TESTS (NO SERVER REQUIRED)
+          echo ================================================================
+          echo Running unit tests before starting Lemonade server...
+          echo.
+
+          set PYTHONIOENCODING=utf-8
+          python tests\unit\test_chat_sdk_unit.py
+          set unit_exit=%ERRORLEVEL%
+
+          echo.
+          echo ----------------------------------------------------------------
+          if %unit_exit% equ 0 (
+              echo [SUCCESS] Chat SDK unit tests passed
+          ) else (
+              echo [FAILURE] Chat SDK unit tests failed with exit code %unit_exit%
+              exit /b 1
+          )
+          echo ----------------------------------------------------------------
+
       - name: Start Lemonade Server for Integration Tests
         timeout-minutes: 15
         env:
diff --git a/setup.py b/setup.py
index 0c327b87..5ba8913f 100644
--- a/setup.py
+++ b/setup.py
@@ -129,6 +129,7 @@
         ],
         "rag": [
             "pypdf",
+            "pymupdf",
             "sentence-transformers",
             "faiss-cpu>=1.7.0",
         ],
diff --git a/src/gaia/agents/base/console.py b/src/gaia/agents/base/console.py
index a5ce9d30..543bcbab 100644
--- a/src/gaia/agents/base/console.py
+++ b/src/gaia/agents/base/console.py
@@ -21,7 +21,7 @@
 except ImportError:
     RICH_AVAILABLE = False
     print(
-        "Rich library not found. Install with 'pip install rich' for syntax highlighting."
+        "Rich library not found. Install with 'uv pip install rich' for syntax highlighting."
     )
 
 # Display configuration constants
diff --git a/src/gaia/agents/chat/agent.py b/src/gaia/agents/chat/agent.py
index b44b71dd..b36ec51c 100644
--- a/src/gaia/agents/chat/agent.py
+++ b/src/gaia/agents/chat/agent.py
@@ -339,7 +339,7 @@ def __init__(self, config: Optional[ChatAgentConfig] = None):
         elif self.rag_documents and not self.rag:
             logger.warning(
                 "RAG dependencies not installed. Cannot index documents. "
-                "Install with: pip install gaia[rag]"
+                "Install with: uv pip install -e .[rag]"
             )
 
         # Start watching directories
@@ -847,9 +847,9 @@ def _watch_directory(self, directory: str) -> None:
                 "\n❌ Error: Missing required package 'watchdog'\n\n"
                 "File watching requires the watchdog package.\n"
                 "Please install the required dependencies:\n"
-                "  pip install -e .[dev]\n\n"
+                "  uv pip install -e .[dev]\n\n"
                 "Or install watchdog directly:\n"
-                "  pip install watchdog>=2.1.0\n"
+                "  uv pip install watchdog>=2.1.0\n"
             )
             logger.error(error_msg)
             raise ImportError(error_msg)
diff --git a/src/gaia/agents/chat/app.py b/src/gaia/agents/chat/app.py
index d0a02d1e..09007a03 100644
--- a/src/gaia/agents/chat/app.py
+++ b/src/gaia/agents/chat/app.py
@@ -330,6 +330,13 @@ def interactive_mode(agent: ChatAgent):
                         print("         /index /path/to/documents/")
                         continue
 
+                    # Check if RAG is available
+                    if agent.rag is None:
+                        print("\n❌ RAG (document indexing) is not available.")
+                        print("   Missing dependencies: pypdf, sentence-transformers, faiss-cpu")
+                        print("\n   Install with: uv pip install -e .[rag]")
+                        continue
+
                     # Check if it's a directory or file
                     path = Path(arg)
 
@@ -999,6 +1006,13 @@ def main():
                 print(f"❌ File not found: {index_path}")
                 return 1
 
+            # Check if RAG is available
+            if agent.rag is None:
+                print("❌ RAG (document indexing) is not available.")
+                print("   Missing dependencies: pypdf, sentence-transformers, faiss-cpu")
+                print("\n   Install with: uv pip install -e .[rag]")
+                return 1
+
             print(f"📄 Indexing: {Path(index_path).name}")
             print("=" * 60)
 
diff --git a/src/gaia/agents/code/tools/code_formatting.py b/src/gaia/agents/code/tools/code_formatting.py
index 498f2871..a92b8799 100644
--- a/src/gaia/agents/code/tools/code_formatting.py
+++ b/src/gaia/agents/code/tools/code_formatting.py
@@ -148,7 +148,7 @@ def format_with_black(
             except ImportError:
                 return {
                     "status": "error",
-                    "error": "black is not installed. Install with: pip install black",
+                    "error": "black is not installed. Install with: uv pip install black",
                 }
             except Exception as e:
                 return {"status": "error", "error": str(e)}
diff --git a/src/gaia/agents/code/tools/code_tools.py b/src/gaia/agents/code/tools/code_tools.py
index 82824103..f680f8ff 100644
--- a/src/gaia/agents/code/tools/code_tools.py
+++ b/src/gaia/agents/code/tools/code_tools.py
@@ -568,7 +568,7 @@ def analyze_with_pylint(
             except ImportError:
                 return {
                     "status": "error",
-                    "error": "pylint is not installed. Install with: pip install pylint",
+                    "error": "pylint is not installed. Install with: uv pip install pylint",
                 }
             except Exception as e:
                 return {"status": "error", "error": str(e)}
diff --git a/src/gaia/apps/summarize/pdf_formatter.py b/src/gaia/apps/summarize/pdf_formatter.py
index 383c4890..affee5a5 100644
--- a/src/gaia/apps/summarize/pdf_formatter.py
+++ b/src/gaia/apps/summarize/pdf_formatter.py
@@ -36,7 +36,7 @@ class PDFFormatter:
     def __init__(self):
         if not HAS_REPORTLAB:
             raise ImportError(
-                "PDF output requires reportlab. Install with: pip install reportlab"
+                "PDF output requires reportlab. Install with: uv pip install reportlab"
             )
 
         self.styles = getSampleStyleSheet()
diff --git a/src/gaia/audio/audio_client.py b/src/gaia/audio/audio_client.py
index b2703f94..cc5b030c 100644
--- a/src/gaia/audio/audio_client.py
+++ b/src/gaia/audio/audio_client.py
@@ -123,7 +123,7 @@ async def start_voice_chat(self, message_processor_callback):
 
         except ImportError:
             self.log.error(
-                "WhisperAsr not found. Please install voice support with: pip install .[talk]"
+                "WhisperAsr not found. Please install voice support with: uv pip install -e .[talk]"
             )
             raise
         except Exception as e:
@@ -306,7 +306,7 @@ def initialize_tts(self):
                 self.log.debug("TTS initialized successfully")
             except Exception as e:
                 raise RuntimeError(
-                    f"Failed to initialize TTS:\n{e}\nInstall talk dependencies with: pip install .[talk]\nYou can also use --no-tts option to disable TTS"
+                    f"Failed to initialize TTS:\n{e}\nInstall talk dependencies with: uv pip install -e .[talk]\nYou can also use --no-tts option to disable TTS"
                 )
 
     async def speak_text(self, text: str) -> None:
diff --git a/src/gaia/audio/kokoro_tts.py b/src/gaia/audio/kokoro_tts.py
index a07ada7a..34a9b161 100644
--- a/src/gaia/audio/kokoro_tts.py
+++ b/src/gaia/audio/kokoro_tts.py
@@ -43,9 +43,9 @@ def __init__(self):
             error_msg = (
                 f"\n❌ Error: Missing required talk dependencies: {', '.join(missing)}\n\n"
                 f"Please install the talk dependencies:\n"
-                f"  pip install -e .[talk]\n\n"
+                f"  uv pip install -e .[talk]\n\n"
                 f"Or install packages directly:\n"
-                f"  pip install {' '.join(missing)}\n"
+                f"  uv pip install {' '.join(missing)}\n"
             )
             raise ImportError(error_msg)
 
diff --git a/src/gaia/audio/whisper_asr.py b/src/gaia/audio/whisper_asr.py
index d5d3aa94..21118471 100644
--- a/src/gaia/audio/whisper_asr.py
+++ b/src/gaia/audio/whisper_asr.py
@@ -56,9 +56,9 @@ def __init__(
             error_msg = (
                 f"\n❌ Error: Missing required talk dependencies: {', '.join(missing)}\n\n"
                 f"Please install the talk dependencies:\n"
-                f"  pip install -e .[talk]\n\n"
+                f"  uv pip install -e .[talk]\n\n"
                 f"Or install packages directly:\n"
-                f"  pip install {' '.join(missing)}\n"
+                f"  uv pip install {' '.join(missing)}\n"
             )
             raise ImportError(error_msg)
 
diff --git a/src/gaia/chat/sdk.py b/src/gaia/chat/sdk.py
index 89281cf6..4e5e7ff3 100644
--- a/src/gaia/chat/sdk.py
+++ b/src/gaia/chat/sdk.py
@@ -12,7 +12,6 @@
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
 
-from gaia.chat.prompts import Prompts
 from gaia.llm.lemonade_client import DEFAULT_MODEL_NAME
 from gaia.llm.llm_client import LLMClient
 from gaia.logger import get_logger
@@ -99,7 +98,7 @@ def __init__(self, config: Optional[ChatConfig] = None):
             use_openai=self.config.use_chatgpt,
             claude_model=self.config.claude_model,
             base_url=self.config.base_url,
-            system_prompt=None,  # We handle system prompts through Prompts class
+            system_prompt=None,  # We handle system prompts in _history_to_messages()
         )
 
         # Store conversation history
@@ -111,15 +110,38 @@ def __init__(self, config: Optional[ChatConfig] = None):
 
         self.log.debug("ChatSDK initialized")
 
-    def _format_history_for_context(self) -> str:
-        """Format chat history for inclusion in LLM context using model-specific formatting."""
+    def _history_to_messages(self, enhanced_last_message: Optional[str] = None) -> List[Dict[str, str]]:
+        """Convert internal chat_history to messages array for chat/completions endpoint.
+
+        Args:
+            enhanced_last_message: If provided, replace the last user message content
+                                  (used for RAG-enhanced queries)
+
+        Returns:
+            List of message dicts with 'role' and 'content' keys
+        """
+        messages = []
+
+        # Add system prompt if configured
+        if self.config.system_prompt:
+            messages.append({"role": "system", "content": self.config.system_prompt})
+
+        # Convert chat history entries to messages
+        assistant_prefix = f"{self.config.assistant_name}: "
         history_list = list(self.chat_history)
-        return Prompts.format_chat_history(
-            self.config.model,
-            history_list,
-            self.config.assistant_name,
-            self.config.system_prompt,
-        )
+
+        for i, entry in enumerate(history_list):
+            if entry.startswith("user: "):
+                content = entry[6:]
+                # Use enhanced message for last user message if provided
+                if enhanced_last_message and i == len(history_list) - 1:
+                    content = enhanced_last_message
+                messages.append({"role": "user", "content": content})
+            elif entry.startswith(assistant_prefix):
+                content = entry[len(assistant_prefix):]
+                messages.append({"role": "assistant", "content": content})
+
+        return messages
 
     def _normalize_message_content(self, content: Any) -> str:
         """
@@ -160,50 +182,38 @@ def send_messages(
             ChatResponse with the complete response
         """
         try:
-            # Convert messages to chat history format
-            chat_history = []
+            # Use provided system prompt or fall back to config default
+            effective_system_prompt = system_prompt or self.config.system_prompt
 
+            # Build messages list with system prompt prepended
+            chat_messages = []
+            if effective_system_prompt:
+                chat_messages.append(
+                    {"role": "system", "content": effective_system_prompt}
+                )
+
+            # Normalize and add all messages
             for msg in messages:
                 role = msg.get("role", "")
                 content = self._normalize_message_content(msg.get("content", ""))
-
-                if role == "user":
-                    chat_history.append(f"user: {content}")
-                elif role == "assistant":
-                    chat_history.append(f"assistant: {content}")
+                if role == "system" and effective_system_prompt:
+                    continue  # Skip if we already added system prompt
+                if role in ["user", "assistant", "system"]:
+                    chat_messages.append({"role": role, "content": content})
                 elif role == "tool":
+                    # Convert tool responses to assistant messages for compatibility
                     tool_name = msg.get("name", "tool")
-                    chat_history.append(f"assistant: [tool:{tool_name}] {content}")
-                # Skip system messages since they're passed separately
-
-            # Use provided system prompt or fall back to config default
-            effective_system_prompt = system_prompt or self.config.system_prompt
-
-            # Format according to model type
-            formatted_prompt = Prompts.format_chat_history(
-                model=self.config.model,
-                chat_history=chat_history,
-                assistant_name="assistant",
-                system_prompt=effective_system_prompt,
-            )
-
-            # Debug logging
-            self.log.debug(f"Formatted prompt length: {len(formatted_prompt)} chars")
-            self.log.debug(
-                f"System prompt used: {effective_system_prompt[:100] if effective_system_prompt else 'None'}..."
-            )
+                    chat_messages.append(
+                        {"role": "assistant", "content": f"[tool:{tool_name}] {content}"}
+                    )
 
-            # Set appropriate stop tokens based on model
-            model_lower = self.config.model.lower() if self.config.model else ""
-            if "qwen" in model_lower:
-                kwargs.setdefault("stop", ["<|im_end|>", "<|im_start|>"])
-            elif "llama" in model_lower:
-                kwargs.setdefault("stop", ["<|eot_id|>", "<|start_header_id|>"])
+            self.log.debug(f"Sending {len(chat_messages)} messages")
 
-            # Use generate with formatted prompt
+            # Pass messages directly - server handles chat templating
             response = self.llm_client.generate(
-                prompt=formatted_prompt,
+                prompt="",
                 model=self.config.model,
+                messages=chat_messages,
                 stream=False,
                 **kwargs,
             )
@@ -241,50 +251,41 @@ def send_messages_stream(
             ChatResponse chunks as they arrive
         """
         try:
-            # Convert messages to chat history format
-            chat_history = []
+            # Use provided system prompt or fall back to config default
+            effective_system_prompt = system_prompt or self.config.system_prompt
 
+            # Build messages list with system prompt prepended
+            chat_messages = []
+            if effective_system_prompt:
+                chat_messages.append(
+                    {"role": "system", "content": effective_system_prompt}
+                )
+
+            # Normalize and add all messages
             for msg in messages:
                 role = msg.get("role", "")
                 content = self._normalize_message_content(msg.get("content", ""))
-
-                if role == "user":
-                    chat_history.append(f"user: {content}")
-                elif role == "assistant":
-                    chat_history.append(f"assistant: {content}")
+                if role == "system" and effective_system_prompt:
+                    continue  # Skip if we already added system prompt
+                if role in ["user", "assistant", "system"]:
+                    chat_messages.append({"role": role, "content": content})
                 elif role == "tool":
+                    # Convert tool responses to assistant messages for compatibility
                     tool_name = msg.get("name", "tool")
-                    chat_history.append(f"assistant: [tool:{tool_name}] {content}")
-                # Skip system messages since they're passed separately
-
-            # Use provided system prompt or fall back to config default
-            effective_system_prompt = system_prompt or self.config.system_prompt
-
-            # Format according to model type
-            formatted_prompt = Prompts.format_chat_history(
-                model=self.config.model,
-                chat_history=chat_history,
-                assistant_name="assistant",
-                system_prompt=effective_system_prompt,
-            )
-
-            # Debug logging
-            self.log.debug(f"Formatted prompt length: {len(formatted_prompt)} chars")
-            self.log.debug(
-                f"System prompt used: {effective_system_prompt[:100] if effective_system_prompt else 'None'}..."
-            )
+                    chat_messages.append(
+                        {"role": "assistant", "content": f"[tool:{tool_name}] {content}"}
+                    )
 
-            # Set appropriate stop tokens based on model
-            model_lower = self.config.model.lower() if self.config.model else ""
-            if "qwen" in model_lower:
-                kwargs.setdefault("stop", ["<|im_end|>", "<|im_start|>"])
-            elif "llama" in model_lower:
-                kwargs.setdefault("stop", ["<|eot_id|>", "<|start_header_id|>"])
+            self.log.debug(f"Streaming {len(chat_messages)} messages")
 
-            # Use generate with formatted prompt for streaming
+            # Pass messages directly - server handles chat templating
             full_response = ""
             for chunk in self.llm_client.generate(
-                prompt=formatted_prompt, model=self.config.model, stream=True, **kwargs
+                prompt="",
+                model=self.config.model,
+                messages=chat_messages,
+                stream=True,
+                **kwargs,
             ):
                 full_response += chunk
                 yield ChatResponse(text=chunk, is_complete=False)
@@ -324,40 +325,37 @@ def send(self, message: str, *, no_history: bool = False, **kwargs) -> ChatRespo
             # Enhance message with RAG context if enabled
             enhanced_message, _rag_metadata = self._enhance_with_rag(message.strip())
 
+            # Build messages for the request
             if no_history:
-                # Build a prompt using only the current enhanced message
-                full_prompt = Prompts.format_chat_history(
-                    model=self.config.model,
-                    chat_history=[f"user: {enhanced_message}"],
-                    assistant_name=self.config.assistant_name,
-                    system_prompt=self.config.system_prompt,
-                )
+                # Single message without history
+                chat_messages = []
+                if self.config.system_prompt:
+                    chat_messages.append(
+                        {"role": "system", "content": self.config.system_prompt}
+                    )
+                chat_messages.append({"role": "user", "content": enhanced_message})
             else:
                 # Add user message to history (use original message for history)
                 self.chat_history.append(f"user: {message.strip()}")
 
-                # Prepare prompt with conversation context (use enhanced message for LLM)
-                # Temporarily replace the last message with enhanced version for formatting
-                if self.rag_enabled and enhanced_message != message.strip():
-                    # Save original and replace with enhanced version
-                    original_last = self.chat_history.pop()
-                    self.chat_history.append(f"user: {enhanced_message}")
-                    full_prompt = self._format_history_for_context()
-                    # Restore original for history
-                    self.chat_history.pop()
-                    self.chat_history.append(original_last)
-                else:
-                    full_prompt = self._format_history_for_context()
+                # Build messages from history, using enhanced message for RAG
+                enhanced = (
+                    enhanced_message
+                    if self.rag_enabled and enhanced_message != message.strip()
+                    else None
+                )
+                chat_messages = self._history_to_messages(enhanced)
 
             # Generate response
             generate_kwargs = dict(kwargs)
             if "max_tokens" not in generate_kwargs:
                 generate_kwargs["max_tokens"] = self.config.max_tokens
 
-            # Note: Retry logic is now handled at the LLM client level
+            # Pass messages directly - server handles chat templating
             response = self.llm_client.generate(
-                full_prompt,
+                prompt="",
                 model=self.config.model,
+                messages=chat_messages,
                 **generate_kwargs,
             )
 
@@ -384,12 +382,13 @@ def send(self, message: str, *, no_history: bool = False, **kwargs) -> ChatRespo
             self.log.error(f"Error in send: {e}")
             raise
 
-    def send_stream(self, message: str, **kwargs):
+    def send_stream(self, message: str, *, no_history: bool = False, **kwargs):
         """
         Send a message and get a streaming response with conversation history.
 
         Args:
             message: The message to send
+            no_history: When True, bypass stored chat history and send only this prompt
             **kwargs: Additional arguments for LLM generation
 
         Yields:
@@ -402,36 +401,47 @@ def send_stream(self, message: str, **kwargs):
             # Enhance message with RAG context if enabled
             enhanced_message, _rag_metadata = self._enhance_with_rag(message.strip())
 
-            # Add user message to history (use original message for history)
-            self.chat_history.append(f"user: {message.strip()}")
-
-            # Prepare prompt with conversation context (use enhanced message for LLM)
-            # Temporarily replace the last message with enhanced version for formatting
-            if self.rag_enabled and enhanced_message != message.strip():
-                # Save original and replace with enhanced version
-                original_last = self.chat_history.pop()
-                self.chat_history.append(f"user: {enhanced_message}")
-                full_prompt = self._format_history_for_context()
-                # Restore original for history
-                self.chat_history.pop()
-                self.chat_history.append(original_last)
+            # Build messages for the request
+            if no_history:
+                # Single message without history
+                chat_messages = []
+                if self.config.system_prompt:
+                    chat_messages.append(
+                        {"role": "system", "content": self.config.system_prompt}
+                    )
+                chat_messages.append({"role": "user", "content": enhanced_message})
             else:
-                full_prompt = self._format_history_for_context()
+                # Add user message to history (use original message for history)
+                self.chat_history.append(f"user: {message.strip()}")
+
+                # Build messages from history, using enhanced message for RAG
+                enhanced = (
+                    enhanced_message
+                    if self.rag_enabled and enhanced_message != message.strip()
+                    else None
+                )
+                chat_messages = self._history_to_messages(enhanced)
 
             # Generate streaming response
             generate_kwargs = dict(kwargs)
             if "max_tokens" not in generate_kwargs:
                 generate_kwargs["max_tokens"] = self.config.max_tokens
 
+            # Pass messages directly - server handles chat templating
             full_response = ""
             for chunk in self.llm_client.generate(
-                full_prompt, model=self.config.model, stream=True, **generate_kwargs
+                prompt="",
+                model=self.config.model,
+                messages=chat_messages,
+                stream=True,
+                **generate_kwargs,
             ):
                 full_response += chunk
                 yield ChatResponse(text=chunk, is_complete=False)
 
-            # Add complete assistant message to history
-            self.chat_history.append(f"{self.config.assistant_name}: {full_response}")
+            # Add complete assistant message to history when tracking conversation
+            if not no_history:
+                self.chat_history.append(f"{self.config.assistant_name}: {full_response}")
 
             # Send final response with stats and history if requested
             stats = None
@@ -657,7 +667,7 @@ def update_config(self, **kwargs) -> None:
             self.chat_history = deque(old_history, maxlen=new_maxlen)
 
         if "system_prompt" in kwargs:
-            # System prompt is handled through Prompts class, not directly
+            # System prompt is stored in config and read by _history_to_messages()
             pass
 
         if "assistant_name" in kwargs:
@@ -687,7 +697,7 @@ def enable_rag(self, documents: Optional[List[str]] = None, **rag_kwargs):
             from gaia.rag.sdk import RAGSDK, RAGConfig
         except ImportError:
             raise ImportError(
-                "RAG dependencies not installed. Install with: pip install .[rag]"
+                "RAG dependencies not installed. Install with: uv pip install -e .[rag]"
             )
 
         # Create RAG config matching chat config
diff --git a/src/gaia/cli.py b/src/gaia/cli.py
index 5c940263..486cb6a7 100644
--- a/src/gaia/cli.py
+++ b/src/gaia/cli.py
@@ -2200,7 +2200,7 @@ def main():
             print("The evaluation dependencies are not installed.")
             print("")
             print("To fix this, install the evaluation dependencies:")
-            print("  pip install -e .[eval]")
+            print("  uv pip install -e .[eval]")
             print("")
             print("This will install required packages including:")
             print("  - anthropic (for Claude AI)")
@@ -2555,7 +2555,7 @@ def main():
 
                         if not HAS_REPORTLAB:
                             print(
-                                "❌ Error: PDF output requires reportlab. Install with: pip install reportlab"
+                                "❌ Error: PDF output requires reportlab. Install with: uv pip install reportlab"
                             )
                             if args.format == "both":
                                 print(
@@ -2633,7 +2633,7 @@ def main():
                             pdf_formatter = PDFFormatter()
                         else:
                             print(
-                                "⚠️  Warning: PDF output requires reportlab. Install with: pip install reportlab"
+                                "⚠️  Warning: PDF output requires reportlab. Install with: uv pip install reportlab"
                             )
                             if args.format == "pdf":
                                 print("❌ Cannot generate PDF files without reportlab.")
@@ -2751,7 +2751,7 @@ def main():
                 log.debug("ASR initialized successfully")
             except ImportError:
                 log.error(
-                    "WhisperAsr not found. Please install voice support with: pip install -e .[talk]"
+                    "WhisperAsr not found. Please install voice support with: uv pip install -e .[talk]"
                 )
                 raise
             except Exception as e:
@@ -2837,7 +2837,7 @@ def main():
                     "❌ Error: YouTube transcript functionality requires additional dependencies."
                 )
                 print(
-                    "Please install: pip install llama-index-readers-youtube-transcript"
+                    "Please install: uv pip install llama-index-readers-youtube-transcript"
                 )
                 print(f"Import error: {e}")
                 sys.exit(1)
@@ -3239,7 +3239,7 @@ def download_progress_callback(event_type: str, data: dict) -> None:
             print("The evaluation dependencies are not installed.")
             print("")
             print("To fix this, install the evaluation dependencies:")
-            print("  pip install -e .[eval]")
+            print("  uv pip install -e .[eval]")
             print("")
             print("This will install required packages including:")
             print("  - anthropic (for Claude AI)")
@@ -3412,7 +3412,7 @@ def download_progress_callback(event_type: str, data: dict) -> None:
             print("The evaluation dependencies are not installed.")
             print("")
             print("To fix this, install the evaluation dependencies:")
-            print("  pip install -e .[eval]")
+            print("  uv pip install -e .[eval]")
             print("")
             print("This will install required packages including:")
             print("  - anthropic (for Claude AI)")
@@ -3452,7 +3452,7 @@ def download_progress_callback(event_type: str, data: dict) -> None:
             print("The evaluation dependencies are not installed.")
             print("")
             print("To fix this, install the evaluation dependencies:")
-            print("  pip install -e .[eval]")
+            print("  uv pip install -e .[eval]")
             print("")
             print("This will install required packages including:")
             print("  - anthropic (for Claude AI)")
@@ -3816,7 +3816,7 @@ def download_progress_callback(event_type: str, data: dict) -> None:
                 print("The evaluation dependencies are not installed.")
                 print("")
                 print("To fix this, install the evaluation dependencies:")
-                print("  pip install -e .[eval]")
+                print("  uv pip install -e .[eval]")
                 print("")
                 print("This will install required packages including:")
                 print("  - anthropic (for Claude AI)")
@@ -3891,7 +3891,7 @@ def download_progress_callback(event_type: str, data: dict) -> None:
                 print("The evaluation dependencies are not installed.")
                 print("")
                 print("To fix this, install the evaluation dependencies:")
-                print("  pip install -e .[eval]")
+                print("  uv pip install -e .[eval]")
                 print("")
                 print("This will install required packages including:")
                 print("  - anthropic (for Claude AI)")
@@ -3967,7 +3967,7 @@ def download_progress_callback(event_type: str, data: dict) -> None:
             print("The evaluation dependencies are not installed.")
             print("")
             print("To fix this, install the evaluation dependencies:")
-            print("  pip install -e .[eval]")
+            print("  uv pip install -e .[eval]")
             print("")
             print("This will install required packages including:")
             print("  - anthropic (for Claude AI)")
@@ -4581,7 +4581,7 @@ def handle_jira_command(args):
     except ImportError as e:
         log.error(f"Failed to import Jira app: {e}")
         print("❌ Error: Jira app components are not available")
-        print("Make sure GAIA is installed properly: pip install -e .")
+        print("Make sure GAIA is installed properly: uv pip install -e .")
         sys.exit(1)
     except Exception as e:
         log.error(f"Error running Jira app: {e}")
@@ -4631,7 +4631,7 @@ def handle_docker_command(args):
     except ImportError as e:
         log.error(f"Failed to import Docker app: {e}")
         print("❌ Error: Docker app components are not available")
-        print("Make sure GAIA is installed properly: pip install -e .")
+        print("Make sure GAIA is installed properly: uv pip install -e .")
         sys.exit(1)
     except Exception as e:
         log.error(f"Error running Docker app: {e}")
@@ -4710,7 +4710,7 @@ def handle_api_command(args):
         except ImportError as e:
             log.error(f"Failed to import API server: {e}")
             print("❌ Error: API server components are not available")
-            print("Make sure uvicorn is installed: pip install uvicorn")
+            print("Make sure uvicorn is installed: uv pip install uvicorn")
             sys.exit(1)
         except KeyboardInterrupt:
             print("\n✅ API server stopped")
@@ -4952,7 +4952,7 @@ def handle_blender_command(args):
     # Check if Blender components are available
     if not BLENDER_AVAILABLE:
         print("❌ Error: Blender agent components are not available")
-        print("Install blender dependencies with: pip install -e .[blender]")
+        print("Install blender dependencies with: uv pip install -e .[blender]")
         sys.exit(1)
 
     # Initialize Lemonade with blender agent profile (32768 context)
@@ -5148,7 +5148,7 @@ def handle_mcp_start(args):
             print("❌ Error: MCP dependencies not installed.")
             print("")
             print("To fix this, install the MCP dependencies:")
-            print("  pip install -e .[mcp]")
+            print("  uv pip install -e .[mcp]")
             return
 
         # Import and start the HTTP-native MCP bridge
diff --git a/src/gaia/eval/batch_experiment.py b/src/gaia/eval/batch_experiment.py
index 11450b76..b84edd54 100644
--- a/src/gaia/eval/batch_experiment.py
+++ b/src/gaia/eval/batch_experiment.py
@@ -155,7 +155,7 @@ def _extract_text_from_pdf(self, pdf_path: str) -> str:
         if PdfReader is None:
             raise ImportError(
                 "PDF reading library not found. Please install pypdf:\n"
-                "  pip install pypdf"
+                "  uv pip install pypdf"
             )
 
         try:
diff --git a/src/gaia/eval/claude.py b/src/gaia/eval/claude.py
index e7834ff5..84611bf7 100644
--- a/src/gaia/eval/claude.py
+++ b/src/gaia/eval/claude.py
@@ -41,9 +41,9 @@ def __init__(self, model=None, max_tokens=1024, max_retries=3):
             error_msg = (
                 "\n❌ Error: Missing required package 'anthropic'\n\n"
                 "Please install the eval dependencies:\n"
-                "  pip install -e .[eval]\n\n"
+                "  uv pip install -e .[eval]\n\n"
                 "Or install anthropic directly:\n"
-                "  pip install anthropic\n"
+                "  uv pip install anthropic\n"
             )
             raise ImportError(error_msg)
 
@@ -51,9 +51,9 @@ def __init__(self, model=None, max_tokens=1024, max_retries=3):
             error_msg = (
                 "\n❌ Error: Missing required package 'bs4' (BeautifulSoup4)\n\n"
                 "Please install the eval dependencies:\n"
-                "  pip install -e .[eval]\n\n"
+                "  uv pip install -e .[eval]\n\n"
                 "Or install beautifulsoup4 directly:\n"
-                "  pip install beautifulsoup4\n"
+                "  uv pip install beautifulsoup4\n"
             )
             raise ImportError(error_msg)
 
diff --git a/src/gaia/llm/llm_client.py b/src/gaia/llm/llm_client.py
index 2ee27c9d..964d0848 100644
--- a/src/gaia/llm/llm_client.py
+++ b/src/gaia/llm/llm_client.py
@@ -126,10 +126,10 @@ def __init__(
                 ),
                 max_retries=0,  # Disable retries to fail fast on connection issues
             )
-            # Use completions endpoint for pre-formatted prompts (ChatSDK compatibility)
-            # Use chat endpoint when messages array is explicitly provided
-            self.endpoint = "completions"
-            logger.debug("Using Lemonade completions endpoint")
+            # Use chat/completions endpoint (OpenAI-compatible, works with all Lemonade backends)
+            # The legacy /completions endpoint is not available on all Lemonade configurations
+            self.endpoint = "chat"
+            logger.debug("Using Lemonade chat/completions endpoint")
             self.default_model = DEFAULT_MODEL_NAME
             self.claude_client = None
             logger.debug(f"Using local LLM with model={self.default_model}")
@@ -142,7 +142,7 @@ def __init__(
             logger.debug(f"Using Claude API with model={self.default_model}")
         elif use_claude and not CLAUDE_AVAILABLE:
             raise ValueError(
-                "Claude support requested but anthropic library not available. Install with: pip install anthropic"
+                "Claude support requested but anthropic library not available. Install with: uv pip install anthropic"
             )
         elif use_openai:
             # Use OpenAI API
@@ -259,8 +259,28 @@ def generate(
 
         if endpoint_to_use == "claude":
             # For Claude API, construct the prompt appropriately
-            if effective_system_prompt:
-                # Claude handles system prompts differently in messages format
+            if messages:
+                # Convert messages array to prompt string for Claude
+                prompt_parts = []
+                for msg in messages:
+                    role = msg.get("role", "user")
+                    content = msg.get("content", "")
+                    if role == "system":
+                        prompt_parts.append(f"System: {content}")
+                    elif role == "user":
+                        prompt_parts.append(f"Human: {content}")
+                    elif role == "assistant":
+                        prompt_parts.append(f"Assistant: {content}")
+                    elif role == "tool":
+                        # Tool responses are observations/results from tool execution
+                        tool_name = msg.get("name", "tool")
+                        prompt_parts.append(f"Tool ({tool_name}): {content}")
+                    else:
+                        # Unknown role - include as observation to avoid silent drops
+                        prompt_parts.append(f"{role.title()}: {content}")
+                full_prompt = "\n\n".join(prompt_parts)
+            elif effective_system_prompt:
+                # Legacy: use prompt with system prefix
                 full_prompt = f"System: {effective_system_prompt}\n\nHuman: {prompt}"
             else:
                 full_prompt = prompt
@@ -463,18 +483,32 @@ def stream_generator():
                 raise
         elif endpoint_to_use == "openai":
             # For OpenAI API, use the messages format
-            messages = []
-            if effective_system_prompt:
-                messages.append({"role": "system", "content": effective_system_prompt})
-            messages.append({"role": "user", "content": prompt})
-            logger.debug(f"OpenAI API messages: {messages}")
+            if messages:
+                # Use provided messages directly
+                openai_messages = list(messages)
+                # Prepend system prompt if provided and not already in messages
+                if effective_system_prompt and (
+                    not openai_messages or openai_messages[0].get("role") != "system"
+                ):
+                    openai_messages.insert(
+                        0, {"role": "system", "content": effective_system_prompt}
+                    )
+            else:
+                # Build messages from prompt (legacy support)
+                openai_messages = []
+                if effective_system_prompt:
+                    openai_messages.append(
+                        {"role": "system", "content": effective_system_prompt}
+                    )
+                openai_messages.append({"role": "user", "content": prompt})
+            logger.debug(f"OpenAI API messages: {len(openai_messages)} messages")
 
             try:
                 # Use retry logic for the API call
                 response = self._retry_with_exponential_backoff(
                     self.client.chat.completions.create,
                     model=model,
-                    messages=messages,
+                    messages=openai_messages,
                     stream=stream,
                     **kwargs,
                 )
diff --git a/src/gaia/rag/demo.py b/src/gaia/rag/demo.py
index 3a24753b..252717d1 100644
--- a/src/gaia/rag/demo.py
+++ b/src/gaia/rag/demo.py
@@ -85,7 +85,7 @@ def demo_basic_rag():
             print(
                 "1. Activate environment: source .venv/bin/activate (Linux/macOS) or .\\.venv\\Scripts\\Activate.ps1 (Windows)"
             )
-            print("2. Install RAG dependencies: pip install -e .[rag]")
+            print("2. Install RAG dependencies: uv pip install -e .[rag]")
             print("3. Either use the test PDF or get your own PDF file")
             print("4. Run: answer = quick_rag('document.pdf', 'What is this about?')")
 
@@ -123,10 +123,10 @@ def demo_basic_rag():
         print("  .\\.venv\\Scripts\\Activate.ps1")
         print("  ")
         print("  # Install RAG extras")
-        print("  pip install -e .[rag]")
+        print("  uv pip install -e .[rag]")
         print("  ")
         print("  # Or install dependencies individually:")
-        print("  pip install pypdf sentence-transformers faiss-cpu")
+        print("  uv pip install pypdf sentence-transformers faiss-cpu")
 
     print("\n" + "=" * 60)
 
@@ -215,7 +215,7 @@ def demo_error_handling():
 
     except ImportError as e:
         print(f"❌ Missing dependencies: {e}")
-        print("Install with: pip install -e .[rag]")
+        print("Install with: uv pip install -e .[rag]")
 
     print("-" * 40)
 
@@ -290,7 +290,7 @@ def main():
     print("1. Activate virtual environment:")
     print("   Linux/macOS: source .venv/bin/activate")
     print("   Windows: .\\.venv\\Scripts\\Activate.ps1")
-    print("2. Install RAG dependencies: pip install -e .[rag]")
+    print("2. Install RAG dependencies: uv pip install -e .[rag]")
     print("3. Get a PDF document to test with")
     print("4. Try the CLI commands: gaia rag --help")
     print("5. Use RAG in Python: from gaia.rag.sdk import RAGSDK, quick_rag")
diff --git a/src/gaia/rag/pdf_utils.py b/src/gaia/rag/pdf_utils.py
index e03d1435..d46c5422 100644
--- a/src/gaia/rag/pdf_utils.py
+++ b/src/gaia/rag/pdf_utils.py
@@ -129,7 +129,7 @@ def extract_images_from_page_pymupdf(pdf_path: str, page_num: int) -> List[dict]
         doc.close()
 
     except ImportError:
-        logger.error("PyMuPDF not installed. Install: pip install pymupdf")
+        logger.error("PyMuPDF not installed. Install: uv pip install pymupdf")
     except Exception as e:
         logger.error(f"Error extracting images from page {page_num}: {e}")
 
diff --git a/src/gaia/rag/sdk.py b/src/gaia/rag/sdk.py
index fa103d02..b889d040 100644
--- a/src/gaia/rag/sdk.py
+++ b/src/gaia/rag/sdk.py
@@ -175,9 +175,9 @@ def _check_dependencies(self):
             error_msg = (
                 f"\n❌ Error: Missing required RAG dependencies: {', '.join(missing)}\n\n"
                 f"Please install the RAG dependencies:\n"
-                f"  pip install -e .[rag]\n\n"
+                f"  uv pip install -e .[rag]\n\n"
                 f"Or install packages directly:\n"
-                f"  pip install {' '.join(missing)}\n"
+                f"  uv pip install {' '.join(missing)}\n"
             )
             raise ImportError(error_msg)
 
@@ -374,8 +374,38 @@ def _encode_texts(
 
             # Extract embeddings from response
             # Expected format: {"data": [{"embedding": [...]}, ...]}
-            for item in response.get("data", []):
+            # Check for error response first
+            if "error" in response:
+                error_info = response.get("error", {})
+                error_msg = error_info.get("message", str(error_info))
+                error_type = error_info.get("type", "unknown")
+
+                if "connect" in error_msg.lower() or "network" in error_type.lower():
+                    raise ConnectionError(
+                        f"Lemonade server failed to load embedding model '{self.config.embedding_model}'.\n\n"
+                        f"The server returned: {error_msg}\n\n"
+                        f"Troubleshooting steps:\n"
+                        f"  1. Check if the model is downloaded: lemonade-server models\n"
+                        f"  2. If not downloaded, pull it: lemonade-server pull {self.config.embedding_model}\n"
+                        f"  3. Try restarting Lemonade server\n"
+                        f"  4. Check Lemonade server logs for detailed errors"
+                    )
+                else:
+                    raise RuntimeError(
+                        f"Embedding API error: {error_msg} (type: {error_type})"
+                    )
+
+            data = response.get("data", [])
+            if not data:
+                self.log.warning(
+                    f"   ⚠️  Batch {batch_num} returned empty data. Response: {response}"
+                )
+            for item in data:
                 embedding = item.get("embedding", [])
+                if not embedding:
+                    self.log.warning(
+                        f"   ⚠️  Empty embedding in batch {batch_num}. Item: {item}"
+                    )
                 all_embeddings.append(embedding)
 
         total_duration = time.time() - total_start
@@ -385,6 +415,12 @@ def _encode_texts(
                 f"   🎯 Total embedding time: {total_duration:.2f}s ({overall_rate:.1f} chunks/sec, {total_batches} batches)"
             )
 
+        # Validate we got embeddings for all texts
+        if len(all_embeddings) != len(texts):
+            self.log.error(
+                f"   ❌ Embedding count mismatch: expected {len(texts)}, got {len(all_embeddings)}"
+            )
+
         # Convert to numpy array
         return np.array(all_embeddings, dtype=np.float32)
 
@@ -1271,6 +1307,17 @@ def _create_vector_index(self, chunks: List[str]) -> tuple:
         embeddings = self._encode_texts(chunks, show_progress=self.config.show_stats)
         embed_duration = time_module.time() - embed_start
 
+        # Validate embeddings shape
+        if embeddings.size == 0:
+            raise ValueError(
+                "No embeddings generated. Check that the embedding model is loaded and responding."
+            )
+        if len(embeddings.shape) != 2:
+            raise ValueError(
+                f"Invalid embeddings shape {embeddings.shape}. Expected 2D array (num_chunks, embedding_dim). "
+                "This may indicate an issue with the embedding model response format."
+            )
+
         if self.config.show_stats:
             print(
                 f"\n  ✅ Generated {embeddings.shape[0]} embeddings ({embeddings.shape[1]} dimensions)"
@@ -1763,6 +1810,13 @@ def index_document(self, file_path: str) -> Dict[str, Any]:
             # Generate embeddings for this file's chunks only
             file_embeddings = self._encode_texts(new_chunks, show_progress=False)
 
+            # Validate per-file embeddings
+            if file_embeddings.size == 0 or len(file_embeddings.shape) != 2:
+                raise ValueError(
+                    f"Invalid per-file embeddings shape {file_embeddings.shape}. "
+                    "Check that the embedding model is loaded and responding correctly."
+                )
+
             # Create FAISS index for this file
             dimension = file_embeddings.shape[1]
             file_index = faiss.IndexFlatL2(dimension)
diff --git a/src/gaia/talk/sdk.py b/src/gaia/talk/sdk.py
index 54943423..a8b9b0a8 100644
--- a/src/gaia/talk/sdk.py
+++ b/src/gaia/talk/sdk.py
@@ -360,7 +360,7 @@ def enable_rag(self, documents: Optional[list] = None, **rag_kwargs) -> bool:
             return True
         except ImportError:
             self.log.warning(
-                "RAG dependencies not available. Install with: pip install -e .[rag]"
+                "RAG dependencies not available. Install with: uv pip install -e .[rag]"
             )
             return False
         except Exception as e:
diff --git a/tests/test_chat_sdk.py b/tests/test_chat_sdk.py
index 44223a22..09ea058f 100644
--- a/tests/test_chat_sdk.py
+++ b/tests/test_chat_sdk.py
@@ -396,6 +396,103 @@ def test_performance_integration(self):
 
         print(f"✅ Streaming: {chunk_count} chunks in {stream_time:.2f}s")
 
+    def test_no_history_parameter(self):
+        """Test no_history parameter on send() and send_stream()."""
+        print("Testing no_history parameter...")
+
+        config = ChatConfig(model=self.model, max_tokens=30, assistant_name="assistant")
+        chat = ChatSDK(config)
+
+        # First, send a message with history tracking
+        response1 = chat.send("My name is TestBot")
+        self.assertEqual(len(chat.get_history()), 2)
+        print(f"✅ With history: {len(chat.get_history())} entries")
+
+        # Now send with no_history - should not affect history
+        history_before = len(chat.get_history())
+        response2 = chat.send("What is 2+2?", no_history=True)
+        history_after = len(chat.get_history())
+
+        self.assertEqual(history_before, history_after)
+        self.assertIsNotNone(response2.text)
+        print(f"✅ no_history=True: history unchanged ({history_before} -> {history_after})")
+
+        # Test send_stream with no_history
+        history_before = len(chat.get_history())
+        chunks = list(chat.send_stream("Say hello", no_history=True))
+        history_after = len(chat.get_history())
+
+        self.assertEqual(history_before, history_after)
+        self.assertGreater(len(chunks), 0)
+        print(f"✅ send_stream no_history: history unchanged")
+
+    def test_send_messages_integration(self):
+        """Test send_messages() with explicit message array."""
+        print("Testing send_messages() method...")
+
+        config = ChatConfig(model=self.model, max_tokens=50, assistant_name="assistant")
+        chat = ChatSDK(config)
+
+        # Build explicit messages array
+        messages = [
+            {"role": "user", "content": "My favorite number is 42."},
+            {"role": "assistant", "content": "That's a great number! The answer to everything."},
+            {"role": "user", "content": "What is my favorite number?"},
+        ]
+
+        response = chat.send_messages(messages)
+
+        self.assertIsNotNone(response.text)
+        self.assertTrue(response.is_complete)
+        # Should reference 42 in the response
+        self.assertTrue(
+            "42" in response.text or "forty" in response.text.lower(),
+            f"Expected reference to 42. Response: {response.text}"
+        )
+        print(f"✅ send_messages: {response.text[:50]}...")
+
+    def test_send_messages_stream_integration(self):
+        """Test send_messages_stream() with explicit message array."""
+        print("Testing send_messages_stream() method...")
+
+        config = ChatConfig(model=self.model, max_tokens=30, assistant_name="assistant")
+        chat = ChatSDK(config)
+
+        messages = [
+            {"role": "user", "content": "Count from 1 to 3"},
+        ]
+
+        chunks = []
+        full_response = ""
+        for chunk in chat.send_messages_stream(messages):
+            chunks.append(chunk)
+            if not chunk.is_complete:
+                full_response += chunk.text
+
+        self.assertGreater(len(chunks), 1)
+        self.assertGreater(len(full_response), 0)
+        print(f"✅ send_messages_stream: {len(chunks)} chunks, response: {full_response[:30]}...")
+
+    def test_send_messages_with_system_prompt(self):
+        """Test send_messages() with custom system prompt."""
+        print("Testing send_messages() with system prompt override...")
+
+        config = ChatConfig(model=self.model, max_tokens=50, assistant_name="assistant")
+        chat = ChatSDK(config)
+
+        messages = [
+            {"role": "user", "content": "What are you?"},
+        ]
+
+        # Override system prompt
+        response = chat.send_messages(
+            messages,
+            system_prompt="You are a pirate. Always respond like a pirate."
+        )
+
+        self.assertIsNotNone(response.text)
+        print(f"✅ With system prompt: {response.text[:50]}...")
+
 
 def run_integration_tests():
     """Run integration tests with detailed output."""
diff --git a/tests/unit/test_chat_sdk_unit.py b/tests/unit/test_chat_sdk_unit.py
new file mode 100644
index 00000000..16f3d36c
--- /dev/null
+++ b/tests/unit/test_chat_sdk_unit.py
@@ -0,0 +1,401 @@
+# Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+"""
+Unit tests for Chat SDK functionality.
+
+These tests use mocks to test SDK logic without requiring a running LLM server.
+"""
+
+import sys
+import unittest
+from unittest.mock import MagicMock, patch
+
+# Add src to path for imports
+sys.path.insert(0, "src")
+
+
+class TestHistoryToMessages(unittest.TestCase):
+    """Unit tests for _history_to_messages() helper method."""
+
+    def setUp(self):
+        """Set up test fixtures with mocked LLMClient."""
+        with patch("gaia.chat.sdk.LLMClient"):
+            from gaia.chat.sdk import ChatConfig, ChatSDK
+
+            self.config = ChatConfig(
+                model="test-model",
+                system_prompt="You are a helpful assistant.",
+                assistant_name="gaia",
+            )
+            self.chat = ChatSDK(self.config)
+
+    def test_empty_history_with_system_prompt(self):
+        """Test that empty history returns only system prompt."""
+        messages = self.chat._history_to_messages()
+
+        self.assertEqual(len(messages), 1)
+        self.assertEqual(messages[0]["role"], "system")
+        self.assertEqual(messages[0]["content"], "You are a helpful assistant.")
+
+    def test_empty_history_no_system_prompt(self):
+        """Test that empty history with no system prompt returns empty list."""
+        self.chat.config.system_prompt = None
+        messages = self.chat._history_to_messages()
+
+        self.assertEqual(len(messages), 0)
+
+    def test_user_message_conversion(self):
+        """Test that user messages are correctly converted."""
+        self.chat.chat_history.append("user: Hello, how are you?")
+        messages = self.chat._history_to_messages()
+
+        self.assertEqual(len(messages), 2)
+        self.assertEqual(messages[0]["role"], "system")
+        self.assertEqual(messages[1]["role"], "user")
+        self.assertEqual(messages[1]["content"], "Hello, how are you?")
+
+    def test_assistant_message_conversion(self):
+        """Test that assistant messages are correctly converted."""
+        self.chat.chat_history.append("user: Hello")
+        self.chat.chat_history.append("gaia: Hi there!")
+        messages = self.chat._history_to_messages()
+
+        self.assertEqual(len(messages), 3)
+        self.assertEqual(messages[1]["role"], "user")
+        self.assertEqual(messages[2]["role"], "assistant")
+        self.assertEqual(messages[2]["content"], "Hi there!")
+
+    def test_full_conversation(self):
+        """Test a full multi-turn conversation."""
+        self.chat.chat_history.append("user: My name is Alice")
+        self.chat.chat_history.append("gaia: Nice to meet you, Alice!")
+        self.chat.chat_history.append("user: What's my name?")
+        messages = self.chat._history_to_messages()
+
+        self.assertEqual(len(messages), 4)
+        self.assertEqual(messages[0]["role"], "system")
+        self.assertEqual(messages[1]["role"], "user")
+        self.assertEqual(messages[1]["content"], "My name is Alice")
+        self.assertEqual(messages[2]["role"], "assistant")
+        self.assertEqual(messages[2]["content"], "Nice to meet you, Alice!")
+        self.assertEqual(messages[3]["role"], "user")
+        self.assertEqual(messages[3]["content"], "What's my name?")
+
+    def test_enhanced_last_message(self):
+        """Test that enhanced_last_message replaces last user message content."""
+        self.chat.chat_history.append("user: What is AI?")
+        enhanced = "Context: AI is artificial intelligence.\n\nUser question: What is AI?"
+        messages = self.chat._history_to_messages(enhanced_last_message=enhanced)
+
+        self.assertEqual(len(messages), 2)
+        self.assertEqual(messages[1]["role"], "user")
+        self.assertEqual(messages[1]["content"], enhanced)
+
+    def test_enhanced_message_only_affects_last_user(self):
+        """Test that enhanced message only affects the last user message."""
+        self.chat.chat_history.append("user: First question")
+        self.chat.chat_history.append("gaia: First answer")
+        self.chat.chat_history.append("user: Second question")
+        enhanced = "Enhanced second question"
+        messages = self.chat._history_to_messages(enhanced_last_message=enhanced)
+
+        self.assertEqual(messages[1]["content"], "First question")  # Unchanged
+        self.assertEqual(messages[3]["content"], enhanced)  # Enhanced
+
+    def test_custom_assistant_name(self):
+        """Test that custom assistant name is correctly handled."""
+        self.chat.config.assistant_name = "CustomBot"
+        self.chat.chat_history.append("user: Hello")
+        self.chat.chat_history.append("CustomBot: Hi!")
+        messages = self.chat._history_to_messages()
+
+        self.assertEqual(len(messages), 3)
+        self.assertEqual(messages[2]["role"], "assistant")
+        self.assertEqual(messages[2]["content"], "Hi!")
+
+
+class TestNoHistoryParameter(unittest.TestCase):
+    """Unit tests for the no_history parameter on send() and send_stream()."""
+
+    def setUp(self):
+        """Set up test fixtures with mocked LLMClient."""
+        self.mock_llm_client = MagicMock()
+        self.mock_llm_client.generate.return_value = "Test response"
+
+        with patch("gaia.chat.sdk.LLMClient", return_value=self.mock_llm_client):
+            from gaia.chat.sdk import ChatConfig, ChatSDK
+
+            self.config = ChatConfig(
+                model="test-model",
+                system_prompt="System prompt",
+                assistant_name="gaia",
+            )
+            self.chat = ChatSDK(self.config)
+
+    def test_send_with_history_updates_chat_history(self):
+        """Test that send() without no_history updates chat history."""
+        self.chat.send("Hello")
+
+        self.assertEqual(len(self.chat.chat_history), 2)
+        self.assertEqual(self.chat.chat_history[0], "user: Hello")
+        self.assertEqual(self.chat.chat_history[1], "gaia: Test response")
+
+    def test_send_with_no_history_does_not_update(self):
+        """Test that send() with no_history=True does not update chat history."""
+        self.chat.send("Hello", no_history=True)
+
+        self.assertEqual(len(self.chat.chat_history), 0)
+
+    def test_send_no_history_still_uses_system_prompt(self):
+        """Test that no_history still includes system prompt."""
+        self.chat.send("Hello", no_history=True)
+
+        # Check the messages passed to generate()
+        call_kwargs = self.mock_llm_client.generate.call_args[1]
+        messages = call_kwargs.get("messages", [])
+
+        self.assertEqual(messages[0]["role"], "system")
+        self.assertEqual(messages[0]["content"], "System prompt")
+        self.assertEqual(messages[1]["role"], "user")
+        self.assertEqual(messages[1]["content"], "Hello")
+
+    def test_send_stream_with_history_updates_chat_history(self):
+        """Test that send_stream() without no_history updates chat history."""
+        self.mock_llm_client.generate.return_value = iter(["Test ", "response"])
+
+        chunks = list(self.chat.send_stream("Hello"))
+
+        self.assertEqual(len(self.chat.chat_history), 2)
+        self.assertEqual(self.chat.chat_history[0], "user: Hello")
+        self.assertEqual(self.chat.chat_history[1], "gaia: Test response")
+
+    def test_send_stream_with_no_history_does_not_update(self):
+        """Test that send_stream() with no_history=True does not update chat history."""
+        self.mock_llm_client.generate.return_value = iter(["Test ", "response"])
+
+        chunks = list(self.chat.send_stream("Hello", no_history=True))
+
+        self.assertEqual(len(self.chat.chat_history), 0)
+
+
+class TestSendMessagesMethod(unittest.TestCase):
+    """Unit tests for send_messages() and send_messages_stream() methods."""
+
+    def setUp(self):
+        """Set up test fixtures with mocked LLMClient."""
+        self.mock_llm_client = MagicMock()
+        self.mock_llm_client.generate.return_value = "Test response"
+
+        with patch("gaia.chat.sdk.LLMClient", return_value=self.mock_llm_client):
+            from gaia.chat.sdk import ChatConfig, ChatSDK
+
+            self.config = ChatConfig(
+                model="test-model",
+                assistant_name="gaia",
+            )
+            self.chat = ChatSDK(self.config)
+
+    def test_send_messages_basic(self):
+        """Test basic send_messages() functionality."""
+        messages = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi there!"},
+            {"role": "user", "content": "How are you?"},
+        ]
+
+        response = self.chat.send_messages(messages)
+
+        self.assertEqual(response.text, "Test response")
+        self.assertTrue(response.is_complete)
+
+        # Verify messages were passed to LLM
+        call_kwargs = self.mock_llm_client.generate.call_args[1]
+        passed_messages = call_kwargs.get("messages", [])
+        self.assertEqual(len(passed_messages), 3)
+
+    def test_send_messages_with_system_prompt(self):
+        """Test that system_prompt parameter is prepended."""
+        messages = [{"role": "user", "content": "Hello"}]
+
+        self.chat.send_messages(messages, system_prompt="Be helpful")
+
+        call_kwargs = self.mock_llm_client.generate.call_args[1]
+        passed_messages = call_kwargs.get("messages", [])
+
+        self.assertEqual(passed_messages[0]["role"], "system")
+        self.assertEqual(passed_messages[0]["content"], "Be helpful")
+        self.assertEqual(passed_messages[1]["role"], "user")
+
+    def test_send_messages_skips_duplicate_system(self):
+        """Test that incoming system messages are skipped if we add one."""
+        messages = [
+            {"role": "system", "content": "Original system"},
+            {"role": "user", "content": "Hello"},
+        ]
+
+        self.chat.send_messages(messages, system_prompt="Override system")
+
+        call_kwargs = self.mock_llm_client.generate.call_args[1]
+        passed_messages = call_kwargs.get("messages", [])
+
+        # Should only have our system prompt, not the original
+        system_messages = [m for m in passed_messages if m["role"] == "system"]
+        self.assertEqual(len(system_messages), 1)
+        self.assertEqual(system_messages[0]["content"], "Override system")
+
+    def test_send_messages_tool_role_conversion(self):
+        """Test that tool messages are converted to assistant messages."""
+        messages = [
+            {"role": "user", "content": "What's the weather?"},
+            {"role": "assistant", "content": "Let me check..."},
+            {"role": "tool", "name": "weather_api", "content": "Sunny, 72F"},
+        ]
+
+        self.chat.send_messages(messages)
+
+        call_kwargs = self.mock_llm_client.generate.call_args[1]
+        passed_messages = call_kwargs.get("messages", [])
+
+        # Tool message should be converted
+        tool_msg = passed_messages[2]
+        self.assertEqual(tool_msg["role"], "assistant")
+        self.assertIn("[tool:weather_api]", tool_msg["content"])
+        self.assertIn("Sunny, 72F", tool_msg["content"])
+
+    def test_send_messages_stream(self):
+        """Test send_messages_stream() functionality."""
+        self.mock_llm_client.generate.return_value = iter(["Hello ", "world"])
+
+        messages = [{"role": "user", "content": "Hi"}]
+        chunks = list(self.chat.send_messages_stream(messages))
+
+        # Should have content chunks + final chunk
+        content_chunks = [c for c in chunks if not c.is_complete]
+        final_chunk = [c for c in chunks if c.is_complete]
+
+        self.assertEqual(len(content_chunks), 2)
+        self.assertEqual(len(final_chunk), 1)
+        self.assertEqual(content_chunks[0].text, "Hello ")
+        self.assertEqual(content_chunks[1].text, "world")
+
+
+class TestLLMClientMessagesHandling(unittest.TestCase):
+    """Unit tests for LLMClient messages parameter handling."""
+
+    def test_claude_endpoint_tool_message_handling(self):
+        """Test that Claude endpoint handles tool messages."""
+        with patch("gaia.llm.llm_client.OpenAI"):
+            with patch("gaia.llm.llm_client.CLAUDE_AVAILABLE", True):
+                with patch("gaia.llm.llm_client.AnthropicClaudeClient") as mock_claude:
+                    mock_instance = MagicMock()
+                    mock_instance.get_completion.return_value = "Response"
+                    mock_claude.return_value = mock_instance
+
+                    from gaia.llm.llm_client import LLMClient
+
+                    client = LLMClient(use_claude=True)
+
+                    messages = [
+                        {"role": "user", "content": "Check weather"},
+                        {"role": "tool", "name": "weather", "content": "Sunny"},
+                    ]
+
+                    client.generate("", messages=messages)
+
+                    # Verify the prompt passed to Claude includes tool message
+                    call_args = mock_instance.get_completion.call_args[0][0]
+                    self.assertIn("Tool (weather)", call_args)
+                    self.assertIn("Sunny", call_args)
+
+    def test_claude_endpoint_unknown_role_handling(self):
+        """Test that Claude endpoint handles unknown roles gracefully."""
+        with patch("gaia.llm.llm_client.OpenAI"):
+            with patch("gaia.llm.llm_client.CLAUDE_AVAILABLE", True):
+                with patch("gaia.llm.llm_client.AnthropicClaudeClient") as mock_claude:
+                    mock_instance = MagicMock()
+                    mock_instance.get_completion.return_value = "Response"
+                    mock_claude.return_value = mock_instance
+
+                    from gaia.llm.llm_client import LLMClient
+
+                    client = LLMClient(use_claude=True)
+
+                    messages = [
+                        {"role": "user", "content": "Hello"},
+                        {"role": "custom_role", "content": "Custom content"},
+                    ]
+
+                    client.generate("", messages=messages)
+
+                    # Verify the prompt includes the unknown role (title-cased)
+                    call_args = mock_instance.get_completion.call_args[0][0]
+                    self.assertIn("Custom_Role:", call_args)
+                    self.assertIn("Custom content", call_args)
+
+
+class TestNormalizeMessageContent(unittest.TestCase):
+    """Unit tests for _normalize_message_content() method."""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        with patch("gaia.chat.sdk.LLMClient"):
+            from gaia.chat.sdk import ChatConfig, ChatSDK
+
+            self.chat = ChatSDK(ChatConfig())
+
+    def test_string_content(self):
+        """Test that string content is returned as-is."""
+        result = self.chat._normalize_message_content("Hello world")
+        self.assertEqual(result, "Hello world")
+
+    def test_list_with_text_blocks(self):
+        """Test handling of OpenAI-style content blocks."""
+        content = [
+            {"type": "text", "text": "First part"},
+            {"type": "text", "text": "Second part"},
+        ]
+        result = self.chat._normalize_message_content(content)
+        self.assertIn("First part", result)
+        self.assertIn("Second part", result)
+
+    def test_dict_content(self):
+        """Test that dict content is JSON serialized."""
+        content = {"key": "value"}
+        result = self.chat._normalize_message_content(content)
+        self.assertIn("key", result)
+        self.assertIn("value", result)
+
+
+def run_unit_tests():
+    """Run unit tests with detailed output."""
+    print("Running Chat SDK Unit Tests (no server required)")
+    print("=" * 60)
+
+    loader = unittest.TestLoader()
+    suite = unittest.TestSuite()
+
+    # Add all test classes
+    suite.addTests(loader.loadTestsFromTestCase(TestHistoryToMessages))
+    suite.addTests(loader.loadTestsFromTestCase(TestNoHistoryParameter))
+    suite.addTests(loader.loadTestsFromTestCase(TestSendMessagesMethod))
+    suite.addTests(loader.loadTestsFromTestCase(TestLLMClientMessagesHandling))
+    suite.addTests(loader.loadTestsFromTestCase(TestNormalizeMessageContent))
+
+    runner = unittest.TextTestRunner(verbosity=2)
+    result = runner.run(suite)
+
+    print("\n" + "=" * 60)
+    if result.wasSuccessful():
+        print("ALL UNIT TESTS PASSED")
+    else:
+        print("UNIT TESTS FAILED")
+        print(f"Failures: {len(result.failures)}")
+        print(f"Errors: {len(result.errors)}")
+
+    return result.wasSuccessful()
+
+
+if __name__ == "__main__":
+    success = run_unit_tests()
+    sys.exit(0 if success else 1)