diff --git a/README.md b/README.md
index 21cf58f88..10996a82e 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,15 @@ Or fork & run on Vercel
 
 [//]: # (big-AGI is an open book; see the **[ready-to-ship and future ideas]&#40;https://github.com/users/enricoros/projects/4/views/2&#41;** in our open roadmap)
 
-### What's New in 1.16.1 · May 13, 2024 (minor release, models support)
+### What's New in 1.16.2 · Jun 7, 2024 (minor release)
+
+- Improve web downloads, as text, markdwon, or HTML
+- Proper support for Gemini models
+- Added the latest Mistral model
+- Tokenizer support for gpt-4o
+- Updates to Beam
+
+### What's New in 1.16.1 · May 13, 2024 (minor release)
 
 - Support for the new OpenAI GPT-4o 2024-05-13 model
 
diff --git a/docs/changelog.md b/docs/changelog.md
index 148e322ae..ba3a2a3ef 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -10,7 +10,15 @@ by release.
 - milestone: [1.17.0](https://github.com/enricoros/big-agi/milestone/17)
 - work in progress: [big-AGI open roadmap](https://github.com/users/enricoros/projects/4/views/2), [help here](https://github.com/users/enricoros/projects/4/views/4)
 
-### What's New in 1.16.1 · May 13, 2024 (minor release, models support)
+### What's New in 1.16.2 · Jun 7, 2024 (minor release)
+
+- Improve web downloads, as text, markdwon, or HTML
+- Proper support for Gemini models
+- Added the latest Mistral model
+- Tokenizer support for gpt-4o
+- Updates to Beam
+
+### What's New in 1.16.1 · May 13, 2024 (minor release)
 
 - Support for the new OpenAI GPT-4o 2024-05-13 model
 
diff --git a/src/apps/call/Telephone.tsx b/src/apps/call/Telephone.tsx
index 741a33ee5..5f333de2c 100644
--- a/src/apps/call/Telephone.tsx
+++ b/src/apps/call/Telephone.tsx
@@ -1,5 +1,5 @@
 import * as React from 'react';
-import { shallow } from 'zustand/shallow';
+import { useShallow } from 'zustand/react/shallow';
 
 import { Box, Card, ListDivider, ListItemDecorator, MenuItem, Switch, Typography } from '@mui/joy';
 import ArrowBackIcon from '@mui/icons-material/ArrowBack';
@@ -99,7 +99,7 @@ export function Telephone(props: {
 
   // external state
   const { chatLLMId, chatLLMDropdown } = useChatLLMDropdown();
-  const { chatTitle, reMessages } = useChatStore(state => {
+  const { chatTitle, reMessages } = useChatStore(useShallow(state => {
     const conversation = props.callIntent.conversationId
       ? state.conversations.find(conversation => conversation.id === props.callIntent.conversationId) ?? null
       : null;
@@ -107,7 +107,7 @@ export function Telephone(props: {
       chatTitle: conversation ? conversationTitle(conversation) : null,
       reMessages: conversation ? conversation.messages : null,
     };
-  }, shallow);
+  }));
   const persona = SystemPurposes[props.callIntent.personaId as SystemPurposeId] ?? undefined;
   const personaCallStarters = persona?.call?.starters ?? undefined;
   const personaVoiceId = overridePersonaVoice ? undefined : (persona?.voices?.elevenLabs?.voiceId ?? undefined);
@@ -225,7 +225,7 @@ export function Telephone(props: {
     let finalText = '';
     let error: any | null = null;
     setPersonaTextInterim('💭...');
-    llmStreamingChatGenerate(chatLLMId, callPrompt, null, null, responseAbortController.current.signal, ({ textSoFar }) => {
+    llmStreamingChatGenerate(chatLLMId, callPrompt, 'call', callMessages[0].id, null, null, responseAbortController.current.signal, ({ textSoFar }) => {
       const text = textSoFar?.trim();
       if (text) {
         finalText = text;
diff --git a/src/apps/chat/editors/chat-stream.ts b/src/apps/chat/editors/chat-stream.ts
index 5b19bc961..f51290c59 100644
--- a/src/apps/chat/editors/chat-stream.ts
+++ b/src/apps/chat/editors/chat-stream.ts
@@ -2,7 +2,7 @@ import type { DLLMId } from '~/modules/llms/store-llms';
 import type { StreamingClientUpdate } from '~/modules/llms/vendors/unifiedStreamingClient';
 import { autoSuggestions } from '~/modules/aifn/autosuggestions/autoSuggestions';
 import { conversationAutoTitle } from '~/modules/aifn/autotitle/autoTitle';
-import { llmStreamingChatGenerate, VChatMessageIn } from '~/modules/llms/llm.client';
+import { llmStreamingChatGenerate, VChatContextRef, VChatContextName, VChatMessageIn } from '~/modules/llms/llm.client';
 import { speakText } from '~/modules/elevenlabs/elevenlabs.client';
 
 import type { DMessage } from '~/common/state/store-chats';
@@ -34,6 +34,8 @@ export async function runAssistantUpdatingState(conversationId: string, history:
   const messageStatus = await streamAssistantMessage(
     assistantLlmId,
     history.map((m): VChatMessageIn => ({ role: m.role, content: m.text })),
+    'conversation',
+    conversationId,
     parallelViewCount,
     autoSpeak,
     (update) => cHandler.messageEdit(assistantMessageId, update, false),
@@ -61,6 +63,8 @@ type StreamMessageStatus = { outcome: StreamMessageOutcome, errorMessage?: strin
 export async function streamAssistantMessage(
   llmId: DLLMId,
   messagesHistory: VChatMessageIn[],
+  contextName: VChatContextName,
+  contextRef: VChatContextRef,
   throttleUnits: number, // 0: disable, 1: default throttle (12Hz), 2+ reduce the message frequency with the square root
   autoSpeak: ChatAutoSpeakType,
   editMessage: (update: Partial<DMessage>) => void,
@@ -92,7 +96,7 @@ export async function streamAssistantMessage(
   const incrementalAnswer: Partial<DMessage> = { text: '' };
 
   try {
-    await llmStreamingChatGenerate(llmId, messagesHistory, null, null, abortSignal, (update: StreamingClientUpdate) => {
+    await llmStreamingChatGenerate(llmId, messagesHistory, contextName, contextRef, null, null, abortSignal, (update: StreamingClientUpdate) => {
       const textSoFar = update.textSoFar;
 
       // grow the incremental message
diff --git a/src/apps/news/news.data.tsx b/src/apps/news/news.data.tsx
index 30f5f6fba..0fe6eee40 100644
--- a/src/apps/news/news.data.tsx
+++ b/src/apps/news/news.data.tsx
@@ -61,9 +61,10 @@ export const NewsItems: NewsItem[] = [
     ]
   }*/
   {
-    versionCode: '1.16.1',
+    versionCode: '1.16.2',
     versionName: 'Crystal Clear',
-    versionDate: new Date('2024-05-13T19:00:00Z'),
+    versionDate: new Date('2024-06-07T05:00:00Z'),
+    // versionDate: new Date('2024-05-13T19:00:00Z'),
     // versionDate: new Date('2024-05-09T00:00:00Z'),
     versionCoverImage: coverV116,
     items: [
@@ -77,6 +78,7 @@ export const NewsItems: NewsItem[] = [
       { text: <>Updated <B>Anthropic</B>*, <B>Groq</B>, <B>Ollama</B>, <B>OpenAI</B>*, <B>OpenRouter</B>*, and <B>Perplexity</B></> },
       { text: <>Developers: update LLMs data structures</>, dev: true },
       { text: <>1.16.1: Support for <B>OpenAI</B> <B href='https://openai.com/index/hello-gpt-4o/'>GPT-4o</B> (refresh your OpenAI models)</> },
+      { text: <>1.16.2: Proper <B>Gemini</B> support, <B>HTML/Markdown</B> downloads, and latest <B>Mistral</B></> },
     ],
   },
   {
diff --git a/src/apps/news/news.version.tsx b/src/apps/news/news.version.tsx
index 29d6f97e3..a4915d342 100644
--- a/src/apps/news/news.version.tsx
+++ b/src/apps/news/news.version.tsx
@@ -7,7 +7,7 @@ import { useAppStateStore } from '~/common/state/store-appstate';
 
 
 // update this variable every time you want to broadcast a new version to clients
-export const incrementalNewsVersion: number = 16.1;
+export const incrementalNewsVersion: number = 16.1; // not notifying for 16.2
 
 
 interface NewsState {
diff --git a/src/apps/personas/creator/Creator.tsx b/src/apps/personas/creator/Creator.tsx
index 7bc70d7b0..23a4e81c4 100644
--- a/src/apps/personas/creator/Creator.tsx
+++ b/src/apps/personas/creator/Creator.tsx
@@ -1,4 +1,5 @@
 import * as React from 'react';
+import { v4 as uuidv4 } from 'uuid';
 
 import { Alert, Box, Button, Card, CardContent, CircularProgress, Divider, FormLabel, Grid, IconButton, LinearProgress, Tab, tabClasses, TabList, TabPanel, Tabs, Typography } from '@mui/joy';
 import AddIcon from '@mui/icons-material/Add';
@@ -102,8 +103,11 @@ export function Creator(props: { display: boolean }) {
     strings: editedInstructions, stringEditors: instructionEditors,
   } = useFormEditTextArray(Prompts, PromptTitles);
 
-  const creationChainSteps = React.useMemo(() => {
-    return createChain(editedInstructions, PromptTitles);
+  const { steps: creationChainSteps, id: chainId } = React.useMemo(() => {
+    return {
+      steps: createChain(editedInstructions, PromptTitles),
+      id: uuidv4(),
+    };
   }, [editedInstructions]);
 
   const llmLabel = personaLlm?.label || undefined;
@@ -122,7 +126,7 @@ export function Creator(props: { display: boolean }) {
     chainError,
     userCancelChain,
     restartChain,
-  } = useLLMChain(creationChainSteps, personaLlm?.id, chainInputText ?? undefined, savePersona);
+  } = useLLMChain(creationChainSteps, personaLlm?.id, chainInputText ?? undefined, savePersona, 'persona-extract', chainId);
 
 
   // Reset the relevant state when the selected tab changes
diff --git a/src/modules/aifn/digrams/DiagramsModal.tsx b/src/modules/aifn/digrams/DiagramsModal.tsx
index 55e092f5f..c50a3a295 100644
--- a/src/modules/aifn/digrams/DiagramsModal.tsx
+++ b/src/modules/aifn/digrams/DiagramsModal.tsx
@@ -68,7 +68,7 @@ export function DiagramsModal(props: { config: DiagramConfig, onClose: () => voi
   const [diagramLlm, llmComponent] = useFormRadioLlmType('Generator', 'chat');
 
   // derived state
-  const { conversationId, text: subject } = props.config;
+  const { conversationId, messageId, text: subject } = props.config;
   const diagramLlmId = diagramLlm?.id;
 
 
@@ -98,7 +98,7 @@ export function DiagramsModal(props: { config: DiagramConfig, onClose: () => voi
     const diagramPrompt = bigDiagramPrompt(diagramType, diagramLanguage, systemMessage.text, subject, customInstruction);
 
     try {
-      await llmStreamingChatGenerate(diagramLlm.id, diagramPrompt, null, null, stepAbortController.signal,
+      await llmStreamingChatGenerate(diagramLlm.id, diagramPrompt, 'ai-diagram', messageId, null, null, stepAbortController.signal,
         ({ textSoFar }) => textSoFar && setDiagramCode(diagramCode = textSoFar),
       );
     } catch (error: any) {
@@ -109,7 +109,7 @@ export function DiagramsModal(props: { config: DiagramConfig, onClose: () => voi
       setAbortController(null);
     }
 
-  }, [abortController, conversationId, diagramLanguage, diagramLlm, diagramType, subject, customInstruction]);
+  }, [abortController, conversationId, customInstruction, diagramLanguage, diagramLlm, diagramType, messageId, subject]);
 
 
   // [Effect] Auto-abort on unmount
diff --git a/src/modules/aifn/flatten/FlattenerModal.tsx b/src/modules/aifn/flatten/FlattenerModal.tsx
index 088498689..8a3723852 100644
--- a/src/modules/aifn/flatten/FlattenerModal.tsx
+++ b/src/modules/aifn/flatten/FlattenerModal.tsx
@@ -117,7 +117,7 @@ export function FlattenerModal(props: {
     await startStreaming(llm.id, [
       { role: 'system', content: flattenProfile.systemPrompt },
       { role: 'user', content: encodeConversationAsUserMessage(flattenProfile.userPrompt, messages) },
-    ]);
+    ], 'ai-flattener', messages[0].id);
 
   }, [llm, props.conversationId, startStreaming]);
 
diff --git a/src/modules/aifn/useLLMChain.ts b/src/modules/aifn/useLLMChain.ts
index 091ca03fd..1e7a654ee 100644
--- a/src/modules/aifn/useLLMChain.ts
+++ b/src/modules/aifn/useLLMChain.ts
@@ -1,7 +1,7 @@
 import * as React from 'react';
 
 import { DLLMId, findLLMOrThrow } from '~/modules/llms/store-llms';
-import { llmStreamingChatGenerate, VChatMessageIn } from '~/modules/llms/llm.client';
+import { llmStreamingChatGenerate, VChatContextName, VChatContextRef, VChatMessageIn } from '~/modules/llms/llm.client';
 
 
 // set to true to log to the console
@@ -20,7 +20,7 @@ export interface LLMChainStep {
 /**
  * React hook to manage a chain of LLM transformations.
  */
-export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, chainInput: string | undefined, onSuccess?: (output: string, input: string) => void) {
+export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, chainInput: string | undefined, onSuccess: (output: string, input: string) => void, contextName: VChatContextName, contextRef: VChatContextRef) {
 
   // state
   const [chain, setChain] = React.useState<ChainState | null>(null);
@@ -114,7 +114,7 @@ export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, ch
     setChainStepInterimText(null);
 
     // LLM call (streaming, cancelable)
-    llmStreamingChatGenerate(llmId, llmChatInput, null, null, stepAbortController.signal,
+    llmStreamingChatGenerate(llmId, llmChatInput, contextName, contextRef, null, null, stepAbortController.signal,
       ({ textSoFar }) => {
         textSoFar && setChainStepInterimText(interimText = textSoFar);
       })
@@ -141,7 +141,7 @@ export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, ch
         stepAbortController.abort('step aborted');
       _chainAbortController.signal.removeEventListener('abort', globalToStepListener);
     };
-  }, [chain, llmId, onSuccess]);
+  }, [chain, contextRef, contextName, llmId, onSuccess]);
 
 
   return {
diff --git a/src/modules/aifn/useStreamChatText.ts b/src/modules/aifn/useStreamChatText.ts
index b7158e8dc..11b856f7e 100644
--- a/src/modules/aifn/useStreamChatText.ts
+++ b/src/modules/aifn/useStreamChatText.ts
@@ -1,7 +1,7 @@
 import * as React from 'react';
 
 import type { DLLMId } from '~/modules/llms/store-llms';
-import { llmStreamingChatGenerate, VChatMessageIn } from '~/modules/llms/llm.client';
+import { llmStreamingChatGenerate, VChatContextName, VChatContextRef, VChatMessageIn } from '~/modules/llms/llm.client';
 
 
 export function useStreamChatText() {
@@ -13,7 +13,7 @@ export function useStreamChatText() {
   const abortControllerRef = React.useRef<AbortController | null>(null);
 
 
-  const startStreaming = React.useCallback(async (llmId: DLLMId, prompt: VChatMessageIn[]) => {
+  const startStreaming = React.useCallback(async (llmId: DLLMId, prompt: VChatMessageIn[], contextName: VChatContextName, contextRef: VChatContextRef) => {
     setStreamError(null);
     setPartialText(null);
     setText(null);
@@ -24,7 +24,7 @@ export function useStreamChatText() {
 
     try {
       let lastText = '';
-      await llmStreamingChatGenerate(llmId, prompt, null, null, abortControllerRef.current.signal, ({ textSoFar }) => {
+      await llmStreamingChatGenerate(llmId, prompt, contextName, contextRef, null, null, abortControllerRef.current.signal, ({ textSoFar }) => {
         if (textSoFar) {
           lastText = textSoFar;
           setPartialText(lastText);
diff --git a/src/modules/beam/gather/instructions/ChatGenerateInstruction.tsx b/src/modules/beam/gather/instructions/ChatGenerateInstruction.tsx
index 1f8452729..a00fd8384 100644
--- a/src/modules/beam/gather/instructions/ChatGenerateInstruction.tsx
+++ b/src/modules/beam/gather/instructions/ChatGenerateInstruction.tsx
@@ -96,7 +96,7 @@ export async function executeChatGenerate(_i: ChatGenerateInstruction, inputs: E
   };
 
   // LLM Streaming generation
-  return streamAssistantMessage(inputs.llmId, history, getUXLabsHighPerformance() ? 0 : 1, 'off', onMessageUpdate, inputs.chainAbortController.signal)
+  return streamAssistantMessage(inputs.llmId, history, 'beam-gather', inputs.contextRef, getUXLabsHighPerformance() ? 0 : 1, 'off', onMessageUpdate, inputs.chainAbortController.signal)
     .then((status) => {
       // re-throw errors, as streamAssistantMessage catches internally
       if (status.outcome === 'aborted') {
diff --git a/src/modules/beam/gather/instructions/beam.gather.execution.tsx b/src/modules/beam/gather/instructions/beam.gather.execution.tsx
index 1836f9cf0..9419e6cc6 100644
--- a/src/modules/beam/gather/instructions/beam.gather.execution.tsx
+++ b/src/modules/beam/gather/instructions/beam.gather.execution.tsx
@@ -23,6 +23,7 @@ export interface ExecutionInputState {
   readonly chatMessages: DMessage[];
   readonly rayMessages: DMessage[];
   readonly llmId: DLLMId;
+  readonly contextRef: string; // not useful
   // interaction
   readonly chainAbortController: AbortController;
   readonly updateProgressComponent: (component: React.ReactNode) => void;
@@ -67,6 +68,7 @@ export function gatherStartFusion(
     chatMessages: chatMessages,
     rayMessages: rayMessages,
     llmId: initialFusion.llmId,
+    contextRef: initialFusion.fusionId,
     // interaction
     chainAbortController: new AbortController(),
     updateProgressComponent: (component: React.ReactNode) => onUpdateBFusion({ fusingProgressComponent: component }),
diff --git a/src/modules/beam/scatter/beam.scatter.ts b/src/modules/beam/scatter/beam.scatter.ts
index d229a6d30..0b9cb51ac 100644
--- a/src/modules/beam/scatter/beam.scatter.ts
+++ b/src/modules/beam/scatter/beam.scatter.ts
@@ -67,7 +67,7 @@ function rayScatterStart(ray: BRay, llmId: DLLMId | null, inputHistory: DMessage
 
   // stream the assistant's messages
   const messagesHistory: VChatMessageIn[] = inputHistory.map(({ role, text }) => ({ role, content: text }));
-  streamAssistantMessage(llmId, messagesHistory, getUXLabsHighPerformance() ? 0 : rays.length, 'off', updateMessage, abortController.signal)
+  streamAssistantMessage(llmId, messagesHistory, 'beam-scatter', ray.rayId, getUXLabsHighPerformance() ? 0 : rays.length, 'off', updateMessage, abortController.signal)
     .then((status) => {
       _rayUpdate(ray.rayId, {
         status: (status.outcome === 'success') ? 'success'
diff --git a/src/modules/llms/llm.client.ts b/src/modules/llms/llm.client.ts
index 55b70dd09..3a6bf5804 100644
--- a/src/modules/llms/llm.client.ts
+++ b/src/modules/llms/llm.client.ts
@@ -21,6 +21,16 @@ export interface VChatMessageIn {
 
 export type VChatFunctionIn = OpenAIWire.ChatCompletion.RequestFunctionDef;
 
+export type VChatContextName =
+  | 'conversation'
+  | 'ai-diagram'
+  | 'ai-flattener'
+  | 'beam-scatter'
+  | 'beam-gather'
+  | 'call'
+  | 'persona-extract';
+export type VChatContextRef = string;
+
 export interface VChatMessageOut {
   role: 'assistant' | 'system' | 'user';
   content: string;
@@ -139,6 +149,8 @@ export async function llmChatGenerateOrThrow<TSourceSetup = unknown, TAccess = u
 export async function llmStreamingChatGenerate<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown>(
   llmId: DLLMId,
   messages: VChatMessageIn[],
+  contextName: VChatContextName,
+  contextRef: VChatContextRef,
   functions: VChatFunctionIn[] | null,
   forceFunctionName: string | null,
   abortSignal: AbortSignal,
@@ -161,5 +173,5 @@ export async function llmStreamingChatGenerate<TSourceSetup = unknown, TAccess =
     await new Promise(resolve => setTimeout(resolve, delay));
 
   // execute via the vendor
-  return await vendor.streamingChatGenerateOrThrow(access, llmId, llmOptions, messages, functions, forceFunctionName, abortSignal, onUpdate);
+  return await vendor.streamingChatGenerateOrThrow(access, llmId, llmOptions, messages, contextName, contextRef, functions, forceFunctionName, abortSignal, onUpdate);
 }
diff --git a/src/modules/llms/server/llm.server.streaming.ts b/src/modules/llms/server/llm.server.streaming.ts
index 88b7b4e2e..03c788782 100644
--- a/src/modules/llms/server/llm.server.streaming.ts
+++ b/src/modules/llms/server/llm.server.streaming.ts
@@ -19,7 +19,7 @@ import { OLLAMA_PATH_CHAT, ollamaAccess, ollamaAccessSchema, ollamaChatCompletio
 
 // OpenAI server imports
 import type { OpenAIWire } from './openai/openai.wiretypes';
-import { openAIAccess, openAIAccessSchema, openAIChatCompletionPayload, OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from './openai/openai.router';
+import { openAIAccess, openAIAccessSchema, openAIChatCompletionPayload, openAIHistorySchema, openAIModelSchema } from './openai/openai.router';
 
 
 // configuration
@@ -46,11 +46,17 @@ type MuxingFormat = 'sse' | 'json-nl';
  */
 type AIStreamParser = (data: string, eventType?: string) => { text: string, close: boolean };
 
+const streamingContextSchema = z.object({
+  method: z.literal('chat-stream'),
+  name: z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']),
+  ref: z.string(),
+});
 
 const chatStreamingInputSchema = z.object({
   access: z.union([anthropicAccessSchema, geminiAccessSchema, ollamaAccessSchema, openAIAccessSchema]),
   model: openAIModelSchema,
   history: openAIHistorySchema,
+  context: streamingContextSchema,
 });
 export type ChatStreamingInputSchema = z.infer<typeof chatStreamingInputSchema>;
 
@@ -72,14 +78,15 @@ export async function llmStreamingRelayHandler(req: NextRequest): Promise<Respon
 
   // Parse the request
   const body = await req.json();
-  const { access, model, history } = chatStreamingInputSchema.parse(body);
-  const prettyDialect = serverCapitalizeFirstLetter(access.dialect);
+  const _chatStreamingInput: ChatStreamingInputSchema = chatStreamingInputSchema.parse(body);
+  const { dialect: accessDialect } = _chatStreamingInput.access;
+  const prettyDialect = serverCapitalizeFirstLetter(accessDialect);
 
 
   // Prepare the upstream API request and demuxer/parser
   let requestData: ReturnType<typeof _prepareRequestData>;
   try {
-    requestData = _prepareRequestData(access, model, history);
+    requestData = _prepareRequestData(_chatStreamingInput);
   } catch (error: any) {
     console.error(`[POST] /api/llms/stream: ${prettyDialect}: prepareRequestData issue:`, safeErrorString(error));
     return new NextResponse(`**[Service Issue] ${prettyDialect}**: ${safeErrorString(error) || 'Unknown streaming error'}`, {
@@ -103,7 +110,7 @@ export async function llmStreamingRelayHandler(req: NextRequest): Promise<Respon
   } catch (error: any) {
 
     // server-side admins message
-    const capDialect = serverCapitalizeFirstLetter(access.dialect);
+    const capDialect = serverCapitalizeFirstLetter(accessDialect);
     const fetchOrVendorError = safeErrorString(error) + (error?.cause ? ' · ' + JSON.stringify(error.cause) : '');
     console.error(`[POST] /api/llms/stream: ${capDialect}: fetch issue:`, fetchOrVendorError, requestData?.url);
 
@@ -125,7 +132,7 @@ export async function llmStreamingRelayHandler(req: NextRequest): Promise<Respon
    * a 'healthy' level of inventory (i.e., pre-buffering) on the pipe to the client.
    */
   const transformUpstreamToBigAgiClient = createUpstreamTransformer(
-    requestData.vendorMuxingFormat, requestData.vendorStreamParser, access.dialect,
+    requestData.vendorMuxingFormat, requestData.vendorStreamParser, accessDialect,
   );
 
   const chatResponseStream =
@@ -486,7 +493,7 @@ function createStreamParserOpenAI(): AIStreamParser {
 }
 
 
-function _prepareRequestData(access: ChatStreamingInputSchema['access'], model: OpenAIModelSchema, history: OpenAIHistorySchema): {
+function _prepareRequestData({ access, model, history, context: _context }: ChatStreamingInputSchema): {
   headers: HeadersInit;
   url: string;
   body: object;
diff --git a/src/modules/llms/vendors/IModelVendor.ts b/src/modules/llms/vendors/IModelVendor.ts
index 89f34cb73..ff6962480 100644
--- a/src/modules/llms/vendors/IModelVendor.ts
+++ b/src/modules/llms/vendors/IModelVendor.ts
@@ -8,7 +8,7 @@ import type { DLLM, DLLMId, DModelSourceId } from '../store-llms';
 import type { ModelDescriptionSchema } from '../server/llm.server.types';
 import type { ModelVendorId } from './vendors.registry';
 import type { StreamingClientUpdate } from './unifiedStreamingClient';
-import type { VChatFunctionIn, VChatMessageIn, VChatMessageOrFunctionCallOut, VChatMessageOut } from '../llm.client';
+import type { VChatContextName, VChatContextRef, VChatFunctionIn, VChatMessageIn, VChatMessageOrFunctionCallOut, VChatMessageOut } from '../llm.client';
 
 
 export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown, TDLLM = DLLM<TSourceSetup, TLLMOptions>> {
@@ -53,6 +53,7 @@ export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOpt
     llmId: DLLMId,
     llmOptions: TLLMOptions,
     messages: VChatMessageIn[],
+    contextName: VChatContextName, contexRef: VChatContextRef,
     functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
     abortSignal: AbortSignal,
     onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
diff --git a/src/modules/llms/vendors/unifiedStreamingClient.ts b/src/modules/llms/vendors/unifiedStreamingClient.ts
index 07094baa6..c8aea7577 100644
--- a/src/modules/llms/vendors/unifiedStreamingClient.ts
+++ b/src/modules/llms/vendors/unifiedStreamingClient.ts
@@ -3,7 +3,7 @@ import { frontendSideFetch } from '~/common/util/clientFetchers';
 
 import type { ChatStreamingInputSchema, ChatStreamingPreambleModelSchema, ChatStreamingPreambleStartSchema } from '../server/llm.server.streaming';
 import type { DLLMId } from '../store-llms';
-import type { VChatFunctionIn, VChatMessageIn } from '../llm.client';
+import type { VChatContextName, VChatContextRef, VChatFunctionIn, VChatMessageIn } from '../llm.client';
 
 import type { OpenAIAccessSchema } from '../server/openai/openai.router';
 import type { OpenAIWire } from '../server/openai/openai.wiretypes';
@@ -29,6 +29,7 @@ export async function unifiedStreamingClient<TSourceSetup = unknown, TLLMOptions
   llmId: DLLMId,
   llmOptions: TLLMOptions,
   messages: VChatMessageIn[],
+  contextName: VChatContextName, contextRef: VChatContextRef,
   functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
   abortSignal: AbortSignal,
   onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
@@ -55,6 +56,11 @@ export async function unifiedStreamingClient<TSourceSetup = unknown, TLLMOptions
       ...(llmResponseTokens ? { maxTokens: llmResponseTokens } : {}),
     },
     history: messages,
+    context: {
+      method: 'chat-stream',
+      name: contextName, // this errors if the client VChatContextName mismatches the server z.enum
+      ref: contextRef,
+    },
   };
 
   // connect to the server-side streaming endpoint