Skip to content

VinF Hybrid Inference #4: ChromeAdapter in stream methods #8918

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 19 additions & 31 deletions e2e/sample-apps/modular.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,7 @@ import {
onValue,
off
} from 'firebase/database';
import {
getGenerativeModel,
getVertexAI,
InferenceMode,
VertexAI
} from 'firebase/vertexai';
import { getGenerativeModel, getVertexAI, VertexAI } from 'firebase/vertexai';
import { getDataConnect, DataConnect } from 'firebase/data-connect';

/**
Expand Down Expand Up @@ -318,8 +313,13 @@ function callPerformance(app) {
async function callVertexAI(app) {
console.log('[VERTEXAI] start');
const vertexAI = getVertexAI(app);
const model = getGenerativeModel(vertexAI, { model: 'gemini-1.5-flash' });
const result = await model.countTokens('abcdefg');
const model = getGenerativeModel(vertexAI, {
mode: 'prefer_in_cloud'
});
const result = await model.generateContentStream("What is Roko's Basalisk?");
for await (const chunk of result.stream) {
console.log(chunk.text());
}
console.log(`[VERTEXAI] counted tokens: ${result.totalTokens}`);
}

Expand All @@ -337,17 +337,6 @@ function callDataConnect(app) {
console.log('[DATACONNECT] initialized');
}

async function callVertex(app) {
console.log('[VERTEX] start');
const vertex = getVertexAI(app);
const model = getGenerativeModel(vertex, {
mode: InferenceMode.PREFER_ON_DEVICE
});
const result = await model.generateContent("What is Roko's Basalisk?");
console.log(result.response.text());
console.log('[VERTEX] initialized');
}

/**
* Run smoke tests for all products.
* Comment out any products you want to ignore.
Expand All @@ -357,19 +346,18 @@ async function main() {
const app = initializeApp(config);
setLogLevel('warn');

callAppCheck(app);
await authLogin(app);
await callStorage(app);
await callFirestore(app);
await callDatabase(app);
await callMessaging(app);
callAnalytics(app);
callPerformance(app);
await callFunctions(app);
// callAppCheck(app);
// await authLogin(app);
// await callStorage(app);
// await callFirestore(app);
// await callDatabase(app);
// await callMessaging(app);
// callAnalytics(app);
// callPerformance(app);
// await callFunctions(app);
await callVertexAI(app);
callDataConnect(app);
await authLogout(app);
await callVertex(app);
// callDataConnect(app);
// await authLogout(app);
console.log('DONE');
}

Expand Down
1 change: 1 addition & 0 deletions packages/vertexai/src/methods/chat-session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ export class ChatSession {
this._apiSettings,
this.model,
generateContentRequest,
this.chromeAdapter,
this.requestOptions
);

Expand Down
63 changes: 51 additions & 12 deletions packages/vertexai/src/methods/chrome-adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,18 +82,20 @@ export class ChromeAdapter {
const result = await session.prompt(prompt.content);
return ChromeAdapter.toResponse(result);
}
private static toResponse(text: string): Response {
return {
json: async () => ({
candidates: [
{
content: {
parts: [{ text }]
}
}
]
})
} as Response;
async generateContentStreamOnDevice(
request: GenerateContentRequest
): Promise<Response> {
const createOptions = this.onDeviceParams || {};
createOptions.initialPrompts ??= [];
const extractedInitialPrompts = ChromeAdapter.toInitialPrompts(
request.contents
);
// Assumes validation asserted there is at least one initial prompt.
const prompt = extractedInitialPrompts.pop()!;
createOptions.initialPrompts.push(...extractedInitialPrompts);
const session = await this.session(createOptions);
const stream = await session.promptStreaming(prompt.content);
return ChromeAdapter.toStreamResponse(stream);
}
private static isOnDeviceRequest(request: GenerateContentRequest): boolean {
// Returns false if the prompt is empty.
Expand Down Expand Up @@ -157,4 +159,41 @@ export class ChromeAdapter {
this.oldSession = newSession;
return newSession;
}
private static toResponse(text: string): Response {
return {
json: async () => ({
candidates: [
{
content: {
parts: [{ text }]
}
}
]
})
} as Response;
}
private static toStreamResponse(
stream: ReadableStream<string>
): Response {
const encoder = new TextEncoder();
return {
body: stream.pipeThrough(
new TransformStream({
transform(chunk, controller) {
const json = JSON.stringify({
candidates: [
{
content: {
role: 'model',
parts: [{ text: chunk }]
}
}
]
});
controller.enqueue(encoder.encode(`data: ${json}\n\n`));
}
})
)
} as Response;
}
}
26 changes: 23 additions & 3 deletions packages/vertexai/src/methods/generate-content.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,40 @@ import { processStream } from '../requests/stream-reader';
import { ApiSettings } from '../types/internal';
import { ChromeAdapter } from './chrome-adapter';

export async function generateContentStream(
async function generateContentStreamOnCloud(
apiSettings: ApiSettings,
model: string,
params: GenerateContentRequest,
requestOptions?: RequestOptions
): Promise<GenerateContentStreamResult> {
const response = await makeRequest(
): Promise<Response> {
return makeRequest(
model,
Task.STREAM_GENERATE_CONTENT,
apiSettings,
/* stream */ true,
JSON.stringify(params),
requestOptions
);
}

export async function generateContentStream(
apiSettings: ApiSettings,
model: string,
params: GenerateContentRequest,
chromeAdapter: ChromeAdapter,
requestOptions?: RequestOptions
): Promise<GenerateContentStreamResult> {
let response;
if (await chromeAdapter.isAvailable(params)) {
response = await chromeAdapter.generateContentStreamOnDevice(params);
} else {
response = await generateContentStreamOnCloud(
apiSettings,
model,
params,
requestOptions
);
}
return processStream(response);
}

Expand Down
1 change: 1 addition & 0 deletions packages/vertexai/src/models/generative-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ export class GenerativeModel extends VertexAIModel {
systemInstruction: this.systemInstruction,
...formattedParams
},
this.chromeAdapter,
this.requestOptions
);
}
Expand Down
Loading