Skip to content

use strongly typed attributes #457

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: dev-1.0
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions agents/src/attributes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// This file was generated from JSON Schema using quicktype, do not modify it directly.
// The code generation lives at https://github.com/livekit/attribute-definitions
//
// To parse this data:
//
// import { Convert, AgentAttributes, TranscriptionAttributes } from "./file";
//
// const agentAttributes = Convert.toAgentAttributes(json);
// const transcriptionAttributes = Convert.toTranscriptionAttributes(json);

export interface AgentAttributes {
'lk.agent.inputs'?: AgentInput[];
'lk.agent.outputs'?: AgentOutput[];
'lk.agent.state'?: AgentState;
'lk.publish_on_behalf'?: string;
[property: string]: any;
}

export type AgentInput = 'audio' | 'video' | 'text';

export type AgentOutput = 'transcription' | 'audio';

export type AgentState = 'idle' | 'initializing' | 'listening' | 'thinking' | 'speaking';

/**
* Schema for transcription-related attributes
*/
export interface TranscriptionAttributes {
/**
* The segment id of the transcription
*/
'lk.segment_id'?: string;
/**
* The associated track id of the transcription
*/
'lk.transcribed_track_id'?: string;
/**
* Whether the transcription is final
*/
'lk.transcription_final'?: boolean;
[property: string]: any;
}

// Converts JSON strings to/from your types
export class Convert {
public static toAgentAttributes(attributes: Record<string, string>): AgentAttributes {
const agentAttributes: AgentAttributes = {};
for (const key in attributes) {
const value = attributes[key];
if (value !== undefined) {
agentAttributes[key] = JSON.parse(value);
}
}
return agentAttributes;
}

public static agentAttributesToRaw(attributes: AgentAttributes): Record<string, string> {
const rawAttributes: Record<string, string> = {};
for (const key in attributes) {
rawAttributes[key] = JSON.stringify(attributes[key]);
}
return rawAttributes;
}

public static toTranscriptionAttributes(
attributes: Record<string, string>,
): TranscriptionAttributes {
const transcriptionAttributes: TranscriptionAttributes = {};
for (const key in attributes) {
const value = attributes[key];
if (value !== undefined) {
transcriptionAttributes[key] = JSON.parse(value);
}
}
return transcriptionAttributes;
}

public static transcriptionAttributesToRaw(
attributes: TranscriptionAttributes,
): Record<string, string> {
const rawAttributes: Record<string, string> = {};
for (const key in attributes) {
rawAttributes[key] = JSON.stringify(attributes[key]);
}
return rawAttributes;
}
}
24 changes: 18 additions & 6 deletions agents/src/constants.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0
export const ATTRIBUTE_TRANSCRIPTION_TRACK_ID = 'lk.transcribed_track_id';
export const ATTRIBUTE_TRANSCRIPTION_FINAL = 'lk.transcription_final';
export const TOPIC_TRANSCRIPTION = 'lk.transcription';
export const ATTRIBUTE_TRANSCRIPTION_SEGMENT_ID = 'lk.segment_id';
export const ATTRIBUTE_PUBLISH_ON_BEHALF = 'lk.publish_on_behalf';
export const TOPIC_CHAT = 'lk.chat';
import type { AgentAttributes, TranscriptionAttributes } from './attributes.js';

// Agent attributes
export const AGENT_STATE_ATTRIBUTE = 'lk.agent.state' as const satisfies keyof AgentAttributes;
export const ATTRIBUTE_PUBLISH_ON_BEHALF =
'lk.publish_on_behalf' as const satisfies keyof AgentAttributes;

// Transcription attributes
export const ATTRIBUTE_TRANSCRIPTION_TRACK_ID =
'lk.transcribed_track_id' as const satisfies keyof TranscriptionAttributes;
export const ATTRIBUTE_TRANSCRIPTION_FINAL =
'lk.transcription_final' as const satisfies keyof TranscriptionAttributes;
export const ATTRIBUTE_TRANSCRIPTION_SEGMENT_ID =
'lk.segment_id' as const satisfies keyof TranscriptionAttributes;

// Topics
export const TOPIC_TRANSCRIPTION = 'lk.transcription' as const;
export const TOPIC_CHAT = 'lk.chat' as const;
18 changes: 11 additions & 7 deletions agents/src/multimodal/multimodal_agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,14 @@ import {
TrackSource,
} from '@livekit/rtc-node';
import { EventEmitter } from 'node:events';
import {
type AgentState,
Convert as ConvertAttributes,
type TranscriptionAttributes,
} from '../attributes.js';
import { AudioByteStream } from '../audio.js';
import {
AGENT_STATE_ATTRIBUTE,
ATTRIBUTE_TRANSCRIPTION_FINAL,
ATTRIBUTE_TRANSCRIPTION_TRACK_ID,
TOPIC_TRANSCRIPTION,
Expand Down Expand Up @@ -56,9 +62,6 @@ export abstract class RealtimeModel {
abstract outFrameSize: number;
}

export type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';
export const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';

/** @beta */
export class MultimodalAgent extends EventEmitter {
model: RealtimeModel;
Expand Down Expand Up @@ -508,10 +511,10 @@ export class MultimodalAgent extends EventEmitter {
const stream = await this.room.localParticipant.streamText({
topic: TOPIC_TRANSCRIPTION,
senderIdentity: participantIdentity,
attributes: {
attributes: ConvertAttributes.transcriptionAttributesToRaw({
[ATTRIBUTE_TRANSCRIPTION_TRACK_ID]: trackSid,
[ATTRIBUTE_TRANSCRIPTION_FINAL]: isFinal.toString(),
},
[ATTRIBUTE_TRANSCRIPTION_FINAL]: isFinal,
} satisfies TranscriptionAttributes),
});
await stream.write(text);
await stream.close();
Expand All @@ -532,7 +535,8 @@ export class MultimodalAgent extends EventEmitter {

#setState(state: AgentState) {
if (this.room?.isConnected && this.room.localParticipant) {
const currentState = this.room.localParticipant.attributes![AGENT_STATE_ATTRIBUTE];
const attributes = ConvertAttributes.toAgentAttributes(this.room.localParticipant.attributes);
const currentState = attributes[AGENT_STATE_ATTRIBUTE];
if (currentState !== state) {
this.room.localParticipant.setAttributes({
[AGENT_STATE_ATTRIBUTE]: state,
Expand Down
2 changes: 1 addition & 1 deletion agents/src/voice/agent_session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import type { AudioFrame, Room } from '@livekit/rtc-node';
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
import { EventEmitter } from 'node:events';
import type { ReadableStream } from 'node:stream/web';
import type { AgentState } from '../attributes.js';
import type { ChatMessage } from '../llm/chat_context.js';
import { ChatContext } from '../llm/chat_context.js';
import type { LLM } from '../llm/index.js';
Expand All @@ -20,7 +21,6 @@ import type { AudioOutput, TextOutput } from './io.js';
import { RoomIO } from './room_io/index.js';
import type { UnknownUserData } from './run_context.js';

export type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';
export interface VoiceOptions {
allowInterruptions: boolean;
discardAudioIfUninterruptible: boolean;
Expand Down
1 change: 0 additions & 1 deletion agents/src/voice/events.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0
export type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';
export type UserState = 'idle' | 'thinking' | 'listening' | 'speaking';
17 changes: 11 additions & 6 deletions agents/src/voice/room_io/_output.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,13 @@ import {
TrackPublishOptions,
TrackSource,
} from '@livekit/rtc-node';
import {
Convert as ConvertAttributes,
type TranscriptionAttributes,
} from 'agents/src/attributes.js';
import { randomUUID } from 'node:crypto';
import {
ATTRIBUTE_TRANSCRIPTION_FINAL,
ATTRIBUTE_TRANSCRIPTION_SEGMENT_ID,
ATTRIBUTE_TRANSCRIPTION_TRACK_ID,
TOPIC_TRANSCRIPTION,
Expand Down Expand Up @@ -165,7 +170,7 @@ export class ParticipantTranscriptionOutput extends BaseParticipantTranscription
this.flushTask = Task.from((controller) => this.flushTaskImpl(currWriter, controller.signal));
}

private async createTextWriter(attributes?: Record<string, string>): Promise<TextStreamWriter> {
private async createTextWriter(attributes?: TranscriptionAttributes): Promise<TextStreamWriter> {
if (!this.participantIdentity) {
throw new Error('participantIdentity not found');
}
Expand All @@ -176,24 +181,24 @@ export class ParticipantTranscriptionOutput extends BaseParticipantTranscription

if (!attributes) {
attributes = {
ATTRIBUTE_TRANSCRIPTION_FINAL: 'false',
[ATTRIBUTE_TRANSCRIPTION_FINAL]: false,
[ATTRIBUTE_TRANSCRIPTION_SEGMENT_ID]: this.currentId,
};
if (this.trackId) {
attributes[ATTRIBUTE_TRANSCRIPTION_TRACK_ID] = this.trackId;
}
attributes[ATTRIBUTE_TRANSCRIPTION_SEGMENT_ID] = this.currentId;
}

return await this.room.localParticipant.streamText({
topic: TOPIC_TRANSCRIPTION,
senderIdentity: this.participantIdentity,
attributes,
attributes: ConvertAttributes.transcriptionAttributesToRaw(attributes),
});
}

private async flushTaskImpl(writer: TextStreamWriter | null, signal: AbortSignal): Promise<void> {
const attributes: Record<string, string> = {
ATTRIBUTE_TRANSCRIPTION_FINAL: 'true',
const attributes: TranscriptionAttributes = {
[ATTRIBUTE_TRANSCRIPTION_FINAL]: true,
};
if (this.trackId) {
attributes[ATTRIBUTE_TRANSCRIPTION_TRACK_ID] = this.trackId;
Expand Down