diff --git a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts index b3648a488..82660bed5 100644 --- a/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts +++ b/src/modules/aix/server/dispatch/wiretypes/openai.wiretypes.ts @@ -3,7 +3,8 @@ import { z } from 'zod'; // // Implementation notes (see https://platform.openai.com/docs/changelog for upstream changes): -// - 2024-11-05: "Predicted Outputs" - not fully added yet - TBA +// - 2024-12-17: "Reasoning Effort" - added reasoning_effort and the 'developer' message role +// - 2024-11-05: "Predicted Outputs" // - 2024-10-17: "gpt-4o-audio-preview" - not fully added: "Audio inputs and outputs are now available in the Chat Completions API" - TBA // - 2024-10-01: "DevDay" - added prompt_tokens_details, audio_tokens, and refusal messages // - 2024-09-12: "o1" - max_tokens is deprecated in favor of max_completion_tokens, added completion_tokens_details @@ -34,9 +35,21 @@ export namespace OpenAIWire_ContentParts { }), }); + const OpenAI_AudioContentPart_schema = z.object({ + // [OpenAI, 2024-10-17] input content: audio + type: z.literal('input_audio'), + input_audio: z.object({ + // Base64 encoded audio data. + data: z.string(), + // The format of the encoded audio data. Currently supports "wav" and "mp3". + format: z.enum(['wav', 'mp3']), + }), + }); + export const ContentPart_schema = z.discriminatedUnion('type', [ TextContentPart_schema, ImageContentPart_schema, + OpenAI_AudioContentPart_schema, ]); export function TextContentPart(text: string): z.infer { @@ -104,7 +117,13 @@ export namespace OpenAIWire_Messages { const AssistantMessage_schema = z.object({ role: z.literal('assistant'), - /** The contents of the assistant message. Required unless tool_calls or function_call is specified. */ + /** + * The contents of the assistant message. Required unless tool_calls or function_call is specified. + * + * NOTE: the assistant message is also extending to be an array, but as of 2024-12-24, it's not important + * enough to require array support. The documentation of the array[] behavior of the field is: + * "An array of content parts with a defined type. Can be one or more of type text, or exactly one of type refusal." + */ content: z.string().nullable(), /** * The tool calls generated by the model, such as function calls. @@ -115,12 +134,36 @@ export namespace OpenAIWire_Messages { * [OpenAI, 2024-10-01] The refusal message generated by the model. */ refusal: z.string().nullable().optional(), + /** + * [OpenAI, 2024-10-17] Data about a previous audio response from the model. Usage depends on the context: + * - request (this schema): has an id, if present + * - non-streaming response: has the generated audio and some metadata + * - streaming response: NO audio fields + */ + audio: z.object({ + id: z.string(), + }).nullable().optional(), // name: _optionalParticipantName, }); export const AssistantMessage_NS_schema = AssistantMessage_schema.extend({ + // + // IMPORTANT - this message *extends* the AssistantMessage_schema, to inherit all fields while performing any other change + // + // .optional: when parsing a non-streaming message with just a FC, the content can be missing - content: z.string().optional().nullable(), + content: z.string().nullable().optional(), + + /** + * [OpenAI, 2024-10-17] Audio output (non-streaming only) + * If the audio output modality is requested, this object contains data about the audio response from the model + */ + audio: z.object({ + id: z.string(), + data: z.string(), // Base64 encoded audio data + expires_at: z.number(), // Unix timestamp + transcript: z.string().optional(), + }).nullable().optional(), }); const ToolMessage_schema = z.object({ @@ -227,6 +270,16 @@ export namespace OpenAIWire_API_Chat_Completions { temperature: z.number().min(0).max(2).optional(), top_p: z.number().min(0).max(1).optional(), + // new output modalities + modalities: z.array(z.enum(['text', 'audio'])).optional(), // defaults to ['text'] + audio: z.object({ // Parameters for audio output. Required when audio output is requested with `modalities: ["audio"]` + voice: z.enum([ + 'ash', 'ballad', 'coral', 'sage', 'verse', // recommended + 'alloy', 'echo', 'shimmer', // discouraged + ]), + format: z.enum(['wav', 'mp3', 'flac', 'opus', 'pcm16']), + }).optional(), + // API configuration n: z.number().int().positive().optional(), // Defaults to 1, as the derived-ecosystem does not support it stream: z.boolean().optional(), // If set, partial message deltas will be sent, with the stream terminated by a `data: [DONE]` message. @@ -234,6 +287,10 @@ export namespace OpenAIWire_API_Chat_Completions { include_usage: z.boolean().optional(), // If set, an additional chunk will be streamed with a 'usage' field on the entire request. }).optional(), reasoning_effort: z.enum(['low', 'medium', 'high']).optional(), // [OpenAI, 2024-12-17] reasoning effort, o1 models only for now + prediction: z.object({ // [OpenAI, 2024-11-05] Predicted Outputs - for regenerating a file with only minor changes to most of the content. + type: z.literal('content'), + content: z.union([z.string(), z.array(OpenAIWire_ContentParts.ContentPart_schema)]), + }).optional(), response_format: z.discriminatedUnion('type', [ z.object({ type: z.literal('text'), // Default @@ -272,16 +329,16 @@ export namespace OpenAIWire_API_Chat_Completions { stop: z.array(z.string()).optional(), // Up to 4 sequences where the API will stop generating further tokens. user: z.string().optional(), - // (deprecated upstream, we decide to omit this): function_call and functions + // (deprecated upstream, OMITTED BY CHOICE): function_call and functions - // (disabled) advanced model configuration + // (OMITTED BY CHOICE) advanced model configuration // frequency_penalty: z.number().min(-2).max(2).optional(), // Defaults to 0 // presence_penalty: z.number().min(-2).max(2).optional(), // Defaults to 0 // logit_bias: z.record(z.number()).optional(), - // logprobs: z.boolean().optional(), // Defaults to false + // logprobs: z.boolean().optional(), // Defaults to false // top_logprobs: z.number().int().min(0).max(20).optional(), - // (disabled) advanced API configuration + // (OMITTED BY CHOICE) advanced API configuration // store: z.boolean().optional(), // Defaults to false. Whether or not to store the output of this chat completion request for use in our model distillation or evals products. // metadata: z.record(z.any()).optional(), // Developer-defined tags and values used for filtering completions in [the dashboard](https://platform.openai.com/completions) // service_tier: z.string().optional(), @@ -296,7 +353,7 @@ export namespace OpenAIWire_API_Chat_Completions { 'tool_calls', // the model called a tool 'content_filter', // upstream content was omitted due to a flag from content filters - // Disabling Function Call, we decide to not support this obsoleted api + // Disabling Function Call, OMITTED BY CHOICE // 'function_call', // (deprecated) the model called a function // Extensions // disabled: we now use a string union to accept any value without breaking @@ -339,7 +396,7 @@ export namespace OpenAIWire_API_Chat_Completions { finish_reason: z.union([FinishReason_Enum, z.string()]) .nullable(), - // (we decide to omit this) We will not support logprobs for now, so it's disabled here and in the request + // (OMITTED BY CHOICE) We will not support logprobs for now, so it's disabled here and in the request // logprobs: z.any().nullable().optional() // Log probability information for the choice. }); @@ -358,7 +415,7 @@ export namespace OpenAIWire_API_Chat_Completions { created: z.number(), // The Unix timestamp (in seconds) of when the chat completion was created. system_fingerprint: z.string().optional() // The backend configuration that the model runs with. .nullable(), // [Groq, undocumented OpenAI] fingerprint is null on some OpenAI examples too - // service_tier: z.string().optional().nullable(), + // service_tier: z.string().optional().nullable(), // OMITTED BY CHOICE // undocumented messages that are not part of the official schema, but can be found when the server sends and error error: z.any().optional(), @@ -416,7 +473,9 @@ export namespace OpenAIWire_API_Chat_Completions { const ChunkDelta_schema = z.object({ role: z.literal('assistant').optional() .nullable(), // [Deepseek] added .nullable() + // delta-text content content: z.string().nullable().optional(), + // delta-tool-calls content tool_calls: z.array(ChunkDeltaToolCalls_schema).optional() .nullable(), // [TogetherAI] added .nullable(), see https://github.com/togethercomputer/together-python/issues/160 refusal: z.string().nullable().optional(), // [OpenAI, 2024-10-01] refusal message @@ -433,7 +492,7 @@ export namespace OpenAIWire_API_Chat_Completions { .nullable() // very common, e.g. Azure .optional(), // [OpenRouter] added .optional() which only has the delta field in the whole chunk choice - // (we decide to omit this) We will not support logprobs for now, so it's disabled here and in the request + // (OMITTED BY CHOICE) We will not support logprobs for now, so it's disabled here and in the request // logprobs: z.any().nullable().optional() // Log probability information for the choice. });