From 34477c19ea75969fb554bd82b3b65e3b203b3d9f Mon Sep 17 00:00:00 2001 From: Kevin Jain Date: Sat, 18 Jan 2025 12:43:18 -0600 Subject: [PATCH] Updated types for Workers AI (#3278) * Updated types for Workers AI We are moving types for supported models under a new NPM package (https://www.npmjs.com/package/@cloudflare/ai-types). This PR removes all the model specific types and converts AI.run() into a generic function. * added generated snapshot files * Reverted breaking changes, added new models in the list added whisper, whisper-tiny-en, whisper-large-v3-turbo, llama-3.2-11b-vision-instruct to the list * added flux-1-schnell to the model catalog --- types/defines/ai.d.ts | 446 +++++++++++++++++- .../generated-snapshot/2021-11-03/index.d.ts | 440 ++++++++++++++++- types/generated-snapshot/2021-11-03/index.ts | 442 ++++++++++++++++- .../generated-snapshot/2022-01-31/index.d.ts | 440 ++++++++++++++++- types/generated-snapshot/2022-01-31/index.ts | 442 ++++++++++++++++- .../generated-snapshot/2022-03-21/index.d.ts | 440 ++++++++++++++++- types/generated-snapshot/2022-03-21/index.ts | 442 ++++++++++++++++- .../generated-snapshot/2022-08-04/index.d.ts | 440 ++++++++++++++++- types/generated-snapshot/2022-08-04/index.ts | 442 ++++++++++++++++- .../generated-snapshot/2022-10-31/index.d.ts | 440 ++++++++++++++++- types/generated-snapshot/2022-10-31/index.ts | 442 ++++++++++++++++- .../generated-snapshot/2022-11-30/index.d.ts | 440 ++++++++++++++++- types/generated-snapshot/2022-11-30/index.ts | 442 ++++++++++++++++- .../generated-snapshot/2023-03-01/index.d.ts | 440 ++++++++++++++++- types/generated-snapshot/2023-03-01/index.ts | 442 ++++++++++++++++- .../generated-snapshot/2023-07-01/index.d.ts | 440 ++++++++++++++++- types/generated-snapshot/2023-07-01/index.ts | 442 ++++++++++++++++- .../experimental/index.d.ts | 440 ++++++++++++++++- .../generated-snapshot/experimental/index.ts | 442 ++++++++++++++++- types/generated-snapshot/oldest/index.d.ts | 440 ++++++++++++++++- types/generated-snapshot/oldest/index.ts | 442 ++++++++++++++++- 21 files changed, 9028 insertions(+), 238 deletions(-) diff --git a/types/defines/ai.d.ts b/types/defines/ai.d.ts index c989ea3b412..58e8a78c88c 100644 --- a/types/defines/ai.d.ts +++ b/types/defines/ai.d.ts @@ -100,12 +100,7 @@ export declare abstract class BaseAiTextEmbeddings { postProcessedOutputs: AiTextEmbeddingsOutput; } export type RoleScopedChatInput = { - role: - | "user" - | "assistant" - | "system" - | "tool" - | (string & NonNullable); + role: "user" | "assistant" | "system" | "tool" | (string & NonNullable); content: string; name?: string; }; @@ -157,7 +152,7 @@ export type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[] | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; export type AiTextGenerationOutput = @@ -216,12 +211,417 @@ export declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -export type AiOptions = { - gateway?: GatewayOptions; +export type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +export type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -export type ModelType = AiModels[Name]; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +export interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +export interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} export interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -278,8 +678,17 @@ export interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -export type ModelListType = Record; +export type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; export type AiModelsSearchParams = { author?: string, hide_experimental?: boolean @@ -307,13 +716,14 @@ export type AiModelsSearchObject = { } export interface InferenceUpstreamError extends Error {} export interface AiInternalError extends Error {} -export declare abstract class Ai { +export type AiModelListType = Record; +export declare abstract class Ai { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], - options?: AiOptions - ): Promise; + inputs: AiModelList[Name]["inputs"], + options?: AiOptions, + ): Promise; public models(params?: AiModelsSearchParams): Promise; } diff --git a/types/generated-snapshot/2021-11-03/index.d.ts b/types/generated-snapshot/2021-11-03/index.d.ts index bda796b40f1..9c38d2761e5 100755 --- a/types/generated-snapshot/2021-11-03/index.d.ts +++ b/types/generated-snapshot/2021-11-03/index.d.ts @@ -3560,7 +3560,10 @@ type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; type AiTextGenerationOutput = @@ -3619,12 +3622,417 @@ declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -type AiOptions = { - gateway?: GatewayOptions; +type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -type ModelType = AiModels[Name]; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3681,8 +4089,17 @@ interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -type ModelListType = Record; +type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3710,14 +4127,15 @@ type AiModelsSearchObject = { }; interface InferenceUpstreamError extends Error {} interface AiInternalError extends Error {} -declare abstract class Ai { +type AiModelListType = Record; +declare abstract class Ai { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } type GatewayOptions = { diff --git a/types/generated-snapshot/2021-11-03/index.ts b/types/generated-snapshot/2021-11-03/index.ts index dd0afeca7d5..4857c896083 100755 --- a/types/generated-snapshot/2021-11-03/index.ts +++ b/types/generated-snapshot/2021-11-03/index.ts @@ -3572,7 +3572,10 @@ export type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; export type AiTextGenerationOutput = @@ -3631,12 +3634,417 @@ export declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -export type AiOptions = { - gateway?: GatewayOptions; +export type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +export type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -export type ModelType = AiModels[Name]; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +export interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +export interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} export interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3693,8 +4101,17 @@ export interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -export type ModelListType = Record; +export type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; export type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3722,14 +4139,17 @@ export type AiModelsSearchObject = { }; export interface InferenceUpstreamError extends Error {} export interface AiInternalError extends Error {} -export declare abstract class Ai { +export type AiModelListType = Record; +export declare abstract class Ai< + AiModelList extends AiModelListType = AiModels, +> { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } export type GatewayOptions = { diff --git a/types/generated-snapshot/2022-01-31/index.d.ts b/types/generated-snapshot/2022-01-31/index.d.ts index 93285e309e9..2bff6ff09ad 100755 --- a/types/generated-snapshot/2022-01-31/index.d.ts +++ b/types/generated-snapshot/2022-01-31/index.d.ts @@ -3586,7 +3586,10 @@ type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; type AiTextGenerationOutput = @@ -3645,12 +3648,417 @@ declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -type AiOptions = { - gateway?: GatewayOptions; +type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -type ModelType = AiModels[Name]; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3707,8 +4115,17 @@ interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -type ModelListType = Record; +type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3736,14 +4153,15 @@ type AiModelsSearchObject = { }; interface InferenceUpstreamError extends Error {} interface AiInternalError extends Error {} -declare abstract class Ai { +type AiModelListType = Record; +declare abstract class Ai { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } type GatewayOptions = { diff --git a/types/generated-snapshot/2022-01-31/index.ts b/types/generated-snapshot/2022-01-31/index.ts index 18c39606f83..851fca0131f 100755 --- a/types/generated-snapshot/2022-01-31/index.ts +++ b/types/generated-snapshot/2022-01-31/index.ts @@ -3598,7 +3598,10 @@ export type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; export type AiTextGenerationOutput = @@ -3657,12 +3660,417 @@ export declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -export type AiOptions = { - gateway?: GatewayOptions; +export type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +export type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -export type ModelType = AiModels[Name]; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +export interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +export interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} export interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3719,8 +4127,17 @@ export interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -export type ModelListType = Record; +export type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; export type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3748,14 +4165,17 @@ export type AiModelsSearchObject = { }; export interface InferenceUpstreamError extends Error {} export interface AiInternalError extends Error {} -export declare abstract class Ai { +export type AiModelListType = Record; +export declare abstract class Ai< + AiModelList extends AiModelListType = AiModels, +> { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } export type GatewayOptions = { diff --git a/types/generated-snapshot/2022-03-21/index.d.ts b/types/generated-snapshot/2022-03-21/index.d.ts index 048ffbdbf1c..8df865efc1f 100755 --- a/types/generated-snapshot/2022-03-21/index.d.ts +++ b/types/generated-snapshot/2022-03-21/index.d.ts @@ -3611,7 +3611,10 @@ type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; type AiTextGenerationOutput = @@ -3670,12 +3673,417 @@ declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -type AiOptions = { - gateway?: GatewayOptions; +type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -type ModelType = AiModels[Name]; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3732,8 +4140,17 @@ interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -type ModelListType = Record; +type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3761,14 +4178,15 @@ type AiModelsSearchObject = { }; interface InferenceUpstreamError extends Error {} interface AiInternalError extends Error {} -declare abstract class Ai { +type AiModelListType = Record; +declare abstract class Ai { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } type GatewayOptions = { diff --git a/types/generated-snapshot/2022-03-21/index.ts b/types/generated-snapshot/2022-03-21/index.ts index 2879c25e8f7..61543c3255a 100755 --- a/types/generated-snapshot/2022-03-21/index.ts +++ b/types/generated-snapshot/2022-03-21/index.ts @@ -3623,7 +3623,10 @@ export type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; export type AiTextGenerationOutput = @@ -3682,12 +3685,417 @@ export declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -export type AiOptions = { - gateway?: GatewayOptions; +export type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +export type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -export type ModelType = AiModels[Name]; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +export interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +export interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} export interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3744,8 +4152,17 @@ export interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -export type ModelListType = Record; +export type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; export type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3773,14 +4190,17 @@ export type AiModelsSearchObject = { }; export interface InferenceUpstreamError extends Error {} export interface AiInternalError extends Error {} -export declare abstract class Ai { +export type AiModelListType = Record; +export declare abstract class Ai< + AiModelList extends AiModelListType = AiModels, +> { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } export type GatewayOptions = { diff --git a/types/generated-snapshot/2022-08-04/index.d.ts b/types/generated-snapshot/2022-08-04/index.d.ts index 25077047016..0906f7112c2 100755 --- a/types/generated-snapshot/2022-08-04/index.d.ts +++ b/types/generated-snapshot/2022-08-04/index.d.ts @@ -3612,7 +3612,10 @@ type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; type AiTextGenerationOutput = @@ -3671,12 +3674,417 @@ declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -type AiOptions = { - gateway?: GatewayOptions; +type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -type ModelType = AiModels[Name]; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3733,8 +4141,17 @@ interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -type ModelListType = Record; +type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3762,14 +4179,15 @@ type AiModelsSearchObject = { }; interface InferenceUpstreamError extends Error {} interface AiInternalError extends Error {} -declare abstract class Ai { +type AiModelListType = Record; +declare abstract class Ai { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } type GatewayOptions = { diff --git a/types/generated-snapshot/2022-08-04/index.ts b/types/generated-snapshot/2022-08-04/index.ts index 62d6d3a621f..ff31605122c 100755 --- a/types/generated-snapshot/2022-08-04/index.ts +++ b/types/generated-snapshot/2022-08-04/index.ts @@ -3624,7 +3624,10 @@ export type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; export type AiTextGenerationOutput = @@ -3683,12 +3686,417 @@ export declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -export type AiOptions = { - gateway?: GatewayOptions; +export type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +export type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -export type ModelType = AiModels[Name]; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +export interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +export interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} export interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3745,8 +4153,17 @@ export interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -export type ModelListType = Record; +export type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; export type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3774,14 +4191,17 @@ export type AiModelsSearchObject = { }; export interface InferenceUpstreamError extends Error {} export interface AiInternalError extends Error {} -export declare abstract class Ai { +export type AiModelListType = Record; +export declare abstract class Ai< + AiModelList extends AiModelListType = AiModels, +> { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } export type GatewayOptions = { diff --git a/types/generated-snapshot/2022-10-31/index.d.ts b/types/generated-snapshot/2022-10-31/index.d.ts index 6de3f1835f3..2d931165003 100755 --- a/types/generated-snapshot/2022-10-31/index.d.ts +++ b/types/generated-snapshot/2022-10-31/index.d.ts @@ -3615,7 +3615,10 @@ type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; type AiTextGenerationOutput = @@ -3674,12 +3677,417 @@ declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -type AiOptions = { - gateway?: GatewayOptions; +type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -type ModelType = AiModels[Name]; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3736,8 +4144,17 @@ interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -type ModelListType = Record; +type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3765,14 +4182,15 @@ type AiModelsSearchObject = { }; interface InferenceUpstreamError extends Error {} interface AiInternalError extends Error {} -declare abstract class Ai { +type AiModelListType = Record; +declare abstract class Ai { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } type GatewayOptions = { diff --git a/types/generated-snapshot/2022-10-31/index.ts b/types/generated-snapshot/2022-10-31/index.ts index c15275cd98a..c1cc3360b5d 100755 --- a/types/generated-snapshot/2022-10-31/index.ts +++ b/types/generated-snapshot/2022-10-31/index.ts @@ -3627,7 +3627,10 @@ export type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; export type AiTextGenerationOutput = @@ -3686,12 +3689,417 @@ export declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -export type AiOptions = { - gateway?: GatewayOptions; +export type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +export type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -export type ModelType = AiModels[Name]; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +export interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +export interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} export interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3748,8 +4156,17 @@ export interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -export type ModelListType = Record; +export type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; export type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3777,14 +4194,17 @@ export type AiModelsSearchObject = { }; export interface InferenceUpstreamError extends Error {} export interface AiInternalError extends Error {} -export declare abstract class Ai { +export type AiModelListType = Record; +export declare abstract class Ai< + AiModelList extends AiModelListType = AiModels, +> { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } export type GatewayOptions = { diff --git a/types/generated-snapshot/2022-11-30/index.d.ts b/types/generated-snapshot/2022-11-30/index.d.ts index e32f7da7001..3cca2fe9aaa 100755 --- a/types/generated-snapshot/2022-11-30/index.d.ts +++ b/types/generated-snapshot/2022-11-30/index.d.ts @@ -3620,7 +3620,10 @@ type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; type AiTextGenerationOutput = @@ -3679,12 +3682,417 @@ declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -type AiOptions = { - gateway?: GatewayOptions; +type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -type ModelType = AiModels[Name]; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3741,8 +4149,17 @@ interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -type ModelListType = Record; +type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3770,14 +4187,15 @@ type AiModelsSearchObject = { }; interface InferenceUpstreamError extends Error {} interface AiInternalError extends Error {} -declare abstract class Ai { +type AiModelListType = Record; +declare abstract class Ai { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } type GatewayOptions = { diff --git a/types/generated-snapshot/2022-11-30/index.ts b/types/generated-snapshot/2022-11-30/index.ts index 8fc9786fe57..6bd3b20b5e0 100755 --- a/types/generated-snapshot/2022-11-30/index.ts +++ b/types/generated-snapshot/2022-11-30/index.ts @@ -3632,7 +3632,10 @@ export type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; export type AiTextGenerationOutput = @@ -3691,12 +3694,417 @@ export declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -export type AiOptions = { - gateway?: GatewayOptions; +export type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +export type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -export type ModelType = AiModels[Name]; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +export interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +export interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} export interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3753,8 +4161,17 @@ export interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -export type ModelListType = Record; +export type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; export type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3782,14 +4199,17 @@ export type AiModelsSearchObject = { }; export interface InferenceUpstreamError extends Error {} export interface AiInternalError extends Error {} -export declare abstract class Ai { +export type AiModelListType = Record; +export declare abstract class Ai< + AiModelList extends AiModelListType = AiModels, +> { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } export type GatewayOptions = { diff --git a/types/generated-snapshot/2023-03-01/index.d.ts b/types/generated-snapshot/2023-03-01/index.d.ts index 9f0d576887a..91405f291de 100755 --- a/types/generated-snapshot/2023-03-01/index.d.ts +++ b/types/generated-snapshot/2023-03-01/index.d.ts @@ -3622,7 +3622,10 @@ type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; type AiTextGenerationOutput = @@ -3681,12 +3684,417 @@ declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -type AiOptions = { - gateway?: GatewayOptions; +type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -type ModelType = AiModels[Name]; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3743,8 +4151,17 @@ interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -type ModelListType = Record; +type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3772,14 +4189,15 @@ type AiModelsSearchObject = { }; interface InferenceUpstreamError extends Error {} interface AiInternalError extends Error {} -declare abstract class Ai { +type AiModelListType = Record; +declare abstract class Ai { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } type GatewayOptions = { diff --git a/types/generated-snapshot/2023-03-01/index.ts b/types/generated-snapshot/2023-03-01/index.ts index 773256074f6..efdccb36c69 100755 --- a/types/generated-snapshot/2023-03-01/index.ts +++ b/types/generated-snapshot/2023-03-01/index.ts @@ -3634,7 +3634,10 @@ export type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; export type AiTextGenerationOutput = @@ -3693,12 +3696,417 @@ export declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -export type AiOptions = { - gateway?: GatewayOptions; +export type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +export type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -export type ModelType = AiModels[Name]; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +export interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +export interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} export interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3755,8 +4163,17 @@ export interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -export type ModelListType = Record; +export type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; export type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3784,14 +4201,17 @@ export type AiModelsSearchObject = { }; export interface InferenceUpstreamError extends Error {} export interface AiInternalError extends Error {} -export declare abstract class Ai { +export type AiModelListType = Record; +export declare abstract class Ai< + AiModelList extends AiModelListType = AiModels, +> { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } export type GatewayOptions = { diff --git a/types/generated-snapshot/2023-07-01/index.d.ts b/types/generated-snapshot/2023-07-01/index.d.ts index 45de60d6bd6..3c08c5aec86 100755 --- a/types/generated-snapshot/2023-07-01/index.d.ts +++ b/types/generated-snapshot/2023-07-01/index.d.ts @@ -3622,7 +3622,10 @@ type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; type AiTextGenerationOutput = @@ -3681,12 +3684,417 @@ declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -type AiOptions = { - gateway?: GatewayOptions; +type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -type ModelType = AiModels[Name]; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3743,8 +4151,17 @@ interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -type ModelListType = Record; +type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3772,14 +4189,15 @@ type AiModelsSearchObject = { }; interface InferenceUpstreamError extends Error {} interface AiInternalError extends Error {} -declare abstract class Ai { +type AiModelListType = Record; +declare abstract class Ai { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } type GatewayOptions = { diff --git a/types/generated-snapshot/2023-07-01/index.ts b/types/generated-snapshot/2023-07-01/index.ts index 6cf1b3319f6..e7eb5d811c8 100755 --- a/types/generated-snapshot/2023-07-01/index.ts +++ b/types/generated-snapshot/2023-07-01/index.ts @@ -3634,7 +3634,10 @@ export type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; export type AiTextGenerationOutput = @@ -3693,12 +3696,417 @@ export declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -export type AiOptions = { - gateway?: GatewayOptions; +export type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +export type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -export type ModelType = AiModels[Name]; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +export interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +export interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} export interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3755,8 +4163,17 @@ export interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -export type ModelListType = Record; +export type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; export type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3784,14 +4201,17 @@ export type AiModelsSearchObject = { }; export interface InferenceUpstreamError extends Error {} export interface AiInternalError extends Error {} -export declare abstract class Ai { +export type AiModelListType = Record; +export declare abstract class Ai< + AiModelList extends AiModelListType = AiModels, +> { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } export type GatewayOptions = { diff --git a/types/generated-snapshot/experimental/index.d.ts b/types/generated-snapshot/experimental/index.d.ts index 17ab6707377..95af5a4cf95 100755 --- a/types/generated-snapshot/experimental/index.d.ts +++ b/types/generated-snapshot/experimental/index.d.ts @@ -3701,7 +3701,10 @@ type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; type AiTextGenerationOutput = @@ -3760,12 +3763,417 @@ declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -type AiOptions = { - gateway?: GatewayOptions; +type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -type ModelType = AiModels[Name]; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3822,8 +4230,17 @@ interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -type ModelListType = Record; +type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3851,14 +4268,15 @@ type AiModelsSearchObject = { }; interface InferenceUpstreamError extends Error {} interface AiInternalError extends Error {} -declare abstract class Ai { +type AiModelListType = Record; +declare abstract class Ai { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } type GatewayOptions = { diff --git a/types/generated-snapshot/experimental/index.ts b/types/generated-snapshot/experimental/index.ts index f517d4590a5..44f0ff5b1e0 100755 --- a/types/generated-snapshot/experimental/index.ts +++ b/types/generated-snapshot/experimental/index.ts @@ -3713,7 +3713,10 @@ export type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; export type AiTextGenerationOutput = @@ -3772,12 +3775,417 @@ export declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -export type AiOptions = { - gateway?: GatewayOptions; +export type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +export type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -export type ModelType = AiModels[Name]; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +export interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +export interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} export interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3834,8 +4242,17 @@ export interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -export type ModelListType = Record; +export type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; export type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3863,14 +4280,17 @@ export type AiModelsSearchObject = { }; export interface InferenceUpstreamError extends Error {} export interface AiInternalError extends Error {} -export declare abstract class Ai { +export type AiModelListType = Record; +export declare abstract class Ai< + AiModelList extends AiModelListType = AiModels, +> { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } export type GatewayOptions = { diff --git a/types/generated-snapshot/oldest/index.d.ts b/types/generated-snapshot/oldest/index.d.ts index 0873101fd61..1f41008aac4 100755 --- a/types/generated-snapshot/oldest/index.d.ts +++ b/types/generated-snapshot/oldest/index.d.ts @@ -3560,7 +3560,10 @@ type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; type AiTextGenerationOutput = @@ -3619,12 +3622,417 @@ declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -type AiOptions = { - gateway?: GatewayOptions; +type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -type ModelType = AiModels[Name]; +} +interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3681,8 +4089,17 @@ interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -type ModelListType = Record; +type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3710,14 +4127,15 @@ type AiModelsSearchObject = { }; interface InferenceUpstreamError extends Error {} interface AiInternalError extends Error {} -declare abstract class Ai { +type AiModelListType = Record; +declare abstract class Ai { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } type GatewayOptions = { diff --git a/types/generated-snapshot/oldest/index.ts b/types/generated-snapshot/oldest/index.ts index 95245ad7177..c79961dfb08 100755 --- a/types/generated-snapshot/oldest/index.ts +++ b/types/generated-snapshot/oldest/index.ts @@ -3572,7 +3572,10 @@ export type AiTextGenerationInput = { frequency_penalty?: number; presence_penalty?: number; messages?: RoleScopedChatInput[]; - tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[]; + tools?: + | AiTextGenerationToolInput[] + | AiTextGenerationToolLegacyInput[] + | (object & NonNullable); functions?: AiTextGenerationFunctionsInput[]; }; export type AiTextGenerationOutput = @@ -3631,12 +3634,417 @@ export declare abstract class BaseAiTranslation { inputs: AiTranslationInput; postProcessedOutputs: AiTranslationOutput; } -export type AiOptions = { - gateway?: GatewayOptions; +export type Ai_Cf_Openai_Whisper_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper { + inputs: Ai_Cf_Openai_Whisper_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Output; +} +export type Ai_Cf_Openai_Whisper_Tiny_En_Input = + | string + | { + /** + * An array of integers that represent the audio data constrained to 8-bit unsigned integer values + */ + audio: number[]; + }; +export interface Ai_Cf_Openai_Whisper_Tiny_En_Output { + /** + * The transcription + */ + text: string; + word_count?: number; + words?: { + word?: string; + /** + * The second this word begins in the recording + */ + start?: number; + /** + * The ending second when the word completes + */ + end?: number; + }[]; + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En { + inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input { + /** + * Base64 encoded value of the audio data. + */ + audio: string; + /** + * Supported tasks are 'translate' or 'transcribe'. + */ + task?: string; + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * Preprocess the audio with a voice activity detection model. + */ + vad_filter?: string; + /** + * A text prompt to help provide context to the model on the contents of the audio. + */ + initial_prompt?: string; + /** + * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result. + */ prefix?: string; - extraHeaders?: object; -}; -export type ModelType = AiModels[Name]; +} +export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output { + transcription_info?: { + /** + * The language of the audio being transcribed or translated. + */ + language?: string; + /** + * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1. + */ + language_probability?: number; + /** + * The total duration of the original audio file, in seconds. + */ + duration?: number; + /** + * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds. + */ + duration_after_vad?: number; + }; + /** + * The complete transcription of the audio. + */ + text: string; + /** + * The total number of words in the transcription. + */ + word_count?: number; + segments?: { + /** + * The starting time of the segment within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the segment within the audio, in seconds. + */ + end?: number; + /** + * The transcription of the segment. + */ + text?: string; + /** + * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs. + */ + temperature?: number; + /** + * The average log probability of the predictions for the words in this segment, indicating overall confidence. + */ + avg_logprob?: number; + /** + * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process. + */ + compression_ratio?: number; + /** + * The probability that the segment contains no speech, represented as a decimal between 0 and 1. + */ + no_speech_prob?: number; + words?: { + /** + * The individual word transcribed from the audio. + */ + word?: string; + /** + * The starting time of the word within the audio, in seconds. + */ + start?: number; + /** + * The ending time of the word within the audio, in seconds. + */ + end?: number; + }[]; + }; + /** + * The transcription in WebVTT format, which includes timing and text information for use in subtitles. + */ + vtt?: string; +} +export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo { + inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input; + postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input { + /** + * A text description of the image you want to generate. + */ + prompt: string; + /** + * The number of diffusion steps; higher values can improve quality but take longer. + */ + steps?: number; +} +export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output { + /** + * The generated image in Base64 format. + */ + image?: string; +} +export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell { + inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input; + postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages; +export interface Prompt { + /** + * The input text prompt for the model to generate a response. + */ + prompt: string; + image?: number[] | (string & NonNullable); + /** + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + */ + raw?: boolean; + /** + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; + /** + * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model. + */ + lora?: string; +} +export interface Messages { + /** + * An array of message objects representing the conversation history. + */ + messages: { + /** + * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool'). + */ + role: string; + /** + * The content of the message as a string. + */ + content: string; + }[]; + image?: number[] | string; + functions?: { + name: string; + code: string; + }[]; + /** + * A list of tools available for the assistant to use. + */ + tools?: ( + | { + /** + * The name of the tool. More descriptive the better. + */ + name: string; + /** + * A brief description of what the tool does. + */ + description: string; + /** + * Schema defining the parameters accepted by the tool. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + } + | { + /** + * Specifies the type of tool (e.g., 'function'). + */ + type: string; + /** + * Details of the function tool. + */ + function: { + /** + * The name of the function. + */ + name: string; + /** + * A brief description of what the function does. + */ + description: string; + /** + * Schema defining the parameters accepted by the function. + */ + parameters: { + /** + * The type of the parameters object (usually 'object'). + */ + type: string; + /** + * List of required parameter names. + */ + required?: string[]; + /** + * Definitions of each parameter. + */ + properties: { + [k: string]: { + /** + * The data type of the parameter. + */ + type: string; + /** + * A description of the expected parameter. + */ + description: string; + }; + }; + }; + }; + } + )[]; + /** + * If true, the response will be streamed back incrementally. + */ + stream?: boolean; + /** + * The maximum number of tokens to generate in the response. + */ + max_tokens?: number; + /** + * Controls the randomness of the output; higher values produce more random results. + */ + temperature?: number; + /** + * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses. + */ + top_p?: number; + /** + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises. + */ + top_k?: number; + /** + * Random seed for reproducibility of the generation. + */ + seed?: number; + /** + * Penalty for repeated tokens; higher values discourage repetition. + */ + repetition_penalty?: number; + /** + * Decreases the likelihood of the model repeating the same lines verbatim. + */ + frequency_penalty?: number; + /** + * Increases the likelihood of the model introducing new topics. + */ + presence_penalty?: number; +} +export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = + | { + /** + * The generated text response from the model + */ + response?: string; + /** + * An array of tool calls requests made during the response generation + */ + tool_calls?: { + /** + * The arguments passed to be passed to the tool call request + */ + arguments?: object; + /** + * The name of the tool to be called + */ + name?: string; + }[]; + } + | ReadableStream; +export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct { + inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input; + postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output; +} export interface AiModels { "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification; "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage; @@ -3693,8 +4101,17 @@ export interface AiModels { "@cf/facebook/bart-large-cnn": BaseAiSummarization; "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText; "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText; + "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper; + "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En; + "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo; + "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell; + "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct; } -export type ModelListType = Record; +export type AiOptions = { + gateway?: GatewayOptions; + prefix?: string; + extraHeaders?: object; +}; export type AiModelsSearchParams = { author?: string; hide_experimental?: boolean; @@ -3722,14 +4139,17 @@ export type AiModelsSearchObject = { }; export interface InferenceUpstreamError extends Error {} export interface AiInternalError extends Error {} -export declare abstract class Ai { +export type AiModelListType = Record; +export declare abstract class Ai< + AiModelList extends AiModelListType = AiModels, +> { aiGatewayLogId: string | null; gateway(gatewayId: string): AiGateway; - run( + run( model: Name, - inputs: ModelList[Name]["inputs"], + inputs: AiModelList[Name]["inputs"], options?: AiOptions, - ): Promise; + ): Promise; public models(params?: AiModelsSearchParams): Promise; } export type GatewayOptions = {