From 34477c19ea75969fb554bd82b3b65e3b203b3d9f Mon Sep 17 00:00:00 2001
From: Kevin Jain <kevinvikas@cloudflare.com>
Date: Sat, 18 Jan 2025 12:43:18 -0600
Subject: [PATCH] Updated types for Workers AI (#3278)

* Updated types for Workers AI

We are moving types for supported models under a new NPM package (https://www.npmjs.com/package/@cloudflare/ai-types).

This PR removes all the model specific types and converts AI.run() into a generic function.

* added generated snapshot files

* Reverted breaking changes, added new models in the list

added whisper, whisper-tiny-en, whisper-large-v3-turbo, llama-3.2-11b-vision-instruct to the list

* added flux-1-schnell to the model catalog
---
 types/defines/ai.d.ts                         | 446 +++++++++++++++++-
 .../generated-snapshot/2021-11-03/index.d.ts  | 440 ++++++++++++++++-
 types/generated-snapshot/2021-11-03/index.ts  | 442 ++++++++++++++++-
 .../generated-snapshot/2022-01-31/index.d.ts  | 440 ++++++++++++++++-
 types/generated-snapshot/2022-01-31/index.ts  | 442 ++++++++++++++++-
 .../generated-snapshot/2022-03-21/index.d.ts  | 440 ++++++++++++++++-
 types/generated-snapshot/2022-03-21/index.ts  | 442 ++++++++++++++++-
 .../generated-snapshot/2022-08-04/index.d.ts  | 440 ++++++++++++++++-
 types/generated-snapshot/2022-08-04/index.ts  | 442 ++++++++++++++++-
 .../generated-snapshot/2022-10-31/index.d.ts  | 440 ++++++++++++++++-
 types/generated-snapshot/2022-10-31/index.ts  | 442 ++++++++++++++++-
 .../generated-snapshot/2022-11-30/index.d.ts  | 440 ++++++++++++++++-
 types/generated-snapshot/2022-11-30/index.ts  | 442 ++++++++++++++++-
 .../generated-snapshot/2023-03-01/index.d.ts  | 440 ++++++++++++++++-
 types/generated-snapshot/2023-03-01/index.ts  | 442 ++++++++++++++++-
 .../generated-snapshot/2023-07-01/index.d.ts  | 440 ++++++++++++++++-
 types/generated-snapshot/2023-07-01/index.ts  | 442 ++++++++++++++++-
 .../experimental/index.d.ts                   | 440 ++++++++++++++++-
 .../generated-snapshot/experimental/index.ts  | 442 ++++++++++++++++-
 types/generated-snapshot/oldest/index.d.ts    | 440 ++++++++++++++++-
 types/generated-snapshot/oldest/index.ts      | 442 ++++++++++++++++-
 21 files changed, 9028 insertions(+), 238 deletions(-)
diff --git a/types/defines/ai.d.ts b/types/defines/ai.d.ts
index c989ea3b412..58e8a78c88c 100644
--- a/types/defines/ai.d.ts
+++ b/types/defines/ai.d.ts
@@ -100,12 +100,7 @@ export declare abstract class BaseAiTextEmbeddings {
   postProcessedOutputs: AiTextEmbeddingsOutput;
 }
 export type RoleScopedChatInput = {
-  role:
-    | "user"
-    | "assistant"
-    | "system"
-    | "tool"
-    | (string & NonNullable<unknown>);
+  role: "user" | "assistant" | "system" | "tool" | (string & NonNullable<unknown>);
   content: string;
   name?: string;
 };
@@ -157,7 +152,7 @@ export type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[] | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 export type AiTextGenerationOutput =
@@ -216,12 +211,417 @@ export declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-export type AiOptions = {
-  gateway?: GatewayOptions;
+export type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+export type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-export type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+export interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+export interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 export interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -278,8 +678,17 @@ export interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-export type ModelListType = Record<string, any>;
+export type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 export type AiModelsSearchParams = {
   author?: string,
   hide_experimental?: boolean
@@ -307,13 +716,14 @@ export type AiModelsSearchObject = {
 }
 export interface InferenceUpstreamError extends Error {}
 export interface AiInternalError extends Error {}
-export declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+export type AiModelListType = Record<string, any>;
+export declare abstract class Ai<AiModelList extends AiModelListType = AiModels> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
-    options?: AiOptions
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+    inputs: AiModelList[Name]["inputs"],
+    options?: AiOptions,
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
diff --git a/types/generated-snapshot/2021-11-03/index.d.ts b/types/generated-snapshot/2021-11-03/index.d.ts
index bda796b40f1..9c38d2761e5 100755
--- a/types/generated-snapshot/2021-11-03/index.d.ts
+++ b/types/generated-snapshot/2021-11-03/index.d.ts
@@ -3560,7 +3560,10 @@ type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 type AiTextGenerationOutput =
@@ -3619,12 +3622,417 @@ declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-type AiOptions = {
-  gateway?: GatewayOptions;
+type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3681,8 +4089,17 @@ interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-type ModelListType = Record<string, any>;
+type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3710,14 +4127,15 @@ type AiModelsSearchObject = {
 };
 interface InferenceUpstreamError extends Error {}
 interface AiInternalError extends Error {}
-declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+type AiModelListType = Record<string, any>;
+declare abstract class Ai<AiModelList extends AiModelListType = AiModels> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 type GatewayOptions = {
diff --git a/types/generated-snapshot/2021-11-03/index.ts b/types/generated-snapshot/2021-11-03/index.ts
index dd0afeca7d5..4857c896083 100755
--- a/types/generated-snapshot/2021-11-03/index.ts
+++ b/types/generated-snapshot/2021-11-03/index.ts
@@ -3572,7 +3572,10 @@ export type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 export type AiTextGenerationOutput =
@@ -3631,12 +3634,417 @@ export declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-export type AiOptions = {
-  gateway?: GatewayOptions;
+export type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+export type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-export type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+export interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+export interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 export interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3693,8 +4101,17 @@ export interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-export type ModelListType = Record<string, any>;
+export type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 export type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3722,14 +4139,17 @@ export type AiModelsSearchObject = {
 };
 export interface InferenceUpstreamError extends Error {}
 export interface AiInternalError extends Error {}
-export declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+export type AiModelListType = Record<string, any>;
+export declare abstract class Ai<
+  AiModelList extends AiModelListType = AiModels,
+> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 export type GatewayOptions = {
diff --git a/types/generated-snapshot/2022-01-31/index.d.ts b/types/generated-snapshot/2022-01-31/index.d.ts
index 93285e309e9..2bff6ff09ad 100755
--- a/types/generated-snapshot/2022-01-31/index.d.ts
+++ b/types/generated-snapshot/2022-01-31/index.d.ts
@@ -3586,7 +3586,10 @@ type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 type AiTextGenerationOutput =
@@ -3645,12 +3648,417 @@ declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-type AiOptions = {
-  gateway?: GatewayOptions;
+type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3707,8 +4115,17 @@ interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-type ModelListType = Record<string, any>;
+type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3736,14 +4153,15 @@ type AiModelsSearchObject = {
 };
 interface InferenceUpstreamError extends Error {}
 interface AiInternalError extends Error {}
-declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+type AiModelListType = Record<string, any>;
+declare abstract class Ai<AiModelList extends AiModelListType = AiModels> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 type GatewayOptions = {
diff --git a/types/generated-snapshot/2022-01-31/index.ts b/types/generated-snapshot/2022-01-31/index.ts
index 18c39606f83..851fca0131f 100755
--- a/types/generated-snapshot/2022-01-31/index.ts
+++ b/types/generated-snapshot/2022-01-31/index.ts
@@ -3598,7 +3598,10 @@ export type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 export type AiTextGenerationOutput =
@@ -3657,12 +3660,417 @@ export declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-export type AiOptions = {
-  gateway?: GatewayOptions;
+export type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+export type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-export type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+export interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+export interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 export interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3719,8 +4127,17 @@ export interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-export type ModelListType = Record<string, any>;
+export type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 export type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3748,14 +4165,17 @@ export type AiModelsSearchObject = {
 };
 export interface InferenceUpstreamError extends Error {}
 export interface AiInternalError extends Error {}
-export declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+export type AiModelListType = Record<string, any>;
+export declare abstract class Ai<
+  AiModelList extends AiModelListType = AiModels,
+> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 export type GatewayOptions = {
diff --git a/types/generated-snapshot/2022-03-21/index.d.ts b/types/generated-snapshot/2022-03-21/index.d.ts
index 048ffbdbf1c..8df865efc1f 100755
--- a/types/generated-snapshot/2022-03-21/index.d.ts
+++ b/types/generated-snapshot/2022-03-21/index.d.ts
@@ -3611,7 +3611,10 @@ type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 type AiTextGenerationOutput =
@@ -3670,12 +3673,417 @@ declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-type AiOptions = {
-  gateway?: GatewayOptions;
+type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3732,8 +4140,17 @@ interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-type ModelListType = Record<string, any>;
+type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3761,14 +4178,15 @@ type AiModelsSearchObject = {
 };
 interface InferenceUpstreamError extends Error {}
 interface AiInternalError extends Error {}
-declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+type AiModelListType = Record<string, any>;
+declare abstract class Ai<AiModelList extends AiModelListType = AiModels> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 type GatewayOptions = {
diff --git a/types/generated-snapshot/2022-03-21/index.ts b/types/generated-snapshot/2022-03-21/index.ts
index 2879c25e8f7..61543c3255a 100755
--- a/types/generated-snapshot/2022-03-21/index.ts
+++ b/types/generated-snapshot/2022-03-21/index.ts
@@ -3623,7 +3623,10 @@ export type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 export type AiTextGenerationOutput =
@@ -3682,12 +3685,417 @@ export declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-export type AiOptions = {
-  gateway?: GatewayOptions;
+export type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+export type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-export type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+export interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+export interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 export interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3744,8 +4152,17 @@ export interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-export type ModelListType = Record<string, any>;
+export type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 export type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3773,14 +4190,17 @@ export type AiModelsSearchObject = {
 };
 export interface InferenceUpstreamError extends Error {}
 export interface AiInternalError extends Error {}
-export declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+export type AiModelListType = Record<string, any>;
+export declare abstract class Ai<
+  AiModelList extends AiModelListType = AiModels,
+> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 export type GatewayOptions = {
diff --git a/types/generated-snapshot/2022-08-04/index.d.ts b/types/generated-snapshot/2022-08-04/index.d.ts
index 25077047016..0906f7112c2 100755
--- a/types/generated-snapshot/2022-08-04/index.d.ts
+++ b/types/generated-snapshot/2022-08-04/index.d.ts
@@ -3612,7 +3612,10 @@ type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 type AiTextGenerationOutput =
@@ -3671,12 +3674,417 @@ declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-type AiOptions = {
-  gateway?: GatewayOptions;
+type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3733,8 +4141,17 @@ interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-type ModelListType = Record<string, any>;
+type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3762,14 +4179,15 @@ type AiModelsSearchObject = {
 };
 interface InferenceUpstreamError extends Error {}
 interface AiInternalError extends Error {}
-declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+type AiModelListType = Record<string, any>;
+declare abstract class Ai<AiModelList extends AiModelListType = AiModels> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 type GatewayOptions = {
diff --git a/types/generated-snapshot/2022-08-04/index.ts b/types/generated-snapshot/2022-08-04/index.ts
index 62d6d3a621f..ff31605122c 100755
--- a/types/generated-snapshot/2022-08-04/index.ts
+++ b/types/generated-snapshot/2022-08-04/index.ts
@@ -3624,7 +3624,10 @@ export type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 export type AiTextGenerationOutput =
@@ -3683,12 +3686,417 @@ export declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-export type AiOptions = {
-  gateway?: GatewayOptions;
+export type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+export type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-export type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+export interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+export interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 export interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3745,8 +4153,17 @@ export interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-export type ModelListType = Record<string, any>;
+export type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 export type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3774,14 +4191,17 @@ export type AiModelsSearchObject = {
 };
 export interface InferenceUpstreamError extends Error {}
 export interface AiInternalError extends Error {}
-export declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+export type AiModelListType = Record<string, any>;
+export declare abstract class Ai<
+  AiModelList extends AiModelListType = AiModels,
+> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 export type GatewayOptions = {
diff --git a/types/generated-snapshot/2022-10-31/index.d.ts b/types/generated-snapshot/2022-10-31/index.d.ts
index 6de3f1835f3..2d931165003 100755
--- a/types/generated-snapshot/2022-10-31/index.d.ts
+++ b/types/generated-snapshot/2022-10-31/index.d.ts
@@ -3615,7 +3615,10 @@ type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 type AiTextGenerationOutput =
@@ -3674,12 +3677,417 @@ declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-type AiOptions = {
-  gateway?: GatewayOptions;
+type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3736,8 +4144,17 @@ interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-type ModelListType = Record<string, any>;
+type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3765,14 +4182,15 @@ type AiModelsSearchObject = {
 };
 interface InferenceUpstreamError extends Error {}
 interface AiInternalError extends Error {}
-declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+type AiModelListType = Record<string, any>;
+declare abstract class Ai<AiModelList extends AiModelListType = AiModels> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 type GatewayOptions = {
diff --git a/types/generated-snapshot/2022-10-31/index.ts b/types/generated-snapshot/2022-10-31/index.ts
index c15275cd98a..c1cc3360b5d 100755
--- a/types/generated-snapshot/2022-10-31/index.ts
+++ b/types/generated-snapshot/2022-10-31/index.ts
@@ -3627,7 +3627,10 @@ export type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 export type AiTextGenerationOutput =
@@ -3686,12 +3689,417 @@ export declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-export type AiOptions = {
-  gateway?: GatewayOptions;
+export type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+export type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-export type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+export interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+export interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 export interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3748,8 +4156,17 @@ export interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-export type ModelListType = Record<string, any>;
+export type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 export type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3777,14 +4194,17 @@ export type AiModelsSearchObject = {
 };
 export interface InferenceUpstreamError extends Error {}
 export interface AiInternalError extends Error {}
-export declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+export type AiModelListType = Record<string, any>;
+export declare abstract class Ai<
+  AiModelList extends AiModelListType = AiModels,
+> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 export type GatewayOptions = {
diff --git a/types/generated-snapshot/2022-11-30/index.d.ts b/types/generated-snapshot/2022-11-30/index.d.ts
index e32f7da7001..3cca2fe9aaa 100755
--- a/types/generated-snapshot/2022-11-30/index.d.ts
+++ b/types/generated-snapshot/2022-11-30/index.d.ts
@@ -3620,7 +3620,10 @@ type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 type AiTextGenerationOutput =
@@ -3679,12 +3682,417 @@ declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-type AiOptions = {
-  gateway?: GatewayOptions;
+type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3741,8 +4149,17 @@ interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-type ModelListType = Record<string, any>;
+type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3770,14 +4187,15 @@ type AiModelsSearchObject = {
 };
 interface InferenceUpstreamError extends Error {}
 interface AiInternalError extends Error {}
-declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+type AiModelListType = Record<string, any>;
+declare abstract class Ai<AiModelList extends AiModelListType = AiModels> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 type GatewayOptions = {
diff --git a/types/generated-snapshot/2022-11-30/index.ts b/types/generated-snapshot/2022-11-30/index.ts
index 8fc9786fe57..6bd3b20b5e0 100755
--- a/types/generated-snapshot/2022-11-30/index.ts
+++ b/types/generated-snapshot/2022-11-30/index.ts
@@ -3632,7 +3632,10 @@ export type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 export type AiTextGenerationOutput =
@@ -3691,12 +3694,417 @@ export declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-export type AiOptions = {
-  gateway?: GatewayOptions;
+export type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+export type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-export type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+export interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+export interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 export interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3753,8 +4161,17 @@ export interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-export type ModelListType = Record<string, any>;
+export type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 export type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3782,14 +4199,17 @@ export type AiModelsSearchObject = {
 };
 export interface InferenceUpstreamError extends Error {}
 export interface AiInternalError extends Error {}
-export declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+export type AiModelListType = Record<string, any>;
+export declare abstract class Ai<
+  AiModelList extends AiModelListType = AiModels,
+> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 export type GatewayOptions = {
diff --git a/types/generated-snapshot/2023-03-01/index.d.ts b/types/generated-snapshot/2023-03-01/index.d.ts
index 9f0d576887a..91405f291de 100755
--- a/types/generated-snapshot/2023-03-01/index.d.ts
+++ b/types/generated-snapshot/2023-03-01/index.d.ts
@@ -3622,7 +3622,10 @@ type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 type AiTextGenerationOutput =
@@ -3681,12 +3684,417 @@ declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-type AiOptions = {
-  gateway?: GatewayOptions;
+type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3743,8 +4151,17 @@ interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-type ModelListType = Record<string, any>;
+type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3772,14 +4189,15 @@ type AiModelsSearchObject = {
 };
 interface InferenceUpstreamError extends Error {}
 interface AiInternalError extends Error {}
-declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+type AiModelListType = Record<string, any>;
+declare abstract class Ai<AiModelList extends AiModelListType = AiModels> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 type GatewayOptions = {
diff --git a/types/generated-snapshot/2023-03-01/index.ts b/types/generated-snapshot/2023-03-01/index.ts
index 773256074f6..efdccb36c69 100755
--- a/types/generated-snapshot/2023-03-01/index.ts
+++ b/types/generated-snapshot/2023-03-01/index.ts
@@ -3634,7 +3634,10 @@ export type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 export type AiTextGenerationOutput =
@@ -3693,12 +3696,417 @@ export declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-export type AiOptions = {
-  gateway?: GatewayOptions;
+export type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+export type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-export type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+export interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+export interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 export interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3755,8 +4163,17 @@ export interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-export type ModelListType = Record<string, any>;
+export type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 export type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3784,14 +4201,17 @@ export type AiModelsSearchObject = {
 };
 export interface InferenceUpstreamError extends Error {}
 export interface AiInternalError extends Error {}
-export declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+export type AiModelListType = Record<string, any>;
+export declare abstract class Ai<
+  AiModelList extends AiModelListType = AiModels,
+> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 export type GatewayOptions = {
diff --git a/types/generated-snapshot/2023-07-01/index.d.ts b/types/generated-snapshot/2023-07-01/index.d.ts
index 45de60d6bd6..3c08c5aec86 100755
--- a/types/generated-snapshot/2023-07-01/index.d.ts
+++ b/types/generated-snapshot/2023-07-01/index.d.ts
@@ -3622,7 +3622,10 @@ type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 type AiTextGenerationOutput =
@@ -3681,12 +3684,417 @@ declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-type AiOptions = {
-  gateway?: GatewayOptions;
+type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3743,8 +4151,17 @@ interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-type ModelListType = Record<string, any>;
+type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3772,14 +4189,15 @@ type AiModelsSearchObject = {
 };
 interface InferenceUpstreamError extends Error {}
 interface AiInternalError extends Error {}
-declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+type AiModelListType = Record<string, any>;
+declare abstract class Ai<AiModelList extends AiModelListType = AiModels> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 type GatewayOptions = {
diff --git a/types/generated-snapshot/2023-07-01/index.ts b/types/generated-snapshot/2023-07-01/index.ts
index 6cf1b3319f6..e7eb5d811c8 100755
--- a/types/generated-snapshot/2023-07-01/index.ts
+++ b/types/generated-snapshot/2023-07-01/index.ts
@@ -3634,7 +3634,10 @@ export type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 export type AiTextGenerationOutput =
@@ -3693,12 +3696,417 @@ export declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-export type AiOptions = {
-  gateway?: GatewayOptions;
+export type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+export type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-export type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+export interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+export interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 export interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3755,8 +4163,17 @@ export interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-export type ModelListType = Record<string, any>;
+export type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 export type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3784,14 +4201,17 @@ export type AiModelsSearchObject = {
 };
 export interface InferenceUpstreamError extends Error {}
 export interface AiInternalError extends Error {}
-export declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+export type AiModelListType = Record<string, any>;
+export declare abstract class Ai<
+  AiModelList extends AiModelListType = AiModels,
+> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 export type GatewayOptions = {
diff --git a/types/generated-snapshot/experimental/index.d.ts b/types/generated-snapshot/experimental/index.d.ts
index 17ab6707377..95af5a4cf95 100755
--- a/types/generated-snapshot/experimental/index.d.ts
+++ b/types/generated-snapshot/experimental/index.d.ts
@@ -3701,7 +3701,10 @@ type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 type AiTextGenerationOutput =
@@ -3760,12 +3763,417 @@ declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-type AiOptions = {
-  gateway?: GatewayOptions;
+type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3822,8 +4230,17 @@ interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-type ModelListType = Record<string, any>;
+type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3851,14 +4268,15 @@ type AiModelsSearchObject = {
 };
 interface InferenceUpstreamError extends Error {}
 interface AiInternalError extends Error {}
-declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+type AiModelListType = Record<string, any>;
+declare abstract class Ai<AiModelList extends AiModelListType = AiModels> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 type GatewayOptions = {
diff --git a/types/generated-snapshot/experimental/index.ts b/types/generated-snapshot/experimental/index.ts
index f517d4590a5..44f0ff5b1e0 100755
--- a/types/generated-snapshot/experimental/index.ts
+++ b/types/generated-snapshot/experimental/index.ts
@@ -3713,7 +3713,10 @@ export type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 export type AiTextGenerationOutput =
@@ -3772,12 +3775,417 @@ export declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-export type AiOptions = {
-  gateway?: GatewayOptions;
+export type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+export type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-export type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+export interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+export interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 export interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3834,8 +4242,17 @@ export interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-export type ModelListType = Record<string, any>;
+export type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 export type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3863,14 +4280,17 @@ export type AiModelsSearchObject = {
 };
 export interface InferenceUpstreamError extends Error {}
 export interface AiInternalError extends Error {}
-export declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+export type AiModelListType = Record<string, any>;
+export declare abstract class Ai<
+  AiModelList extends AiModelListType = AiModels,
+> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 export type GatewayOptions = {
diff --git a/types/generated-snapshot/oldest/index.d.ts b/types/generated-snapshot/oldest/index.d.ts
index 0873101fd61..1f41008aac4 100755
--- a/types/generated-snapshot/oldest/index.d.ts
+++ b/types/generated-snapshot/oldest/index.d.ts
@@ -3560,7 +3560,10 @@ type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 type AiTextGenerationOutput =
@@ -3619,12 +3622,417 @@ declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-type AiOptions = {
-  gateway?: GatewayOptions;
+type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3681,8 +4089,17 @@ interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-type ModelListType = Record<string, any>;
+type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3710,14 +4127,15 @@ type AiModelsSearchObject = {
 };
 interface InferenceUpstreamError extends Error {}
 interface AiInternalError extends Error {}
-declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+type AiModelListType = Record<string, any>;
+declare abstract class Ai<AiModelList extends AiModelListType = AiModels> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 type GatewayOptions = {
diff --git a/types/generated-snapshot/oldest/index.ts b/types/generated-snapshot/oldest/index.ts
index 95245ad7177..c79961dfb08 100755
--- a/types/generated-snapshot/oldest/index.ts
+++ b/types/generated-snapshot/oldest/index.ts
@@ -3572,7 +3572,10 @@ export type AiTextGenerationInput = {
   frequency_penalty?: number;
   presence_penalty?: number;
   messages?: RoleScopedChatInput[];
-  tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[];
+  tools?:
+    | AiTextGenerationToolInput[]
+    | AiTextGenerationToolLegacyInput[]
+    | (object & NonNullable<unknown>);
   functions?: AiTextGenerationFunctionsInput[];
 };
 export type AiTextGenerationOutput =
@@ -3631,12 +3634,417 @@ export declare abstract class BaseAiTranslation {
   inputs: AiTranslationInput;
   postProcessedOutputs: AiTranslationOutput;
 }
-export type AiOptions = {
-  gateway?: GatewayOptions;
+export type Ai_Cf_Openai_Whisper_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper {
+  inputs: Ai_Cf_Openai_Whisper_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+export type Ai_Cf_Openai_Whisper_Tiny_En_Input =
+  | string
+  | {
+      /**
+       * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+       */
+      audio: number[];
+    };
+export interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+  /**
+   * The transcription
+   */
+  text: string;
+  word_count?: number;
+  words?: {
+    word?: string;
+    /**
+     * The second this word begins in the recording
+     */
+    start?: number;
+    /**
+     * The ending second when the word completes
+     */
+    end?: number;
+  }[];
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+  inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+  /**
+   * Base64 encoded value of the audio data.
+   */
+  audio: string;
+  /**
+   * Supported tasks are 'translate' or 'transcribe'.
+   */
+  task?: string;
+  /**
+   * The language of the audio being transcribed or translated.
+   */
+  language?: string;
+  /**
+   * Preprocess the audio with a voice activity detection model.
+   */
+  vad_filter?: string;
+  /**
+   * A text prompt to help provide context to the model on the contents of the audio.
+   */
+  initial_prompt?: string;
+  /**
+   * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+   */
   prefix?: string;
-  extraHeaders?: object;
-};
-export type ModelType<Name extends keyof AiModels> = AiModels[Name];
+}
+export interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+  transcription_info?: {
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+     */
+    language_probability?: number;
+    /**
+     * The total duration of the original audio file, in seconds.
+     */
+    duration?: number;
+    /**
+     * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+     */
+    duration_after_vad?: number;
+  };
+  /**
+   * The complete transcription of the audio.
+   */
+  text: string;
+  /**
+   * The total number of words in the transcription.
+   */
+  word_count?: number;
+  segments?: {
+    /**
+     * The starting time of the segment within the audio, in seconds.
+     */
+    start?: number;
+    /**
+     * The ending time of the segment within the audio, in seconds.
+     */
+    end?: number;
+    /**
+     * The transcription of the segment.
+     */
+    text?: string;
+    /**
+     * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+     */
+    temperature?: number;
+    /**
+     * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+     */
+    avg_logprob?: number;
+    /**
+     * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+     */
+    compression_ratio?: number;
+    /**
+     * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+     */
+    no_speech_prob?: number;
+    words?: {
+      /**
+       * The individual word transcribed from the audio.
+       */
+      word?: string;
+      /**
+       * The starting time of the word within the audio, in seconds.
+       */
+      start?: number;
+      /**
+       * The ending time of the word within the audio, in seconds.
+       */
+      end?: number;
+    }[];
+  };
+  /**
+   * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+   */
+  vtt?: string;
+}
+export declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+  inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+  postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+  /**
+   * A text description of the image you want to generate.
+   */
+  prompt: string;
+  /**
+   * The number of diffusion steps; higher values can improve quality but take longer.
+   */
+  steps?: number;
+}
+export interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+  /**
+   * The generated image in Base64 format.
+   */
+  image?: string;
+}
+export declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+  inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+  postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Prompt | Messages;
+export interface Prompt {
+  /**
+   * The input text prompt for the model to generate a response.
+   */
+  prompt: string;
+  image?: number[] | (string & NonNullable<unknown>);
+  /**
+   * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+   */
+  raw?: boolean;
+  /**
+   * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+  /**
+   * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+   */
+  lora?: string;
+}
+export interface Messages {
+  /**
+   * An array of message objects representing the conversation history.
+   */
+  messages: {
+    /**
+     * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+     */
+    role: string;
+    /**
+     * The content of the message as a string.
+     */
+    content: string;
+  }[];
+  image?: number[] | string;
+  functions?: {
+    name: string;
+    code: string;
+  }[];
+  /**
+   * A list of tools available for the assistant to use.
+   */
+  tools?: (
+    | {
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+          /**
+           * The type of the parameters object (usually 'object').
+           */
+          type: string;
+          /**
+           * List of required parameter names.
+           */
+          required?: string[];
+          /**
+           * Definitions of each parameter.
+           */
+          properties: {
+            [k: string]: {
+              /**
+               * The data type of the parameter.
+               */
+              type: string;
+              /**
+               * A description of the expected parameter.
+               */
+              description: string;
+            };
+          };
+        };
+      }
+    | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+          /**
+           * The name of the function.
+           */
+          name: string;
+          /**
+           * A brief description of what the function does.
+           */
+          description: string;
+          /**
+           * Schema defining the parameters accepted by the function.
+           */
+          parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+              [k: string]: {
+                /**
+                 * The data type of the parameter.
+                 */
+                type: string;
+                /**
+                 * A description of the expected parameter.
+                 */
+                description: string;
+              };
+            };
+          };
+        };
+      }
+  )[];
+  /**
+   * If true, the response will be streamed back incrementally.
+   */
+  stream?: boolean;
+  /**
+   * The maximum number of tokens to generate in the response.
+   */
+  max_tokens?: number;
+  /**
+   * Controls the randomness of the output; higher values produce more random results.
+   */
+  temperature?: number;
+  /**
+   * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+   */
+  top_p?: number;
+  /**
+   * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+   */
+  top_k?: number;
+  /**
+   * Random seed for reproducibility of the generation.
+   */
+  seed?: number;
+  /**
+   * Penalty for repeated tokens; higher values discourage repetition.
+   */
+  repetition_penalty?: number;
+  /**
+   * Decreases the likelihood of the model repeating the same lines verbatim.
+   */
+  frequency_penalty?: number;
+  /**
+   * Increases the likelihood of the model introducing new topics.
+   */
+  presence_penalty?: number;
+}
+export type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output =
+  | {
+      /**
+       * The generated text response from the model
+       */
+      response?: string;
+      /**
+       * An array of tool calls requests made during the response generation
+       */
+      tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+      }[];
+    }
+  | ReadableStream;
+export declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+  inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+  postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
 export interface AiModels {
   "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
   "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
@@ -3693,8 +4101,17 @@ export interface AiModels {
   "@cf/facebook/bart-large-cnn": BaseAiSummarization;
   "@cf/unum/uform-gen2-qwen-500m": BaseAiImageToText;
   "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+  "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+  "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+  "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+  "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+  "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
 }
-export type ModelListType = Record<string, any>;
+export type AiOptions = {
+  gateway?: GatewayOptions;
+  prefix?: string;
+  extraHeaders?: object;
+};
 export type AiModelsSearchParams = {
   author?: string;
   hide_experimental?: boolean;
@@ -3722,14 +4139,17 @@ export type AiModelsSearchObject = {
 };
 export interface InferenceUpstreamError extends Error {}
 export interface AiInternalError extends Error {}
-export declare abstract class Ai<ModelList extends ModelListType = AiModels> {
+export type AiModelListType = Record<string, any>;
+export declare abstract class Ai<
+  AiModelList extends AiModelListType = AiModels,
+> {
   aiGatewayLogId: string | null;
   gateway(gatewayId: string): AiGateway;
-  run<Name extends keyof ModelList>(
+  run<Name extends keyof AiModelList>(
     model: Name,
-    inputs: ModelList[Name]["inputs"],
+    inputs: AiModelList[Name]["inputs"],
     options?: AiOptions,
-  ): Promise<ModelList[Name]["postProcessedOutputs"]>;
+  ): Promise<AiModelList[Name]["postProcessedOutputs"]>;
   public models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
 }
 export type GatewayOptions = {