diff --git a/library/models/anthropic/claude-3-5-haiku-20241022.yaml b/library/models/anthropic/claude-3-5-haiku-20241022.yaml deleted file mode 100644 index 4141278..0000000 --- a/library/models/anthropic/claude-3-5-haiku-20241022.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# This model is for Claude Code -claude-3-5-haiku-20241022: - name: Claude 3.5 Haiku (2024-10-22) - type: language - icon: https://claude.ai/favicon.ico - author: anthropic - description: Anthropic's fastest, most cost-effective model. - website: https://docs.anthropic.com/en/docs/about-claude/models/overview - providers: - - provider: anthropic - model: claude-3-5-haiku-20241022 - context_window: 200000 - max_tokens: 8192 - pricing: - type: fixed - input: 0.8 - output: 4 - web_search: 0.01 - input_cache_reads: 0.08 - input_cache_writes: 1 diff --git a/library/models/anthropic/claude-3.5-haiku.yaml b/library/models/anthropic/claude-3.5-haiku.yaml deleted file mode 100644 index 5811637..0000000 --- a/library/models/anthropic/claude-3.5-haiku.yaml +++ /dev/null @@ -1,19 +0,0 @@ -anthropic/claude-3.5-haiku: - name: Claude 3.5 Haiku - type: language - icon: https://claude.ai/favicon.ico - author: anthropic - description: Anthropic's fastest, most cost-effective model. - website: https://docs.anthropic.com/en/docs/about-claude/models/overview - providers: - - provider: anthropic - model: claude-3-5-haiku-latest - context_window: 200000 - max_tokens: 8192 - pricing: - type: fixed - input: 0.8 - output: 4 - web_search: 0.01 - input_cache_reads: 0.08 - input_cache_writes: 1 diff --git a/library/models/anthropic/claude-opus-4-5-20251124.yaml b/library/models/anthropic/claude-opus-4-5-20251124.yaml new file mode 100644 index 0000000..8418ebc --- /dev/null +++ b/library/models/anthropic/claude-opus-4-5-20251124.yaml @@ -0,0 +1,19 @@ +claude-opus-4-5-20251124: + name: Claude Opus 4.5 20251124 + type: language + icon: https://claude.ai/favicon.ico + author: anthropic + description: Claude Opus 4.5 is Anthropic's most powerful model, released on November 24, 2025. It is the leading model for coding, agents, and computer use, with state-of-the-art benchmark results. + website: https://docs.anthropic.com/en/docs/about-claude/models/overview + providers: + - provider: anthropic + model: claude-opus-4-5-20251124 + context_window: 200000 + max_tokens: 64000 + pricing: + type: fixed + input: 5 + output: 25 + web_search: 0.01 + input_cache_reads: 0.5 + input_cache_writes: 6.25 diff --git a/library/models/anthropic/claude-opus-4.1.yaml b/library/models/anthropic/claude-opus-4.1.yaml deleted file mode 100644 index 35472f8..0000000 --- a/library/models/anthropic/claude-opus-4.1.yaml +++ /dev/null @@ -1,19 +0,0 @@ -anthropic/claude-opus-4.1: - name: Claude Opus 4.1 - type: language - icon: https://claude.ai/favicon.ico - author: anthropic - description: Anthropic's most capable and intelligent model yet. Claude Opus 4.1 sets new standards in complex reasoning and advanced coding. - website: https://docs.anthropic.com/en/docs/about-claude/models/overview - providers: - - provider: anthropic - model: claude-opus-4-1 - context_window: 200000 - max_tokens: 32000 - pricing: - type: fixed - input: 15 - output: 75 - web_search: 0.01 - input_cache_reads: 1.5 - input_cache_writes: 18.75 diff --git a/library/models/anthropic/claude-opus-4.5.yaml b/library/models/anthropic/claude-opus-4.5.yaml new file mode 100644 index 0000000..da52d08 --- /dev/null +++ b/library/models/anthropic/claude-opus-4.5.yaml @@ -0,0 +1,19 @@ +anthropic/claude-opus-4.5: + name: Claude Opus 4.5 + type: language + icon: https://claude.ai/favicon.ico + author: anthropic + description: Claude Opus 4.5 is Anthropic's most powerful model, released on November 24, 2025. It is the leading model for coding, agents, and computer use, with state-of-the-art benchmark results and significantly improved pricing. + website: https://docs.anthropic.com/en/docs/about-claude/models/overview + providers: + - provider: anthropic + model: claude-opus-4-5 + context_window: 200000 + max_tokens: 64000 + pricing: + type: fixed + input: 5 + output: 25 + web_search: 0.01 + input_cache_reads: 0.5 + input_cache_writes: 6.25 diff --git a/library/models/anthropic/claude-sonnet-4-20250514.yaml b/library/models/anthropic/claude-sonnet-4-20250514.yaml deleted file mode 100644 index 06428d7..0000000 --- a/library/models/anthropic/claude-sonnet-4-20250514.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# This model is for Claude Code -claude-sonnet-4-20250514: - name: Claude Sonnet 4 (2025-05-14) - type: language - icon: https://claude.ai/favicon.ico - author: anthropic - description: Anthropic's high-performance model with exceptional reasoning and efficiency. - website: https://docs.anthropic.com/en/docs/about-claude/models/overview - providers: - - provider: anthropic - model: claude-sonnet-4-20250514 - context_window: 1000000 - max_tokens: 64000 - pricing: - type: tiered - tiers: - - predicate: { "<=": [{ "var": "input" }, 200000] } - pricing: - type: fixed - input: 3 - output: 15 - web_search: 0.01 - input_cache_reads: 0.3 - input_cache_writes: 3.75 - - pricing: - type: fixed - input: 6 - output: 22.5 - web_search: 0.01 - input_cache_reads: 0.6 - input_cache_writes: 7.5 diff --git a/library/models/anthropic/claude-sonnet-4.yaml b/library/models/anthropic/claude-sonnet-4.yaml deleted file mode 100644 index 571a5be..0000000 --- a/library/models/anthropic/claude-sonnet-4.yaml +++ /dev/null @@ -1,30 +0,0 @@ -anthropic/claude-sonnet-4: - name: Claude Sonnet 4 - type: language - icon: https://claude.ai/favicon.ico - author: anthropic - description: Anthropic's high-performance model with exceptional reasoning and efficiency. - website: https://docs.anthropic.com/en/docs/about-claude/models/overview - providers: - - provider: anthropic - model: claude-sonnet-4-0 - context_window: 1000000 - max_tokens: 64000 - pricing: - type: tiered - tiers: - - predicate: { "<=": [{ "var": "input" }, 200000] } - pricing: - type: fixed - input: 3 - output: 15 - web_search: 0.01 - input_cache_reads: 0.3 - input_cache_writes: 3.75 - - pricing: - type: fixed - input: 6 - output: 22.5 - web_search: 0.01 - input_cache_reads: 0.6 - input_cache_writes: 7.5 diff --git a/library/models/deepseek/deepseek-r1-0528.yaml b/library/models/deepseek/deepseek-r1-0528.yaml deleted file mode 100644 index 06c1338..0000000 --- a/library/models/deepseek/deepseek-r1-0528.yaml +++ /dev/null @@ -1,15 +0,0 @@ -deepseek/deepseek-r1-0528: - name: DeepSeek R1 0528 - type: language - icon: https://www.deepseek.com/favicon.ico - author: deepseek - description: May 28th update to the original DeepSeek R1 Performance on par with OpenAI o1, but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass. Fully open-source model. - website: https://api-docs.deepseek.com/news/news250528 - providers: - - provider: fireworks - model: accounts/fireworks/models/deepseek-r1-0528 - context_window: 160000 - pricing: - type: fixed - input: 3 - output: 8 diff --git a/library/models/deepseek/deepseek-r1.yaml b/library/models/deepseek/deepseek-r1.yaml deleted file mode 100644 index e6e8a47..0000000 --- a/library/models/deepseek/deepseek-r1.yaml +++ /dev/null @@ -1,15 +0,0 @@ -deepseek/deepseek-r1: - name: DeepSeek R1 - type: language - icon: https://www.deepseek.com/favicon.ico - author: deepseek - description: "DeepSeek R1 is here: Performance on par with OpenAI o1, but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass." - website: https://api-docs.deepseek.com/news/news250120 - providers: - - provider: fireworks - model: accounts/fireworks/models/deepseek-r1 - context_window: 160000 - pricing: - type: fixed - input: 3 - output: 8 diff --git a/library/models/deepseek/deepseek-v3-0324.yaml b/library/models/deepseek/deepseek-v3-0324.yaml deleted file mode 100644 index 9b6a420..0000000 --- a/library/models/deepseek/deepseek-v3-0324.yaml +++ /dev/null @@ -1,15 +0,0 @@ -deepseek/deepseek-v3-0324: - name: DeepSeek V3 0324 - type: language - icon: https://www.deepseek.com/favicon.ico - author: deepseek - description: DeepSeek V3 0324, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team. It succeeds the DeepSeek V3 model and performs really well on a variety of tasks. - website: https://api-docs.deepseek.com/news/news250325 - providers: - - provider: fireworks - model: accounts/fireworks/models/deepseek-v3-0324 - context_window: 160000 - pricing: - type: fixed - input: 0.9 - output: 0.9 diff --git a/library/models/deepseek/deepseek-v3.1-terminus-thinking.yaml b/library/models/deepseek/deepseek-v3.1-terminus-thinking.yaml deleted file mode 100644 index 32f6a58..0000000 --- a/library/models/deepseek/deepseek-v3.1-terminus-thinking.yaml +++ /dev/null @@ -1,17 +0,0 @@ -deepseek/deepseek-v3.1-terminus-thinking: - name: DeepSeek V3.1 Terminus Thinking - type: language - icon: https://www.deepseek.com/favicon.ico - author: deepseek - description: DeepSeek-V3.1 Terminus is an update to DeepSeek V3.1 that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the reasoning enabled boolean. The model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. - website: https://api-docs.deepseek.com/news/news250922 - providers: - - provider: deepseek-v3.1_terminus_expires_on_20251015 - model: deepseek-reasoner - context_window: 128000 - max_tokens: 64000 - pricing: - type: fixed - input: 0.56 - output: 1.68 - input_cache_reads: 0.07 diff --git a/library/models/deepseek/deepseek-v3.1-terminus.yaml b/library/models/deepseek/deepseek-v3.1-terminus.yaml deleted file mode 100644 index aaabe8a..0000000 --- a/library/models/deepseek/deepseek-v3.1-terminus.yaml +++ /dev/null @@ -1,24 +0,0 @@ -deepseek/deepseek-v3.1-terminus: - name: DeepSeek V3.1 Terminus - type: language - icon: https://www.deepseek.com/favicon.ico - author: deepseek - description: DeepSeek-V3.1 Terminus is an update to DeepSeek V3.1 that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the reasoning enabled boolean. The model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. - website: https://api-docs.deepseek.com/news/news250922 - providers: - - provider: atlascloud - model: deepseek-ai/DeepSeek-V3.1-Terminus - context_window: 131072 - pricing: - type: fixed - input: 0.45 - output: 1.5 - - provider: deepseek-v3.1_terminus_expires_on_20251015 - model: deepseek-chat - context_window: 128000 - max_tokens: 8000 - pricing: - type: fixed - input: 0.56 - output: 1.68 - input_cache_reads: 0.07 diff --git a/library/models/deepseek/deepseek-v3.1.yaml b/library/models/deepseek/deepseek-v3.1.yaml deleted file mode 100644 index 8a75b6b..0000000 --- a/library/models/deepseek/deepseek-v3.1.yaml +++ /dev/null @@ -1,15 +0,0 @@ -deepseek/deepseek-v3.1: - name: DeepSeek V3.1 - type: language - icon: https://www.deepseek.com/favicon.ico - author: deepseek - description: DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the reasoning enabled boolean. The model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. - website: https://api-docs.deepseek.com/news/news250821 - providers: - - provider: fireworks - model: accounts/fireworks/models/deepseek-v3p1 - context_window: 160000 - pricing: - type: fixed - input: 0.56 - output: 1.68 diff --git a/library/models/deepseek/deepseek-v3.2-exp-thinking.yaml b/library/models/deepseek/deepseek-v3.2-exp-thinking.yaml deleted file mode 100644 index e31a06b..0000000 --- a/library/models/deepseek/deepseek-v3.2-exp-thinking.yaml +++ /dev/null @@ -1,17 +0,0 @@ -deepseek/deepseek-v3.2-exp-thinking: - name: DeepSeek V3.2 Exp Thinking - type: language - icon: https://www.deepseek.com/favicon.ico - author: deepseek - description: DeepSeek-V3.2-Exp is an experimental large language model released by DeepSeek as an intermediate step between V3.1 and future architectures. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism designed to improve training and inference efficiency in long-context scenarios while maintaining output quality. Users can control the reasoning behaviour with the reasoning enabled boolean. The model was trained under conditions aligned with V3.1-Terminus to enable direct comparison. Benchmarking shows performance roughly on par with V3.1 across reasoning, coding, and agentic tool-use tasks, with minor tradeoffs and gains depending on the domain. This release focuses on validating architectural optimizations for extended context lengths rather than advancing raw task accuracy, making it primarily a research-oriented model for exploring efficient transformer designs. - website: https://api-docs.deepseek.com/news/news250929 - providers: - - provider: deepseek - model: deepseek-reasoner - context_window: 128000 - max_tokens: 64000 - pricing: - type: fixed - input: 0.28 - output: 0.42 - input_cache_reads: 0.028 diff --git a/library/models/deepseek/deepseek-v3.2-exp.yaml b/library/models/deepseek/deepseek-v3.2-exp.yaml deleted file mode 100644 index 963990b..0000000 --- a/library/models/deepseek/deepseek-v3.2-exp.yaml +++ /dev/null @@ -1,24 +0,0 @@ -deepseek/deepseek-v3.2-exp: - name: DeepSeek V3.2 Exp - type: language - icon: https://www.deepseek.com/favicon.ico - author: deepseek - description: DeepSeek-V3.2-Exp is an experimental large language model released by DeepSeek as an intermediate step between V3.1 and future architectures. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism designed to improve training and inference efficiency in long-context scenarios while maintaining output quality. Users can control the reasoning behaviour with the reasoning enabled boolean. The model was trained under conditions aligned with V3.1-Terminus to enable direct comparison. Benchmarking shows performance roughly on par with V3.1 across reasoning, coding, and agentic tool-use tasks, with minor tradeoffs and gains depending on the domain. This release focuses on validating architectural optimizations for extended context lengths rather than advancing raw task accuracy, making it primarily a research-oriented model for exploring efficient transformer designs. - website: https://api-docs.deepseek.com/news/news250929 - providers: - - provider: atlascloud - model: deepseek-ai/DeepSeek-V3.2-Exp - context_window: 163840 - pricing: - type: fixed - input: 0.28 - output: 0.4 - - provider: deepseek - model: deepseek-chat - context_window: 128000 - max_tokens: 8000 - pricing: - type: fixed - input: 0.28 - output: 0.42 - input_cache_reads: 0.028 diff --git a/library/models/deepseek/deepseek-v3.2-thinking.yaml b/library/models/deepseek/deepseek-v3.2-thinking.yaml new file mode 100644 index 0000000..0521bc6 --- /dev/null +++ b/library/models/deepseek/deepseek-v3.2-thinking.yaml @@ -0,0 +1,17 @@ +deepseek/deepseek-v3.2-thinking: + name: DeepSeek V3.2 Thinking + type: language + icon: https://www.deepseek.com/favicon.ico + author: deepseek + description: DeepSeek-V3.2 Thinking is the reasoning variant of DeepSeek-V3.2, officially released on December 1, 2025. It uses extended reasoning chains and has a larger max output token limit (64K) for complex problems requiring step-by-step analysis. + website: https://api-docs.deepseek.com/news/news251201 + providers: + - provider: deepseek + model: deepseek-reasoner + context_window: 128000 + max_tokens: 64000 + pricing: + type: fixed + input: 0.28 + output: 0.42 + input_cache_reads: 0.028 diff --git a/library/models/deepseek/deepseek-v3.2.yaml b/library/models/deepseek/deepseek-v3.2.yaml new file mode 100644 index 0000000..277c166 --- /dev/null +++ b/library/models/deepseek/deepseek-v3.2.yaml @@ -0,0 +1,32 @@ +deepseek/deepseek-v3.2: + name: DeepSeek V3.2 + type: language + icon: https://www.deepseek.com/favicon.ico + author: deepseek + description: DeepSeek-V3.2 is a large language model officially released on December 1, 2025. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism designed to improve training and inference efficiency in long-context scenarios while maintaining output quality. The model supports up to 164K tokens context window and excels at reasoning, coding, and agentic tool-use tasks. + website: https://api-docs.deepseek.com/news/news251201 + providers: + - provider: atlascloud + model: deepseek-ai/DeepSeek-V3.2 + context_window: 163840 + pricing: + type: fixed + input: 0.26 + output: 0.38 + - provider: deepseek + model: deepseek-chat + context_window: 128000 + max_tokens: 8000 + pricing: + type: fixed + input: 0.28 + output: 0.42 + input_cache_reads: 0.028 + - provider: fireworks + model: accounts/fireworks/models/deepseek-v3p2 + context_window: 163840 + pricing: + type: fixed + input: 0.56 + output: 1.68 + input_cache_reads: 0.28 diff --git a/library/models/deepseek/deepseek-v3.yaml b/library/models/deepseek/deepseek-v3.yaml deleted file mode 100644 index 0935470..0000000 --- a/library/models/deepseek/deepseek-v3.yaml +++ /dev/null @@ -1,15 +0,0 @@ -deepseek/deepseek-v3: - name: DeepSeek V3 - type: language - icon: https://www.deepseek.com/favicon.ico - author: deepseek - description: DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models. - website: https://api-docs.deepseek.com/news/news1226 - providers: - - provider: fireworks - model: accounts/fireworks/models/deepseek-v3 - context_window: 128000 - pricing: - type: fixed - input: 0.9 - output: 0.9 diff --git a/library/models/google/gemini-2.5-flash-image-preview.yaml b/library/models/google/gemini-2.5-flash-image-preview.yaml deleted file mode 100644 index 55fad56..0000000 --- a/library/models/google/gemini-2.5-flash-image-preview.yaml +++ /dev/null @@ -1,16 +0,0 @@ -google/gemini-2.5-flash-image-preview: - name: Gemini 2.5 Flash Image Preview - type: image - icon: https://www.gstatic.com/lamda/images/gemini_sparkle_aurora_33f86dc0c0257da337c63.svg - author: google - description: Gemini 2.5 Flash Image Preview is Google's latest, fastest, and most efficient natively multimodal model that lets you generate and edit images conversationally. - website: https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-image-preview - providers: - - provider: google - model: gemini-2.5-flash-image-preview - context_window: 65536 - max_tokens: 32768 - pricing: - type: fixed - input: 0.3 - output: 30 diff --git a/library/models/google/gemini-2.5-flash-image.yaml b/library/models/google/gemini-2.5-flash-image.yaml new file mode 100644 index 0000000..6310205 --- /dev/null +++ b/library/models/google/gemini-2.5-flash-image.yaml @@ -0,0 +1,16 @@ +google/gemini-2.5-flash-image: + name: Gemini 2.5 Flash Image (Nano Banana) + type: image + icon: https://www.gstatic.com/lamda/images/gemini_sparkle_aurora_33f86dc0c0257da337c63.svg + author: google + description: State-of-the-art image generation and editing model, known as Nano Banana. It generates and edits images conversationally with high quality and efficiency. + website: https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-image + providers: + - provider: google + model: gemini-2.5-flash-image + context_window: 65536 + max_tokens: 32768 + pricing: + type: fixed + input: 0.3 + output: 0.039 diff --git a/library/models/google/gemini-2.5-flash.yaml b/library/models/google/gemini-2.5-flash.yaml index 5aae53a..b64cbe6 100644 --- a/library/models/google/gemini-2.5-flash.yaml +++ b/library/models/google/gemini-2.5-flash.yaml @@ -15,5 +15,5 @@ google/gemini-2.5-flash: input: 0.3 output: 2.5 web_search: 0.035 - input_cache_reads: 0.075 - input_cache_writes: 0.383 + input_cache_reads: 0.03 + input_cache_writes: 1.0 diff --git a/library/models/google/gemini-2.5-pro.yaml b/library/models/google/gemini-2.5-pro.yaml index 691c349..0ee48f8 100644 --- a/library/models/google/gemini-2.5-pro.yaml +++ b/library/models/google/gemini-2.5-pro.yaml @@ -19,12 +19,12 @@ google/gemini-2.5-pro: input: 1.25 output: 10 web_search: 0.035 - input_cache_reads: 0.31 - input_cache_writes: 1.625 + input_cache_reads: 0.125 + input_cache_writes: 4.5 - pricing: type: fixed input: 2.5 output: 15 web_search: 0.035 - input_cache_reads: 0.625 - input_cache_writes: 2.875 + input_cache_reads: 0.25 + input_cache_writes: 4.5 diff --git a/library/models/google/gemini-3-flash-preview.yaml b/library/models/google/gemini-3-flash-preview.yaml new file mode 100644 index 0000000..456cf53 --- /dev/null +++ b/library/models/google/gemini-3-flash-preview.yaml @@ -0,0 +1,19 @@ +google/gemini-3-flash-preview: + name: Gemini 3 Flash + type: language + icon: https://www.gstatic.com/lamda/images/gemini_sparkle_aurora_33f86dc0c0257da337c63.svg + author: google + description: Google's frontier intelligence built for speed at a fraction of the cost. It offers high efficiency, low latency, and strong multimodal capabilities, making it ideal for high-volume tasks and agentic use cases. + website: https://ai.google.dev/gemini-api/docs/models#gemini-3-flash + providers: + - provider: google + model: gemini-3-flash-preview + context_window: 1048576 + max_tokens: 65536 + pricing: + type: fixed + input: 0.5 + output: 3.0 + web_search: 0.035 + input_cache_reads: 0.05 + input_cache_writes: 1.0 diff --git a/library/models/google/gemini-3-pro-image-preview.yaml b/library/models/google/gemini-3-pro-image-preview.yaml new file mode 100644 index 0000000..0623022 --- /dev/null +++ b/library/models/google/gemini-3-pro-image-preview.yaml @@ -0,0 +1,17 @@ +google/gemini-3-pro-image-preview: + name: Gemini 3 Pro Image (NanoBanana Pro) + type: image + icon: https://www.gstatic.com/lamda/images/gemini_sparkle_aurora_33f86dc0c0257da337c63.svg + author: google + description: Google's most intelligent image generation model, known as NanoBanana Pro in the community. It produces studio-quality visuals with photographic realism, supports native 4K resolution, and accurately renders text within images. + website: https://ai.google.dev/gemini-api/docs/models#gemini-3-pro-image + providers: + - provider: google + model: gemini-3-pro-image-preview + context_window: 65536 + max_tokens: 32768 + pricing: + type: fixed + input: 2.0 + output: 12.0 + image_output: 120.0 diff --git a/library/models/google/gemini-3-pro-preview.yaml b/library/models/google/gemini-3-pro-preview.yaml new file mode 100644 index 0000000..539bf4d --- /dev/null +++ b/library/models/google/gemini-3-pro-preview.yaml @@ -0,0 +1,30 @@ +google/gemini-3-pro-preview: + name: Gemini 3 Pro + type: language + icon: https://www.gstatic.com/lamda/images/gemini_sparkle_aurora_33f86dc0c0257da337c63.svg + author: google + description: Gemini 3 Pro is Google's state-of-the-art thinking model, capable of reasoning over complex problems in code, math, and STEM, as well as analyzing large datasets and multimodal inputs. + website: https://ai.google.dev/gemini-api/docs/models#gemini-3-pro + providers: + - provider: google + model: gemini-3-pro-preview + context_window: 1048576 + max_tokens: 65536 + pricing: + type: tiered + tiers: + - predicate: { "<=": [{ "var": "input" }, 200000] } + pricing: + type: fixed + input: 2.0 + output: 12.0 + web_search: 0.035 + input_cache_reads: 0.2 + input_cache_writes: 4.5 + - pricing: + type: fixed + input: 4.0 + output: 18.0 + web_search: 0.035 + input_cache_reads: 0.4 + input_cache_writes: 4.5 diff --git a/library/models/google/imagen-4.0-fast-generate-001.yaml b/library/models/google/imagen-4.0-fast-generate-001.yaml deleted file mode 100644 index 17107b9..0000000 --- a/library/models/google/imagen-4.0-fast-generate-001.yaml +++ /dev/null @@ -1,14 +0,0 @@ -google/imagen-4.0-fast-generate-001: - name: Imagen 4 Fast - type: image - icon: https://www.gstatic.com/lamda/images/gemini_sparkle_aurora_33f86dc0c0257da337c63.svg - author: google - description: Imagen is Google's high-fidelity image generation model, capable of generating realistic and high quality images from text prompts. All generated images include a SynthID watermark. - website: https://ai.google.dev/gemini-api/docs/imagen#imagen-4 - providers: - - provider: google - model: imagen-4.0-fast-generate-001 - context_window: 480 - pricing: - type: fixed - image: 0.02 diff --git a/library/models/google/imagen-4.0-generate-001.yaml b/library/models/google/imagen-4.0-generate-001.yaml deleted file mode 100644 index 77ce807..0000000 --- a/library/models/google/imagen-4.0-generate-001.yaml +++ /dev/null @@ -1,14 +0,0 @@ -google/imagen-4.0-generate-001: - name: Imagen 4 Standard - type: image - icon: https://www.gstatic.com/lamda/images/gemini_sparkle_aurora_33f86dc0c0257da337c63.svg - author: google - description: Imagen is Google's high-fidelity image generation model, capable of generating realistic and high quality images from text prompts. All generated images include a SynthID watermark. - website: https://ai.google.dev/gemini-api/docs/imagen#imagen-4 - providers: - - provider: google - model: imagen-4.0-generate-001 - context_window: 480 - pricing: - type: fixed - image: 0.04 diff --git a/library/models/google/imagen-4.0-ultra-generate-001.yaml b/library/models/google/imagen-4.0-ultra-generate-001.yaml deleted file mode 100644 index 5b42929..0000000 --- a/library/models/google/imagen-4.0-ultra-generate-001.yaml +++ /dev/null @@ -1,14 +0,0 @@ -google/imagen-4.0-ultra-generate-001: - name: Imagen 4 Ultra - type: image - icon: https://www.gstatic.com/lamda/images/gemini_sparkle_aurora_33f86dc0c0257da337c63.svg - author: google - description: Imagen is Google's high-fidelity image generation model, capable of generating realistic and high quality images from text prompts. All generated images include a SynthID watermark. - website: https://ai.google.dev/gemini-api/docs/imagen#imagen-4 - providers: - - provider: google - model: imagen-4.0-ultra-generate-001 - context_window: 480 - pricing: - type: fixed - image: 0.06 diff --git a/library/models/moonshotai/kimi-k2-instruct.yaml b/library/models/moonshotai/kimi-k2-instruct.yaml index 40db8e7..ec30f9e 100644 --- a/library/models/moonshotai/kimi-k2-instruct.yaml +++ b/library/models/moonshotai/kimi-k2-instruct.yaml @@ -13,3 +13,10 @@ moonshotai/kimi-k2-instruct: type: fixed input: 0.6 output: 2.5 + - provider: atlascloud + model: moonshotai/Kimi-K2-Instruct + context_window: 128000 + pricing: + type: fixed + input: 0.6 + output: 2.5 diff --git a/library/models/moonshotai/kimi-k2-thinking.yaml b/library/models/moonshotai/kimi-k2-thinking.yaml new file mode 100644 index 0000000..f5d6404 --- /dev/null +++ b/library/models/moonshotai/kimi-k2-thinking.yaml @@ -0,0 +1,23 @@ +moonshotai/kimi-k2-thinking: + name: Kimi K2 Thinking + type: language + icon: https://www.kimi.com/favicon.ico + author: moonshotai + description: Kimi K2 Thinking is Moonshot AI's specialized thinking agent released in November 2025. It features 1 trillion total parameters with 32 billion active per forward pass, using a Mixture-of-Experts (MoE) architecture. It excels at complex, multi-step reasoning and dynamic tool invocation, capable of maintaining stable behavior across 200-300 consecutive tool calls. + website: https://moonshotai.github.io/Kimi-K2/ + providers: + - provider: atlascloud + model: moonshotai/Kimi-K2-Thinking + context_window: 256000 + pricing: + type: fixed + input: 0.6 + output: 2.5 + - provider: fireworks + model: fireworks/kimi-k2-thinking + context_window: 256000 + pricing: + type: fixed + input: 0.6 + output: 2.5 + input_cache_reads: 0.3 diff --git a/library/models/openai/chatgpt-image-latest.yaml b/library/models/openai/chatgpt-image-latest.yaml new file mode 100644 index 0000000..6ff9c68 --- /dev/null +++ b/library/models/openai/chatgpt-image-latest.yaml @@ -0,0 +1,81 @@ +openai/chatgpt-image-latest: + name: chatgpt-image-latest + type: image + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-image-1.png + author: openai + description: GPT Image Latest points to the image snapshot currently used in ChatGPT. + website: https://platform.openai.com/docs/models/chatgpt-image-latest + providers: + - provider: openai + model: chatgpt-image-latest + pricing: + type: tiered + tiers: + - predicate: { "==": [{ "var": "image_quality" }, "low"] } + pricing: + type: tiered + tiers: + - predicate: { "==": [{ "var": "image_size" }, "1024x1024"] } + pricing: + type: fixed + input: 5 + input_image: 8 + output: 10 + output_image: 32 + input_cache_reads: 1.25 + input_image_cache_reads: 2 + image: 0.009 + - pricing: + type: fixed + input: 5 + input_image: 8 + output: 10 + output_image: 32 + input_cache_reads: 1.25 + input_image_cache_reads: 2 + image: 0.013 + - predicate: { "==": [{ "var": "image_quality" }, "medium"] } + pricing: + type: tiered + tiers: + - predicate: { "==": [{ "var": "image_size" }, "1024x1024"] } + pricing: + type: fixed + input: 5 + input_image: 8 + output: 10 + output_image: 32 + input_cache_reads: 1.25 + input_image_cache_reads: 2 + image: 0.034 + - pricing: + type: fixed + input: 5 + input_image: 8 + output: 10 + output_image: 32 + input_cache_reads: 1.25 + input_image_cache_reads: 2 + image: 0.05 + - pricing: + type: tiered + tiers: + - predicate: { "==": [{ "var": "image_size" }, "1024x1024"] } + pricing: + type: fixed + input: 5 + input_image: 8 + output: 10 + output_image: 32 + input_cache_reads: 1.25 + input_image_cache_reads: 2 + image: 0.133 + - pricing: + type: fixed + input: 5 + input_image: 8 + output: 10 + output_image: 32 + input_cache_reads: 1.25 + input_image_cache_reads: 2 + image: 0.2 diff --git a/library/models/openai/codex-mini-latest.yaml b/library/models/openai/codex-mini-latest.yaml new file mode 100644 index 0000000..43f1710 --- /dev/null +++ b/library/models/openai/codex-mini-latest.yaml @@ -0,0 +1,17 @@ +openai/codex-mini-latest: + name: codex-mini-latest + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-5.png + author: openai + description: Codex Mini Latest is the latest version of OpenAI's smaller Codex model for coding tasks. + website: https://platform.openai.com/docs/models/codex-mini-latest + providers: + - provider: openai + model: codex-mini-latest + context_window: 200000 + max_tokens: 100000 + pricing: + type: fixed + input: 1.5 + output: 6 + input_cache_reads: 0.375 diff --git a/library/models/openai/computer-use-preview.yaml b/library/models/openai/computer-use-preview.yaml new file mode 100644 index 0000000..ed364a0 --- /dev/null +++ b/library/models/openai/computer-use-preview.yaml @@ -0,0 +1,16 @@ +openai/computer-use-preview: + name: computer-use-preview + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/computer-use-preview.png + author: openai + description: Computer Use Preview is OpenAI's model designed for computer control and automation tasks. + website: https://platform.openai.com/docs/models/computer-use-preview + providers: + - provider: openai + model: computer-use-preview + context_window: 8192 + max_tokens: 1024 + pricing: + type: fixed + input: 3 + output: 12 diff --git a/library/models/openai/gpt-4.1 mini.yaml b/library/models/openai/gpt-4.1 mini.yaml deleted file mode 100644 index 074d412..0000000 --- a/library/models/openai/gpt-4.1 mini.yaml +++ /dev/null @@ -1,26 +0,0 @@ -openai/gpt-4.1-mini: - name: GPT-4.1 mini - type: language - icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-4.1-mini.png - author: openai - description: GPT-4.1 mini excels at instruction following and tool calling. It features a 1M token context window, and low latency without a reasoning step. - website: https://platform.openai.com/docs/models/gpt-4.1-mini - providers: - - provider: openai - model: gpt-4.1-mini - context_window: 1047576 - max_tokens: 32768 - pricing: - type: tiered - tiers: - - predicate: { "==": [{ "var": "service_tier" }, "default"] } - pricing: - type: fixed - input: 0.4 - output: 1.6 - input_cache_reads: 0.1 - - pricing: - type: fixed - input: 0.7 - output: 2.8 - input_cache_reads: 0.175 diff --git a/library/models/openai/gpt-4.1 nano.yaml b/library/models/openai/gpt-4.1 nano.yaml deleted file mode 100644 index 3a5d767..0000000 --- a/library/models/openai/gpt-4.1 nano.yaml +++ /dev/null @@ -1,26 +0,0 @@ -openai/gpt-4.1-nano: - name: GPT-4.1 nano - type: language - icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-4.1-nano.png - author: openai - description: GPT-4.1 nano excels at instruction following and tool calling. It features a 1M token context window, and low latency without a reasoning step. - website: https://platform.openai.com/docs/models/gpt-4.1-nano - providers: - - provider: openai - model: gpt-4.1-nano - context_window: 1047576 - max_tokens: 32768 - pricing: - type: tiered - tiers: - - predicate: { "==": [{ "var": "service_tier" }, "default"] } - pricing: - type: fixed - input: 0.1 - output: 0.4 - input_cache_reads: 0.025 - - pricing: - type: fixed - input: 0.2 - output: 0.8 - input_cache_reads: 0.05 diff --git a/library/models/openai/gpt-4.1.yaml b/library/models/openai/gpt-4.1.yaml deleted file mode 100644 index 87faaa5..0000000 --- a/library/models/openai/gpt-4.1.yaml +++ /dev/null @@ -1,26 +0,0 @@ -openai/gpt-4.1: - name: GPT-4.1 - type: language - icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-4.1.png - author: openai - description: GPT-4.1 excels at instruction following and tool calling, with broad knowledge across domains. It features a 1M token context window, and low latency without a reasoning step. - website: https://platform.openai.com/docs/models/gpt-4.1 - providers: - - provider: openai - model: gpt-4.1 - context_window: 1047576 - max_tokens: 32768 - pricing: - type: tiered - tiers: - - predicate: { "==": [{ "var": "service_tier" }, "default"] } - pricing: - type: fixed - input: 2 - output: 8 - input_cache_reads: 0.5 - - pricing: - type: fixed - input: 3.5 - output: 14 - input_cache_reads: 0.875 diff --git a/library/models/openai/gpt-4o-realtime-preview.yaml b/library/models/openai/gpt-4o-realtime-preview.yaml new file mode 100644 index 0000000..2c54f8a --- /dev/null +++ b/library/models/openai/gpt-4o-realtime-preview.yaml @@ -0,0 +1,20 @@ +openai/gpt-4o-realtime-preview: + name: gpt-4o-realtime-preview + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-4o-realtime-preview.png + author: openai + description: GPT-4o Realtime Preview is a preview of OpenAI's real-time conversational GPT-4o model. + website: https://platform.openai.com/docs/models/gpt-4o-realtime-preview + providers: + - provider: openai + model: gpt-4o-realtime-preview + context_window: 32000 + max_tokens: 4096 + pricing: + type: fixed + input: 5 + input_audio: 40 + output: 20 + output_audio: 80 + input_cache_reads: 2.5 + input_audio_cache_reads: 2.5 diff --git a/library/models/openai/gpt-5-chat.yaml b/library/models/openai/gpt-5-chat-latest.yaml similarity index 60% rename from library/models/openai/gpt-5-chat.yaml rename to library/models/openai/gpt-5-chat-latest.yaml index 23b5bd9..2018961 100644 --- a/library/models/openai/gpt-5-chat.yaml +++ b/library/models/openai/gpt-5-chat-latest.yaml @@ -1,9 +1,9 @@ -openai/gpt-5-chat: - name: GPT-5 Chat +openai/gpt-5-chat-latest: + name: gpt-5-chat-latest type: language icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-5-chat-latest.png author: openai - description: GPT-5 Chat points to the GPT-5 snapshot currently used in ChatGPT. OpenAI recommends GPT-5 for most API usage, but feel free to use this GPT-5 Chat model to test OpenAI's latest improvements for chat use cases. + description: GPT-5 Chat Latest points to the GPT-5 snapshot currently used in ChatGPT. OpenAI recommends GPT-5 for most API usage, but feel free to use this model to test OpenAI's latest improvements for chat use cases. website: https://platform.openai.com/docs/models/gpt-5-chat-latest providers: - provider: openai diff --git a/library/models/openai/gpt-5-codex.yaml b/library/models/openai/gpt-5-codex.yaml new file mode 100644 index 0000000..b6977b3 --- /dev/null +++ b/library/models/openai/gpt-5-codex.yaml @@ -0,0 +1,26 @@ +openai/gpt-5-codex: + name: gpt-5-codex + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-5.png + author: openai + description: GPT-5 Codex is OpenAI's agentic coding model optimized for software development tasks. + website: https://platform.openai.com/docs/models/gpt-5-codex + providers: + - provider: openai + model: gpt-5-codex + context_window: 400000 + max_tokens: 128000 + pricing: + type: tiered + tiers: + - predicate: { "==": [{ "var": "service_tier" }, "default"] } + pricing: + type: fixed + input: 1.25 + output: 10 + input_cache_reads: 0.125 + - pricing: + type: fixed + input: 2.5 + output: 20 + input_cache_reads: 0.25 diff --git a/library/models/openai/gpt-5-pro.yaml b/library/models/openai/gpt-5-pro.yaml new file mode 100644 index 0000000..b7a0caa --- /dev/null +++ b/library/models/openai/gpt-5-pro.yaml @@ -0,0 +1,16 @@ +openai/gpt-5-pro: + name: gpt-5-pro + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-5.png + author: openai + description: GPT-5 Pro is OpenAI's powerful reasoning model, offering improved clarity, relevance, and structure in responses, particularly for writing, data science, and business questions. + website: https://platform.openai.com/docs/models/gpt-5-pro + providers: + - provider: openai + model: gpt-5-pro + context_window: 400000 + max_tokens: 272000 + pricing: + type: fixed + input: 15 + output: 120 diff --git a/library/models/openai/gpt-5.1-chat-latest.yaml b/library/models/openai/gpt-5.1-chat-latest.yaml new file mode 100644 index 0000000..db36ee2 --- /dev/null +++ b/library/models/openai/gpt-5.1-chat-latest.yaml @@ -0,0 +1,17 @@ +openai/gpt-5.1-chat-latest: + name: gpt-5.1-chat-latest + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-5-chat-latest.png + author: openai + description: GPT-5.1 Chat Latest points to the GPT-5.1 snapshot currently used in ChatGPT. OpenAI recommends GPT-5.1 for most API usage, but feel free to use this model to test OpenAI's latest improvements for chat use cases. + website: https://platform.openai.com/docs/models/gpt-5.1-chat-latest + providers: + - provider: openai + model: gpt-5.1-chat-latest + context_window: 128000 + max_tokens: 16384 + pricing: + type: fixed + input: 1.25 + output: 10 + input_cache_reads: 0.125 diff --git a/library/models/openai/gpt-5.1-codex-max.yaml b/library/models/openai/gpt-5.1-codex-max.yaml new file mode 100644 index 0000000..742ade0 --- /dev/null +++ b/library/models/openai/gpt-5.1-codex-max.yaml @@ -0,0 +1,26 @@ +openai/gpt-5.1-codex-max: + name: GPT-5.1 Codex Max + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-5.png + author: openai + description: GPT-5.1-Codex-Max is OpenAI's advanced agentic coding model released on November 19, 2025. It is capable of performing long-running tasks using "compaction" for continuous work within large contexts. It includes tools like apply_patch for reliable code editing and a shell tool for running commands. + website: https://platform.openai.com/docs/models/gpt-5.1-codex-max + providers: + - provider: openai + model: gpt-5.1-codex-max + context_window: 400000 + max_tokens: 128000 + pricing: + type: tiered + tiers: + - predicate: { "==": [{ "var": "service_tier" }, "default"] } + pricing: + type: fixed + input: 1.25 + output: 10 + input_cache_reads: 0.125 + - pricing: + type: fixed + input: 2.5 + output: 20 + input_cache_reads: 0.25 diff --git a/library/models/openai/gpt-5.1-codex-mini.yaml b/library/models/openai/gpt-5.1-codex-mini.yaml new file mode 100644 index 0000000..bb96636 --- /dev/null +++ b/library/models/openai/gpt-5.1-codex-mini.yaml @@ -0,0 +1,17 @@ +openai/gpt-5.1-codex-mini: + name: gpt-5.1-codex-mini + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-5.png + author: openai + description: GPT-5.1 Codex Mini is a smaller, faster version of the Codex model for coding tasks. + website: https://platform.openai.com/docs/models/gpt-5.1-codex-mini + providers: + - provider: openai + model: gpt-5.1-codex-mini + context_window: 400000 + max_tokens: 128000 + pricing: + type: fixed + input: 0.25 + output: 2 + input_cache_reads: 0.025 diff --git a/library/models/openai/gpt-5.1-codex.yaml b/library/models/openai/gpt-5.1-codex.yaml new file mode 100644 index 0000000..65acb36 --- /dev/null +++ b/library/models/openai/gpt-5.1-codex.yaml @@ -0,0 +1,26 @@ +openai/gpt-5.1-codex: + name: gpt-5.1-codex + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-5.png + author: openai + description: GPT-5.1 Codex is OpenAI's agentic coding model optimized for software development tasks. + website: https://platform.openai.com/docs/models/gpt-5.1-codex + providers: + - provider: openai + model: gpt-5.1-codex + context_window: 400000 + max_tokens: 128000 + pricing: + type: tiered + tiers: + - predicate: { "==": [{ "var": "service_tier" }, "default"] } + pricing: + type: fixed + input: 1.25 + output: 10 + input_cache_reads: 0.125 + - pricing: + type: fixed + input: 2.5 + output: 20 + input_cache_reads: 0.25 diff --git a/library/models/openai/gpt-5.2-chat-latest.yaml b/library/models/openai/gpt-5.2-chat-latest.yaml new file mode 100644 index 0000000..79fbe68 --- /dev/null +++ b/library/models/openai/gpt-5.2-chat-latest.yaml @@ -0,0 +1,17 @@ +openai/gpt-5.2-chat-latest: + name: gpt-5.2-chat-latest + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-5-chat-latest.png + author: openai + description: GPT-5.2 Chat Latest points to the GPT-5.2 snapshot currently used in ChatGPT. OpenAI recommends GPT-5.2 for most API usage, but feel free to use this model to test OpenAI's latest improvements for chat use cases. + website: https://platform.openai.com/docs/models/gpt-5.2-chat-latest + providers: + - provider: openai + model: gpt-5.2-chat-latest + context_window: 128000 + max_tokens: 16384 + pricing: + type: fixed + input: 1.75 + output: 14 + input_cache_reads: 0.175 diff --git a/library/models/openai/gpt-5.2-pro.yaml b/library/models/openai/gpt-5.2-pro.yaml new file mode 100644 index 0000000..9382486 --- /dev/null +++ b/library/models/openai/gpt-5.2-pro.yaml @@ -0,0 +1,16 @@ +openai/gpt-5.2-pro: + name: gpt-5.2-pro + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-5.png + author: openai + description: GPT-5.2 Pro is OpenAI's most powerful reasoning model, an upgrade to GPT-5 Pro with improved clarity, relevance, and structure in responses. + website: https://platform.openai.com/docs/models/gpt-5.2-pro + providers: + - provider: openai + model: gpt-5.2-pro + context_window: 400000 + max_tokens: 128000 + pricing: + type: fixed + input: 21 + output: 168 diff --git a/library/models/openai/gpt-5.2.yaml b/library/models/openai/gpt-5.2.yaml new file mode 100644 index 0000000..0b445f7 --- /dev/null +++ b/library/models/openai/gpt-5.2.yaml @@ -0,0 +1,32 @@ +openai/gpt-5.2: + name: gpt-5.2 + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-5.png + author: openai + description: GPT-5.2 is OpenAI's most advanced flagship model for coding, reasoning, and agentic tasks across domains. + website: https://platform.openai.com/docs/models/gpt-5.2 + providers: + - provider: openai + model: gpt-5.2 + context_window: 400000 + max_tokens: 128000 + pricing: + type: tiered + tiers: + - predicate: { "==": [{ "var": "service_tier" }, "flex"] } + pricing: + type: fixed + input: 0.875 + output: 7 + input_cache_reads: 0.0875 + - predicate: { "==": [{ "var": "service_tier" }, "default"] } + pricing: + type: fixed + input: 1.75 + output: 14 + input_cache_reads: 0.175 + - pricing: + type: fixed + input: 3.5 + output: 28 + input_cache_reads: 0.35 diff --git a/library/models/openai/gpt-audio-mini.yaml b/library/models/openai/gpt-audio-mini.yaml new file mode 100644 index 0000000..df7e527 --- /dev/null +++ b/library/models/openai/gpt-audio-mini.yaml @@ -0,0 +1,18 @@ +openai/gpt-audio-mini: + name: gpt-audio-mini + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-audio.png + author: openai + description: GPT Audio Mini is a smaller, faster audio model for audio input and output tasks. + website: https://platform.openai.com/docs/models/gpt-audio-mini + providers: + - provider: openai + model: gpt-audio-mini + context_window: 128000 + max_tokens: 16384 + pricing: + type: fixed + input: 0.6 + input_audio: 10 + output: 2.4 + output_audio: 20 diff --git a/library/models/openai/gpt-audio.yaml b/library/models/openai/gpt-audio.yaml index 7459519..2c7b004 100644 --- a/library/models/openai/gpt-audio.yaml +++ b/library/models/openai/gpt-audio.yaml @@ -13,6 +13,6 @@ openai/gpt-audio: pricing: type: fixed input: 2.5 - input_audio: 40 + input_audio: 32 output: 10 - output_audio: 80 + output_audio: 64 diff --git a/library/models/openai/gpt-image-1-mini.yaml b/library/models/openai/gpt-image-1-mini.yaml new file mode 100644 index 0000000..116fffd --- /dev/null +++ b/library/models/openai/gpt-image-1-mini.yaml @@ -0,0 +1,75 @@ +openai/gpt-image-1-mini: + name: gpt-image-1-mini + type: image + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-image-1.png + author: openai + description: GPT Image 1 Mini is a smaller, faster image generation model optimized for cost efficiency. + website: https://platform.openai.com/docs/models/gpt-image-1-mini + providers: + - provider: openai + model: gpt-image-1-mini + pricing: + type: tiered + tiers: + - predicate: { "==": [{ "var": "image_quality" }, "low"] } + pricing: + type: tiered + tiers: + - predicate: { "==": [{ "var": "image_size" }, "1024x1024"] } + pricing: + type: fixed + input: 2 + input_image: 2.5 + output_image: 8 + input_cache_reads: 0.2 + input_image_cache_reads: 0.25 + image: 0.005 + - pricing: + type: fixed + input: 2 + input_image: 2.5 + output_image: 8 + input_cache_reads: 0.2 + input_image_cache_reads: 0.25 + image: 0.006 + - predicate: { "==": [{ "var": "image_quality" }, "medium"] } + pricing: + type: tiered + tiers: + - predicate: { "==": [{ "var": "image_size" }, "1024x1024"] } + pricing: + type: fixed + input: 2 + input_image: 2.5 + output_image: 8 + input_cache_reads: 0.2 + input_image_cache_reads: 0.25 + image: 0.011 + - pricing: + type: fixed + input: 2 + input_image: 2.5 + output_image: 8 + input_cache_reads: 0.2 + input_image_cache_reads: 0.25 + image: 0.015 + - pricing: + type: tiered + tiers: + - predicate: { "==": [{ "var": "image_size" }, "1024x1024"] } + pricing: + type: fixed + input: 2 + input_image: 2.5 + output_image: 8 + input_cache_reads: 0.2 + input_image_cache_reads: 0.25 + image: 0.036 + - pricing: + type: fixed + input: 2 + input_image: 2.5 + output_image: 8 + input_cache_reads: 0.2 + input_image_cache_reads: 0.25 + image: 0.052 diff --git a/library/models/openai/gpt-image-1.5.yaml b/library/models/openai/gpt-image-1.5.yaml new file mode 100644 index 0000000..7ad62b3 --- /dev/null +++ b/library/models/openai/gpt-image-1.5.yaml @@ -0,0 +1,81 @@ +openai/gpt-image-1.5: + name: gpt-image-1.5 + type: image + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-image-1.png + author: openai + description: GPT Image 1.5 is OpenAI's latest image generation model with improved quality, text rendering, and reasoning capabilities. + website: https://platform.openai.com/docs/models/gpt-image-1.5 + providers: + - provider: openai + model: gpt-image-1.5 + pricing: + type: tiered + tiers: + - predicate: { "==": [{ "var": "image_quality" }, "low"] } + pricing: + type: tiered + tiers: + - predicate: { "==": [{ "var": "image_size" }, "1024x1024"] } + pricing: + type: fixed + input: 5 + input_image: 8 + output: 10 + output_image: 32 + input_cache_reads: 1.25 + input_image_cache_reads: 2 + image: 0.009 + - pricing: + type: fixed + input: 5 + input_image: 8 + output: 10 + output_image: 32 + input_cache_reads: 1.25 + input_image_cache_reads: 2 + image: 0.013 + - predicate: { "==": [{ "var": "image_quality" }, "medium"] } + pricing: + type: tiered + tiers: + - predicate: { "==": [{ "var": "image_size" }, "1024x1024"] } + pricing: + type: fixed + input: 5 + input_image: 8 + output: 10 + output_image: 32 + input_cache_reads: 1.25 + input_image_cache_reads: 2 + image: 0.034 + - pricing: + type: fixed + input: 5 + input_image: 8 + output: 10 + output_image: 32 + input_cache_reads: 1.25 + input_image_cache_reads: 2 + image: 0.05 + - pricing: + type: tiered + tiers: + - predicate: { "==": [{ "var": "image_size" }, "1024x1024"] } + pricing: + type: fixed + input: 5 + input_image: 8 + output: 10 + output_image: 32 + input_cache_reads: 1.25 + input_image_cache_reads: 2 + image: 0.133 + - pricing: + type: fixed + input: 5 + input_image: 8 + output: 10 + output_image: 32 + input_cache_reads: 1.25 + input_image_cache_reads: 2 + image: 0.2 diff --git a/library/models/openai/gpt-image-1.yaml b/library/models/openai/gpt-image-1.yaml deleted file mode 100644 index 278a1b4..0000000 --- a/library/models/openai/gpt-image-1.yaml +++ /dev/null @@ -1,63 +0,0 @@ -openai/gpt-image-1: - name: GPT Image 1 - type: image - icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-image-1.png - author: openai - description: GPT Image 1 is OpenAI's new state-of-the-art image generation model. It is a natively multimodal language model that accepts both text and image inputs, and produces image outputs. - website: https://platform.openai.com/docs/models/gpt-image-1 - providers: - - provider: openai - model: gpt-image-1 - pricing: - type: tiered - tiers: - - predicate: { "==": [{ "var": "image_quality" }, "low"] } - pricing: - type: tiered - tiers: - - predicate: { "==": [{ "var": "image_size" }, "1024x1024"] } - pricing: - type: fixed - input: 5 - input_image: 10 - output: 40 - image: 0.011 - - pricing: - type: fixed - input: 5 - input_image: 10 - output: 40 - image: 0.016 - - predicate: { "==": [{ "var": "image_quality" }, "medium"] } - pricing: - type: tiered - tiers: - - predicate: { "==": [{ "var": "image_size" }, "1024x1024"] } - pricing: - type: fixed - input: 5 - input_image: 10 - output: 40 - image: 0.042 - - pricing: - type: fixed - input: 5 - input_image: 10 - output: 40 - image: 0.063 - - pricing: - type: tiered - tiers: - - predicate: { "==": [{ "var": "image_size" }, "1024x1024"] } - pricing: - type: fixed - input: 5 - input_image: 10 - output: 40 - image: 0.167 - - pricing: - type: fixed - input: 5 - input_image: 10 - output: 40 - image: 0.25 diff --git a/library/models/openai/o3-deep-research.yaml b/library/models/openai/o3-deep-research.yaml new file mode 100644 index 0000000..3253209 --- /dev/null +++ b/library/models/openai/o3-deep-research.yaml @@ -0,0 +1,17 @@ +openai/o3-deep-research: + name: o3-deep-research + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/o3.png + author: openai + description: o3 Deep Research is optimized for in-depth research and analysis tasks. + website: https://platform.openai.com/docs/models/o3-deep-research + providers: + - provider: openai + model: o3-deep-research + context_window: 200000 + max_tokens: 100000 + pricing: + type: fixed + input: 10 + output: 40 + input_cache_reads: 2.5 diff --git a/library/models/openai/o3-mini.yaml b/library/models/openai/o3-mini.yaml new file mode 100644 index 0000000..76e4c7f --- /dev/null +++ b/library/models/openai/o3-mini.yaml @@ -0,0 +1,17 @@ +openai/o3-mini: + name: o3-mini + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/o3.png + author: openai + description: o3 Mini is a smaller, faster version of o3 optimized for reasoning tasks. + website: https://platform.openai.com/docs/models/o3-mini + providers: + - provider: openai + model: o3-mini + context_window: 200000 + max_tokens: 100000 + pricing: + type: fixed + input: 1.1 + output: 4.4 + input_cache_reads: 0.55 diff --git a/library/models/openai/o3-pro.yaml b/library/models/openai/o3-pro.yaml new file mode 100644 index 0000000..06353c1 --- /dev/null +++ b/library/models/openai/o3-pro.yaml @@ -0,0 +1,16 @@ +openai/o3-pro: + name: o3-pro + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/o3.png + author: openai + description: o3 Pro is OpenAI's most powerful o3 model with maximum compute for the hardest problems. + website: https://platform.openai.com/docs/models/o3-pro + providers: + - provider: openai + model: o3-pro + context_window: 200000 + max_tokens: 100000 + pricing: + type: fixed + input: 20 + output: 80 diff --git a/library/models/openai/o4-mini-deep-research.yaml b/library/models/openai/o4-mini-deep-research.yaml new file mode 100644 index 0000000..1c42ee2 --- /dev/null +++ b/library/models/openai/o4-mini-deep-research.yaml @@ -0,0 +1,17 @@ +openai/o4-mini-deep-research: + name: o4-mini-deep-research + type: language + icon: https://cdn.openai.com/API/docs/images/model-page/model-icons/o4-mini.png + author: openai + description: o4 Mini Deep Research is optimized for in-depth research and analysis tasks with a smaller model. + website: https://platform.openai.com/docs/models/o4-mini-deep-research + providers: + - provider: openai + model: o4-mini-deep-research + context_window: 200000 + max_tokens: 100000 + pricing: + type: fixed + input: 2 + output: 8 + input_cache_reads: 0.5 diff --git a/library/models/xai/grok-4-fast-non-reasoning.yaml b/library/models/xai/grok-4.1-fast-non-reasoning.yaml similarity index 54% rename from library/models/xai/grok-4-fast-non-reasoning.yaml rename to library/models/xai/grok-4.1-fast-non-reasoning.yaml index eb1c5aa..5a7d862 100644 --- a/library/models/xai/grok-4-fast-non-reasoning.yaml +++ b/library/models/xai/grok-4.1-fast-non-reasoning.yaml @@ -1,13 +1,13 @@ -xai/grok-4-fast-non-reasoning: - name: Grok 4 Fast (Non-Reasoning) +xai/grok-4.1-fast-non-reasoning: + name: Grok 4.1 Fast (Non-Reasoning) type: language icon: https://grok.com/favicon.ico author: xai - description: xAI's excited to release grok-4-fast, their latest advancement in cost-efficient reasoning models. - website: https://docs.x.ai/docs/models/grok-4-fast-non-reasoning + description: Grok 4.1 Fast (Non-Reasoning) is xAI's ultra-fast deterministic model for instant text-to-text generation. Released on November 20, 2025, it features a 2-million-token context window and prioritizes speed for large-scale content workflows. + website: https://docs.x.ai/docs/models/grok-4-1-fast-non-reasoning providers: - provider: xai - model: grok-4-fast-non-reasoning + model: grok-4-1-fast-non-reasoning context_window: 2000000 pricing: type: tiered @@ -22,6 +22,6 @@ xai/grok-4-fast-non-reasoning: - pricing: type: fixed input: 0.4 - output: 1 + output: 1.0 web_search: 0.025 input_cache_reads: 0.05 diff --git a/library/models/xai/grok-4-fast.yaml b/library/models/xai/grok-4.1-fast-reasoning.yaml similarity index 54% rename from library/models/xai/grok-4-fast.yaml rename to library/models/xai/grok-4.1-fast-reasoning.yaml index 097054e..68afc06 100644 --- a/library/models/xai/grok-4-fast.yaml +++ b/library/models/xai/grok-4.1-fast-reasoning.yaml @@ -1,13 +1,13 @@ -xai/grok-4-fast: - name: Grok 4 Fast +xai/grok-4.1-fast-reasoning: + name: Grok 4.1 Fast (Reasoning) type: language icon: https://grok.com/favicon.ico author: xai - description: xAI's excited to release grok-4-fast, their latest advancement in cost-efficient reasoning models. - website: https://docs.x.ai/docs/models/grok-4-fast + description: Grok 4.1 Fast (Reasoning) is xAI's reasoning model optimized for maximal intelligence and complex agentic tasks. Released on November 20, 2025, it features a 2-million-token context window and excels at accurate tool-calling and multi-step problem-solving. + website: https://docs.x.ai/docs/models/grok-4-1-fast-reasoning providers: - provider: xai - model: grok-4-fast + model: grok-4-1-fast-reasoning context_window: 2000000 pricing: type: tiered @@ -22,6 +22,6 @@ xai/grok-4-fast: - pricing: type: fixed input: 0.4 - output: 1 + output: 1.0 web_search: 0.025 input_cache_reads: 0.05 diff --git a/library/models/z-ai/glm-4.6.yaml b/library/models/z-ai/glm-4.6.yaml index c278aa0..4fa61fa 100644 --- a/library/models/z-ai/glm-4.6.yaml +++ b/library/models/z-ai/glm-4.6.yaml @@ -7,9 +7,17 @@ z-ai/glm-4.6: website: https://docs.z.ai/guides/llm/glm-4.6 providers: - provider: atlascloud + model: zai-org/GLM-4.6 + context_window: 202752 + pricing: + type: fixed + input: 0.42 + output: 1.74 + input_cache_reads: 0.08 + - provider: together model: zai-org/GLM-4.6 context_window: 202752 pricing: type: fixed input: 0.6 - output: 2 + output: 2.2