Skip to content
This repository was archived by the owner on Dec 11, 2025. It is now read-only.

Commit 019b9d7

Browse files
authored
Merge pull request #111 from Not-Diamond/ENG-3998_support-kimi-k2-thinking
feat: add support for Moonshot AI's Kimi K2 Thinking model
2 parents 124bd75 + 1fad140 commit 019b9d7

10 files changed

Lines changed: 1968 additions & 4 deletions

notdiamond/llms/client.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1653,6 +1653,19 @@ def _llm_from_config(
16531653
replicate_api_key=provider.api_key,
16541654
**passed_kwargs,
16551655
)
1656+
if provider.provider == "moonshotai":
1657+
ChatOpenAI = _module_check(
1658+
"langchain_openai.chat_models",
1659+
"ChatOpenAI",
1660+
provider.provider,
1661+
)
1662+
return ChatOpenAI(
1663+
openai_api_key=provider.api_key,
1664+
model_name=provider.model,
1665+
base_url="https://api.moonshot.ai/v1",
1666+
callbacks=callbacks,
1667+
**passed_kwargs,
1668+
)
16561669
raise ValueError(f"Unsupported provider: {provider.provider}")
16571670

16581671
def verify_against_response_model(self) -> bool:

notdiamond/llms/providers.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ class NDLLMProviders(Enum):
9090
model served via Replicate
9191
9292
SONAR (NDLLMProvider): refers to "sonar" model by Perplexity
93+
94+
KIMI_K2_THINKING (NDLLMProvider): refers to "kimi-k2-thinking" model by Moonshot AI
9395
"""
9496

9597
GPT_3_5_TURBO = ("openai", "gpt-3.5-turbo")
@@ -207,6 +209,8 @@ class NDLLMProviders(Enum):
207209
"meta-llama-3.1-405b-instruct",
208210
)
209211

212+
KIMI_K2_THINKING = ("moonshotai", "kimi-k2-thinking")
213+
210214
def __new__(cls, provider, model):
211215
return LLMConfig(provider=provider, model=model)
212216

notdiamond/settings.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY", default="")
1717
PPLX_API_KEY = os.getenv("PPLX_API_KEY", default="")
1818
REPLICATE_API_KEY = os.getenv("REPLICATE_API_KEY", default="")
19+
MOONSHOT_API_KEY = os.getenv("MOONSHOT_API_KEY", default="")
1920

2021

2122
NOTDIAMOND_API_URL = os.getenv(
@@ -434,6 +435,24 @@
434435
"meta-llama-3.1-405b-instruct": {"input": 9.5, "output": 9.5},
435436
},
436437
},
438+
"moonshotai": {
439+
"models": [
440+
"kimi-k2-thinking",
441+
],
442+
"api_key": MOONSHOT_API_KEY,
443+
"support_tools": [
444+
"kimi-k2-thinking",
445+
],
446+
"support_response_model": [
447+
"kimi-k2-thinking",
448+
],
449+
"openrouter_identifier": {
450+
"kimi-k2-thinking": "moonshotai/kimi-k2-thinking",
451+
},
452+
"price": {
453+
"kimi-k2-thinking": {"input": 0.6, "output": 2.5},
454+
},
455+
},
437456
}
438457

439458

tests/helpers.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,26 @@
1-
def stream_chunks(ichunk):
1+
def stream_chunks(ichunk, max_chunks=5):
22
n_chunks = 0
33
all_empty = True
44
for chunk in ichunk:
55
assert chunk.type == "AIMessageChunk"
66
assert isinstance(chunk.content, str)
77
all_empty &= len(chunk.content) == 0
88
n_chunks += 1
9-
if n_chunks >= 5:
9+
# If max_chunks is None, consume all chunks
10+
if max_chunks is not None and n_chunks >= max_chunks:
1011
break
1112
assert not all_empty
1213

1314

14-
async def astream_chunks(async_ichunk):
15+
async def astream_chunks(async_ichunk, max_chunks=5):
1516
n_chunks = 0
1617
all_empty = True
1718
async for chunk in async_ichunk:
1819
assert chunk.type == "AIMessageChunk"
1920
assert isinstance(chunk.content, str)
2021
all_empty &= len(chunk.content) == 0
2122
n_chunks += 1
22-
if n_chunks >= 5:
23+
# If max_chunks is None, consume all chunks
24+
if max_chunks is not None and n_chunks >= max_chunks:
2325
break
2426
assert not all_empty
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
interactions:
2+
- request:
3+
body: '{"messages": [{"role": "user", "content": "Tell me a joke"}], "llm_providers":
4+
[{"provider": "moonshotai", "model": "kimi-k2-thinking", "is_custom": false,
5+
"context_length": null, "input_price": null, "output_price": null, "latency":
6+
null}], "metric": "accuracy", "max_model_depth": 1, "hash_content": true}'
7+
headers:
8+
Accept:
9+
- '*/*'
10+
Accept-Encoding:
11+
- gzip, deflate, zstd
12+
Connection:
13+
- keep-alive
14+
Content-Length:
15+
- '307'
16+
User-Agent:
17+
- Python-SDK/0.4.6
18+
content-type:
19+
- application/json
20+
method: POST
21+
uri: https://staging-api.notdiamond.ai/v2/modelRouter/modelSelect
22+
response:
23+
body:
24+
string: '{"providers":[{"provider":"moonshotai","model":"kimi-k2-thinking"}],"session_id":"a9f29142-49ef-419f-a62b-275a07f67875"}'
25+
headers:
26+
CF-RAY:
27+
- 9a0a3d193801e432-OTP
28+
Connection:
29+
- keep-alive
30+
Content-Type:
31+
- application/json
32+
Date:
33+
- Tue, 18 Nov 2025 20:32:43 GMT
34+
Server:
35+
- cloudflare
36+
Transfer-Encoding:
37+
- chunked
38+
alt-svc:
39+
- h3=":443"; ma=86400
40+
cf-cache-status:
41+
- DYNAMIC
42+
content-length:
43+
- '120'
44+
rndr-id:
45+
- 7c647cb2-d389-48f2
46+
vary:
47+
- Accept-Encoding
48+
x-render-origin-server:
49+
- uvicorn
50+
x-request-id:
51+
- 7c647cb2-d389-48f2
52+
status:
53+
code: 200
54+
message: OK
55+
- request:
56+
body: '{"messages":[{"content":"The output should be formatted as a JSON instance
57+
that conforms to the JSON schema below.\n\nAs an example, for the schema {\"properties\":
58+
{\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\":
59+
\"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\nthe
60+
object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema.
61+
The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\n\nHere
62+
is the output schema:\n```\n{\"properties\": {\"setup\": {\"description\": \"question
63+
to set up a joke\", \"title\": \"Setup\", \"type\": \"string\"}, \"punchline\":
64+
{\"description\": \"answer to resolve the joke\", \"title\": \"Punchline\",
65+
\"type\": \"string\"}}, \"required\": [\"setup\", \"punchline\"]}\n```\nTell
66+
me a joke","role":"user"}],"model":"kimi-k2-thinking","max_completion_tokens":2000,"stream":false}'
67+
headers:
68+
accept:
69+
- application/json
70+
accept-encoding:
71+
- gzip, deflate, zstd
72+
connection:
73+
- keep-alive
74+
content-length:
75+
- '912'
76+
content-type:
77+
- application/json
78+
host:
79+
- api.moonshot.ai
80+
user-agent:
81+
- OpenAI/Python 1.99.9
82+
x-stainless-arch:
83+
- arm64
84+
x-stainless-async:
85+
- 'false'
86+
x-stainless-lang:
87+
- python
88+
x-stainless-os:
89+
- MacOS
90+
x-stainless-package-version:
91+
- 1.99.9
92+
x-stainless-read-timeout:
93+
- '120.0'
94+
x-stainless-retry-count:
95+
- '0'
96+
x-stainless-runtime:
97+
- CPython
98+
x-stainless-runtime-version:
99+
- 3.11.12
100+
method: POST
101+
uri: https://api.moonshot.ai/v1/chat/completions
102+
response:
103+
body:
104+
string: '{"id":"chatcmpl-691cd7ec3e228ab73cf751df","object":"chat.completion","created":1763497964,"model":"kimi-k2-thinking","choices":[{"index":0,"message":{"role":"assistant","content":"{\"setup\":
105+
\"Why don''t scientists trust atoms?\", \"punchline\": \"Because they make
106+
up everything!\"}","reasoning_content":"The user wants me to:\n1. Tell a joke\n2.
107+
Format the output as a JSON instance that conforms to the provided schema\n3.
108+
The schema has two required properties: \"setup\" (string) and \"punchline\"
109+
(string)\n\nI need to create a JSON object with \"setup\" and \"punchline\"
110+
keys. The setup should be the question part of the joke, and the punchline
111+
should be the answer.\n\nLet me think of a good joke. I''ll use a classic
112+
one:\nSetup: \"Why don''t scientists trust atoms?\"\nPunchline: \"Because
113+
they make up everything!\"\n\nNow I need to format this as JSON:\n{\n \"setup\":
114+
\"Why don''t scientists trust atoms?\",\n \"punchline\": \"Because they make
115+
up everything!\"\n}\n\nThis matches the required schema."},"finish_reason":"stop"}],"usage":{"prompt_tokens":201,"completion_tokens":193,"total_tokens":394,"cached_tokens":201}}'
116+
headers:
117+
Connection:
118+
- keep-alive
119+
Content-Type:
120+
- application/json; charset=utf-8
121+
Date:
122+
- Tue, 18 Nov 2025 20:32:54 GMT
123+
Msh-Context-Cache-Token-Saved:
124+
- '201'
125+
Msh-Gid:
126+
- free
127+
Msh-Org-Id:
128+
- org-67752d4c14124a4fb97b1cb607c5492a
129+
Msh-Project-Id:
130+
- proj-afa6516f69f142258a1acf013213e49c
131+
Msh-Request-Id:
132+
- bcaa050a-c4bd-11f0-8ed1-00163e49fa2f
133+
Msh-Trace-Mode:
134+
- 'on'
135+
Msh-Uid:
136+
- d4a871m6s4t4vqocb090
137+
Strict-Transport-Security:
138+
- max-age=31536000; includeSubDomains
139+
Transfer-Encoding:
140+
- chunked
141+
Vary:
142+
- Accept-Encoding
143+
- Origin
144+
X-Msh-Trace-Id:
145+
- 58c39ec1c7a5f240826722ace064ce99
146+
content-length:
147+
- '1136'
148+
status:
149+
code: 200
150+
message: OK
151+
version: 1

0 commit comments

Comments
 (0)