Not-Diamond
diff --git a/‎notdiamond/llms/client.py‎
Lines changed: 13 additions & 0 deletions b/‎notdiamond/llms/client.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎notdiamond/llms/providers.py‎
Lines changed: 4 additions & 0 deletions b/‎notdiamond/llms/providers.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎notdiamond/settings.py‎
Lines changed: 19 additions & 0 deletions b/‎notdiamond/settings.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎tests/helpers.py‎
Lines changed: 6 additions & 4 deletions b/‎tests/helpers.py‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎tests/test_llm_calls/cassettes/test_moonshotai/Test_Moonshotai_LLMs.test_kimi_k2_thinking_response_model.yaml‎
Lines changed: 151 additions & 0 deletions b/‎tests/test_llm_calls/cassettes/test_moonshotai/Test_Moonshotai_LLMs.test_kimi_k2_thinking_response_model.yaml‎
Lines changed: 151 additions & 0 deletions
@@ -1653,6 +1653,19 @@ def _llm_from_config(
                     replicate_api_key=provider.api_key,
                     **passed_kwargs,
                 )
+            if provider.provider == "moonshotai":
+                ChatOpenAI = _module_check(
+                    "langchain_openai.chat_models",
+                    "ChatOpenAI",
+                    provider.provider,
+                )
+                return ChatOpenAI(
+                    openai_api_key=provider.api_key,
+                    model_name=provider.model,
+                    base_url="https://api.moonshot.ai/v1",
+                    callbacks=callbacks,
+                    **passed_kwargs,
+                )
             raise ValueError(f"Unsupported provider: {provider.provider}")
 
         def verify_against_response_model(self) -> bool:
 
@@ -90,6 +90,8 @@ class NDLLMProviders(Enum):
             model served via Replicate
 
         SONAR (NDLLMProvider): refers to "sonar" model by Perplexity
+
+        KIMI_K2_THINKING (NDLLMProvider): refers to "kimi-k2-thinking" model by Moonshot AI
     """
 
     GPT_3_5_TURBO = ("openai", "gpt-3.5-turbo")
@@ -207,6 +209,8 @@ class NDLLMProviders(Enum):
         "meta-llama-3.1-405b-instruct",
     )
 
+    KIMI_K2_THINKING = ("moonshotai", "kimi-k2-thinking")
+
     def __new__(cls, provider, model):
         return LLMConfig(provider=provider, model=model)
 
 
@@ -16,6 +16,7 @@
 TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY", default="")
 PPLX_API_KEY = os.getenv("PPLX_API_KEY", default="")
 REPLICATE_API_KEY = os.getenv("REPLICATE_API_KEY", default="")
+MOONSHOT_API_KEY = os.getenv("MOONSHOT_API_KEY", default="")
 
 
 NOTDIAMOND_API_URL = os.getenv(
@@ -434,6 +435,24 @@
             "meta-llama-3.1-405b-instruct": {"input": 9.5, "output": 9.5},
         },
     },
+    "moonshotai": {
+        "models": [
+            "kimi-k2-thinking",
+        ],
+        "api_key": MOONSHOT_API_KEY,
+        "support_tools": [
+            "kimi-k2-thinking",
+        ],
+        "support_response_model": [
+            "kimi-k2-thinking",
+        ],
+        "openrouter_identifier": {
+            "kimi-k2-thinking": "moonshotai/kimi-k2-thinking",
+        },
+        "price": {
+            "kimi-k2-thinking": {"input": 0.6, "output": 2.5},
+        },
+    },
 }
 
 
 
@@ -1,24 +1,26 @@
-def stream_chunks(ichunk):
+def stream_chunks(ichunk, max_chunks=5):
     n_chunks = 0
     all_empty = True
     for chunk in ichunk:
         assert chunk.type == "AIMessageChunk"
         assert isinstance(chunk.content, str)
         all_empty &= len(chunk.content) == 0
         n_chunks += 1
-        if n_chunks >= 5:
+        # If max_chunks is None, consume all chunks
+        if max_chunks is not None and n_chunks >= max_chunks:
             break
     assert not all_empty
 
 
-async def astream_chunks(async_ichunk):
+async def astream_chunks(async_ichunk, max_chunks=5):
     n_chunks = 0
     all_empty = True
     async for chunk in async_ichunk:
         assert chunk.type == "AIMessageChunk"
         assert isinstance(chunk.content, str)
         all_empty &= len(chunk.content) == 0
         n_chunks += 1
-        if n_chunks >= 5:
+        # If max_chunks is None, consume all chunks
+        if max_chunks is not None and n_chunks >= max_chunks:
             break
     assert not all_empty
@@ -0,0 +1,151 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Tell me a joke"}], "llm_providers":
+      [{"provider": "moonshotai", "model": "kimi-k2-thinking", "is_custom": false,
+      "context_length": null, "input_price": null, "output_price": null, "latency":
+      null}], "metric": "accuracy", "max_model_depth": 1, "hash_content": true}'
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '307'
+      User-Agent:
+      - Python-SDK/0.4.6
+      content-type:
+      - application/json
+    method: POST
+    uri: https://staging-api.notdiamond.ai/v2/modelRouter/modelSelect
+  response:
+    body:
+      string: '{"providers":[{"provider":"moonshotai","model":"kimi-k2-thinking"}],"session_id":"a9f29142-49ef-419f-a62b-275a07f67875"}'
+    headers:
+      CF-RAY:
+      - 9a0a3d193801e432-OTP
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 18 Nov 2025 20:32:43 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      content-length:
+      - '120'
+      rndr-id:
+      - 7c647cb2-d389-48f2
+      vary:
+      - Accept-Encoding
+      x-render-origin-server:
+      - uvicorn
+      x-request-id:
+      - 7c647cb2-d389-48f2
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"content":"The output should be formatted as a JSON instance
+      that conforms to the JSON schema below.\n\nAs an example, for the schema {\"properties\":
+      {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\":
+      \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\nthe
+      object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema.
+      The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\n\nHere
+      is the output schema:\n```\n{\"properties\": {\"setup\": {\"description\": \"question
+      to set up a joke\", \"title\": \"Setup\", \"type\": \"string\"}, \"punchline\":
+      {\"description\": \"answer to resolve the joke\", \"title\": \"Punchline\",
+      \"type\": \"string\"}}, \"required\": [\"setup\", \"punchline\"]}\n```\nTell
+      me a joke","role":"user"}],"model":"kimi-k2-thinking","max_completion_tokens":2000,"stream":false}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '912'
+      content-type:
+      - application/json
+      host:
+      - api.moonshot.ai
+      user-agent:
+      - OpenAI/Python 1.99.9
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.99.9
+      x-stainless-read-timeout:
+      - '120.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.12
+    method: POST
+    uri: https://api.moonshot.ai/v1/chat/completions
+  response:
+    body:
+      string: '{"id":"chatcmpl-691cd7ec3e228ab73cf751df","object":"chat.completion","created":1763497964,"model":"kimi-k2-thinking","choices":[{"index":0,"message":{"role":"assistant","content":"{\"setup\":
+        \"Why don''t scientists trust atoms?\", \"punchline\": \"Because they make
+        up everything!\"}","reasoning_content":"The user wants me to:\n1. Tell a joke\n2.
+        Format the output as a JSON instance that conforms to the provided schema\n3.
+        The schema has two required properties: \"setup\" (string) and \"punchline\"
+        (string)\n\nI need to create a JSON object with \"setup\" and \"punchline\"
+        keys. The setup should be the question part of the joke, and the punchline
+        should be the answer.\n\nLet me think of a good joke. I''ll use a classic
+        one:\nSetup: \"Why don''t scientists trust atoms?\"\nPunchline: \"Because
+        they make up everything!\"\n\nNow I need to format this as JSON:\n{\n  \"setup\":
+        \"Why don''t scientists trust atoms?\",\n  \"punchline\": \"Because they make
+        up everything!\"\n}\n\nThis matches the required schema."},"finish_reason":"stop"}],"usage":{"prompt_tokens":201,"completion_tokens":193,"total_tokens":394,"cached_tokens":201}}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json; charset=utf-8
+      Date:
+      - Tue, 18 Nov 2025 20:32:54 GMT
+      Msh-Context-Cache-Token-Saved:
+      - '201'
+      Msh-Gid:
+      - free
+      Msh-Org-Id:
+      - org-67752d4c14124a4fb97b1cb607c5492a
+      Msh-Project-Id:
+      - proj-afa6516f69f142258a1acf013213e49c
+      Msh-Request-Id:
+      - bcaa050a-c4bd-11f0-8ed1-00163e49fa2f
+      Msh-Trace-Mode:
+      - 'on'
+      Msh-Uid:
+      - d4a871m6s4t4vqocb090
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Accept-Encoding
+      - Origin
+      X-Msh-Trace-Id:
+      - 58c39ec1c7a5f240826722ace064ce99
+      content-length:
+      - '1136'
+    status:
+      code: 200
+      message: OK
+version: 1