huggingface · V2arK · Mar 25, 2025 · Mar 25, 2025 · Mar 27, 2025 · Mar 27, 2025
diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
@@ -133,7 +133,7 @@ class InferenceClient:
             path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
             documentation for details). When passing a URL as `model`, the client will not append any suffix path to it.
         provider (`str`, *optional*):
-            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"openai"`, `"replicate"`, "sambanova"` or `"together"`.
+            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"centml"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"openai"`, `"replicate"`, "sambanova"` or `"together"`.
             defaults to hf-inference (Hugging Face Serverless Inference API).
             If model is a URL or `base_url` is passed, then `provider` is not used.
         token (`str`, *optional*):

diff --git a/src/huggingface_hub/inference/_providers/__init__.py b/src/huggingface_hub/inference/_providers/__init__.py
@@ -19,10 +19,11 @@
 from .replicate import ReplicateTask, ReplicateTextToSpeechTask
 from .sambanova import SambanovaConversationalTask
 from .together import TogetherConversationalTask, TogetherTextGenerationTask, TogetherTextToImageTask
-
+from .centml import CentmlConversationalTask, CentmlTextGenerationTask
 
 PROVIDER_T = Literal[
     "black-forest-labs",
+    "centml",
     "cerebras",
     "cohere",
     "fal-ai",
@@ -41,6 +42,10 @@
     "black-forest-labs": {
         "text-to-image": BlackForestLabsTextToImageTask(),
     },
+    "centml": {
+        "conversational": CentmlConversationalTask(),
+        "text-generation": CentmlTextGenerationTask(),
+    },
     "cerebras": {
         "conversational": CerebrasConversationalTask(),
     },

diff --git a/src/huggingface_hub/inference/_providers/centml.py b/src/huggingface_hub/inference/_providers/centml.py
@@ -0,0 +1,50 @@
+from typing import Optional
+
+from huggingface_hub.inference._providers._common import (
+    BaseConversationalTask,
+    BaseTextGenerationTask,
+)
+
+
+class CentmlConversationalTask(BaseConversationalTask):
+    """
+    Provider helper for centml conversational (chat completions) tasks.
+    This helper builds requests in the OpenAI API format.
+    """
+
+    def __init__(self):
+        # Set the provider name to "centml" and use the centml serverless endpoint URL.
+        super().__init__(provider="centml", base_url="https://api.centml.com/openai")
+
+    def _prepare_api_key(self, api_key: Optional[str]) -> str:
+        if api_key is None:
+            raise ValueError(
+                "An API key must be provided to use the centml provider.")
+        return api_key
+
+    def _prepare_mapped_model(self, model: Optional[str]) -> str:
+        if model is None:
+            raise ValueError("Please provide a centml model ID.")
+        return model
+
+
+class CentmlTextGenerationTask(BaseTextGenerationTask):
+    """
+    Provider helper for centml text generation (completions) tasks.
+    This helper builds requests in the OpenAI API format.
+    """
+
+    def __init__(self):
+        super().__init__(provider="centml", base_url="https://api.centml.com/openai")
+
+    def _prepare_api_key(self, api_key: Optional[str]) -> str:
+        if api_key is None:
+            raise ValueError(
+                "An API key must be provided to use the centml provider.")
+        return api_key
+
+    def _prepare_mapped_model(self, model: Optional[str]) -> str:
+        if model is None:
+            raise ValueError("Please provide a centml model ID.")
+        return model
+
diff --git a/.../cassettes/TestInferenceClient.test_chat_completion_no_stream[centml,conversational].yaml b/.../cassettes/TestInferenceClient.test_chat_completion_no_stream[centml,conversational].yaml
@@ -0,0 +1,75 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are a helpful assistant."},
+      {"role": "user", "content": "What is deep learning?"}], "model": "meta-llama/Llama-3.3-70B-Instruct",
+      "stream": false}'
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '195'
+      Content-Type:
+      - application/json
+      X-Amzn-Trace-Id:
+      - 9f95510c-8aae-4df7-820e-eafbc8ad396f
+    method: POST
+    uri: https://api.centml.com/openai/v1/chat/completions
+  response:
+    body:
+      string: '{"id":"chatcmpl-de1b282d4615cdcf51313490db81295a","object":"chat.completion","created":1742933815,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"message":{"role":"assistant","reasoning_content":null,"content":"**Deep
+        Learning: An Overview**\n=====================================\n\nDeep learning
+        is a subset of machine learning that involves the use of artificial neural
+        networks to analyze and interpret data. These neural networks are designed
+        to mimic the structure and function of the human brain, with multiple layers
+        of interconnected nodes (neurons) that process and transmit information.\n\n**Key
+        Characteristics:**\n\n1. **Artificial Neural Networks**: Deep learning models
+        are based on artificial neural networks, which are composed of multiple layers
+        of nodes (neurons) that process and transmit information.\n2. **Multiple Layers**:
+        Deep learning models have multiple layers, each of which performs a specific
+        function, such as feature extraction, feature transformation, or classification.\n3.
+        **Hierarchical Representation**: Deep learning models learn hierarchical representations
+        of data, with early layers learning low-level features and later layers learning
+        higher-level features.\n4. **Large Amounts of Data**: Deep learning models
+        require large amounts of data to train, as they need to learn complex patterns
+        and relationships in the data.\n\n**Types of Deep Learning Models:**\n\n1.
+        **Convolutional Neural Networks (CNNs)**: Used for image and video processing,
+        CNNs are designed to extract features from spatially structured data.\n2.
+        **Recurrent Neural Networks (RNNs)**: Used for sequential data, such as speech
+        or text, RNNs are designed to model temporal relationships in data.\n3. **Autoencoders**:
+        Used for dimensionality reduction and generative modeling, autoencoders are
+        designed to learn compact representations of data.\n\n**Applications:**\n\n1.
+        **Computer Vision**: Deep learning models are widely used in computer vision
+        applications, such as image classification, object detection, and segmentation.\n2.
+        **Natural Language Processing**: Deep learning models are used in NLP applications,
+        such as language modeling, text classification, and machine translation.\n3.
+        **Speech Recognition**: Deep learning models are used in speech recognition
+        applications, such as speech-to-text and voice recognition.\n\n**Advantages:**\n\n1.
+        **High Accuracy**: Deep learning models can achieve high accuracy in complex
+        tasks, such as image recognition and speech recognition.\n2. **Flexibility**:
+        Deep learning models can be used in a wide range of applications, from computer
+        vision to NLP.\n3. **Scalability**: Deep learning models can be trained on
+        large datasets and can scale to large applications.\n\n**Challenges:**\n\n1.
+        **Computational Requirements**: Deep learning models require significant computational
+        resources to train and deploy.\n2. **Data Requirements**: Deep learning models
+        require large amounts of data to train, which can be difficult to obtain.\n3.
+        **Interpretability**: Deep learning models can be difficult to interpret,
+        making it challenging to understand why a particular decision was made.","tool_calls":[]},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":46,"total_tokens":595,"completion_tokens":549,"prompt_tokens_details":null},"prompt_logprobs":null}'
+    headers:
+      content-type:
+      - application/json
+      date:
+      - Tue, 25 Mar 2025 20:16:54 GMT
+      server:
+      - istio-envoy
+      transfer-encoding:
+      - chunked
+      x-envoy-upstream-service-time:
+      - '3844'
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/...assettes/TestInferenceClient.test_chat_completion_with_stream[centml,conversational].yaml b/...assettes/TestInferenceClient.test_chat_completion_with_stream[centml,conversational].yaml
@@ -0,0 +1,70 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are a helpful assistant."},
+      {"role": "user", "content": "What is deep learning?"}], "model": "meta-llama/Llama-3.3-70B-Instruct",
+      "max_tokens": 20, "stream": true}'
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '212'
+      Content-Type:
+      - application/json
+      X-Amzn-Trace-Id:
+      - ad425e9f-bc1f-48df-b84d-a5ebad74cd66
+    method: POST
+    uri: https://api.centml.com/openai/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"**"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"Deep"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"
+        Learning Overview"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"**\n=========================\n\nDeep
+        learning is a"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"
+        subset"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"
+        of machine learning that involves"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"
+        the use"},"logprobs":null,"finish_reason":"length","stop_reason":null}]}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      content-type:
+      - text/event-stream; charset=utf-8; charset=utf-8
+      date:
+      - Tue, 25 Mar 2025 20:16:58 GMT
+      server:
+      - istio-envoy
+      transfer-encoding:
+      - chunked
+      x-envoy-upstream-service-time:
+      - '328'
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/test_inference_client.py b/tests/test_inference_client.py
@@ -63,6 +63,10 @@
     "black-forest-labs": {
         "text-to-image": "black-forest-labs/FLUX.1-dev",
     },
+    "centml": {
+        "conversational": "meta-llama/Llama-3.3-70B-Instruct",
+        "text-generation": "meta-llama/Llama-3.2-3B-Instruct",
+    },
     "cerebras": {
         "conversational": "meta-llama/Llama-3.3-70B-Instruct",
     },