Skip to content

[Inference Providers] Add CentML inference providers support #2959

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/huggingface_hub/inference/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ class InferenceClient:
path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
documentation for details). When passing a URL as `model`, the client will not append any suffix path to it.
provider (`str`, *optional*):
Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"openai"`, `"replicate"`, "sambanova"` or `"together"`.
Name of the provider to use for inference. Can be `"black-forest-labs"`, `"centml"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"openai"`, `"replicate"`, "sambanova"` or `"together"`.
defaults to hf-inference (Hugging Face Serverless Inference API).
If model is a URL or `base_url` is passed, then `provider` is not used.
token (`str`, *optional*):
Expand Down
7 changes: 6 additions & 1 deletion src/huggingface_hub/inference/_providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
from .replicate import ReplicateTask, ReplicateTextToSpeechTask
from .sambanova import SambanovaConversationalTask
from .together import TogetherConversationalTask, TogetherTextGenerationTask, TogetherTextToImageTask

from .centml import CentmlConversationalTask, CentmlTextGenerationTask

PROVIDER_T = Literal[
"black-forest-labs",
"centml",
"cerebras",
"cohere",
"fal-ai",
Expand All @@ -41,6 +42,10 @@
"black-forest-labs": {
"text-to-image": BlackForestLabsTextToImageTask(),
},
"centml": {
"conversational": CentmlConversationalTask(),
"text-generation": CentmlTextGenerationTask(),
},
"cerebras": {
"conversational": CerebrasConversationalTask(),
},
Expand Down
50 changes: 50 additions & 0 deletions src/huggingface_hub/inference/_providers/centml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from typing import Optional

from huggingface_hub.inference._providers._common import (
BaseConversationalTask,
BaseTextGenerationTask,
)


class CentmlConversationalTask(BaseConversationalTask):
"""
Provider helper for centml conversational (chat completions) tasks.
This helper builds requests in the OpenAI API format.
"""

def __init__(self):
# Set the provider name to "centml" and use the centml serverless endpoint URL.
super().__init__(provider="centml", base_url="https://api.centml.com/openai")

def _prepare_api_key(self, api_key: Optional[str]) -> str:
if api_key is None:
raise ValueError(
"An API key must be provided to use the centml provider.")
return api_key

def _prepare_mapped_model(self, model: Optional[str]) -> str:
if model is None:
raise ValueError("Please provide a centml model ID.")
return model


class CentmlTextGenerationTask(BaseTextGenerationTask):
"""
Provider helper for centml text generation (completions) tasks.
This helper builds requests in the OpenAI API format.
"""

def __init__(self):
super().__init__(provider="centml", base_url="https://api.centml.com/openai")

def _prepare_api_key(self, api_key: Optional[str]) -> str:
if api_key is None:
raise ValueError(
"An API key must be provided to use the centml provider.")
return api_key

def _prepare_mapped_model(self, model: Optional[str]) -> str:
if model is None:
raise ValueError("Please provide a centml model ID.")
return model

Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
interactions:
- request:
body: '{"messages": [{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is deep learning?"}], "model": "meta-llama/Llama-3.3-70B-Instruct",
"stream": false}'
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '195'
Content-Type:
- application/json
X-Amzn-Trace-Id:
- 9f95510c-8aae-4df7-820e-eafbc8ad396f
method: POST
uri: https://api.centml.com/openai/v1/chat/completions
response:
body:
string: '{"id":"chatcmpl-de1b282d4615cdcf51313490db81295a","object":"chat.completion","created":1742933815,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"message":{"role":"assistant","reasoning_content":null,"content":"**Deep
Learning: An Overview**\n=====================================\n\nDeep learning
is a subset of machine learning that involves the use of artificial neural
networks to analyze and interpret data. These neural networks are designed
to mimic the structure and function of the human brain, with multiple layers
of interconnected nodes (neurons) that process and transmit information.\n\n**Key
Characteristics:**\n\n1. **Artificial Neural Networks**: Deep learning models
are based on artificial neural networks, which are composed of multiple layers
of nodes (neurons) that process and transmit information.\n2. **Multiple Layers**:
Deep learning models have multiple layers, each of which performs a specific
function, such as feature extraction, feature transformation, or classification.\n3.
**Hierarchical Representation**: Deep learning models learn hierarchical representations
of data, with early layers learning low-level features and later layers learning
higher-level features.\n4. **Large Amounts of Data**: Deep learning models
require large amounts of data to train, as they need to learn complex patterns
and relationships in the data.\n\n**Types of Deep Learning Models:**\n\n1.
**Convolutional Neural Networks (CNNs)**: Used for image and video processing,
CNNs are designed to extract features from spatially structured data.\n2.
**Recurrent Neural Networks (RNNs)**: Used for sequential data, such as speech
or text, RNNs are designed to model temporal relationships in data.\n3. **Autoencoders**:
Used for dimensionality reduction and generative modeling, autoencoders are
designed to learn compact representations of data.\n\n**Applications:**\n\n1.
**Computer Vision**: Deep learning models are widely used in computer vision
applications, such as image classification, object detection, and segmentation.\n2.
**Natural Language Processing**: Deep learning models are used in NLP applications,
such as language modeling, text classification, and machine translation.\n3.
**Speech Recognition**: Deep learning models are used in speech recognition
applications, such as speech-to-text and voice recognition.\n\n**Advantages:**\n\n1.
**High Accuracy**: Deep learning models can achieve high accuracy in complex
tasks, such as image recognition and speech recognition.\n2. **Flexibility**:
Deep learning models can be used in a wide range of applications, from computer
vision to NLP.\n3. **Scalability**: Deep learning models can be trained on
large datasets and can scale to large applications.\n\n**Challenges:**\n\n1.
**Computational Requirements**: Deep learning models require significant computational
resources to train and deploy.\n2. **Data Requirements**: Deep learning models
require large amounts of data to train, which can be difficult to obtain.\n3.
**Interpretability**: Deep learning models can be difficult to interpret,
making it challenging to understand why a particular decision was made.","tool_calls":[]},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":46,"total_tokens":595,"completion_tokens":549,"prompt_tokens_details":null},"prompt_logprobs":null}'
headers:
content-type:
- application/json
date:
- Tue, 25 Mar 2025 20:16:54 GMT
server:
- istio-envoy
transfer-encoding:
- chunked
x-envoy-upstream-service-time:
- '3844'
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
interactions:
- request:
body: '{"messages": [{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is deep learning?"}], "model": "meta-llama/Llama-3.3-70B-Instruct",
"max_tokens": 20, "stream": true}'
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '212'
Content-Type:
- application/json
X-Amzn-Trace-Id:
- ad425e9f-bc1f-48df-b84d-a5ebad74cd66
method: POST
uri: https://api.centml.com/openai/v1/chat/completions
response:
body:
string: 'data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]}


data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"**"},"logprobs":null,"finish_reason":null}]}


data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"Deep"},"logprobs":null,"finish_reason":null}]}


data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"
Learning Overview"},"logprobs":null,"finish_reason":null}]}


data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"**\n=========================\n\nDeep
learning is a"},"logprobs":null,"finish_reason":null}]}


data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"
subset"},"logprobs":null,"finish_reason":null}]}


data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"
of machine learning that involves"},"logprobs":null,"finish_reason":null}]}


data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"
the use"},"logprobs":null,"finish_reason":"length","stop_reason":null}]}


data: [DONE]


'
headers:
content-type:
- text/event-stream; charset=utf-8; charset=utf-8
date:
- Tue, 25 Mar 2025 20:16:58 GMT
server:
- istio-envoy
transfer-encoding:
- chunked
x-envoy-upstream-service-time:
- '328'
status:
code: 200
message: OK
version: 1
4 changes: 4 additions & 0 deletions tests/test_inference_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@
"black-forest-labs": {
"text-to-image": "black-forest-labs/FLUX.1-dev",
},
"centml": {
"conversational": "meta-llama/Llama-3.3-70B-Instruct",
"text-generation": "meta-llama/Llama-3.2-3B-Instruct",
},
"cerebras": {
"conversational": "meta-llama/Llama-3.3-70B-Instruct",
},
Expand Down