|
1 | | -from abc import ABC, abstractmethod |
2 | | -from collections.abc import AsyncGenerator |
3 | | -from pathlib import Path |
4 | | -from typing import Any, Literal, Optional, Union |
| 1 | +""" |
| 2 | +Backend interface and registry for generative AI model interactions. |
5 | 3 |
|
6 | | -from loguru import logger |
7 | | -from PIL import Image |
| 4 | +Provides the abstract base class for implementing backends that communicate with |
| 5 | +generative AI models. Backends handle the lifecycle of generation requests. |
8 | 6 |
|
9 | | -from guidellm.backend.response import ResponseSummary, StreamingTextResponse |
10 | | -from guidellm.settings import settings |
| 7 | +Classes: |
| 8 | + Backend: Abstract base class for generative AI backends with registry support. |
| 9 | +
|
| 10 | +Type Aliases: |
| 11 | + BackendType: Literal type defining supported backend implementations. |
| 12 | +""" |
| 13 | + |
| 14 | +from __future__ import annotations |
| 15 | + |
| 16 | +from abc import abstractmethod |
| 17 | +from typing import Literal |
| 18 | + |
| 19 | +from guidellm.backend.objects import ( |
| 20 | + GenerationRequest, |
| 21 | + GenerationResponse, |
| 22 | +) |
| 23 | +from guidellm.scheduler import BackendInterface |
| 24 | +from guidellm.utils import RegistryMixin |
11 | 25 |
|
12 | 26 | __all__ = [ |
13 | 27 | "Backend", |
|
18 | 32 | BackendType = Literal["openai_http"] |
19 | 33 |
|
20 | 34 |
|
21 | | -class Backend(ABC): |
| 35 | +class Backend( |
| 36 | + RegistryMixin["type[Backend]"], |
| 37 | + BackendInterface[GenerationRequest, GenerationResponse], |
| 38 | +): |
22 | 39 | """ |
23 | | - Abstract base class for generative AI backends. |
24 | | -
|
25 | | - This class provides a common interface for creating and interacting with different |
26 | | - generative AI backends. Subclasses should implement the abstract methods to |
27 | | - define specific backend behavior. |
28 | | -
|
29 | | - :cvar _registry: A registration dictionary that maps BackendType to backend classes. |
30 | | - :param type_: The type of the backend. |
| 40 | + Base class for generative AI backends with registry and lifecycle. |
| 41 | +
|
| 42 | + Provides a standard interface for backends that communicate with generative AI |
| 43 | + models. Combines the registry pattern for automatic discovery with a defined |
| 44 | + lifecycle for process-based distributed execution. |
| 45 | +
|
| 46 | + Backend lifecycle phases: |
| 47 | + 1. Creation and configuration |
| 48 | + 2. Process startup - Initialize resources in worker process |
| 49 | + 3. Validation - Verify backend readiness |
| 50 | + 4. Request resolution - Process generation requests |
| 51 | + 5. Process shutdown - Clean up resources |
| 52 | +
|
| 53 | + Backend state (excluding process_startup resources) must be pickleable for |
| 54 | + distributed execution across process boundaries. |
| 55 | +
|
| 56 | + Example: |
| 57 | + :: |
| 58 | + @Backend.register("my_backend") |
| 59 | + class MyBackend(Backend): |
| 60 | + def __init__(self, api_key: str): |
| 61 | + super().__init__("my_backend") |
| 62 | + self.api_key = api_key |
| 63 | +
|
| 64 | + async def process_startup(self): |
| 65 | + self.client = MyAPIClient(self.api_key) |
| 66 | +
|
| 67 | + backend = Backend.create("my_backend", api_key="secret") |
31 | 68 | """ |
32 | 69 |
|
33 | | - _registry: dict[BackendType, "type[Backend]"] = {} |
34 | | - |
35 | | - @classmethod |
36 | | - def register(cls, backend_type: BackendType): |
37 | | - """ |
38 | | - A decorator to register a backend class in the backend registry. |
39 | | -
|
40 | | - :param backend_type: The type of backend to register. |
41 | | - :type backend_type: BackendType |
42 | | - :return: The decorated backend class. |
43 | | - :rtype: Type[Backend] |
44 | | - """ |
45 | | - if backend_type in cls._registry: |
46 | | - raise ValueError(f"Backend type already registered: {backend_type}") |
47 | | - |
48 | | - if not issubclass(cls, Backend): |
49 | | - raise TypeError("Only subclasses of Backend can be registered") |
50 | | - |
51 | | - def inner_wrapper(wrapped_class: type["Backend"]): |
52 | | - cls._registry[backend_type] = wrapped_class |
53 | | - logger.info("Registered backend type: {}", backend_type) |
54 | | - return wrapped_class |
55 | | - |
56 | | - return inner_wrapper |
57 | | - |
58 | 70 | @classmethod |
59 | | - def create(cls, type_: BackendType, **kwargs) -> "Backend": |
| 71 | + def create(cls, type_: BackendType, **kwargs) -> Backend: |
60 | 72 | """ |
61 | | - Factory method to create a backend instance based on the backend type. |
| 73 | + Create a backend instance based on the backend type. |
62 | 74 |
|
63 | 75 | :param type_: The type of backend to create. |
64 | | - :type type_: BackendType |
65 | 76 | :param kwargs: Additional arguments for backend initialization. |
66 | 77 | :return: An instance of a subclass of Backend. |
67 | | - :rtype: Backend |
68 | 78 | :raises ValueError: If the backend type is not registered. |
69 | 79 | """ |
70 | 80 |
|
71 | | - logger.info("Creating backend of type {}", type_) |
| 81 | + backend = cls.get_registered_object(type_) |
72 | 82 |
|
73 | | - if type_ not in cls._registry: |
74 | | - err = ValueError(f"Unsupported backend type: {type_}") |
75 | | - logger.error("{}", err) |
76 | | - raise err |
| 83 | + if backend is None: |
| 84 | + raise ValueError( |
| 85 | + f"Backend type '{type_}' is not registered. " |
| 86 | + f"Available types: {list(cls.registry.keys()) if cls.registry else []}" |
| 87 | + ) |
77 | 88 |
|
78 | | - return Backend._registry[type_](**kwargs) |
| 89 | + return backend(**kwargs) |
79 | 90 |
|
80 | 91 | def __init__(self, type_: BackendType): |
81 | | - self._type = type_ |
82 | | - |
83 | | - @property |
84 | | - def type_(self) -> BackendType: |
85 | 92 | """ |
86 | | - :return: The type of the backend. |
87 | | - """ |
88 | | - return self._type |
| 93 | + Initialize a backend instance. |
89 | 94 |
|
90 | | - @property |
91 | | - @abstractmethod |
92 | | - def target(self) -> str: |
93 | | - """ |
94 | | - :return: The target location for the backend. |
| 95 | + :param type_: The backend type identifier. |
95 | 96 | """ |
96 | | - ... |
| 97 | + self.type_ = type_ |
97 | 98 |
|
98 | 99 | @property |
99 | | - @abstractmethod |
100 | | - def model(self) -> Optional[str]: |
| 100 | + def processes_limit(self) -> int | None: |
101 | 101 | """ |
102 | | - :return: The model used for the backend requests. |
| 102 | + :return: Maximum number of worker processes supported. None if unlimited. |
103 | 103 | """ |
104 | | - ... |
| 104 | + return None |
105 | 105 |
|
106 | 106 | @property |
107 | | - @abstractmethod |
108 | | - def info(self) -> dict[str, Any]: |
109 | | - """ |
110 | | - :return: The information about the backend. |
111 | | - """ |
112 | | - ... |
113 | | - |
114 | | - @abstractmethod |
115 | | - async def reset(self) -> None: |
| 107 | + def requests_limit(self) -> int | None: |
116 | 108 | """ |
117 | | - Reset the connection object. This is useful for backends that |
118 | | - reuse connections or have state that needs to be cleared. |
| 109 | + :return: Maximum number of concurrent requests supported globally. |
| 110 | + None if unlimited. |
119 | 111 | """ |
120 | | - ... |
121 | | - |
122 | | - async def validate(self): |
123 | | - """ |
124 | | - Handle final setup and validate the backend is ready for use. |
125 | | - If not successful, raises the appropriate exception. |
126 | | - """ |
127 | | - logger.info("{} validating backend {}", self.__class__.__name__, self.type_) |
128 | | - await self.check_setup() |
129 | | - models = await self.available_models() |
130 | | - if not models: |
131 | | - raise ValueError("No models available for the backend") |
132 | | - |
133 | | - # Use the preferred route defined in the global settings when performing the |
134 | | - # validation request. This avoids calling an unavailable endpoint (ie |
135 | | - # /v1/completions) when the deployment only supports the chat completions |
136 | | - # endpoint. |
137 | | - if settings.preferred_route == "chat_completions": |
138 | | - async for _ in self.chat_completions( # type: ignore[attr-defined] |
139 | | - content="Test connection", output_token_count=1 |
140 | | - ): |
141 | | - pass |
142 | | - else: |
143 | | - async for _ in self.text_completions( # type: ignore[attr-defined] |
144 | | - prompt="Test connection", output_token_count=1 |
145 | | - ): |
146 | | - pass |
147 | | - |
148 | | - await self.reset() |
149 | | - |
150 | | - @abstractmethod |
151 | | - async def check_setup(self): |
152 | | - """ |
153 | | - Check the setup for the backend. |
154 | | - If unsuccessful, raises the appropriate exception. |
155 | | -
|
156 | | - :raises ValueError: If the setup check fails. |
157 | | - """ |
158 | | - ... |
159 | | - |
160 | | - @abstractmethod |
161 | | - async def prepare_multiprocessing(self): |
162 | | - """ |
163 | | - Prepare the backend for use in a multiprocessing environment. |
164 | | - This is useful for backends that have instance state that can not |
165 | | - be shared across processes and should be cleared out and re-initialized |
166 | | - for each new process. |
167 | | - """ |
168 | | - ... |
169 | | - |
170 | | - @abstractmethod |
171 | | - async def available_models(self) -> list[str]: |
172 | | - """ |
173 | | - Get the list of available models for the backend. |
174 | | -
|
175 | | - :return: The list of available models. |
176 | | - :rtype: List[str] |
177 | | - """ |
178 | | - ... |
| 112 | + return None |
179 | 113 |
|
180 | 114 | @abstractmethod |
181 | | - async def text_completions( |
182 | | - self, |
183 | | - prompt: Union[str, list[str]], |
184 | | - request_id: Optional[str] = None, |
185 | | - prompt_token_count: Optional[int] = None, |
186 | | - output_token_count: Optional[int] = None, |
187 | | - **kwargs, |
188 | | - ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]: |
| 115 | + async def default_model(self) -> str | None: |
189 | 116 | """ |
190 | | - Generate text only completions for the given prompt. |
191 | | - Does not support multiple modalities, complicated chat interfaces, |
192 | | - or chat templates. Specifically, it requests with only the prompt. |
193 | | -
|
194 | | - :param prompt: The prompt (or list of prompts) to generate a completion for. |
195 | | - If a list is supplied, these are concatenated and run through the model |
196 | | - for a single prompt. |
197 | | - :param request_id: The unique identifier for the request, if any. |
198 | | - Added to logging statements and the response for tracking purposes. |
199 | | - :param prompt_token_count: The number of tokens measured in the prompt, if any. |
200 | | - Returned in the response stats for later analysis, if applicable. |
201 | | - :param output_token_count: If supplied, the number of tokens to enforce |
202 | | - generation of for the output for this request. |
203 | | - :param kwargs: Additional keyword arguments to pass with the request. |
204 | | - :return: An async generator that yields a StreamingTextResponse for start, |
205 | | - a StreamingTextResponse for each received iteration, |
206 | | - and a ResponseSummary for the final response. |
207 | | - """ |
208 | | - ... |
209 | | - |
210 | | - @abstractmethod |
211 | | - async def chat_completions( |
212 | | - self, |
213 | | - content: Union[ |
214 | | - str, |
215 | | - list[Union[str, dict[str, Union[str, dict[str, str]]], Path, Image.Image]], |
216 | | - Any, |
217 | | - ], |
218 | | - request_id: Optional[str] = None, |
219 | | - prompt_token_count: Optional[int] = None, |
220 | | - output_token_count: Optional[int] = None, |
221 | | - raw_content: bool = False, |
222 | | - **kwargs, |
223 | | - ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]: |
224 | | - """ |
225 | | - Generate chat completions for the given content. |
226 | | - Supports multiple modalities, complicated chat interfaces, and chat templates. |
227 | | - Specifically, it requests with the content, which can be any combination of |
228 | | - text, images, and audio provided the target model supports it, |
229 | | - and returns the output text. Additionally, any chat templates |
230 | | - for the model are applied within the backend. |
231 | | -
|
232 | | - :param content: The content (or list of content) to generate a completion for. |
233 | | - This supports any combination of text, images, and audio (model dependent). |
234 | | - Supported text only request examples: |
235 | | - content="Sample prompt", content=["Sample prompt", "Second prompt"], |
236 | | - content=[{"type": "text", "value": "Sample prompt"}. |
237 | | - Supported text and image request examples: |
238 | | - content=["Describe the image", PIL.Image.open("image.jpg")], |
239 | | - content=["Describe the image", Path("image.jpg")], |
240 | | - content=["Describe the image", {"type": "image_url", |
241 | | - "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}]. |
242 | | - Supported text and audio request examples: |
243 | | - content=["Transcribe the audio", Path("audio.wav")], |
244 | | - content=["Transcribe the audio", {"type": "input_audio", |
245 | | - "input_audio": {"data": f"{base64_bytes}", "format": "wav}]. |
246 | | - Additionally, if raw_content=True then the content is passed directly to the |
247 | | - backend without any processing. |
248 | | - :param request_id: The unique identifier for the request, if any. |
249 | | - Added to logging statements and the response for tracking purposes. |
250 | | - :param prompt_token_count: The number of tokens measured in the prompt, if any. |
251 | | - Returned in the response stats for later analysis, if applicable. |
252 | | - :param output_token_count: If supplied, the number of tokens to enforce |
253 | | - generation of for the output for this request. |
254 | | - :param kwargs: Additional keyword arguments to pass with the request. |
255 | | - :return: An async generator that yields a StreamingTextResponse for start, |
256 | | - a StreamingTextResponse for each received iteration, |
257 | | - and a ResponseSummary for the final response. |
| 117 | + :return: The default model name or identifier for generation requests. |
258 | 118 | """ |
259 | 119 | ... |
0 commit comments