Skip to content

Commit

Permalink
add model_ready v2 endpoint (kserve#2617)
Browse files Browse the repository at this point in the history
* add model ready v2

Signed-off-by: alexagriffith <[email protected]>

* lint

Signed-off-by: alexagriffith <[email protected]>

* add test back

Signed-off-by: alexagriffith <[email protected]>

Signed-off-by: alexagriffith <[email protected]>
  • Loading branch information
alexagriffith authored Jan 3, 2023
1 parent 4778517 commit 9f2e6fe
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 2 deletions.
13 changes: 13 additions & 0 deletions python/kserve/kserve/handlers/v2_datamodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,19 @@ class ModelMetadataResponse(BaseModel):
outputs: List[MetadataTensor]


class ModelReadyResponse(BaseModel):
"""ModelReadyResponse
$ready_model_response =
{
"name": $string,
"ready": $bool
}
"""
name: str
ready: bool


class RequestInput(BaseModel):
"""RequestInput Model
Expand Down
24 changes: 23 additions & 1 deletion python/kserve/kserve/handlers/v2_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
from fastapi.responses import Response
from kserve.handlers.v2_datamodels import (
InferenceRequest, ServerMetadataResponse, ServerLiveResponse, ServerReadyResponse,
ModelMetadataResponse, InferenceResponse
ModelMetadataResponse, InferenceResponse, ModelReadyResponse
)
from kserve.errors import ModelNotReady
from kserve.handlers.dataplane import DataPlane
from kserve.handlers.model_repository_extension import ModelRepositoryExtension

Expand Down Expand Up @@ -74,6 +75,27 @@ async def model_metadata(self, model_name: str, model_version: Optional[str] = N
metadata = await self.dataplane.model_metadata(model_name)
return ModelMetadataResponse.parse_obj(metadata)

async def model_ready(self, model_name: str, model_version: Optional[str] = None) -> ModelReadyResponse:
"""Check if a given model is ready.
Args:
model_name (str): Model name.
model_version (str): Model version.
Returns:
ModelReadyResponse: Model ready object
"""
# TODO: support model_version
if model_version:
raise NotImplementedError("Model versioning not supported yet.")

model_ready = self.dataplane.model_ready(model_name)

if not model_ready:
raise ModelNotReady(model_name)

return ModelReadyResponse.parse_obj({"name": model_name, "ready": model_ready})

async def infer(
self,
raw_request: Request,
Expand Down
6 changes: 5 additions & 1 deletion python/kserve/kserve/model_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from kserve.handlers.dataplane import DataPlane
from kserve.handlers.model_repository_extension import ModelRepositoryExtension
from kserve.handlers.v2_datamodels import InferenceResponse, ServerMetadataResponse, ServerLiveResponse, \
ServerReadyResponse, ModelMetadataResponse
ServerReadyResponse, ModelMetadataResponse, ModelReadyResponse
from kserve.model_repository import ModelRepository


Expand Down Expand Up @@ -168,6 +168,10 @@ def create_application(self) -> FastAPI:
v2_endpoints.model_metadata, response_model=ModelMetadataResponse, tags=["V2"]),
FastAPIRoute(r"/v2/models/{model_name}/versions/{model_version}",
v2_endpoints.model_metadata, tags=["V2"], include_in_schema=False),
FastAPIRoute(r"/v2/models/{model_name}/ready",
v2_endpoints.model_ready, response_model=ModelReadyResponse, tags=["V2"]),
FastAPIRoute(r"v2/models/{model_name}/versions/{model_version}/ready",
v2_endpoints.model_ready, response_model=ModelReadyResponse, tags=["V2"]),
FastAPIRoute(r"/v2/models/{model_name}/infer",
v2_endpoints.infer, methods=["POST"], response_model=InferenceResponse, tags=["V2"]),
FastAPIRoute(r"/v2/models/{model_name}/versions/{model_version}/infer",
Expand Down

0 comments on commit 9f2e6fe

Please sign in to comment.