From 2cd19589042bd0a47bdfc60bb72450f500391b94 Mon Sep 17 00:00:00 2001 From: Alberto Ferrer Date: Tue, 20 Aug 2024 12:47:02 -0600 Subject: [PATCH 1/3] Update server_vllm.py Hello, There's an error occurring in the /v1/models endpoint of the API server. The error is happening because the served_model variable is a list instead of a string, which is causing validation errors when trying to create a ModelCard object. --- server_vllm.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/server_vllm.py b/server_vllm.py index 55e770e..c18eddb 100644 --- a/server_vllm.py +++ b/server_vllm.py @@ -48,10 +48,13 @@ @app.get("/v1/models") async def show_available_models(): - """Show available models. Right now we only have one model.""" - model_cards = [ - ModelCard(id=served_model, root=served_model, permission=[ModelPermission()]) - ] + """Show available models.""" + model_cards = [] + if isinstance(served_model, list): + for model in served_model: + model_cards.append(ModelCard(id=model, root=model, permission=[ModelPermission()])) + else: + model_cards.append(ModelCard(id=served_model, root=served_model, permission=[ModelPermission()])) return ModelList(data=model_cards) From 18b2e7c18133d36e6bfc7cb8ca60aac6315a9751 Mon Sep 17 00:00:00 2001 From: Jeffrey Fong Date: Thu, 22 Aug 2024 01:54:13 +0000 Subject: [PATCH 2/3] refactor served_model --- server_vllm.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/server_vllm.py b/server_vllm.py index c18eddb..d97d547 100644 --- a/server_vllm.py +++ b/server_vllm.py @@ -18,9 +18,9 @@ import argparse import asyncio import json +import logging import re from typing import Any, AsyncGenerator, Dict, List, Literal, Optional, Tuple, Union -import logging import fastapi import uvicorn @@ -36,13 +36,13 @@ TIMEOUT_KEEP_ALIVE = 5 # seconds -#logger = init_logger(__name__) +# logger = init_logger(__name__) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) logger.addHandler(logging.StreamHandler()) -served_model = None +served_model = [] app = fastapi.FastAPI() @@ -52,9 +52,15 @@ async def show_available_models(): model_cards = [] if isinstance(served_model, list): for model in served_model: - model_cards.append(ModelCard(id=model, root=model, permission=[ModelPermission()])) + model_cards.append( + ModelCard(id=model, root=model, permission=[ModelPermission()]) + ) else: - model_cards.append(ModelCard(id=served_model, root=served_model, permission=[ModelPermission()])) + model_cards.append( + ModelCard( + id=served_model, root=served_model, permission=[ModelPermission()] + ) + ) return ModelList(data=model_cards) @@ -133,9 +139,11 @@ async def create_chat_completion(raw_request: Request): logger.info(f"args: {args}") if args.served_model_name is not None: - served_model = args.served_model_name - else: - served_model = args.model + logger.info( + "args.served_model_name is not used in this service and will be ignored. Served model will consist of args.model only." + ) + + served_model = [args.model] engine_args = AsyncEngineArgs.from_cli_args(args) # A separate tokenizer to map token IDs to strings. From 4c4561bf69a2e847905a0dbcebb54eccbfc280da Mon Sep 17 00:00:00 2001 From: Jeffrey Fong Date: Thu, 22 Aug 2024 02:29:14 +0000 Subject: [PATCH 3/3] fix unittest for contributors --- tests/test_prompt_creation.py | 2 +- tests/test_request_handling.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_prompt_creation.py b/tests/test_prompt_creation.py index 3471fee..08a8bfb 100644 --- a/tests/test_prompt_creation.py +++ b/tests/test_prompt_creation.py @@ -42,7 +42,7 @@ def __init__(self, *args, **kwargs): "meetkai/functionary-small-v2.4", "meetkai/functionary-small-v2.5", "meetkai/functionary-medium-v3.0", - "meta-llama/Meta-Llama-3.1-8B-Instruct", + "meetkai/functionary-small-v3.1", ] def read_example_data(self, template_version: str): diff --git a/tests/test_request_handling.py b/tests/test_request_handling.py index a4832c2..6199af4 100644 --- a/tests/test_request_handling.py +++ b/tests/test_request_handling.py @@ -208,7 +208,7 @@ def __init__(self, *args, **kwargs): PromptTemplateV2: "meetkai/functionary-small-v2.4", Llama3Template: "meetkai/functionary-small-v2.5", Llama3TemplateV3: "meetkai/functionary-medium-v3.0", - Llama31Template: "meta-llama/Meta-Llama-3.1-8B-Instruct", + Llama31Template: "meetkai/functionary-small-v3.1", LlavaLlama: "lmms-lab/llama3-llava-next-8b", } self.default_text_str = "Normal text generation"