From dcf6bb300bb907468691def26be1c43365796904 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 20 Aug 2025 01:13:09 +0000 Subject: [PATCH 1/2] dynamic model switch Signed-off-by: Ubuntu --- ChatQnA/chatqna.py | 4 ++-- CodeGen/codegen.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ChatQnA/chatqna.py b/ChatQnA/chatqna.py index 2e462b0f6e..733084e8a4 100644 --- a/ChatQnA/chatqna.py +++ b/ChatQnA/chatqna.py @@ -73,7 +73,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k elif self.services[cur_node].service_type == ServiceType.LLM: # convert TGI/vLLM to unified OpenAI /v1/chat/completions format next_inputs = {} - next_inputs["model"] = LLM_MODEL + next_inputs["model"] = inputs["model"] next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}] next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"] next_inputs["top_p"] = llm_parameters_dict["top_p"] @@ -396,7 +396,7 @@ async def handle_request(self, request: Request): repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03, stream=stream_opt, chat_template=chat_request.chat_template if chat_request.chat_template else None, - model=chat_request.model if chat_request.model else None, + model=chat_request.model if chat_request.model else LLM_MODEL, ) retriever_parameters = RetrieverParms( search_type=chat_request.search_type if chat_request.search_type else "similarity", diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py index af9afdf715..a535c8e8f7 100644 --- a/CodeGen/codegen.py +++ b/CodeGen/codegen.py @@ -76,7 +76,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k elif self.services[cur_node].service_type == ServiceType.LLM: # convert TGI/vLLM to unified OpenAI /v1/chat/completions format next_inputs = {} - next_inputs["model"] = LLM_MODEL_ID + next_inputs["model"] = inputs["model"] next_inputs["messages"] = [{"role": "user", "content": inputs["query"]}] next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"] next_inputs["top_p"] = llm_parameters_dict["top_p"] @@ -195,6 +195,7 @@ async def handle_request(self, request: Request): repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03, stream=stream_opt, index_name=chat_request.index_name, + model=chat_request.model if chat_request.model else LLM_MODEL_ID ) # Initialize the initial inputs with the generated prompt From 0e2472889b6e27629d16a3d9d82a4e6d7ee5df8c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 01:15:14 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- CodeGen/codegen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py index a535c8e8f7..f95ec94e09 100644 --- a/CodeGen/codegen.py +++ b/CodeGen/codegen.py @@ -195,7 +195,7 @@ async def handle_request(self, request: Request): repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03, stream=stream_opt, index_name=chat_request.index_name, - model=chat_request.model if chat_request.model else LLM_MODEL_ID + model=chat_request.model if chat_request.model else LLM_MODEL_ID, ) # Initialize the initial inputs with the generated prompt