Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ChatQnA/chatqna.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
elif self.services[cur_node].service_type == ServiceType.LLM:
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
next_inputs = {}
next_inputs["model"] = LLM_MODEL
next_inputs["model"] = inputs["model"]
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
next_inputs["top_p"] = llm_parameters_dict["top_p"]
Expand Down Expand Up @@ -396,7 +396,7 @@ async def handle_request(self, request: Request):
repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
stream=stream_opt,
chat_template=chat_request.chat_template if chat_request.chat_template else None,
model=chat_request.model if chat_request.model else None,
model=chat_request.model if chat_request.model else LLM_MODEL,
)
retriever_parameters = RetrieverParms(
search_type=chat_request.search_type if chat_request.search_type else "similarity",
Expand Down
3 changes: 2 additions & 1 deletion CodeGen/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
elif self.services[cur_node].service_type == ServiceType.LLM:
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
next_inputs = {}
next_inputs["model"] = LLM_MODEL_ID
next_inputs["model"] = inputs["model"]
next_inputs["messages"] = [{"role": "user", "content": inputs["query"]}]
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
next_inputs["top_p"] = llm_parameters_dict["top_p"]
Expand Down Expand Up @@ -195,6 +195,7 @@ async def handle_request(self, request: Request):
repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
stream=stream_opt,
index_name=chat_request.index_name,
model=chat_request.model if chat_request.model else LLM_MODEL_ID,
)

# Initialize the initial inputs with the generated prompt
Expand Down
Loading