Skip to content

Commit 2c0e43c

Browse files
🐛 bugfix : Retrieval on llm-vllm service not working (opea-project#756)
* [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * 🐛 bugfix: Retrieval on llm-vllm service not working Signed-off-by: Krishna Murti <[email protected]> * removed LLMParamDocs instantiation Signed-off-by: Krishna Murti <[email protected]> * applied patch for recent updates for llm-vllm Signed-off-by: Krishna Murti <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Krishna Murti <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 4638c1d commit 2c0e43c

File tree

1 file changed

+18
-9
lines changed
  • comps/llms/text-generation/vllm/langchain

1 file changed

+18
-9
lines changed

comps/llms/text-generation/vllm/langchain/llm.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,6 @@ def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc])
5757
if not isinstance(input, SearchedDoc) and input.chat_template:
5858
prompt_template = PromptTemplate.from_template(input.chat_template)
5959
input_variables = prompt_template.input_variables
60-
parameters = {
61-
"max_tokens": input.max_tokens,
62-
"top_p": input.top_p,
63-
"temperature": input.temperature,
64-
"frequency_penalty": input.frequency_penalty,
65-
"presence_penalty": input.presence_penalty,
66-
}
6760

6861
if isinstance(input, SearchedDoc):
6962
if logflag:
@@ -81,6 +74,14 @@ def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc])
8174
# use default llm parameter for inference
8275
new_input = LLMParamsDoc(query=prompt)
8376

77+
parameters = {
78+
"max_tokens": new_input.max_tokens,
79+
"top_p": new_input.top_p,
80+
"temperature": new_input.temperature,
81+
"frequency_penalty": new_input.frequency_penalty,
82+
"presence_penalty": new_input.presence_penalty,
83+
}
84+
8485
if logflag:
8586
logger.info(f"[ SearchedDoc ] final input: {new_input}")
8687

@@ -113,6 +114,14 @@ async def stream_generator():
113114

114115
prompt = input.query
115116

117+
parameters = {
118+
"max_tokens": input.max_tokens,
119+
"top_p": input.top_p,
120+
"temperature": input.temperature,
121+
"frequency_penalty": input.frequency_penalty,
122+
"presence_penalty": input.presence_penalty,
123+
}
124+
116125
if prompt_template:
117126
if sorted(input_variables) == ["context", "question"]:
118127
prompt = prompt_template.format(question=input.query, context="\n".join(input.documents))
@@ -131,7 +140,7 @@ async def stream_generator():
131140

132141
async def stream_generator():
133142
chat_response = ""
134-
async for text in llm.astream(input.query, **parameters):
143+
async for text in llm.astream(prompt, **parameters):
135144
chat_response += text
136145
chunk_repr = repr(text.encode("utf-8"))
137146
if logflag:
@@ -144,7 +153,7 @@ async def stream_generator():
144153
return StreamingResponse(stream_generator(), media_type="text/event-stream")
145154

146155
else:
147-
response = llm.invoke(input.query, **parameters)
156+
response = llm.invoke(prompt, **parameters)
148157
if logflag:
149158
logger.info(response)
150159

0 commit comments

Comments
 (0)