Increase the max_tokens limit to address incomplete generated responses.

SeverusYixin · SeverusYixin · commit 0164e69872fb · 2025-01-07T20:59:35.000+01:00
diff --git a/search_engine/chatbot/llm_utilities.py b/search_engine/chatbot/llm_utilities.py
@@ -31,7 +31,7 @@ def __init__(self, model_name="meta-llama-3.1-70b-instruct", use_gpu=True):
             f"KISSKI LLM configured with model '{self.model_name}'. GPU usage = {self.use_gpu}."
         )
 
-    def generate_response(self, prompt, max_new_tokens=150, num_return_sequences=1):
+    def generate_response(self, prompt, max_new_tokens=500, num_return_sequences=1):
         """
         Generate a response from the KISSKI LLM service using the new openai>=1.0.0 Chat interface.
         Args:

Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@ def __init__(self, model_name="meta-llama-3.1-70b-instruct", use_gpu=True):`
`31`	`31`	`f"KISSKI LLM configured with model '{self.model_name}'. GPU usage = {self.use_gpu}."`
`32`	`32`	`)`
`33`	`33`
`34`		`- def generate_response(self, prompt, max_new_tokens=150, num_return_sequences=1):`
	`34`	`+ def generate_response(self, prompt, max_new_tokens=500, num_return_sequences=1):`
`35`	`35`	`"""`
`36`	`36`	`Generate a response from the KISSKI LLM service using the new openai>=1.0.0 Chat interface.`
`37`	`37`	`Args:`