We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 22fa5a6 commit 6595ad8Copy full SHA for 6595ad8
llama_cpp/llama.py
@@ -218,6 +218,7 @@ def generate(
218
top_p: float,
219
temp: float,
220
repeat_penalty: float,
221
+ reset: bool = True,
222
) -> Generator[
223
llama_cpp.llama_token, Optional[Sequence[llama_cpp.llama_token]], None
224
]:
@@ -235,12 +236,14 @@ def generate(
235
236
top_p: The top-p sampling parameter.
237
temp: The temperature parameter.
238
repeat_penalty: The repeat penalty parameter.
239
+ reset: Whether to reset the model state.
240
241
Yields:
242
The generated tokens.
243
"""
244
assert self.ctx is not None
- self.reset()
245
+ if reset:
246
+ self.reset()
247
while True:
248
self.eval(tokens)
249
token = self.sample(
0 commit comments