-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathchat.py
More file actions
39 lines (29 loc) · 921 Bytes
/
chat.py
File metadata and controls
39 lines (29 loc) · 921 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
"""
Create a chat completion request and receive the response, either
at once, or streaming chunk by chunk.
"""
from qstash import QStash
def main():
client = QStash(
token="<QSTASH-TOKEN>",
)
res = client.chat.create(
messages=[{"role": "user", "content": "How are you?"}],
model="meta-llama/Meta-Llama-3-8B-Instruct",
)
# Get the response at once
print(res.choices[0].message.content)
stream_res = client.chat.create(
messages=[{"role": "user", "content": "How are you again?"}],
model="meta-llama/Meta-Llama-3-8B-Instruct",
stream=True,
)
# Get the response in chunks over time
for chunk in stream_res:
content = chunk.choices[0].delta.content
if content is None:
# Content is none for the first chunk
continue
print(content, end="")
if __name__ == "__main__":
main()