-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathasync_demo.cc
More file actions
70 lines (54 loc) · 1.4 KB
/
async_demo.cc
File metadata and controls
70 lines (54 loc) · 1.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#include <signal.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <string>
#include <iostream>
#include "llm_client.h"
using namespace wfai;
volatile bool stop_flag = false;
void sig_handler(int signo)
{
stop_flag = true;
}
int main(int argc, char *argv[])
{
if (argc != 2)
{
fprintf(stderr, "USAGE: %s <api_key>\n", argv[0]);
exit(1);
}
signal(SIGINT, sig_handler);
signal(SIGTERM, sig_handler);
LLMClient client(argv[1]);
printf("=== Asynchronous LLMClient DEMO ===\n\n");
printf("Async Streaming Mode (using AsyncResult):\n");
{
ChatCompletionRequest request;
request.stream = true;
request.max_tokens = 10;
request.messages.push_back({"user", "What is your tokenizer?"});
auto result = client.chat_completion_async(request);
// ... you may do anything else until you need the data ...
while (true)
{
ChatCompletionChunk *chunk = result.get_chunk();
if (!chunk /*non-streaming*/ || chunk->state != RESPONSE_SUCCESS)
break;
if (!chunk->choices.empty() && !chunk->choices[0].delta.content.empty())
{
printf("%s", chunk->choices[0].delta.content.c_str());
fflush(stdout);
}
if (chunk->last_chunk())
{
// print some other info
break;
}
}
// if non streaming, use this to get response
// ChatCompletionResponse *response = result.get_response();
printf("\n\n✓ Async streaming completed\n\n");
}
return 0;
}