Skip to content

Commit 41a8b8d

Browse files
committed
use string instead of c++ on c++ interfaces
1 parent 0b47bcf commit 41a8b8d

8 files changed

Lines changed: 126 additions & 92 deletions

File tree

include/LLM.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ struct LoraIdScalePath {
2828
};
2929

3030
void ensure_error_handlers_initialized();
31-
bool has_gpu_layers(const std::string& command);
3231

3332
class UNDREAMAI_API LLM {
3433
protected:
@@ -38,7 +37,8 @@ class UNDREAMAI_API LLM {
3837
virtual std::string completion_impl(const json& data, CharArrayFn callback = nullptr, httplib::Response* res = nullptr, std::function<bool()> is_connection_closed = always_false, int oaicompat = 0) = 0;
3938

4039
public:
41-
static std::string LLM_args_to_command(const char* model_path, int num_threads=-1, int num_GPU_layers=0, int num_parallel=1, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, int lora_count=0, const char** lora_paths=nullptr);
40+
static bool has_gpu_layers(const std::string& command);
41+
static std::string LLM_args_to_command(const std::string& model_path, int num_threads=-1, int num_GPU_layers=0, int num_parallel=1, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector<std::string>& lora_paths = {});
4242

4343
virtual json build_tokenize_json(const std::string& query);
4444
virtual std::vector<int> parse_tokenize_json(const json& result);
@@ -79,12 +79,12 @@ class UNDREAMAI_API LLMLocal : public LLM {
7979
virtual void cancel_impl(int id_slot) = 0;
8080

8181
public:
82-
virtual json build_slot_json(int id_slot, std::string action, std::string filepath);
82+
virtual json build_slot_json(int id_slot, const std::string& action, const std::string& filepath);
8383
virtual std::string parse_slot_json(const json& result);
8484
virtual std::string slot_json(const json& data, httplib::Response* res = nullptr);
85-
virtual std::string slot_json(int id_slot, std::string action, std::string filepath, httplib::Response* res = nullptr);
85+
virtual std::string slot_json(int id_slot, const std::string& action, const std::string& filepath, httplib::Response* res = nullptr);
8686
virtual std::string slot(const json& data, httplib::Response* res = nullptr);
87-
virtual std::string slot(int id_slot, std::string action, std::string filepath, httplib::Response* res = nullptr);
87+
virtual std::string slot(int id_slot, const std::string& action, const std::string& filepath, httplib::Response* res = nullptr);
8888

8989
virtual void cancel(int id_slot);
9090
};
@@ -111,13 +111,13 @@ class UNDREAMAI_API LLMProvider : public LLMLocal {
111111
virtual std::vector<LoraIdScalePath> parse_lora_list_json(const json& result);
112112
virtual std::vector<LoraIdScalePath> lora_list();
113113

114-
virtual void start_server(const char* host="0.0.0.0", int port=0, const char* API_key="") = 0;
114+
virtual void start_server(const std::string& host="0.0.0.0", int port=0, const std::string& API_key="") = 0;
115115
virtual void stop_server() = 0;
116116
virtual void join_server() = 0;
117117
virtual void start() = 0;
118118
virtual void stop() = 0;
119119
virtual void join_service() = 0;
120-
virtual void set_SSL(const char* SSL_cert, const char* SSL_key) = 0;
120+
virtual void set_SSL(const std::string& SSL_cert, const std::string& SSL_key) = 0;
121121
virtual bool started() = 0;
122122

123123
virtual int embedding_size() = 0;
@@ -157,7 +157,7 @@ class LLMProviderRegistry {
157157

158158

159159
extern "C" {
160-
UNDREAMAI_API bool Has_GPU_Layers(const std::string& command);
160+
UNDREAMAI_API bool Has_GPU_Layers(const char* command);
161161

162162
UNDREAMAI_API const char* LLM_Tokenize(LLM* llm, const char* json_data);
163163
UNDREAMAI_API const char* LLM_Detokenize(LLM* llm, const char* json_data);

include/LLM_runtime.h

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -71,26 +71,28 @@ class LLMService;
7171

7272
class UNDREAMAI_API LLMRuntime : public LLMProvider {
7373
public:
74-
LLMRuntime(const char* model_path, int num_threads=-1, int num_GPU_layers=0, int num_parallel=1, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, int lora_count=0, const char** lora_paths=nullptr);
75-
LLMRuntime(const std::string& command);
76-
LLMRuntime(int argc, char ** argv);
74+
LLMRuntime();
75+
LLMRuntime(const std::string& model_path, int num_threads=-1, int num_GPU_layers=0, int num_parallel=1, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector<std::string>& lora_paths = {});
7776
~LLMRuntime();
7877

78+
static LLMRuntime* from_command(const std::string& command);
79+
static LLMRuntime* from_command(int argc, char ** argv);
80+
7981
LibHandle handle = nullptr;
8082
LLMProvider* llm = nullptr;
8183

8284
bool create_LLM_library(const std::string& command);
8385

8486
//=================================== LLM METHODS START ===================================//
85-
void start_server(const char* host="0.0.0.0", int port=0, const char* API_key="") override { LLM_Start_Server((LLMProvider*)llm, host, port, API_key); }
86-
void stop_server() override { LLM_Stop_Server((LLMProvider*)llm); }
87-
void join_server() override { LLM_Join_Server((LLMProvider*)llm); }
88-
void start() override { LLM_Start((LLMProvider*)llm); }
89-
void stop() override { LLM_Stop((LLMProvider*)llm); }
90-
void join_service() override { LLM_Join_Service((LLMProvider*)llm); }
91-
void set_SSL(const char* cert, const char* key) override { LLM_Set_SSL((LLMProvider*)llm, cert, key); }
92-
bool started() override { return LLM_Started((LLMProvider*)llm); }
93-
int embedding_size() override { return LLM_Embedding_Size((LLMProvider*)llm);}
87+
void start_server(const std::string& host="0.0.0.0", int port=0, const std::string& API_key="") override { ((LLMProvider*)llm)->start_server(host, port, API_key); }
88+
void stop_server() override { ((LLMProvider*)llm)->stop_server(); }
89+
void join_server() override { ((LLMProvider*)llm)->join_server(); }
90+
void start() override { ((LLMProvider*)llm)->start(); }
91+
void stop() override { ((LLMProvider*)llm)->stop();; }
92+
void join_service() override { ((LLMProvider*)llm)->join_service(); }
93+
void set_SSL(const std::string& cert, const std::string& key) override { ((LLMProvider*)llm)->set_SSL(cert, key); }
94+
bool started() override { return ((LLMProvider*)llm)->started(); }
95+
int embedding_size() override { return ((LLMProvider*)llm)->embedding_size();}
9496
//=================================== LLM METHODS END ===================================//
9597

9698
#define DECLARE_FN(name, ret, ...) \
@@ -100,6 +102,7 @@ class UNDREAMAI_API LLMRuntime : public LLMProvider {
100102

101103
protected:
102104
std::vector<std::filesystem::path> search_paths;
105+
103106
//=================================== LLM METHODS START ===================================//
104107
std::string tokenize_impl(const json& data) override {
105108
return LLM_Tokenize((LLM*)llm, data.dump().c_str());

include/LLM_service.h

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,28 +11,30 @@ struct server_context;
1111

1212
class UNDREAMAI_API LLMService : public LLMProvider {
1313
public:
14-
LLMService(const char* model_path, int num_threads=-1, int num_GPU_layers=0, int num_parallel=1, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, int lora_count=0, const char** lora_paths=nullptr);
15-
LLMService(const json& params);
16-
LLMService(const std::string& params);
17-
LLMService(const char* params);
18-
LLMService(int argc, char ** argv);
14+
LLMService();
15+
LLMService(const std::string& model_path, int num_threads=-1, int num_GPU_layers=0, int num_parallel=1, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector<std::string>& lora_paths = {});
1916
~LLMService();
2017

21-
void init(int argc, char** argv);
22-
void init(const std::string& params);
23-
void init(const char* params);
18+
static LLMService* from_params(const json& params);
19+
static LLMService* from_command(const std::string& command);
20+
static LLMService* from_command(int argc, char ** argv);
2421

2522
static EVP_PKEY* load_key(const std::string& key_str);
2623
static X509* load_cert(const std::string& cert_str);
24+
static std::vector<char*> jsonToArguments(const json& params);
25+
26+
void init(int argc, char** argv);
27+
void init(const std::string& params);
28+
void init(const char* params);
2729

2830
//=================================== LLM METHODS START ===================================//
29-
void start_server(const char* host="0.0.0.0", int port=0, const char* API_key="") override;
31+
void start_server(const std::string& host="0.0.0.0", int port=0, const std::string& API_key="") override;
3032
void stop_server() override;
3133
void join_server() override;
3234
void start() override;
3335
void stop() override;
3436
void join_service() override;
35-
void set_SSL(const char* SSL_cert, const char* SSL_key) override;
37+
void set_SSL(const std::string& SSL_cert, const std::string& SSL_key) override;
3638
bool started() override;
3739

3840
int embedding_size() override;
@@ -61,7 +63,6 @@ class UNDREAMAI_API LLMService : public LLMProvider {
6163
std::string SSL_key = "";
6264
std::mutex start_stop_mutex;
6365

64-
std::vector<char*> jsonToArguments(const json& params);
6566
std::vector<std::string> splitArguments(const std::string& inputString);
6667
std::string completion_streaming(
6768
std::unordered_set<int> id_tasks,

src/LLM.cpp

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,26 +27,22 @@ void ensure_error_handlers_initialized() {
2727

2828
//=========================== Helpers ===========================//
2929

30-
std::string LLM::LLM_args_to_command(const char* model_path, int num_threads, int num_GPU_layers, int num_parallel, bool flash_attention, int context_size, int batch_size, bool embedding_only, int lora_count, const char** lora_paths)
31-
{
32-
std::string command = std::string("-m ") + model_path
33-
+ " -t " + std::to_string(num_threads)
34-
+ " -ngl " + std::to_string(num_GPU_layers)
35-
+ " -np " + std::to_string(num_parallel)
36-
+ " -c " + std::to_string(context_size)
37-
+ " -b " + std::to_string(batch_size);
30+
std::string LLM::LLM_args_to_command(const std::string& model_path, int num_threads, int num_GPU_layers, int num_parallel, bool flash_attention, int context_size, int batch_size, bool embedding_only, const std::vector<std::string>& lora_paths)
31+
{
32+
std::string command = "-m " + model_path +
33+
" -t " + std::to_string(num_threads) +
34+
" -ngl " + std::to_string(num_GPU_layers) +
35+
" -np " + std::to_string(num_parallel) +
36+
" -c " + std::to_string(context_size) +
37+
" -b " + std::to_string(batch_size);
38+
3839
if (flash_attention) command += " --flash-attn";
3940
if (embedding_only) command += " --embedding";
40-
if (lora_paths != nullptr && lora_count > 0)
41-
{
42-
for (int i = 0; i < lora_count; ++i) {
43-
command += " --lora " + std::string(lora_paths[i]);
44-
}
45-
}
41+
for (const auto& lora_path : lora_paths) command += " --lora " + lora_path;
4642
return command;
4743
}
4844

49-
bool has_gpu_layers(const std::string& command) {
45+
bool LLM::has_gpu_layers(const std::string& command) {
5046
std::istringstream iss(command);
5147
std::vector<std::string> args;
5248
std::string token;
@@ -258,7 +254,7 @@ std::string LLM::completion(const std::string& prompt, int id_slot, const json&
258254

259255
//=========================== Slot Action ===========================//
260256

261-
json LLMLocal::build_slot_json(int id_slot, std::string action, std::string filepath)
257+
json LLMLocal::build_slot_json(int id_slot, const std::string& action, const std::string& filepath)
262258
{
263259
json j;
264260
j["id_slot"] = id_slot;
@@ -281,7 +277,7 @@ std::string LLMLocal::slot_json(const json& data, httplib::Response* res)
281277
return slot_impl(data, res);
282278
}
283279

284-
std::string LLMLocal::slot_json(int id_slot, std::string action, std::string filepath, httplib::Response* res)
280+
std::string LLMLocal::slot_json(int id_slot, const std::string& action, const std::string& filepath, httplib::Response* res)
285281
{
286282
return slot_json(build_slot_json(id_slot, action, filepath), res);
287283
}
@@ -291,7 +287,7 @@ std::string LLMLocal::slot(const json& data, httplib::Response* res)
291287
return parse_slot_json(json::parse(slot_json(data, res)));
292288
}
293289

294-
std::string LLMLocal::slot(int id_slot, std::string action, std::string filepath, httplib::Response* res)
290+
std::string LLMLocal::slot(int id_slot, const std::string& action, const std::string& filepath, httplib::Response* res)
295291
{
296292
return slot(build_slot_json(id_slot, action, filepath), res);
297293
}
@@ -377,9 +373,9 @@ std::vector<LoraIdScalePath> LLMProvider::lora_list()
377373

378374
//=========================== API ===========================//
379375

380-
bool Has_GPU_Layers(const std::string& command)
376+
bool Has_GPU_Layers(const char* command)
381377
{
382-
return has_gpu_layers(command);
378+
return LLM::has_gpu_layers(command);
383379
}
384380

385381
const char* LLM_Tokenize(LLM* llm, const char* json_data) {

src/LLM_runtime.cpp

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -247,16 +247,29 @@ bool LLMRuntime::create_LLM_library(const std::string& command) {
247247

248248
//============================= LLMRuntime =============================//
249249

250-
LLMRuntime::LLMRuntime(const char* model_path, int num_threads, int num_GPU_layers, int num_parallel, bool flash_attention, int context_size, int batch_size, bool embedding_only, int lora_count, const char** lora_paths)
251-
: LLMRuntime(LLM::LLM_args_to_command(model_path, num_threads, num_GPU_layers, num_parallel, flash_attention, context_size, batch_size, embedding_only, lora_count, lora_paths)) { }
252-
253-
LLMRuntime::LLMRuntime(const std::string& command)
250+
LLMRuntime::LLMRuntime()
254251
{
255252
search_paths = get_search_directories();
253+
}
254+
255+
LLMRuntime::LLMRuntime(const std::string& model_path, int num_threads, int num_GPU_layers, int num_parallel, bool flash_attention, int context_size, int batch_size, bool embedding_only, const std::vector<std::string>& lora_paths)
256+
: LLMRuntime()
257+
{
258+
std::string command = LLM::LLM_args_to_command(model_path, num_threads, num_GPU_layers, num_parallel, flash_attention, context_size, batch_size, embedding_only, lora_paths);
256259
create_LLM_library(command);
257260
}
258261

259-
LLMRuntime::LLMRuntime(int argc, char ** argv) : LLMRuntime(args_to_command(argc, argv)) { }
262+
LLMRuntime* LLMRuntime::from_command(const std::string& command)
263+
{
264+
LLMRuntime* llmRuntime = new LLMRuntime();
265+
llmRuntime->create_LLM_library(command);
266+
return llmRuntime;
267+
}
268+
269+
LLMRuntime* LLMRuntime::from_command(int argc, char ** argv)
270+
{
271+
return from_command(args_to_command(argc, argv));
272+
}
260273

261274
LLMRuntime::~LLMRuntime() {
262275
if (llm) {
@@ -287,15 +300,30 @@ const char* Available_Architectures(bool gpu)
287300

288301
LLMRuntime* LLMRuntime_Construct(const char* model_path, int num_threads, int num_GPU_layers, int num_parallel, bool flash_attention, int context_size, int batch_size, bool embedding_only, int lora_count, const char** lora_paths)
289302
{
290-
return LLMRuntime_From_Command(LLM::LLM_args_to_command(model_path, num_threads, num_GPU_layers, num_parallel, flash_attention, context_size, batch_size, embedding_only, lora_count, lora_paths).c_str());
303+
std::vector<std::string> lora_paths_vector;
304+
if (lora_paths != nullptr && lora_count > 0)
305+
{
306+
for (int i = 0; i < lora_count; ++i) {
307+
lora_paths_vector.push_back(std::string(lora_paths[i]));
308+
}
309+
}
310+
LLMRuntime* llmRuntime = new LLMRuntime(model_path, num_threads, num_GPU_layers, num_parallel, flash_attention, context_size, batch_size, embedding_only, lora_paths_vector);
311+
312+
if(llmRuntime->llm == nullptr)
313+
{
314+
delete llmRuntime;
315+
return nullptr;
316+
}
317+
return llmRuntime;
291318
}
292319

293-
LLMRuntime* LLMRuntime_From_Command(const char* command) {
294-
LLMRuntime* lib = new LLMRuntime(std::string(command));
295-
if(lib->llm == nullptr)
320+
LLMRuntime* LLMRuntime_From_Command(const char* command)
321+
{
322+
LLMRuntime* llmRuntime = new LLMRuntime(std::string(command));
323+
if(llmRuntime->llm == nullptr)
296324
{
297-
delete lib;
325+
delete llmRuntime;
298326
return nullptr;
299327
}
300-
return lib;
328+
return llmRuntime;
301329
}

0 commit comments

Comments
 (0)