You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
static std::string LLM_args_to_command(constchar* model_path, int num_threads=-1, int num_GPU_layers=0, int num_parallel=1, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, int lora_count=0, constchar** lora_paths=nullptr);
static std::string LLM_args_to_command(const std::string& model_path, int num_threads=-1, int num_GPU_layers=0, int num_parallel=1, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector<std::string>& lora_paths = {});
Copy file name to clipboardExpand all lines: include/LLM_runtime.h
+15-12Lines changed: 15 additions & 12 deletions
Original file line number
Diff line number
Diff line change
@@ -71,26 +71,28 @@ class LLMService;
71
71
72
72
class UNDREAMAI_API LLMRuntime : public LLMProvider {
73
73
public:
74
-
LLMRuntime(constchar* model_path, int num_threads=-1, int num_GPU_layers=0, int num_parallel=1, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, int lora_count=0, constchar** lora_paths=nullptr);
75
-
LLMRuntime(const std::string& command);
76
-
LLMRuntime(int argc, char ** argv);
74
+
LLMRuntime();
75
+
LLMRuntime(const std::string& model_path, int num_threads=-1, int num_GPU_layers=0, int num_parallel=1, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector<std::string>& lora_paths = {});
Copy file name to clipboardExpand all lines: include/LLM_service.h
+12-11Lines changed: 12 additions & 11 deletions
Original file line number
Diff line number
Diff line change
@@ -11,28 +11,30 @@ struct server_context;
11
11
12
12
classUNDREAMAI_API LLMService : public LLMProvider {
13
13
public:
14
-
LLMService(constchar* model_path, int num_threads=-1, int num_GPU_layers=0, int num_parallel=1, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, int lora_count=0, constchar** lora_paths=nullptr);
15
-
LLMService(const json& params);
16
-
LLMService(const std::string& params);
17
-
LLMService(constchar* params);
18
-
LLMService(int argc, char ** argv);
14
+
LLMService();
15
+
LLMService(const std::string& model_path, int num_threads=-1, int num_GPU_layers=0, int num_parallel=1, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector<std::string>& lora_paths = {});
std::string LLM::LLM_args_to_command(constchar* model_path, int num_threads, int num_GPU_layers, int num_parallel, bool flash_attention, int context_size, int batch_size, bool embedding_only, int lora_count, constchar** lora_paths)
std::string LLM::LLM_args_to_command(const std::string& model_path, int num_threads, int num_GPU_layers, int num_parallel, bool flash_attention, int context_size, int batch_size, bool embedding_only, const std::vector<std::string>& lora_paths)
LLMRuntime::LLMRuntime(constchar* model_path, int num_threads, int num_GPU_layers, int num_parallel, bool flash_attention, int context_size, int batch_size, bool embedding_only, int lora_count, constchar** lora_paths)
LLMRuntime::LLMRuntime(const std::string& model_path, int num_threads, int num_GPU_layers, int num_parallel, bool flash_attention, int context_size, int batch_size, bool embedding_only, const std::vector<std::string>& lora_paths)
LLMRuntime* LLMRuntime_Construct(constchar* model_path, int num_threads, int num_GPU_layers, int num_parallel, bool flash_attention, int context_size, int batch_size, bool embedding_only, int lora_count, constchar** lora_paths)
0 commit comments