Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion include/LLM_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ using LibHandle = void *; ///< Unix library handle type
#define LLM_FUNCTIONS_LIST(M) \
M(LLMService_Registry, void, LLMProviderRegistry *) \
M(LLMService_InjectErrorState, void, ErrorState *) \
M(LLMService_Supports_GPU, bool) \
M(LLMService_Construct, LLMProvider *, const char *, int, int, int, bool, int, int, bool, int, const char **) \
M(LLMService_From_Command, LLMProvider *, const char *)

Expand Down Expand Up @@ -227,7 +228,7 @@ class UNDREAMAI_API LLMService : public LLMProvider
/// @param llm_lib_filename Specific library filename to load
/// @return true if library loaded successfully, false otherwise
/// @details Internal method for loading specific library files
bool create_LLM_library_backend(const std::string &command, const std::string &llm_lib_filename);
bool create_LLM_library_backend(const std::string &command, const std::string &llm_lib_filename, bool is_gpu_library=false);
};

/// @brief Get OS-specific library directory
Expand Down
3 changes: 3 additions & 0 deletions include/LLM_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,9 @@ extern "C"
UNDREAMAI_API const char *LLMService_Command(LLMService *llm_service);

UNDREAMAI_API void LLMService_InjectErrorState(ErrorState *error_state);

UNDREAMAI_API bool LLMService_Supports_GPU();

}

/// @}
26 changes: 20 additions & 6 deletions src/LLM_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ const std::string platform_name()
#endif
}

const std::vector<std::string> GPU_LIBRARIES = {"cublas", "tinyblas", "hip", "vulkan"};

const std::vector<std::string> available_architectures(bool gpu)
{
std::vector<std::string> architectures;
Expand Down Expand Up @@ -53,10 +55,8 @@ const std::vector<std::string> available_architectures(bool gpu)
#if defined(_WIN32) || defined(__linux__)
if (gpu)
{
add_library("cublas");
add_library("tinyblas");
add_library("hip");
add_library("vulkan");
for (std::string gpu_library: GPU_LIBRARIES)
add_library(gpu_library);
}
if (has_avx512())
add_library("avx512");
Expand Down Expand Up @@ -215,7 +215,7 @@ LibHandle load_library_safe(const std::string &path)
return handle_out;
}

bool LLMService::create_LLM_library_backend(const std::string &command, const std::string &llm_lib_filename)
bool LLMService::create_LLM_library_backend(const std::string &command, const std::string &llm_lib_filename, bool is_gpu_library)
{
sigjmp_buf local_jump_point;
sigjmp_buf* old_jump_point = get_current_jump_point_ptr(); // Save the old one
Expand Down Expand Up @@ -269,6 +269,11 @@ bool LLMService::create_LLM_library_backend(const std::string &command, const st
}
LLM_FUNCTIONS_LIST(DECLARE_AND_LOAD)
#undef DECLARE_AND_LOAD
if (is_gpu_library && !LLMService_Supports_GPU())
{
std::cout << "Doesn't support the GPU, skipping"<<std::endl;
continue;
}

LLMService_Registry(&LLMProviderRegistry::instance());
LLMService_InjectErrorState(&ErrorStateRegistry::get_error_state());
Expand Down Expand Up @@ -299,7 +304,16 @@ bool LLMService::create_LLM_library(const std::string &command)
for (const auto &llm_lib_filename : available_architectures(gpu))
{
fail("", 0);
bool success = create_LLM_library_backend(command, llm_lib_filename);
bool is_gpu_library = false;
for (std::string gpu_library: GPU_LIBRARIES)
{
if (llm_lib_filename.find(gpu_library) != std::string::npos) {
is_gpu_library = true;
break;
}
}

bool success = create_LLM_library_backend(command, llm_lib_filename, is_gpu_library);
if (success)
{
std::cout << "Successfully loaded: " << llm_lib_filename << std::endl;
Expand Down
5 changes: 5 additions & 0 deletions src/LLM_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,11 @@ void LLMService_Registry(LLMProviderRegistry *existing_instance)
LLMProviderRegistry::inject_registry(existing_instance);
}

bool LLMService_Supports_GPU()
{
return llama_supports_gpu_offload();
}

LLMService *LLMService_Construct(const char *model_path, int num_slots, int num_threads, int num_GPU_layers, bool flash_attention, int context_size, int batch_size, bool embedding_only, int lora_count, const char **lora_paths)
{
std::vector<std::string> lora_paths_vector;
Expand Down
1 change: 1 addition & 0 deletions tools/llamalib_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ int main(int argc, char **argv)
llm->debug(1);
llm->start();
llm->start_server();
std::cout << "service started" << std::endl;
llm->join_server();

return 0;
Expand Down