diff --git a/runtime/ggma/CMakeLists.txt b/runtime/ggma/CMakeLists.txt index 12e1c432042..508e35330a3 100644 --- a/runtime/ggma/CMakeLists.txt +++ b/runtime/ggma/CMakeLists.txt @@ -8,7 +8,7 @@ set(GGMA_DEV ggma-dev) add_library(${GGMA_DEV} SHARED ${API_SRC}) # Public headers to publish -set(GGMA_API_HEADERS include/ggma_api.h include/ggma_macro.h include/ggma_types.h +set(GGMA_API_HEADERS include/ggma_api.h include/ggma_types.h include/ggma_context.h include/ggma_generate.h) # GGMA install directory (same as ONERT_INSTALL_APIDIR) diff --git a/runtime/ggma/src/config.cc b/runtime/ggma/src/Config.cc similarity index 99% rename from runtime/ggma/src/config.cc rename to runtime/ggma/src/Config.cc index e0cf46c1577..1affb925f5f 100644 --- a/runtime/ggma/src/config.cc +++ b/runtime/ggma/src/Config.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "config.h" +#include "Config.h" #include #include diff --git a/runtime/ggma/src/config.h b/runtime/ggma/src/Config.h similarity index 99% rename from runtime/ggma/src/config.h rename to runtime/ggma/src/Config.h index 26f22c40550..9caffe9a50f 100644 --- a/runtime/ggma/src/config.h +++ b/runtime/ggma/src/Config.h @@ -17,7 +17,7 @@ #ifndef __GGMA_CONFIG_H__ #define __GGMA_CONFIG_H__ -#include "kv_cache.h" +#include "KVCache.h" #include #include diff --git a/runtime/ggma/src/context.cc b/runtime/ggma/src/Context.cc similarity index 88% rename from runtime/ggma/src/context.cc rename to runtime/ggma/src/Context.cc index 9e3b936bbb2..399991754be 100644 --- a/runtime/ggma/src/context.cc +++ b/runtime/ggma/src/Context.cc @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "config.h" -#include "context.h" -#include "kv_cache.h" +#include "Config.h" +#include "Context.h" +#include "KVCache.h" #include #include @@ -75,37 +75,13 @@ uint64_t bufsize_for(const nnfw_tensorinfo *ti) return elmsize[ti->dtype] * num_elems(ti); } -ggma::context::context(const char *package_path) : _package_path(package_path) +ggma::Context::Context(const char *package_path) : _package_path(package_path) { _cfg = load_config(_package_path); _cache.init(_cfg, _cfg.cache_size); } -GGMA_STATUS context::from_package(ggma_context **session, const char *package_path) -{ - if (session == nullptr) - return GGMA_STATUS_UNEXPECTED_NULL; - try - { - auto new_session = std::unique_ptr(new context(package_path)); - *session = reinterpret_cast(new_session.release()); - } - catch (const std::bad_alloc &e) - { - std::cerr << "Error during session creation" << std::endl; - *session = nullptr; // Set nullptr on error to keep the old behavior - return GGMA_STATUS_OUT_OF_MEMORY; - } - catch (const std::exception &e) - { - std::cerr << "Error during session initialization : " << e.what() << std::endl; - *session = nullptr; // Set nullptr on error to keep the old behavior - return GGMA_STATUS_ERROR; - } - return GGMA_STATUS_NO_ERROR; -} - -ggma::GGMAConfig ggma::context::load_config(const std::string &package_path) +ggma::GGMAConfig ggma::Context::load_config(const std::string &package_path) { GGMAConfig config; @@ -116,7 +92,7 @@ ggma::GGMAConfig ggma::context::load_config(const std::string &package_path) return config; } -void context::prefill(ggma_token *tokens, size_t n_tokens, std::vector &hidden_state) +void Context::prefill(ggma_token *tokens, size_t n_tokens, std::vector &hidden_state) { std::filesystem::path nnpkg_path = std::filesystem::path(_package_path) / "prefill"; nnfw_session *session = create_and_prepare_session(nnpkg_path.string()); @@ -182,7 +158,7 @@ void context::prefill(ggma_token *tokens, size_t n_tokens, std::vector nnfw_close_session(session); } -void context::unemb(std::vector &hidden_state, size_t n_tokens, std::vector &logits) +void Context::unemb(std::vector &hidden_state, size_t n_tokens, std::vector &logits) { std::filesystem::path nnpkg_path = std::filesystem::path(_package_path) / "unemb"; nnfw_session *session = create_and_prepare_session(nnpkg_path.string()); @@ -228,7 +204,7 @@ void context::unemb(std::vector &hidden_state, size_t n_tokens, std::ve // Template implementation to eliminate code duplication template -void context::decode_impl(ggma_token token_id, OutputType &output) +void Context::decode_impl(ggma_token token_id, OutputType &output) { std::filesystem::path nnpkg_path = std::filesystem::path(_package_path) / "decode"; nnfw_session *session = create_and_prepare_session(nnpkg_path.string()); @@ -297,25 +273,25 @@ void context::decode_impl(ggma_token token_id, OutputType &output) } // Public interface functions - delegate to template implementation -void context::decode(ggma_token token_id, std::vector &hidden_state) +void Context::decode(ggma_token token_id, std::vector &hidden_state) { decode_impl>(token_id, hidden_state); } -void context::decode(ggma_token token_id, std::vector &logits) +void Context::decode(ggma_token token_id, std::vector &logits) { decode_impl>(token_id, logits); } // Template instantiation (required for template implementation in .cpp file) -template void context::decode_impl>(ggma_token token_id, +template void Context::decode_impl>(ggma_token token_id, std::vector &output); -template void context::decode_impl>(ggma_token token_id, +template void Context::decode_impl>(ggma_token token_id, std::vector &output); // Sample token from logits using greedy sampling // Input shape: [n_seq, vocab_size], sample from last token -ggma_token context::sample(const std::vector &logits) +ggma_token Context::sample(const std::vector &logits) { if (logits.empty()) throw std::runtime_error("Empty logits tensor"); diff --git a/runtime/ggma/src/context.h b/runtime/ggma/src/Context.h similarity index 80% rename from runtime/ggma/src/context.h rename to runtime/ggma/src/Context.h index 518f5d66816..4559b1edc89 100644 --- a/runtime/ggma/src/context.h +++ b/runtime/ggma/src/Context.h @@ -17,10 +17,9 @@ #ifndef __GGMA_CONTEXT_H__ #define __GGMA_CONTEXT_H__ -#include "config.h" -#include "ggma_api.h" -#include "kv_cache.h" -#include "nnfw.h" +#include "ggma_types.h" +#include "Config.h" +#include "KVCache.h" #include #include @@ -30,18 +29,10 @@ namespace ggma { -class context +class Context { public: - /** - * @brief Factory method. It creates and initialize ggma_context - * - * @note Use factory instead of constructor to get status - */ - static GGMA_STATUS from_package(ggma_context **session, const char *package_path); - -private: - context(const char *package_path); + Context(const char *package_path); GGMAConfig load_config(const std::string &package_path); void prefill(ggma_token *tokens, size_t n_tokens, std::vector &hidden_state); @@ -57,7 +48,7 @@ class context void init_kv_cache(); public: - ~context() = default; + ~Context() = default; GGMA_STATUS generate(ggma_token *tokens, size_t n_tokens, size_t n_tokens_max, size_t *n_predict); diff --git a/runtime/ggma/src/generate.cc b/runtime/ggma/src/Generate.cc similarity index 96% rename from runtime/ggma/src/generate.cc rename to runtime/ggma/src/Generate.cc index 3ede101b98b..f6ef246ee63 100644 --- a/runtime/ggma/src/generate.cc +++ b/runtime/ggma/src/Generate.cc @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "config.h" -#include "context.h" -#include "kv_cache.h" +#include "Config.h" +#include "Context.h" +#include "KVCache.h" #include #include @@ -36,7 +36,7 @@ namespace ggma // // The function ensures no buffer overflow by checking against n_tokens_max // and stops generation when either the requested number is reached or the array is full. -GGMA_STATUS context::generate(ggma_token *tokens, size_t n_tokens, size_t n_tokens_max, +GGMA_STATUS Context::generate(ggma_token *tokens, size_t n_tokens, size_t n_tokens_max, size_t *n_predict) { try diff --git a/runtime/ggma/src/kv_cache.cc b/runtime/ggma/src/KVCache.cc similarity index 98% rename from runtime/ggma/src/kv_cache.cc rename to runtime/ggma/src/KVCache.cc index ec6e9a6127e..57e6a114e23 100644 --- a/runtime/ggma/src/kv_cache.cc +++ b/runtime/ggma/src/KVCache.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "config.h" -#include "kv_cache.h" +#include "Config.h" +#include "KVCache.h" #include #include diff --git a/runtime/ggma/src/kv_cache.h b/runtime/ggma/src/KVCache.h similarity index 100% rename from runtime/ggma/src/kv_cache.h rename to runtime/ggma/src/KVCache.h diff --git a/runtime/ggma/include/ggma_macro.h b/runtime/ggma/src/Macro.h similarity index 63% rename from runtime/ggma/include/ggma_macro.h rename to runtime/ggma/src/Macro.h index fc828e0eb75..9c6252910c7 100644 --- a/runtime/ggma/include/ggma_macro.h +++ b/runtime/ggma/src/Macro.h @@ -17,29 +17,19 @@ #include /** - * @file ggma_macro.h + * @file Macro.h * @brief Common macros for GGMA error handling and utilities */ -#ifndef __GGMA_GGMA_MACRO_H__ -#define __GGMA_GGMA_MACRO_H__ +#ifndef __GGMA_MACRO_H__ +#define __GGMA_MACRO_H__ -#define GGMA_ENSURE(a) \ - do \ - { \ - if ((a) != GGMA_STATUS_NO_ERROR) \ - { \ - exit(-1); \ - } \ +#define GGMA_RETURN_ERROR_IF_NULL(p) \ + do \ + { \ + if ((p) == NULL) \ + return GGMA_STATUS_UNEXPECTED_NULL; \ } while (0) #define GGMA_UNUSED(x) (void)(x) -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef __cplusplus -} -#endif - -#endif // __GGMA_GGMA_MACRO_H__ +#endif // __GGMA_MACRO_H__ diff --git a/runtime/ggma/src/ggma_context.cc b/runtime/ggma/src/ggma_context.cc new file mode 100644 index 00000000000..e1bfc81cd86 --- /dev/null +++ b/runtime/ggma/src/ggma_context.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ggma_context.h" +#include "Context.h" +#include + +extern "C" { + +GGMA_STATUS ggma_create_context(ggma_context **context, const char *package_path) +{ + if (context == nullptr) + return GGMA_STATUS_UNEXPECTED_NULL; + try + { + *context = reinterpret_cast(new ggma::Context(package_path)); + } + catch (const std::bad_alloc &e) + { + std::cerr << "Error during context creation" << std::endl; + *context = nullptr; + return GGMA_STATUS_OUT_OF_MEMORY; + } + catch (const std::exception &e) + { + std::cerr << "Error during context initialization : " << e.what() << std::endl; + *context = nullptr; + return GGMA_STATUS_ERROR; + } + return GGMA_STATUS_NO_ERROR; +} + +GGMA_STATUS ggma_free_context(ggma_context *context) +{ + delete reinterpret_cast(context); + return GGMA_STATUS_NO_ERROR; +} + +} // extern "C" diff --git a/runtime/ggma/src/ggma_api.cc b/runtime/ggma/src/ggma_generate.cc similarity index 55% rename from runtime/ggma/src/ggma_api.cc rename to runtime/ggma/src/ggma_generate.cc index 0931750286c..669fbb05410 100644 --- a/runtime/ggma/src/ggma_api.cc +++ b/runtime/ggma/src/ggma_generate.cc @@ -14,37 +14,15 @@ * limitations under the License. */ -#include "context.h" -#include "ggma_api.h" +#include "ggma_generate.h" -#include -#include - -// Double-check enum value changes - -#define GGMA_RETURN_ERROR_IF_NULL(p) \ - do \ - { \ - if ((p) == NULL) \ - return GGMA_STATUS_UNEXPECTED_NULL; \ - } while (0) - -GGMA_STATUS ggma_create_context(ggma_context **context, const char *package_path) -{ - GGMA_RETURN_ERROR_IF_NULL(context); - return ggma::context::from_package(context, package_path); -} - -GGMA_STATUS ggma_free_context(ggma_context *context) -{ - delete reinterpret_cast(context); - return GGMA_STATUS_NO_ERROR; -} +#include "Context.h" +#include "Macro.h" GGMA_STATUS ggma_generate(ggma_context *context, ggma_token *tokens, size_t n_tokens, size_t n_tokens_max, size_t *n_tokens_out) { GGMA_RETURN_ERROR_IF_NULL(context); - return reinterpret_cast(context)->generate(tokens, n_tokens, n_tokens_max, + return reinterpret_cast(context)->generate(tokens, n_tokens, n_tokens_max, n_tokens_out); }