diff --git a/runtime/ggma/CMakeLists.txt b/runtime/ggma/CMakeLists.txt
index 12e1c432042..508e35330a3 100644
--- a/runtime/ggma/CMakeLists.txt
+++ b/runtime/ggma/CMakeLists.txt
@@ -8,7 +8,7 @@ set(GGMA_DEV ggma-dev)
 add_library(${GGMA_DEV} SHARED ${API_SRC})
 
 # Public headers to publish
-set(GGMA_API_HEADERS include/ggma_api.h include/ggma_macro.h include/ggma_types.h
+set(GGMA_API_HEADERS include/ggma_api.h include/ggma_types.h
                      include/ggma_context.h include/ggma_generate.h)
 
 # GGMA install directory (same as ONERT_INSTALL_APIDIR)
diff --git a/runtime/ggma/src/config.cc b/runtime/ggma/src/Config.cc
similarity index 99%
rename from runtime/ggma/src/config.cc
rename to runtime/ggma/src/Config.cc
index e0cf46c1577..1affb925f5f 100644
--- a/runtime/ggma/src/config.cc
+++ b/runtime/ggma/src/Config.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "config.h"
+#include "Config.h"
 
 #include <fstream>
 #include <json/json.h>
diff --git a/runtime/ggma/src/config.h b/runtime/ggma/src/Config.h
similarity index 99%
rename from runtime/ggma/src/config.h
rename to runtime/ggma/src/Config.h
index 26f22c40550..9caffe9a50f 100644
--- a/runtime/ggma/src/config.h
+++ b/runtime/ggma/src/Config.h
@@ -17,7 +17,7 @@
 #ifndef __GGMA_CONFIG_H__
 #define __GGMA_CONFIG_H__
 
-#include "kv_cache.h"
+#include "KVCache.h"
 
 #include <optional>
 #include <string>
diff --git a/runtime/ggma/src/context.cc b/runtime/ggma/src/Context.cc
similarity index 88%
rename from runtime/ggma/src/context.cc
rename to runtime/ggma/src/Context.cc
index 9e3b936bbb2..399991754be 100644
--- a/runtime/ggma/src/context.cc
+++ b/runtime/ggma/src/Context.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "config.h"
-#include "context.h"
-#include "kv_cache.h"
+#include "Config.h"
+#include "Context.h"
+#include "KVCache.h"
 
 #include <algorithm>
 #include <cassert>
@@ -75,37 +75,13 @@ uint64_t bufsize_for(const nnfw_tensorinfo *ti)
   return elmsize[ti->dtype] * num_elems(ti);
 }
 
-ggma::context::context(const char *package_path) : _package_path(package_path)
+ggma::Context::Context(const char *package_path) : _package_path(package_path)
 {
   _cfg = load_config(_package_path);
   _cache.init(_cfg, _cfg.cache_size);
 }
 
-GGMA_STATUS context::from_package(ggma_context **session, const char *package_path)
-{
-  if (session == nullptr)
-    return GGMA_STATUS_UNEXPECTED_NULL;
-  try
-  {
-    auto new_session = std::unique_ptr<context>(new context(package_path));
-    *session = reinterpret_cast<ggma_context *>(new_session.release());
-  }
-  catch (const std::bad_alloc &e)
-  {
-    std::cerr << "Error during session creation" << std::endl;
-    *session = nullptr; // Set nullptr on error to keep the old behavior
-    return GGMA_STATUS_OUT_OF_MEMORY;
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << "Error during session initialization : " << e.what() << std::endl;
-    *session = nullptr; // Set nullptr on error to keep the old behavior
-    return GGMA_STATUS_ERROR;
-  }
-  return GGMA_STATUS_NO_ERROR;
-}
-
-ggma::GGMAConfig ggma::context::load_config(const std::string &package_path)
+ggma::GGMAConfig ggma::Context::load_config(const std::string &package_path)
 {
   GGMAConfig config;
 
@@ -116,7 +92,7 @@ ggma::GGMAConfig ggma::context::load_config(const std::string &package_path)
   return config;
 }
 
-void context::prefill(ggma_token *tokens, size_t n_tokens, std::vector<uint8_t> &hidden_state)
+void Context::prefill(ggma_token *tokens, size_t n_tokens, std::vector<uint8_t> &hidden_state)
 {
   std::filesystem::path nnpkg_path = std::filesystem::path(_package_path) / "prefill";
   nnfw_session *session = create_and_prepare_session(nnpkg_path.string());
@@ -182,7 +158,7 @@ void context::prefill(ggma_token *tokens, size_t n_tokens, std::vector<uint8_t>
   nnfw_close_session(session);
 }
 
-void context::unemb(std::vector<uint8_t> &hidden_state, size_t n_tokens, std::vector<float> &logits)
+void Context::unemb(std::vector<uint8_t> &hidden_state, size_t n_tokens, std::vector<float> &logits)
 {
   std::filesystem::path nnpkg_path = std::filesystem::path(_package_path) / "unemb";
   nnfw_session *session = create_and_prepare_session(nnpkg_path.string());
@@ -228,7 +204,7 @@ void context::unemb(std::vector<uint8_t> &hidden_state, size_t n_tokens, std::ve
 
 // Template implementation to eliminate code duplication
 template <bool ReturnLogits, typename OutputType>
-void context::decode_impl(ggma_token token_id, OutputType &output)
+void Context::decode_impl(ggma_token token_id, OutputType &output)
 {
   std::filesystem::path nnpkg_path = std::filesystem::path(_package_path) / "decode";
   nnfw_session *session = create_and_prepare_session(nnpkg_path.string());
@@ -297,25 +273,25 @@ void context::decode_impl(ggma_token token_id, OutputType &output)
 }
 
 // Public interface functions - delegate to template implementation
-void context::decode(ggma_token token_id, std::vector<uint8_t> &hidden_state)
+void Context::decode(ggma_token token_id, std::vector<uint8_t> &hidden_state)
 {
   decode_impl<false, std::vector<uint8_t>>(token_id, hidden_state);
 }
 
-void context::decode(ggma_token token_id, std::vector<float> &logits)
+void Context::decode(ggma_token token_id, std::vector<float> &logits)
 {
   decode_impl<true, std::vector<float>>(token_id, logits);
 }
 
 // Template instantiation (required for template implementation in .cpp file)
-template void context::decode_impl<false, std::vector<uint8_t>>(ggma_token token_id,
+template void Context::decode_impl<false, std::vector<uint8_t>>(ggma_token token_id,
                                                                 std::vector<uint8_t> &output);
-template void context::decode_impl<true, std::vector<float>>(ggma_token token_id,
+template void Context::decode_impl<true, std::vector<float>>(ggma_token token_id,
                                                              std::vector<float> &output);
 
 // Sample token from logits using greedy sampling
 // Input shape: [n_seq, vocab_size], sample from last token
-ggma_token context::sample(const std::vector<float> &logits)
+ggma_token Context::sample(const std::vector<float> &logits)
 {
   if (logits.empty())
     throw std::runtime_error("Empty logits tensor");
diff --git a/runtime/ggma/src/context.h b/runtime/ggma/src/Context.h
similarity index 80%
rename from runtime/ggma/src/context.h
rename to runtime/ggma/src/Context.h
index 518f5d66816..4559b1edc89 100644
--- a/runtime/ggma/src/context.h
+++ b/runtime/ggma/src/Context.h
@@ -17,10 +17,9 @@
 #ifndef __GGMA_CONTEXT_H__
 #define __GGMA_CONTEXT_H__
 
-#include "config.h"
-#include "ggma_api.h"
-#include "kv_cache.h"
-#include "nnfw.h"
+#include "ggma_types.h"
+#include "Config.h"
+#include "KVCache.h"
 
 #include <cstdint>
 #include <memory>
@@ -30,18 +29,10 @@
 namespace ggma
 {
 
-class context
+class Context
 {
 public:
-  /**
-   * @brief Factory method. It creates and initialize ggma_context
-   *
-   * @note  Use factory instead of constructor to get status
-   */
-  static GGMA_STATUS from_package(ggma_context **session, const char *package_path);
-
-private:
-  context(const char *package_path);
+  Context(const char *package_path);
   GGMAConfig load_config(const std::string &package_path);
 
   void prefill(ggma_token *tokens, size_t n_tokens, std::vector<uint8_t> &hidden_state);
@@ -57,7 +48,7 @@ class context
   void init_kv_cache();
 
 public:
-  ~context() = default;
+  ~Context() = default;
 
   GGMA_STATUS generate(ggma_token *tokens, size_t n_tokens, size_t n_tokens_max, size_t *n_predict);
 
diff --git a/runtime/ggma/src/generate.cc b/runtime/ggma/src/Generate.cc
similarity index 96%
rename from runtime/ggma/src/generate.cc
rename to runtime/ggma/src/Generate.cc
index 3ede101b98b..f6ef246ee63 100644
--- a/runtime/ggma/src/generate.cc
+++ b/runtime/ggma/src/Generate.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "config.h"
-#include "context.h"
-#include "kv_cache.h"
+#include "Config.h"
+#include "Context.h"
+#include "KVCache.h"
 
 #include <iostream>
 #include <vector>
@@ -36,7 +36,7 @@ namespace ggma
 //
 // The function ensures no buffer overflow by checking against n_tokens_max
 // and stops generation when either the requested number is reached or the array is full.
-GGMA_STATUS context::generate(ggma_token *tokens, size_t n_tokens, size_t n_tokens_max,
+GGMA_STATUS Context::generate(ggma_token *tokens, size_t n_tokens, size_t n_tokens_max,
                               size_t *n_predict)
 {
   try
diff --git a/runtime/ggma/src/kv_cache.cc b/runtime/ggma/src/KVCache.cc
similarity index 98%
rename from runtime/ggma/src/kv_cache.cc
rename to runtime/ggma/src/KVCache.cc
index ec6e9a6127e..57e6a114e23 100644
--- a/runtime/ggma/src/kv_cache.cc
+++ b/runtime/ggma/src/KVCache.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "config.h"
-#include "kv_cache.h"
+#include "Config.h"
+#include "KVCache.h"
 
 #include <cstring>
 #include <stdexcept>
diff --git a/runtime/ggma/src/kv_cache.h b/runtime/ggma/src/KVCache.h
similarity index 100%
rename from runtime/ggma/src/kv_cache.h
rename to runtime/ggma/src/KVCache.h
diff --git a/runtime/ggma/include/ggma_macro.h b/runtime/ggma/src/Macro.h
similarity index 63%
rename from runtime/ggma/include/ggma_macro.h
rename to runtime/ggma/src/Macro.h
index fc828e0eb75..9c6252910c7 100644
--- a/runtime/ggma/include/ggma_macro.h
+++ b/runtime/ggma/src/Macro.h
@@ -17,29 +17,19 @@
 #include <cstdlib>
 
 /**
- * @file  ggma_macro.h
+ * @file  Macro.h
  * @brief Common macros for GGMA error handling and utilities
  */
-#ifndef __GGMA_GGMA_MACRO_H__
-#define __GGMA_GGMA_MACRO_H__
+#ifndef __GGMA_MACRO_H__
+#define __GGMA_MACRO_H__
 
-#define GGMA_ENSURE(a)               \
-  do                                 \
-  {                                  \
-    if ((a) != GGMA_STATUS_NO_ERROR) \
-    {                                \
-      exit(-1);                      \
-    }                                \
+#define GGMA_RETURN_ERROR_IF_NULL(p)      \
+  do                                      \
+  {                                       \
+    if ((p) == NULL)                      \
+      return GGMA_STATUS_UNEXPECTED_NULL; \
   } while (0)
 
 #define GGMA_UNUSED(x) (void)(x)
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // __GGMA_GGMA_MACRO_H__
+#endif // __GGMA_MACRO_H__
diff --git a/runtime/ggma/src/ggma_context.cc b/runtime/ggma/src/ggma_context.cc
new file mode 100644
index 00000000000..e1bfc81cd86
--- /dev/null
+++ b/runtime/ggma/src/ggma_context.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ggma_context.h"
+#include "Context.h"
+#include <iostream>
+
+extern "C" {
+
+GGMA_STATUS ggma_create_context(ggma_context **context, const char *package_path)
+{
+  if (context == nullptr)
+    return GGMA_STATUS_UNEXPECTED_NULL;
+  try
+  {
+    *context = reinterpret_cast<ggma_context *>(new ggma::Context(package_path));
+  }
+  catch (const std::bad_alloc &e)
+  {
+    std::cerr << "Error during context creation" << std::endl;
+    *context = nullptr;
+    return GGMA_STATUS_OUT_OF_MEMORY;
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Error during context initialization : " << e.what() << std::endl;
+    *context = nullptr;
+    return GGMA_STATUS_ERROR;
+  }
+  return GGMA_STATUS_NO_ERROR;
+}
+
+GGMA_STATUS ggma_free_context(ggma_context *context)
+{
+  delete reinterpret_cast<ggma::Context *>(context);
+  return GGMA_STATUS_NO_ERROR;
+}
+
+} // extern "C"
diff --git a/runtime/ggma/src/ggma_api.cc b/runtime/ggma/src/ggma_generate.cc
similarity index 55%
rename from runtime/ggma/src/ggma_api.cc
rename to runtime/ggma/src/ggma_generate.cc
index 0931750286c..669fbb05410 100644
--- a/runtime/ggma/src/ggma_api.cc
+++ b/runtime/ggma/src/ggma_generate.cc
@@ -14,37 +14,15 @@
  * limitations under the License.
  */
 
-#include "context.h"
-#include "ggma_api.h"
+#include "ggma_generate.h"
 
-#include <cstring>
-#include <iostream>
-
-// Double-check enum value changes
-
-#define GGMA_RETURN_ERROR_IF_NULL(p)      \
-  do                                      \
-  {                                       \
-    if ((p) == NULL)                      \
-      return GGMA_STATUS_UNEXPECTED_NULL; \
-  } while (0)
-
-GGMA_STATUS ggma_create_context(ggma_context **context, const char *package_path)
-{
-  GGMA_RETURN_ERROR_IF_NULL(context);
-  return ggma::context::from_package(context, package_path);
-}
-
-GGMA_STATUS ggma_free_context(ggma_context *context)
-{
-  delete reinterpret_cast<ggma::context *>(context);
-  return GGMA_STATUS_NO_ERROR;
-}
+#include "Context.h"
+#include "Macro.h"
 
 GGMA_STATUS ggma_generate(ggma_context *context, ggma_token *tokens, size_t n_tokens,
                           size_t n_tokens_max, size_t *n_tokens_out)
 {
   GGMA_RETURN_ERROR_IF_NULL(context);
-  return reinterpret_cast<ggma::context *>(context)->generate(tokens, n_tokens, n_tokens_max,
+  return reinterpret_cast<ggma::Context *>(context)->generate(tokens, n_tokens, n_tokens_max,
                                                               n_tokens_out);
 }