Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions runtime/infra/cmake/CfgOptionFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ option(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated Test" ON)
option(BUILD_RUNTIME_NNFW_API_TEST "Build Runtime NNFW API Tests" ON)
option(BUILD_TFLITE_RUN "Build tflite_run test driver" ON)
option(BUILD_ONERT_RUN "Build onert_run test driver" ON)
option(BUILD_GGMA_RUN "Build ggma_run test driver" ON)
option(BUILD_ONERT_TRAIN "Build onert_train test driver" ON)
option(BUILD_TFLITE_COMPARATOR_TEST_TOOL "Build testing tool to compare runtime result with TFLite" ON)
option(BUILD_WITH_HDF5 "Build test tool with HDF5 library" ON)
Expand Down
1 change: 1 addition & 0 deletions runtime/infra/cmake/options/options_armv7l-tizen.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OF
option(BUILD_XNNPACK "Build XNNPACK" OFF)

option(BUILD_GGMA_API "Build GGMA API for Generative AI" OFF)
option(BUILD_GGMA_RUN "Build ggma_run test driver" OFF)
option(DOWNLOAD_SENTENCEPIECE "Download SentencePiece source" OFF)
option(BUILD_SENTENCEPIECE "Build SentencePiece library from the source" OFF)
1 change: 1 addition & 0 deletions runtime/infra/cmake/options/options_i686-tizen.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OF
option(BUILD_XNNPACK "Build XNNPACK" OFF)

option(BUILD_GGMA_API "Build GGMA API for Generative AI" OFF)
option(BUILD_GGMA_RUN "Build ggma_run test driver" OFF)
Copy link
Copy Markdown
Contributor Author

@glistening glistening Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reviewers, I will turn BUILD_GGMA_API and BUILD_GGMA_RUN on after sentencepiece is handled for Tizen.

option(DOWNLOAD_SENTENCEPIECE "Download SentencePiece source" OFF)
option(BUILD_SENTENCEPIECE "Build SentencePiece library from the source" OFF)
22 changes: 22 additions & 0 deletions runtime/tests/tools/ggma_run/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
if(NOT BUILD_GGMA_RUN)
return()
endif(NOT BUILD_GGMA_RUN)

list(APPEND GGMA_RUN_SRCS "src/args.cc")
list(APPEND GGMA_RUN_SRCS "src/ggma_run.cc")

add_executable(ggma_run ${GGMA_RUN_SRCS})

target_include_directories(ggma_run PRIVATE src)

target_link_libraries(ggma_run arser)
target_link_libraries(ggma_run ggma-dev)
target_link_libraries(ggma_run nnfw_common)
target_link_libraries(ggma_run nnfw-dev)

# Set RPATH to find GGMA library in lib/ggma directory
set_target_properties(ggma_run PROPERTIES
INSTALL_RPATH "$ORIGIN/../lib/ggma:$ORIGIN/../lib/:$ORIGIN/:/usr/local/lib"
)

install(TARGETS ggma_run DESTINATION ${CMAKE_INSTALL_BINDIR})
22 changes: 22 additions & 0 deletions runtime/tests/tools/ggma_run/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# ggma_run

`ggma_run` is a tool to run LLM model.

It takes GGMA package as input. It uses **GGMA API** internally.

## Usage

```
$ ./ggma_run path_to_ggma_package
```

It will run a GGML package to generate the output using the default prompt.

## Example

```
$ Product/out/bin/ggma_run tinyllama
prompt: Lily picked up a flower.
generated: { 1100, 7899, 289, 826, 351, 600, 2439, 288, 266, 3653, 31843, 1100, 7899, 289, 1261, 291, 5869, 291, 1261, 31843, 1100, 7899 }
detokenized: She liked to play with her friends. She liked to run and jump in the water. She was
```
91 changes: 91 additions & 0 deletions runtime/tests/tools/ggma_run/src/args.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "args.h"
#include "nnfw.h"

#include <iostream>
#include <string>
#include <sys/stat.h>

#define NNPR_ENSURE_STATUS(a) \
do \
{ \
if ((a) != NNFW_STATUS_NO_ERROR) \
{ \
exit(-1); \
} \
} while (0)

Comment on lines +24 to +32
Copy link
Copy Markdown
Contributor Author

@glistening glistening Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Beyond this PR. (Just for recording)

The NNPR_ENSURE_STATUS definition appears in several files:

  • runtime/tests/tools/ggma_run/src/ggma_run.cc
  • runtime/tests/tools/onert_run/src/nnfw_util.h
  • runtime/tests/tools/onert_train/src/nnfw_util.h
  • runtime/tests/custom_op/FillFrom/FillFrom_runner.cc
  • runtime/contrib/style_transfer_app/src/style_transfer_app.cc

This is why I tried to define GGMA_ENSURE in ggma_macro.h. To minimize the expose API, however, we could follow the suggestion in #16260 and place it under test/tools/libs instead.

static void print_version()
{
uint32_t version;
NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
std::cout << "ggma_run v0.1.0 (nnfw runtime: v" << (version >> 24) << "."
<< ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
}

namespace ggma_run
{

Args::Args(const int argc, char **argv)
{
initialize();
parse(argc, argv);
}

void Args::initialize(void)
{
_arser.add_argument("path").type(arser::DataType::STR).help("nnpackage path");
arser::Helper::add_version(_arser, print_version);
}

void Args::parse(const int argc, char **argv)
{
try
{
_arser.parse(argc, argv);

if (_arser.get<bool>("--version"))
{
_print_version = true;
return;
}

if (_arser["path"])
{
auto path = _arser.get<std::string>("path");
struct stat sb;
if (stat(path.c_str(), &sb) == 0)
{
if (sb.st_mode & S_IFDIR)
_package_path = path;
}
else
{
std::cerr << "Cannot find: " << path << "\n";
exit(1);
}
}
}
catch (const std::bad_cast &e)
{
std::cerr << "Bad cast error - " << e.what() << '\n';
exit(1);
}
}

} // end of namespace ggma_run
51 changes: 51 additions & 0 deletions runtime/tests/tools/ggma_run/src/args.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef __GGMA_RUN_ARGS_H__
#define __GGMA_RUN_ARGS_H__

#include "arser/arser.h"

#include <string>
#include <unordered_map>
#include <vector>

namespace ggma_run
{

class Args
{
public:
Args(const int argc, char **argv);
void print(void);

const std::string &packagePath() const { return _package_path; }
bool printVersion() const { return _print_version; }

private:
void initialize();
void parse(const int argc, char **argv);

private:
arser::Arser _arser;

std::string _package_path;
bool _print_version = false;
};

} // end of namespace ggma_run

#endif // __GGMA_RUN_ARGS_H__
88 changes: 88 additions & 0 deletions runtime/tests/tools/ggma_run/src/ggma_run.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "args.h"
#include "ggma_api.h"

#include <cstdlib>
#include <iostream>
#include <stdexcept>

#define GGMA_ENSURE(a) \
do \
{ \
if ((a) != GGMA_STATUS_NO_ERROR) \
{ \
exit(-1); \
} \
} while (0)

int main(const int argc, char **argv)
{
using namespace ggma_run;

try
{
Args args(argc, argv);

std::string prompt = "Lily picked up a flower.";
constexpr size_t n_tokens_max = 32;
ggma_token tokens[n_tokens_max];
size_t n_tokens;

// Create tokenizer first
ggma_tokenizer *tokenizer = nullptr;
GGMA_ENSURE(ggma_create_tokenizer(&tokenizer, (args.packagePath() + "/tokenizer").c_str()));

// Tokenize using the created tokenizer
GGMA_ENSURE(
ggma_tokenize(tokenizer, prompt.c_str(), prompt.size(), tokens, n_tokens_max, &n_tokens));

ggma_context *context = nullptr;
GGMA_ENSURE(ggma_create_context(&context, args.packagePath().c_str()));

size_t n_predict = 22;
GGMA_ENSURE(ggma_generate(context, tokens, n_tokens, n_tokens_max, &n_predict));

// Output generated token IDs
std::cout << "prompt: " << prompt << std::endl;
std::cout << "generated: { ";
for (size_t i = n_tokens; i < n_tokens + n_predict; ++i)
{
std::cout << tokens[i];
if (i < n_tokens + n_predict - 1)
{
std::cout << ", ";
}
}
std::cout << " }" << std::endl;

// Detokenize and output the generated text
constexpr size_t detokenize_max = 256;
char detokenized[detokenize_max];
GGMA_ENSURE(
ggma_detokenize(tokenizer, tokens + n_tokens, n_predict, detokenized, detokenize_max));
std::cout << "detokenized: " << detokenized << std::endl;

GGMA_ENSURE(ggma_free_context(context));
GGMA_ENSURE(ggma_free_tokenizer(tokenizer));
}
catch (std::runtime_error &e)
{
std::cerr << "E: Fail to run by runtime error: " << e.what() << std::endl;
exit(-1);
}
}