From a5681acb72c3b5125e4cd450b12f21454176c501 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Fri, 6 Feb 2026 13:57:00 -0800 Subject: [PATCH 01/13] Ugrade to RAI 1.7.0 incl. model-gen --- setup.py | 6 +- src/lemonade/tools/oga/load.py | 150 ++++++++++++++++++++++++------ src/lemonade/tools/oga/ryzenai.py | 8 +- 3 files changed, 126 insertions(+), 38 deletions(-) diff --git a/setup.py b/setup.py index a03cfdf..9ae46e0 100644 --- a/setup.py +++ b/setup.py @@ -53,15 +53,15 @@ extras_require={ # Extras for specific backends "oga-ryzenai": [ - "onnxruntime-genai-directml-ryzenai==0.9.2.1", + "onnxruntime-genai-directml-ryzenai==0.11.2", "protobuf>=6.30.1", ], "oga-cpu": [ - "onnxruntime-genai==0.9.2", + "onnxruntime-genai==0.11.2", "onnxruntime >=1.22.0", ], "model-generate": [ - "model-generate==1.5.0; platform_system=='Windows' and python_version=='3.10'", + "model-generate==1.7.0; platform_system=='Windows' and python_version=='3.12'", ], }, classifiers=[], diff --git a/src/lemonade/tools/oga/load.py b/src/lemonade/tools/oga/load.py index 7592a8d..19a2fb2 100644 --- a/src/lemonade/tools/oga/load.py +++ b/src/lemonade/tools/oga/load.py @@ -232,6 +232,62 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser: f'{", ".join([value + " for " + key for key, value in execution_providers.items()])}.', ) + parser.add_argument( + "--packed-const", + action="store_true", + default=False, + help="[model-generate] Pass this if packed constants are required", + ) + + parser.add_argument( + "--script-option", + choices=["jit_npu", "non_jit"], + default=None, + help="[model-generate] Script variant (default: jit_npu for hybrid, non_jit for NPU basic)", + ) + + parser.add_argument( + "--optimize", + choices=["prefill", "prefill_llama3", "decode", "full_fusion", "full_fusion_llama3"], + default=None, + help="[model-generate] Optimization mode (prefill/prefill_llama3 for hybrid, decode/full_fusion/full_fusion_llama3 for NPU)", + ) + + parser.add_argument( + "--max-seq-len", + default=None, + type=int, + help="[model-generate] Max sequence length for prefill fusion (default: 4096)", + ) + + parser.add_argument( + "--npu-op-version", + choices=["v1", "v2"], + default=None, + help="[model-generate] NPU LLM op version (v1 / v2)", + ) + + parser.add_argument( + "--npu-basic", + action="store_true", + default=False, + help="[model-generate] Use basic NPU flow with matmulnbits pass file", + ) + + parser.add_argument( + "--npu-use-ep", + action="store_true", + default=False, + help="[model-generate] Use EP (Execution Provider) flow (only applies to --npu --optimize decode)", + ) + + parser.add_argument( + "--no-prune-logits", + action="store_true", + default=False, + help="[model-generate] Disable logits pruning by setting prune_logits=false", + ) + return parser @staticmethod @@ -378,24 +434,6 @@ def _setup_model_dependencies(full_model_path, device, ryzenai_version, oga_path dll_source_path = os.path.join( env_path, "Lib", "site-packages", "onnxruntime_genai" ) - required_dlls = ["libutf8_validity.dll", "abseil_dll.dll"] - - # Validate that all required DLLs exist in the source directory - missing_dlls = [] - - for dll_name in required_dlls: - dll_source = os.path.join(dll_source_path, dll_name) - if not os.path.exists(dll_source): - missing_dlls.append(dll_source) - - if missing_dlls: - dll_list = "\n - ".join(missing_dlls) - raise RuntimeError( - f"Required DLLs not found for {device} inference:\n - {dll_list}\n" - f"Please ensure your RyzenAI installation is complete and supports {device}.\n" - "See installation instructions at:\n" - "https://github.com/lemonade-sdk/lemonade-eval#installation\n" - ) # Add the DLL source directory to PATH current_path = os.environ.get("PATH", "") @@ -543,7 +581,22 @@ def _cleanup_environment(saved_state): os.chdir(saved_state["cwd"]) os.environ["PATH"] = saved_state["path"] - def _generate_model_for_oga(self, output_model_path, device, input_model_path): + def _generate_model_for_oga( + self, + output_model_path, + device, + input_model_path, + packed_const=False, + script_option=None, + optimize=None, + max_seq_len=None, + npu_op_version=None, + npu_basic=False, + npu_use_ep=False, + no_prune_logits=False, + dml_only=False, + cpu_only=False, + ): """ Uses the model_generate tool to generate the model for OGA hybrid or npu targets. """ @@ -569,18 +622,31 @@ def _generate_model_for_oga(self, output_model_path, device, input_model_path): try: if device_flag == "npu": + script_opt = script_option if script_option is not None else "non_jit" model_generate.generate_npu_model( input_model=input_model_path, output_dir=output_model_path, - packed_const=False, + packed_const=packed_const, + script_option=script_opt, + optimize=optimize, + max_seq_len=max_seq_len, + npu_op_version=npu_op_version, + basic=npu_basic, + use_ep=npu_use_ep, + no_prune_logits=no_prune_logits, + cpu_only=cpu_only, + ) else: # hybrid + script_opt = script_option if script_option is not None else "jit_npu" model_generate.generate_hybrid_model( input_model=input_model_path, output_dir=output_model_path, - script_option="jit_npu", - mode="bf16", - dml_only=False, + script_option=script_opt, + optimize=optimize, + max_seq_len=max_seq_len, + no_prune_logits=no_prune_logits, + dml_only=dml_only, ) except Exception as e: raise RuntimeError( @@ -600,6 +666,16 @@ def run( trust_remote_code=False, subfolder: str = None, do_not_upgrade: bool = False, + packed_const: bool = False, + script_option: str = None, + optimize: str = None, + max_seq_len: int = None, + npu_op_version: str = None, + npu_basic: bool = False, + npu_use_ep: bool = False, + no_prune_logits: bool = False, + dml_only: bool = False, + cpu_only: bool = False, ) -> State: from lemonade.common.network import ( custom_snapshot_download, @@ -714,6 +790,7 @@ def run( "It does not contain ONNX or safetensors files." ) if device in ["npu", "hybrid"]: + needs_generation = False if is_onnx_model: if is_preoptimized_onnx: # Use HuggingFace cache path as it is @@ -721,11 +798,7 @@ def run( else: # If ONNX but not modified yet for Hybrid or NPU, # needs further optimization - self._generate_model_for_oga( - full_model_path, - device, - input_model_path, - ) + needs_generation = True elif is_safetensors_model: config_path = os.path.join(input_model_path, "config.json") if os.path.exists(config_path): @@ -733,9 +806,7 @@ def run( config = json.load(f) if "quantization_config" in config: # If quantized, use subprocess to generate the model - self._generate_model_for_oga( - full_model_path, device, input_model_path - ) + needs_generation = True else: raise ValueError( f"The safetensors model {checkpoint} is not quantized. " @@ -750,6 +821,23 @@ def run( raise ValueError( f"Unsupported model type for checkpoint: {checkpoint}" ) + + if needs_generation: + self._generate_model_for_oga( + full_model_path, + device, + input_model_path, + packed_const, + script_option, + optimize, + max_seq_len, + npu_op_version, + npu_basic, + npu_use_ep, + no_prune_logits, + dml_only, + cpu_only, + ) else: if is_onnx_model: # Use HuggingFace cache path as it is diff --git a/src/lemonade/tools/oga/ryzenai.py b/src/lemonade/tools/oga/ryzenai.py index eb1f691..c45dce9 100644 --- a/src/lemonade/tools/oga/ryzenai.py +++ b/src/lemonade/tools/oga/ryzenai.py @@ -45,17 +45,17 @@ def get_ryzenai_version_info(): if Version(og.__version__) >= Version("0.7.0"): oga_path = os.path.dirname(og.__file__) - if og.__version__ in ("0.9.2", "0.9.2.1"): - return "1.6.0", oga_path + if og.__version__ in ("0.11.2", "0.11.2.1"): + return "1.7.0", oga_path else: raise ValueError( f"Unsupported onnxruntime-genai-directml-ryzenai version: {og.__version__}\n" - "Only RyzenAI 1.6.0 is currently supported.\n" + "Only RyzenAI 1.7.0 is currently supported.\n" "See installation instructions at: https://github.com/lemonade-sdk/lemonade-eval#installation" # pylint: disable=line-too-long ) else: raise ValueError( "Legacy RyzenAI installation detected (version < 0.7.0).\n" - "RyzenAI 1.4.0 and 1.5.0 are no longer supported. Please upgrade to 1.6.0.\n" + "RyzenAI 1.4.0, 1.5.0 and 1.6.0 are no longer supported. Please upgrade to 1.7.0.\n" "See installation instructions at: https://github.com/lemonade-sdk/lemonade-eval#installation" # pylint: disable=line-too-long ) From 95950651425181ce76a58862e8a127fc736c95ac Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Fri, 6 Feb 2026 15:05:51 -0800 Subject: [PATCH 02/13] lint --- README.md | 8 ++++---- src/lemonade/tools/oga/load.py | 11 ++++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index f9dcf93..fb91228 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ pip install -e .[oga-cpu] # For RyzenAI NPU support (Windows + Python 3.12 only): pip install -e .[oga-ryzenai] --extra-index-url=https://pypi.amd.com/simple -# For model generation/export (Windows + Python 3.12 only): +# For model generation/custom export (Windows + Python 3.12 only): pip install -e .[oga-ryzenai,model-generate] --extra-index-url=https://pypi.amd.com/simple ``` @@ -171,15 +171,15 @@ See the [Models List](https://lemonade-server.ai/docs/server/server_models/) for ## OGA-Load for Model Preparation -The `oga-load` tool is for preparing custom OGA (ONNX Runtime GenAI) models. It can build and quantize models from Hugging Face for use on NPU, iGPU, or CPU. - +The `oga-load` tool is for preparing custom OGA (ONNX Runtime GenAI) models. It can build quark-quantized models from Hugging Face for use on NPU, iGPU, or CPU. +Checkout the official [Ryzen AI Model Preparation guide](https://ryzenai.docs.amd.com/en/latest/oga_model_prepare.html) for more details. > **Note**: For running pre-built NPU/Hybrid models, use the server-based workflow above with `-NPU` or `-Hybrid` models. The `oga-load` tool is primarily for model preparation and testing custom checkpoints. ### Usage ```bash # Prepare and test a model on CPU -lemonade-eval -i microsoft/Phi-3-mini-4k-instruct oga-load --device cpu --dtype int4 llm-prompt -p "Hello!" +lemonade-eval -i amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead oga-load --device hybrid --dtype int4 llm-prompt -p "Alice and Bob" --max-new-tokens 10 ``` ### Installation for OGA diff --git a/src/lemonade/tools/oga/load.py b/src/lemonade/tools/oga/load.py index 19a2fb2..b2db43b 100644 --- a/src/lemonade/tools/oga/load.py +++ b/src/lemonade/tools/oga/load.py @@ -248,7 +248,13 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser: parser.add_argument( "--optimize", - choices=["prefill", "prefill_llama3", "decode", "full_fusion", "full_fusion_llama3"], + choices=[ + "prefill", + "prefill_llama3", + "decode", + "full_fusion", + "full_fusion_llama3", + ], default=None, help="[model-generate] Optimization mode (prefill/prefill_llama3 for hybrid, decode/full_fusion/full_fusion_llama3 for NPU)", ) @@ -396,7 +402,7 @@ def _setup_model_dependencies(full_model_path, device, ryzenai_version, oga_path 3. Check NPU driver version if required for device and ryzenai_version. """ - # For RyzenAI 1.6.0, check NPU driver version for NPU and hybrid devices + # For RyzenAI 1.7.0, check NPU driver version for NPU and hybrid devices if device in ["npu", "hybrid"]: required_driver_version = REQUIRED_NPU_DRIVER_VERSION @@ -635,7 +641,6 @@ def _generate_model_for_oga( use_ep=npu_use_ep, no_prune_logits=no_prune_logits, cpu_only=cpu_only, - ) else: # hybrid script_opt = script_option if script_option is not None else "jit_npu" From 797f2953a833f56831f64bdf8faf38fec3fd4432 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Fri, 6 Feb 2026 15:11:57 -0800 Subject: [PATCH 03/13] lint --- src/lemonade/tools/oga/load.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/lemonade/tools/oga/load.py b/src/lemonade/tools/oga/load.py index b2db43b..050a612 100644 --- a/src/lemonade/tools/oga/load.py +++ b/src/lemonade/tools/oga/load.py @@ -236,14 +236,16 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser: "--packed-const", action="store_true", default=False, - help="[model-generate] Pass this if packed constants are required", + help="[model-generate] Pass this if packed constants are\n" + "required (packed constants).", ) parser.add_argument( "--script-option", choices=["jit_npu", "non_jit"], default=None, - help="[model-generate] Script variant (default: jit_npu for hybrid, non_jit for NPU basic)", + help="[model-generate] Script variant: jit_npu (hybrid),\n" + "non_jit (NPU basic) (default depends on device)", ) parser.add_argument( @@ -256,14 +258,16 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser: "full_fusion_llama3", ], default=None, - help="[model-generate] Optimization mode (prefill/prefill_llama3 for hybrid, decode/full_fusion/full_fusion_llama3 for NPU)", + help="[model-generate] Optimization: prefill(_llama3) (hybrid),\n" + "decode/full_fusion(_llama3) (NPU basic)", ) parser.add_argument( "--max-seq-len", default=None, type=int, - help="[model-generate] Max sequence length for prefill fusion (default: 4096)", + help="[model-generate] Max sequence length for prefill\n" + "fusion (default: 4096)", ) parser.add_argument( From 6a27a448fa31858bc0d422dcc36a19d7565505cb Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Fri, 6 Feb 2026 15:15:50 -0800 Subject: [PATCH 04/13] lint --- src/lemonade/tools/oga/load.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lemonade/tools/oga/load.py b/src/lemonade/tools/oga/load.py index 050a612..93b711a 100644 --- a/src/lemonade/tools/oga/load.py +++ b/src/lemonade/tools/oga/load.py @@ -237,7 +237,7 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser: action="store_true", default=False, help="[model-generate] Pass this if packed constants are\n" - "required (packed constants).", + "required (packed constants).", ) parser.add_argument( @@ -245,7 +245,7 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser: choices=["jit_npu", "non_jit"], default=None, help="[model-generate] Script variant: jit_npu (hybrid),\n" - "non_jit (NPU basic) (default depends on device)", + "non_jit (NPU basic) (default depends on device)", ) parser.add_argument( @@ -259,7 +259,7 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser: ], default=None, help="[model-generate] Optimization: prefill(_llama3) (hybrid),\n" - "decode/full_fusion(_llama3) (NPU basic)", + "decode/full_fusion(_llama3) (NPU basic)", ) parser.add_argument( @@ -267,7 +267,7 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser: default=None, type=int, help="[model-generate] Max sequence length for prefill\n" - "fusion (default: 4096)", + "fusion (default: 4096)", ) parser.add_argument( From 968e2f5a6aac0070bf858ad21a1a84be4e0e64b8 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Fri, 6 Feb 2026 15:23:17 -0800 Subject: [PATCH 05/13] lint --- src/lemonade/tools/oga/load.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lemonade/tools/oga/load.py b/src/lemonade/tools/oga/load.py index 93b711a..68903ce 100644 --- a/src/lemonade/tools/oga/load.py +++ b/src/lemonade/tools/oga/load.py @@ -288,7 +288,8 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser: "--npu-use-ep", action="store_true", default=False, - help="[model-generate] Use EP (Execution Provider) flow (only applies to --npu --optimize decode)", + help="[model-generate] Use EP (Execution Provider) flow\n" + "(only applies to --npu --optimize decode)", ) parser.add_argument( From 6378f8a0b82ee17df563b0fc97f39e3e34503aba Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Fri, 6 Feb 2026 15:44:48 -0800 Subject: [PATCH 06/13] Add tests for model prep --- .github/workflows/test_lemonade_eval.yml | 39 +++++++++++++++ test/oga_hybrid_model_prep_api.py | 61 ++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 test/oga_hybrid_model_prep_api.py diff --git a/.github/workflows/test_lemonade_eval.yml b/.github/workflows/test_lemonade_eval.yml index e36f102..065afd4 100644 --- a/.github/workflows/test_lemonade_eval.yml +++ b/.github/workflows/test_lemonade_eval.yml @@ -115,6 +115,45 @@ jobs: echo "Running OGA CPU API tests..." $venvPython test/oga_cpu_api.py + test-oga-hybrid-model-prep: + # This job requires Ryzen AI 300-series hardware (e.g., Strix Point, Krackan Point) + # It will only run if a self-hosted runner with the 'rai-300' label is available + env: + LEMONADE_CI_MODE: "True" + runs-on: [self-hosted, windows, rai-300] + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + steps: + - uses: actions/checkout@v3 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Create virtual environment and install dependencies + shell: bash + run: | + python -m venv .venv + venvPython=".venv/Scripts/python" + venvPip=".venv/Scripts/pip" + $venvPython -m pip install --upgrade pip + $venvPython -m pip check + $venvPip install -e .[oga-ryzenai,model-generate] --extra-index-url=https://pypi.amd.com/simple + + - name: Test OGA Hybrid Model Prep + shell: bash + run: | + venvPython=".venv/Scripts/python" + venvLemonade=".venv/Scripts/lemonade-eval" + + echo "Testing OGA-Load with Hybrid model generation..." + $venvLemonade -i amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead oga-load --device hybrid --dml-only --dtype int4 llm-prompt -p "Alice and Bob" --max-new-tokens 10 + + echo "Running OGA Hybrid Model Prep API tests..." + $venvPython test/oga_hybrid_model_prep_api.py + - name: Test Server Integration (GGUF model) shell: PowerShell run: | diff --git a/test/oga_hybrid_model_prep_api.py b/test/oga_hybrid_model_prep_api.py new file mode 100644 index 0000000..7454032 --- /dev/null +++ b/test/oga_hybrid_model_prep_api.py @@ -0,0 +1,61 @@ +import unittest +import shutil +import os +from lemonade.state import State +import lemonade.common.test_helpers as common +from lemonade.common.build import builds_dir +from lemonade.tools.prompt import LLMPrompt +from lemonade.tools.oga.load import OgaLoad +import sys + +ci_mode = os.getenv("LEMONADE_CI_MODE", False) + +checkpoint = "amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead" +device = "hybrid" +dtype = "int4" +force = False +prompt = "Alice and Bob" + + +class Testing(unittest.TestCase): + + def setUp(self) -> None: + shutil.rmtree(builds_dir(cache_dir), ignore_errors=True) + + def test_001_oga_model_prep_hybrid(self): + # Test the OgaLoad with model generation (oga_model_prep) for hybrid device + # and LLMPrompt tools + + state = State(cache_dir=cache_dir, build_name="test") + + state = OgaLoad().run( + state, + input=checkpoint, + device=device, + dtype=dtype, + force=force, + dml_only=True, + ) + state = LLMPrompt().run(state, prompt=prompt, max_new_tokens=10) + + assert len(state.response) > 0, state.response + + +if __name__ == "__main__": + cache_dir, _ = common.create_test_dir( + "lemonade_oga_hybrid_model_prep_api", base_dir=os.path.abspath(".") + ) + + suite = unittest.TestSuite() + suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Testing)) + + # Run the test suite + runner = unittest.TextTestRunner() + result = runner.run(suite) + + # Set exit code based on test results + if not result.wasSuccessful(): + sys.exit(1) + +# This file was originally licensed under Apache 2.0. It has been modified. +# Modifications Copyright (c) 2025 AMD From 464af561f745c2098c8ebce393894c74390a2dbd Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Fri, 6 Feb 2026 15:52:31 -0800 Subject: [PATCH 07/13] update workflow config --- .github/workflows/test_lemonade_eval.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_lemonade_eval.yml b/.github/workflows/test_lemonade_eval.yml index 065afd4..26bc2b6 100644 --- a/.github/workflows/test_lemonade_eval.yml +++ b/.github/workflows/test_lemonade_eval.yml @@ -120,9 +120,9 @@ jobs: # It will only run if a self-hosted runner with the 'rai-300' label is available env: LEMONADE_CI_MODE: "True" - runs-on: [self-hosted, windows, rai-300] + runs-on: [windows-latest] concurrency: - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-hybrid-model-prep-${{ github.ref }} cancel-in-progress: true steps: - uses: actions/checkout@v3 From 7c16b2f9ecbc7aeb176f88ec5dd07b9588c1fa97 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Fri, 6 Feb 2026 15:52:31 -0800 Subject: [PATCH 08/13] update workflow config --- .github/workflows/test_lemonade_eval.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test_lemonade_eval.yml b/.github/workflows/test_lemonade_eval.yml index 065afd4..f80abec 100644 --- a/.github/workflows/test_lemonade_eval.yml +++ b/.github/workflows/test_lemonade_eval.yml @@ -120,9 +120,9 @@ jobs: # It will only run if a self-hosted runner with the 'rai-300' label is available env: LEMONADE_CI_MODE: "True" - runs-on: [self-hosted, windows, rai-300] + runs-on: [windows-latest] concurrency: - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-hybrid-model-prep-${{ github.ref }} cancel-in-progress: true steps: - uses: actions/checkout@v3 @@ -149,8 +149,7 @@ jobs: venvLemonade=".venv/Scripts/lemonade-eval" echo "Testing OGA-Load with Hybrid model generation..." - $venvLemonade -i amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead oga-load --device hybrid --dml-only --dtype int4 llm-prompt -p "Alice and Bob" --max-new-tokens 10 - + $venvLemonade -i amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead oga-load --device hybrid --dtype int4 echo "Running OGA Hybrid Model Prep API tests..." $venvPython test/oga_hybrid_model_prep_api.py From cdde62a0891cce0d5e319e9617b0b30f829177a2 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Fri, 6 Feb 2026 16:19:56 -0800 Subject: [PATCH 09/13] Modify tests --- .github/workflows/test_lemonade_eval.yml | 40 +----------------------- 1 file changed, 1 insertion(+), 39 deletions(-) diff --git a/.github/workflows/test_lemonade_eval.yml b/.github/workflows/test_lemonade_eval.yml index f80abec..4242307 100644 --- a/.github/workflows/test_lemonade_eval.yml +++ b/.github/workflows/test_lemonade_eval.yml @@ -115,44 +115,6 @@ jobs: echo "Running OGA CPU API tests..." $venvPython test/oga_cpu_api.py - test-oga-hybrid-model-prep: - # This job requires Ryzen AI 300-series hardware (e.g., Strix Point, Krackan Point) - # It will only run if a self-hosted runner with the 'rai-300' label is available - env: - LEMONADE_CI_MODE: "True" - runs-on: [windows-latest] - concurrency: - group: ${{ github.workflow }}-hybrid-model-prep-${{ github.ref }} - cancel-in-progress: true - steps: - - uses: actions/checkout@v3 - - - name: Set up Python 3.12 - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - - name: Create virtual environment and install dependencies - shell: bash - run: | - python -m venv .venv - venvPython=".venv/Scripts/python" - venvPip=".venv/Scripts/pip" - $venvPython -m pip install --upgrade pip - $venvPython -m pip check - $venvPip install -e .[oga-ryzenai,model-generate] --extra-index-url=https://pypi.amd.com/simple - - - name: Test OGA Hybrid Model Prep - shell: bash - run: | - venvPython=".venv/Scripts/python" - venvLemonade=".venv/Scripts/lemonade-eval" - - echo "Testing OGA-Load with Hybrid model generation..." - $venvLemonade -i amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead oga-load --device hybrid --dtype int4 - echo "Running OGA Hybrid Model Prep API tests..." - $venvPython test/oga_hybrid_model_prep_api.py - - name: Test Server Integration (GGUF model) shell: PowerShell run: | @@ -238,4 +200,4 @@ jobs: Write-Host "Server stopped." -ForegroundColor Green # This file was originally licensed under Apache 2.0. It has been modified. -# Modifications Copyright (c) 2025 AMD +# Modifications Copyright (c) 2025 AMD \ No newline at end of file From 7be8300acf4610f13382f393a20b66ca8264ff18 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Mon, 9 Feb 2026 09:55:41 -0800 Subject: [PATCH 10/13] edit setup.py --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 9ae46e0..c76f4ad 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,8 @@ ], "model-generate": [ "model-generate==1.7.0; platform_system=='Windows' and python_version=='3.12'", + "numpy<2", + "onnx_ir", ], }, classifiers=[], From 6eb3ea1aff252d41fe07feef1a5e9f4d29bce4e9 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Mon, 9 Feb 2026 11:15:34 -0800 Subject: [PATCH 11/13] test new workflow --- .github/workflows/test_lemonade_eval.yml | 38 ++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/.github/workflows/test_lemonade_eval.yml b/.github/workflows/test_lemonade_eval.yml index 4242307..008d284 100644 --- a/.github/workflows/test_lemonade_eval.yml +++ b/.github/workflows/test_lemonade_eval.yml @@ -199,5 +199,43 @@ jobs: Get-Process -Name "lemonade-server", "lemonade-router", "llama-server" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue Write-Host "Server stopped." -ForegroundColor Green + test-oga-model-prep: + # This job requires Ryzen AI hardware with NPU support (e.g., RAI 160 SDK) + # It will only run if a self-hosted runner with the 'rai-160-sdk' label is available + env: + LEMONADE_CI_MODE: "True" + runs-on: [rai-160-sdk, Windows] + concurrency: + group: ${{ github.workflow }}-hybrid-model-prep-${{ github.ref }} + cancel-in-progress: true + steps: + - uses: actions/checkout@v3 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Create virtual environment and install dependencies + shell: bash + run: | + python -m venv .venv + venvPython=".venv/Scripts/python" + venvPip=".venv/Scripts/pip" + $venvPython -m pip install --upgrade pip + $venvPython -m pip check + $venvPip install -e .[oga-ryzenai,model-generate] --extra-index-url=https://pypi.amd.com/simple + + - name: Test OGA Hybrid Model Prep + shell: bash + run: | + venvPython=".venv/Scripts/python" + venvLemonade=".venv/Scripts/lemonade-eval" + + echo "Testing OGA-Load with Hybrid model generation..." + $venvLemonade -i amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead oga-load --device hybrid --dtype int4 + echo "Running OGA Hybrid Model Prep API tests..." + $venvPython test/oga_hybrid_model_prep_api.py + # This file was originally licensed under Apache 2.0. It has been modified. # Modifications Copyright (c) 2025 AMD \ No newline at end of file From 39d3555d87054a9ef7f810969fd90d2b7f25fe67 Mon Sep 17 00:00:00 2001 From: Iswarya Alex <47045679+iswaryaalex@users.noreply.github.com> Date: Mon, 9 Feb 2026 18:40:13 -0800 Subject: [PATCH 12/13] Change runner from 'rai-160-sdk' to 'stx' --- .github/workflows/test_lemonade_eval.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_lemonade_eval.yml b/.github/workflows/test_lemonade_eval.yml index 008d284..5971086 100644 --- a/.github/workflows/test_lemonade_eval.yml +++ b/.github/workflows/test_lemonade_eval.yml @@ -204,7 +204,7 @@ jobs: # It will only run if a self-hosted runner with the 'rai-160-sdk' label is available env: LEMONADE_CI_MODE: "True" - runs-on: [rai-160-sdk, Windows] + runs-on: [stx, Windows] concurrency: group: ${{ github.workflow }}-hybrid-model-prep-${{ github.ref }} cancel-in-progress: true @@ -238,4 +238,4 @@ jobs: $venvPython test/oga_hybrid_model_prep_api.py # This file was originally licensed under Apache 2.0. It has been modified. -# Modifications Copyright (c) 2025 AMD \ No newline at end of file +# Modifications Copyright (c) 2025 AMD From 457df746b201b1316dbcc245d35fdbba9d3b2eab Mon Sep 17 00:00:00 2001 From: Iswarya Alex <47045679+iswaryaalex@users.noreply.github.com> Date: Mon, 9 Feb 2026 18:52:16 -0800 Subject: [PATCH 13/13] test workflow Removed the test-oga-model-prep job from the workflow. --- .github/workflows/test_lemonade_eval.yml | 38 ------------------------ 1 file changed, 38 deletions(-) diff --git a/.github/workflows/test_lemonade_eval.yml b/.github/workflows/test_lemonade_eval.yml index 5971086..e36f102 100644 --- a/.github/workflows/test_lemonade_eval.yml +++ b/.github/workflows/test_lemonade_eval.yml @@ -199,43 +199,5 @@ jobs: Get-Process -Name "lemonade-server", "lemonade-router", "llama-server" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue Write-Host "Server stopped." -ForegroundColor Green - test-oga-model-prep: - # This job requires Ryzen AI hardware with NPU support (e.g., RAI 160 SDK) - # It will only run if a self-hosted runner with the 'rai-160-sdk' label is available - env: - LEMONADE_CI_MODE: "True" - runs-on: [stx, Windows] - concurrency: - group: ${{ github.workflow }}-hybrid-model-prep-${{ github.ref }} - cancel-in-progress: true - steps: - - uses: actions/checkout@v3 - - - name: Set up Python 3.12 - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - - name: Create virtual environment and install dependencies - shell: bash - run: | - python -m venv .venv - venvPython=".venv/Scripts/python" - venvPip=".venv/Scripts/pip" - $venvPython -m pip install --upgrade pip - $venvPython -m pip check - $venvPip install -e .[oga-ryzenai,model-generate] --extra-index-url=https://pypi.amd.com/simple - - - name: Test OGA Hybrid Model Prep - shell: bash - run: | - venvPython=".venv/Scripts/python" - venvLemonade=".venv/Scripts/lemonade-eval" - - echo "Testing OGA-Load with Hybrid model generation..." - $venvLemonade -i amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead oga-load --device hybrid --dtype int4 - echo "Running OGA Hybrid Model Prep API tests..." - $venvPython test/oga_hybrid_model_prep_api.py - # This file was originally licensed under Apache 2.0. It has been modified. # Modifications Copyright (c) 2025 AMD