From a5681acb72c3b5125e4cd450b12f21454176c501 Mon Sep 17 00:00:00 2001
From: Iswarya Alex <iswaalex@amd.com>
Date: Fri, 6 Feb 2026 13:57:00 -0800
Subject: [PATCH 01/13] Ugrade to RAI 1.7.0 incl. model-gen

---
 setup.py                          |   6 +-
 src/lemonade/tools/oga/load.py    | 150 ++++++++++++++++++++++++------
 src/lemonade/tools/oga/ryzenai.py |   8 +-
 3 files changed, 126 insertions(+), 38 deletions(-)

diff --git a/setup.py b/setup.py
index a03cfdf..9ae46e0 100644
--- a/setup.py
+++ b/setup.py
@@ -53,15 +53,15 @@
     extras_require={
         # Extras for specific backends
         "oga-ryzenai": [
-            "onnxruntime-genai-directml-ryzenai==0.9.2.1",
+            "onnxruntime-genai-directml-ryzenai==0.11.2",
             "protobuf>=6.30.1",
         ],
         "oga-cpu": [
-            "onnxruntime-genai==0.9.2",
+            "onnxruntime-genai==0.11.2",
             "onnxruntime >=1.22.0",
         ],
         "model-generate": [
-            "model-generate==1.5.0; platform_system=='Windows' and python_version=='3.10'",
+            "model-generate==1.7.0; platform_system=='Windows' and python_version=='3.12'",
         ],
     },
     classifiers=[],
diff --git a/src/lemonade/tools/oga/load.py b/src/lemonade/tools/oga/load.py
index 7592a8d..19a2fb2 100644
--- a/src/lemonade/tools/oga/load.py
+++ b/src/lemonade/tools/oga/load.py
@@ -232,6 +232,62 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser:
             f'{", ".join([value + " for " + key for key, value in execution_providers.items()])}.',
         )
 
+        parser.add_argument(
+            "--packed-const",
+            action="store_true",
+            default=False,
+            help="[model-generate] Pass this if packed constants are required",
+        )
+
+        parser.add_argument(
+            "--script-option",
+            choices=["jit_npu", "non_jit"],
+            default=None,
+            help="[model-generate] Script variant (default: jit_npu for hybrid, non_jit for NPU basic)",
+        )
+
+        parser.add_argument(
+            "--optimize",
+            choices=["prefill", "prefill_llama3", "decode", "full_fusion", "full_fusion_llama3"],
+            default=None,
+            help="[model-generate] Optimization mode (prefill/prefill_llama3 for hybrid, decode/full_fusion/full_fusion_llama3 for NPU)",
+        )
+
+        parser.add_argument(
+            "--max-seq-len",
+            default=None,
+            type=int,
+            help="[model-generate] Max sequence length for prefill fusion (default: 4096)",
+        )
+
+        parser.add_argument(
+            "--npu-op-version",
+            choices=["v1", "v2"],
+            default=None,
+            help="[model-generate] NPU LLM op version (v1 / v2)",
+        )
+
+        parser.add_argument(
+            "--npu-basic",
+            action="store_true",
+            default=False,
+            help="[model-generate] Use basic NPU flow with matmulnbits pass file",
+        )
+
+        parser.add_argument(
+            "--npu-use-ep",
+            action="store_true",
+            default=False,
+            help="[model-generate] Use EP (Execution Provider) flow (only applies to --npu --optimize decode)",
+        )
+
+        parser.add_argument(
+            "--no-prune-logits",
+            action="store_true",
+            default=False,
+            help="[model-generate] Disable logits pruning by setting prune_logits=false",
+        )
+
         return parser
 
     @staticmethod
@@ -378,24 +434,6 @@ def _setup_model_dependencies(full_model_path, device, ryzenai_version, oga_path
             dll_source_path = os.path.join(
                 env_path, "Lib", "site-packages", "onnxruntime_genai"
             )
-            required_dlls = ["libutf8_validity.dll", "abseil_dll.dll"]
-
-            # Validate that all required DLLs exist in the source directory
-            missing_dlls = []
-
-            for dll_name in required_dlls:
-                dll_source = os.path.join(dll_source_path, dll_name)
-                if not os.path.exists(dll_source):
-                    missing_dlls.append(dll_source)
-
-            if missing_dlls:
-                dll_list = "\n  - ".join(missing_dlls)
-                raise RuntimeError(
-                    f"Required DLLs not found for {device} inference:\n  - {dll_list}\n"
-                    f"Please ensure your RyzenAI installation is complete and supports {device}.\n"
-                    "See installation instructions at:\n"
-                    "https://github.com/lemonade-sdk/lemonade-eval#installation\n"
-                )
 
             # Add the DLL source directory to PATH
             current_path = os.environ.get("PATH", "")
@@ -543,7 +581,22 @@ def _cleanup_environment(saved_state):
             os.chdir(saved_state["cwd"])
             os.environ["PATH"] = saved_state["path"]
 
-    def _generate_model_for_oga(self, output_model_path, device, input_model_path):
+    def _generate_model_for_oga(
+        self,
+        output_model_path,
+        device,
+        input_model_path,
+        packed_const=False,
+        script_option=None,
+        optimize=None,
+        max_seq_len=None,
+        npu_op_version=None,
+        npu_basic=False,
+        npu_use_ep=False,
+        no_prune_logits=False,
+        dml_only=False,
+        cpu_only=False,
+    ):
         """
         Uses the model_generate tool to generate the model for OGA hybrid or npu targets.
         """
@@ -569,18 +622,31 @@ def _generate_model_for_oga(self, output_model_path, device, input_model_path):
 
         try:
             if device_flag == "npu":
+                script_opt = script_option if script_option is not None else "non_jit"
                 model_generate.generate_npu_model(
                     input_model=input_model_path,
                     output_dir=output_model_path,
-                    packed_const=False,
+                    packed_const=packed_const,
+                    script_option=script_opt,
+                    optimize=optimize,
+                    max_seq_len=max_seq_len,
+                    npu_op_version=npu_op_version,
+                    basic=npu_basic,
+                    use_ep=npu_use_ep,
+                    no_prune_logits=no_prune_logits,
+                    cpu_only=cpu_only,
+
                 )
             else:  # hybrid
+                script_opt = script_option if script_option is not None else "jit_npu"
                 model_generate.generate_hybrid_model(
                     input_model=input_model_path,
                     output_dir=output_model_path,
-                    script_option="jit_npu",
-                    mode="bf16",
-                    dml_only=False,
+                    script_option=script_opt,
+                    optimize=optimize,
+                    max_seq_len=max_seq_len,
+                    no_prune_logits=no_prune_logits,
+                    dml_only=dml_only,
                 )
         except Exception as e:
             raise RuntimeError(
@@ -600,6 +666,16 @@ def run(
         trust_remote_code=False,
         subfolder: str = None,
         do_not_upgrade: bool = False,
+        packed_const: bool = False,
+        script_option: str = None,
+        optimize: str = None,
+        max_seq_len: int = None,
+        npu_op_version: str = None,
+        npu_basic: bool = False,
+        npu_use_ep: bool = False,
+        no_prune_logits: bool = False,
+        dml_only: bool = False,
+        cpu_only: bool = False,
     ) -> State:
         from lemonade.common.network import (
             custom_snapshot_download,
@@ -714,6 +790,7 @@ def run(
                         "It does not contain ONNX or safetensors files."
                     )
                 if device in ["npu", "hybrid"]:
+                    needs_generation = False
                     if is_onnx_model:
                         if is_preoptimized_onnx:
                             # Use HuggingFace cache path as it is
@@ -721,11 +798,7 @@ def run(
                         else:
                             # If ONNX but not modified yet for Hybrid or NPU,
                             # needs further optimization
-                            self._generate_model_for_oga(
-                                full_model_path,
-                                device,
-                                input_model_path,
-                            )
+                            needs_generation = True
                     elif is_safetensors_model:
                         config_path = os.path.join(input_model_path, "config.json")
                         if os.path.exists(config_path):
@@ -733,9 +806,7 @@ def run(
                                 config = json.load(f)
                             if "quantization_config" in config:
                                 # If quantized, use subprocess to generate the model
-                                self._generate_model_for_oga(
-                                    full_model_path, device, input_model_path
-                                )
+                                needs_generation = True
                             else:
                                 raise ValueError(
                                     f"The safetensors model {checkpoint} is not quantized. "
@@ -750,6 +821,23 @@ def run(
                         raise ValueError(
                             f"Unsupported model type for checkpoint: {checkpoint}"
                         )
+
+                    if needs_generation:
+                        self._generate_model_for_oga(
+                            full_model_path,
+                            device,
+                            input_model_path,
+                            packed_const,
+                            script_option,
+                            optimize,
+                            max_seq_len,
+                            npu_op_version,
+                            npu_basic,
+                            npu_use_ep,
+                            no_prune_logits,
+                            dml_only,
+                            cpu_only,
+                        )
                 else:
                     if is_onnx_model:
                         # Use HuggingFace cache path as it is
diff --git a/src/lemonade/tools/oga/ryzenai.py b/src/lemonade/tools/oga/ryzenai.py
index eb1f691..c45dce9 100644
--- a/src/lemonade/tools/oga/ryzenai.py
+++ b/src/lemonade/tools/oga/ryzenai.py
@@ -45,17 +45,17 @@ def get_ryzenai_version_info():
 
     if Version(og.__version__) >= Version("0.7.0"):
         oga_path = os.path.dirname(og.__file__)
-        if og.__version__ in ("0.9.2", "0.9.2.1"):
-            return "1.6.0", oga_path
+        if og.__version__ in ("0.11.2", "0.11.2.1"):
+            return "1.7.0", oga_path
         else:
             raise ValueError(
                 f"Unsupported onnxruntime-genai-directml-ryzenai version: {og.__version__}\n"
-                "Only RyzenAI 1.6.0 is currently supported.\n"
+                "Only RyzenAI 1.7.0 is currently supported.\n"
                 "See installation instructions at: https://github.com/lemonade-sdk/lemonade-eval#installation"  # pylint: disable=line-too-long
             )
     else:
         raise ValueError(
             "Legacy RyzenAI installation detected (version < 0.7.0).\n"
-            "RyzenAI 1.4.0 and 1.5.0 are no longer supported. Please upgrade to 1.6.0.\n"
+            "RyzenAI 1.4.0, 1.5.0 and 1.6.0 are no longer supported. Please upgrade to 1.7.0.\n"
             "See installation instructions at: https://github.com/lemonade-sdk/lemonade-eval#installation"  # pylint: disable=line-too-long
         )

From 95950651425181ce76a58862e8a127fc736c95ac Mon Sep 17 00:00:00 2001
From: Iswarya Alex <iswaalex@amd.com>
Date: Fri, 6 Feb 2026 15:05:51 -0800
Subject: [PATCH 02/13] lint

---
 README.md                      |  8 ++++----
 src/lemonade/tools/oga/load.py | 11 ++++++++---
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index f9dcf93..fb91228 100644
--- a/README.md
+++ b/README.md
@@ -96,7 +96,7 @@ pip install -e .[oga-cpu]
 # For RyzenAI NPU support (Windows + Python 3.12 only):
 pip install -e .[oga-ryzenai] --extra-index-url=https://pypi.amd.com/simple
 
-# For model generation/export (Windows + Python 3.12 only):
+# For model generation/custom export (Windows + Python 3.12 only):
 pip install -e .[oga-ryzenai,model-generate] --extra-index-url=https://pypi.amd.com/simple
 ```
 
@@ -171,15 +171,15 @@ See the [Models List](https://lemonade-server.ai/docs/server/server_models/) for
 
 ## OGA-Load for Model Preparation
 
-The `oga-load` tool is for preparing custom OGA (ONNX Runtime GenAI) models. It can build and quantize models from Hugging Face for use on NPU, iGPU, or CPU.
-
+The `oga-load` tool is for preparing custom OGA (ONNX Runtime GenAI) models. It can build quark-quantized models from Hugging Face for use on NPU, iGPU, or CPU.
+Checkout the official [Ryzen AI Model Preparation guide](https://ryzenai.docs.amd.com/en/latest/oga_model_prepare.html) for more details.
 > **Note**: For running pre-built NPU/Hybrid models, use the server-based workflow above with `-NPU` or `-Hybrid` models. The `oga-load` tool is primarily for model preparation and testing custom checkpoints.
 
 ### Usage
 
 ```bash
 # Prepare and test a model on CPU
-lemonade-eval -i microsoft/Phi-3-mini-4k-instruct oga-load --device cpu --dtype int4 llm-prompt -p "Hello!"
+lemonade-eval -i amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead oga-load --device hybrid --dtype int4 llm-prompt -p "Alice and Bob" --max-new-tokens 10
 ```
 
 ### Installation for OGA
diff --git a/src/lemonade/tools/oga/load.py b/src/lemonade/tools/oga/load.py
index 19a2fb2..b2db43b 100644
--- a/src/lemonade/tools/oga/load.py
+++ b/src/lemonade/tools/oga/load.py
@@ -248,7 +248,13 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser:
 
         parser.add_argument(
             "--optimize",
-            choices=["prefill", "prefill_llama3", "decode", "full_fusion", "full_fusion_llama3"],
+            choices=[
+                "prefill",
+                "prefill_llama3",
+                "decode",
+                "full_fusion",
+                "full_fusion_llama3",
+            ],
             default=None,
             help="[model-generate] Optimization mode (prefill/prefill_llama3 for hybrid, decode/full_fusion/full_fusion_llama3 for NPU)",
         )
@@ -396,7 +402,7 @@ def _setup_model_dependencies(full_model_path, device, ryzenai_version, oga_path
         3. Check NPU driver version if required for device and ryzenai_version.
         """
 
-        # For RyzenAI 1.6.0, check NPU driver version for NPU and hybrid devices
+        # For RyzenAI 1.7.0, check NPU driver version for NPU and hybrid devices
         if device in ["npu", "hybrid"]:
             required_driver_version = REQUIRED_NPU_DRIVER_VERSION
 
@@ -635,7 +641,6 @@ def _generate_model_for_oga(
                     use_ep=npu_use_ep,
                     no_prune_logits=no_prune_logits,
                     cpu_only=cpu_only,
-
                 )
             else:  # hybrid
                 script_opt = script_option if script_option is not None else "jit_npu"

From 797f2953a833f56831f64bdf8faf38fec3fd4432 Mon Sep 17 00:00:00 2001
From: Iswarya Alex <iswaalex@amd.com>
Date: Fri, 6 Feb 2026 15:11:57 -0800
Subject: [PATCH 03/13] lint

---
 src/lemonade/tools/oga/load.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/lemonade/tools/oga/load.py b/src/lemonade/tools/oga/load.py
index b2db43b..050a612 100644
--- a/src/lemonade/tools/oga/load.py
+++ b/src/lemonade/tools/oga/load.py
@@ -236,14 +236,16 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser:
             "--packed-const",
             action="store_true",
             default=False,
-            help="[model-generate] Pass this if packed constants are required",
+            help="[model-generate] Pass this if packed constants are\n"
+                 "required (packed constants).",
         )
 
         parser.add_argument(
             "--script-option",
             choices=["jit_npu", "non_jit"],
             default=None,
-            help="[model-generate] Script variant (default: jit_npu for hybrid, non_jit for NPU basic)",
+            help="[model-generate] Script variant: jit_npu (hybrid),\n"
+                 "non_jit (NPU basic) (default depends on device)",
         )
 
         parser.add_argument(
@@ -256,14 +258,16 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser:
                 "full_fusion_llama3",
             ],
             default=None,
-            help="[model-generate] Optimization mode (prefill/prefill_llama3 for hybrid, decode/full_fusion/full_fusion_llama3 for NPU)",
+            help="[model-generate] Optimization: prefill(_llama3) (hybrid),\n"
+                 "decode/full_fusion(_llama3) (NPU basic)",
         )
 
         parser.add_argument(
             "--max-seq-len",
             default=None,
             type=int,
-            help="[model-generate] Max sequence length for prefill fusion (default: 4096)",
+            help="[model-generate] Max sequence length for prefill\n"
+                 "fusion (default: 4096)",
         )
 
         parser.add_argument(

From 6a27a448fa31858bc0d422dcc36a19d7565505cb Mon Sep 17 00:00:00 2001
From: Iswarya Alex <iswaalex@amd.com>
Date: Fri, 6 Feb 2026 15:15:50 -0800
Subject: [PATCH 04/13] lint

---
 src/lemonade/tools/oga/load.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/lemonade/tools/oga/load.py b/src/lemonade/tools/oga/load.py
index 050a612..93b711a 100644
--- a/src/lemonade/tools/oga/load.py
+++ b/src/lemonade/tools/oga/load.py
@@ -237,7 +237,7 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser:
             action="store_true",
             default=False,
             help="[model-generate] Pass this if packed constants are\n"
-                 "required (packed constants).",
+            "required (packed constants).",
         )
 
         parser.add_argument(
@@ -245,7 +245,7 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser:
             choices=["jit_npu", "non_jit"],
             default=None,
             help="[model-generate] Script variant: jit_npu (hybrid),\n"
-                 "non_jit (NPU basic) (default depends on device)",
+            "non_jit (NPU basic) (default depends on device)",
         )
 
         parser.add_argument(
@@ -259,7 +259,7 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser:
             ],
             default=None,
             help="[model-generate] Optimization: prefill(_llama3) (hybrid),\n"
-                 "decode/full_fusion(_llama3) (NPU basic)",
+            "decode/full_fusion(_llama3) (NPU basic)",
         )
 
         parser.add_argument(
@@ -267,7 +267,7 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser:
             default=None,
             type=int,
             help="[model-generate] Max sequence length for prefill\n"
-                 "fusion (default: 4096)",
+            "fusion (default: 4096)",
         )
 
         parser.add_argument(

From 968e2f5a6aac0070bf858ad21a1a84be4e0e64b8 Mon Sep 17 00:00:00 2001
From: Iswarya Alex <iswaalex@amd.com>
Date: Fri, 6 Feb 2026 15:23:17 -0800
Subject: [PATCH 05/13] lint

---
 src/lemonade/tools/oga/load.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/lemonade/tools/oga/load.py b/src/lemonade/tools/oga/load.py
index 93b711a..68903ce 100644
--- a/src/lemonade/tools/oga/load.py
+++ b/src/lemonade/tools/oga/load.py
@@ -288,7 +288,8 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser:
             "--npu-use-ep",
             action="store_true",
             default=False,
-            help="[model-generate] Use EP (Execution Provider) flow (only applies to --npu --optimize decode)",
+            help="[model-generate] Use EP (Execution Provider) flow\n"
+            "(only applies to --npu --optimize decode)",
         )
 
         parser.add_argument(

From 6378f8a0b82ee17df563b0fc97f39e3e34503aba Mon Sep 17 00:00:00 2001
From: Iswarya Alex <iswaalex@amd.com>
Date: Fri, 6 Feb 2026 15:44:48 -0800
Subject: [PATCH 06/13] Add tests for model prep

---
 .github/workflows/test_lemonade_eval.yml | 39 +++++++++++++++
 test/oga_hybrid_model_prep_api.py        | 61 ++++++++++++++++++++++++
 2 files changed, 100 insertions(+)
 create mode 100644 test/oga_hybrid_model_prep_api.py

diff --git a/.github/workflows/test_lemonade_eval.yml b/.github/workflows/test_lemonade_eval.yml
index e36f102..065afd4 100644
--- a/.github/workflows/test_lemonade_eval.yml
+++ b/.github/workflows/test_lemonade_eval.yml
@@ -115,6 +115,45 @@ jobs:
           echo "Running OGA CPU API tests..."
           $venvPython test/oga_cpu_api.py
 
+  test-oga-hybrid-model-prep:
+    # This job requires Ryzen AI 300-series hardware (e.g., Strix Point, Krackan Point)
+    # It will only run if a self-hosted runner with the 'rai-300' label is available
+    env:
+      LEMONADE_CI_MODE: "True"
+    runs-on: [self-hosted, windows, rai-300]
+    concurrency:
+      group: ${{ github.workflow }}-${{ github.ref }}
+      cancel-in-progress: true
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Create virtual environment and install dependencies
+        shell: bash
+        run: |
+          python -m venv .venv
+          venvPython=".venv/Scripts/python"
+          venvPip=".venv/Scripts/pip"
+          $venvPython -m pip install --upgrade pip
+          $venvPython -m pip check
+          $venvPip install -e .[oga-ryzenai,model-generate] --extra-index-url=https://pypi.amd.com/simple
+
+      - name: Test OGA Hybrid Model Prep
+        shell: bash
+        run: |
+          venvPython=".venv/Scripts/python"
+          venvLemonade=".venv/Scripts/lemonade-eval"
+          
+          echo "Testing OGA-Load with Hybrid model generation..."
+          $venvLemonade -i amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead oga-load --device hybrid --dml-only --dtype int4 llm-prompt -p "Alice and Bob" --max-new-tokens 10
+          
+          echo "Running OGA Hybrid Model Prep API tests..."
+          $venvPython test/oga_hybrid_model_prep_api.py
+
       - name: Test Server Integration (GGUF model)
         shell: PowerShell
         run: |
diff --git a/test/oga_hybrid_model_prep_api.py b/test/oga_hybrid_model_prep_api.py
new file mode 100644
index 0000000..7454032
--- /dev/null
+++ b/test/oga_hybrid_model_prep_api.py
@@ -0,0 +1,61 @@
+import unittest
+import shutil
+import os
+from lemonade.state import State
+import lemonade.common.test_helpers as common
+from lemonade.common.build import builds_dir
+from lemonade.tools.prompt import LLMPrompt
+from lemonade.tools.oga.load import OgaLoad
+import sys
+
+ci_mode = os.getenv("LEMONADE_CI_MODE", False)
+
+checkpoint = "amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead"
+device = "hybrid"
+dtype = "int4"
+force = False
+prompt = "Alice and Bob"
+
+
+class Testing(unittest.TestCase):
+
+    def setUp(self) -> None:
+        shutil.rmtree(builds_dir(cache_dir), ignore_errors=True)
+
+    def test_001_oga_model_prep_hybrid(self):
+        # Test the OgaLoad with model generation (oga_model_prep) for hybrid device
+        # and LLMPrompt tools
+
+        state = State(cache_dir=cache_dir, build_name="test")
+
+        state = OgaLoad().run(
+            state,
+            input=checkpoint,
+            device=device,
+            dtype=dtype,
+            force=force,
+            dml_only=True,
+        )
+        state = LLMPrompt().run(state, prompt=prompt, max_new_tokens=10)
+
+        assert len(state.response) > 0, state.response
+
+
+if __name__ == "__main__":
+    cache_dir, _ = common.create_test_dir(
+        "lemonade_oga_hybrid_model_prep_api", base_dir=os.path.abspath(".")
+    )
+
+    suite = unittest.TestSuite()
+    suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Testing))
+
+    # Run the test suite
+    runner = unittest.TextTestRunner()
+    result = runner.run(suite)
+
+    # Set exit code based on test results
+    if not result.wasSuccessful():
+        sys.exit(1)
+
+# This file was originally licensed under Apache 2.0. It has been modified.
+# Modifications Copyright (c) 2025 AMD

From 464af561f745c2098c8ebce393894c74390a2dbd Mon Sep 17 00:00:00 2001
From: Iswarya Alex <iswaalex@amd.com>
Date: Fri, 6 Feb 2026 15:52:31 -0800
Subject: [PATCH 07/13] update workflow config

---
 .github/workflows/test_lemonade_eval.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test_lemonade_eval.yml b/.github/workflows/test_lemonade_eval.yml
index 065afd4..26bc2b6 100644
--- a/.github/workflows/test_lemonade_eval.yml
+++ b/.github/workflows/test_lemonade_eval.yml
@@ -120,9 +120,9 @@ jobs:
     # It will only run if a self-hosted runner with the 'rai-300' label is available
     env:
       LEMONADE_CI_MODE: "True"
-    runs-on: [self-hosted, windows, rai-300]
+    runs-on: [windows-latest]
     concurrency:
-      group: ${{ github.workflow }}-${{ github.ref }}
+      group: ${{ github.workflow }}-hybrid-model-prep-${{ github.ref }}
       cancel-in-progress: true
     steps:
       - uses: actions/checkout@v3

From 7c16b2f9ecbc7aeb176f88ec5dd07b9588c1fa97 Mon Sep 17 00:00:00 2001
From: Iswarya Alex <iswaalex@amd.com>
Date: Fri, 6 Feb 2026 15:52:31 -0800
Subject: [PATCH 08/13] update workflow config

---
 .github/workflows/test_lemonade_eval.yml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/test_lemonade_eval.yml b/.github/workflows/test_lemonade_eval.yml
index 065afd4..f80abec 100644
--- a/.github/workflows/test_lemonade_eval.yml
+++ b/.github/workflows/test_lemonade_eval.yml
@@ -120,9 +120,9 @@ jobs:
     # It will only run if a self-hosted runner with the 'rai-300' label is available
     env:
       LEMONADE_CI_MODE: "True"
-    runs-on: [self-hosted, windows, rai-300]
+    runs-on: [windows-latest]
     concurrency:
-      group: ${{ github.workflow }}-${{ github.ref }}
+      group: ${{ github.workflow }}-hybrid-model-prep-${{ github.ref }}
       cancel-in-progress: true
     steps:
       - uses: actions/checkout@v3
@@ -149,8 +149,7 @@ jobs:
           venvLemonade=".venv/Scripts/lemonade-eval"
           
           echo "Testing OGA-Load with Hybrid model generation..."
-          $venvLemonade -i amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead oga-load --device hybrid --dml-only --dtype int4 llm-prompt -p "Alice and Bob" --max-new-tokens 10
-          
+          $venvLemonade -i amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead oga-load --device hybrid --dtype int4
           echo "Running OGA Hybrid Model Prep API tests..."
           $venvPython test/oga_hybrid_model_prep_api.py
 

From cdde62a0891cce0d5e319e9617b0b30f829177a2 Mon Sep 17 00:00:00 2001
From: Iswarya Alex <iswaalex@amd.com>
Date: Fri, 6 Feb 2026 16:19:56 -0800
Subject: [PATCH 09/13] Modify tests

---
 .github/workflows/test_lemonade_eval.yml | 40 +-----------------------
 1 file changed, 1 insertion(+), 39 deletions(-)

diff --git a/.github/workflows/test_lemonade_eval.yml b/.github/workflows/test_lemonade_eval.yml
index f80abec..4242307 100644
--- a/.github/workflows/test_lemonade_eval.yml
+++ b/.github/workflows/test_lemonade_eval.yml
@@ -115,44 +115,6 @@ jobs:
           echo "Running OGA CPU API tests..."
           $venvPython test/oga_cpu_api.py
 
-  test-oga-hybrid-model-prep:
-    # This job requires Ryzen AI 300-series hardware (e.g., Strix Point, Krackan Point)
-    # It will only run if a self-hosted runner with the 'rai-300' label is available
-    env:
-      LEMONADE_CI_MODE: "True"
-    runs-on: [windows-latest]
-    concurrency:
-      group: ${{ github.workflow }}-hybrid-model-prep-${{ github.ref }}
-      cancel-in-progress: true
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Set up Python 3.12
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.12"
-
-      - name: Create virtual environment and install dependencies
-        shell: bash
-        run: |
-          python -m venv .venv
-          venvPython=".venv/Scripts/python"
-          venvPip=".venv/Scripts/pip"
-          $venvPython -m pip install --upgrade pip
-          $venvPython -m pip check
-          $venvPip install -e .[oga-ryzenai,model-generate] --extra-index-url=https://pypi.amd.com/simple
-
-      - name: Test OGA Hybrid Model Prep
-        shell: bash
-        run: |
-          venvPython=".venv/Scripts/python"
-          venvLemonade=".venv/Scripts/lemonade-eval"
-          
-          echo "Testing OGA-Load with Hybrid model generation..."
-          $venvLemonade -i amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead oga-load --device hybrid --dtype int4
-          echo "Running OGA Hybrid Model Prep API tests..."
-          $venvPython test/oga_hybrid_model_prep_api.py
-
       - name: Test Server Integration (GGUF model)
         shell: PowerShell
         run: |
@@ -238,4 +200,4 @@ jobs:
           Write-Host "Server stopped." -ForegroundColor Green
 
 # This file was originally licensed under Apache 2.0. It has been modified.
-# Modifications Copyright (c) 2025 AMD
+# Modifications Copyright (c) 2025 AMD
\ No newline at end of file

From 7be8300acf4610f13382f393a20b66ca8264ff18 Mon Sep 17 00:00:00 2001
From: Iswarya Alex <iswaalex@amd.com>
Date: Mon, 9 Feb 2026 09:55:41 -0800
Subject: [PATCH 10/13] edit setup.py

---
 setup.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/setup.py b/setup.py
index 9ae46e0..c76f4ad 100644
--- a/setup.py
+++ b/setup.py
@@ -62,6 +62,8 @@
         ],
         "model-generate": [
             "model-generate==1.7.0; platform_system=='Windows' and python_version=='3.12'",
+            "numpy<2",
+            "onnx_ir",
         ],
     },
     classifiers=[],

From 6eb3ea1aff252d41fe07feef1a5e9f4d29bce4e9 Mon Sep 17 00:00:00 2001
From: Iswarya Alex <iswaalex@amd.com>
Date: Mon, 9 Feb 2026 11:15:34 -0800
Subject: [PATCH 11/13] test new workflow

---
 .github/workflows/test_lemonade_eval.yml | 38 ++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/.github/workflows/test_lemonade_eval.yml b/.github/workflows/test_lemonade_eval.yml
index 4242307..008d284 100644
--- a/.github/workflows/test_lemonade_eval.yml
+++ b/.github/workflows/test_lemonade_eval.yml
@@ -199,5 +199,43 @@ jobs:
           Get-Process -Name "lemonade-server", "lemonade-router", "llama-server" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
           Write-Host "Server stopped." -ForegroundColor Green
 
+  test-oga-model-prep:
+    # This job requires Ryzen AI hardware with NPU support (e.g., RAI 160 SDK)
+    # It will only run if a self-hosted runner with the 'rai-160-sdk' label is available
+    env:
+      LEMONADE_CI_MODE: "True"
+    runs-on: [rai-160-sdk, Windows]
+    concurrency:
+      group: ${{ github.workflow }}-hybrid-model-prep-${{ github.ref }}
+      cancel-in-progress: true
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Create virtual environment and install dependencies
+        shell: bash
+        run: |
+          python -m venv .venv
+          venvPython=".venv/Scripts/python"
+          venvPip=".venv/Scripts/pip"
+          $venvPython -m pip install --upgrade pip
+          $venvPython -m pip check
+          $venvPip install -e .[oga-ryzenai,model-generate] --extra-index-url=https://pypi.amd.com/simple
+
+      - name: Test OGA Hybrid Model Prep
+        shell: bash
+        run: |
+          venvPython=".venv/Scripts/python"
+          venvLemonade=".venv/Scripts/lemonade-eval"
+          
+          echo "Testing OGA-Load with Hybrid model generation..."
+          $venvLemonade -i amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead oga-load --device hybrid --dtype int4
+          echo "Running OGA Hybrid Model Prep API tests..."
+          $venvPython test/oga_hybrid_model_prep_api.py
+
 # This file was originally licensed under Apache 2.0. It has been modified.
 # Modifications Copyright (c) 2025 AMD
\ No newline at end of file

From 39d3555d87054a9ef7f810969fd90d2b7f25fe67 Mon Sep 17 00:00:00 2001
From: Iswarya Alex <47045679+iswaryaalex@users.noreply.github.com>
Date: Mon, 9 Feb 2026 18:40:13 -0800
Subject: [PATCH 12/13] Change runner from 'rai-160-sdk' to 'stx'

---
 .github/workflows/test_lemonade_eval.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test_lemonade_eval.yml b/.github/workflows/test_lemonade_eval.yml
index 008d284..5971086 100644
--- a/.github/workflows/test_lemonade_eval.yml
+++ b/.github/workflows/test_lemonade_eval.yml
@@ -204,7 +204,7 @@ jobs:
     # It will only run if a self-hosted runner with the 'rai-160-sdk' label is available
     env:
       LEMONADE_CI_MODE: "True"
-    runs-on: [rai-160-sdk, Windows]
+    runs-on: [stx, Windows]
     concurrency:
       group: ${{ github.workflow }}-hybrid-model-prep-${{ github.ref }}
       cancel-in-progress: true
@@ -238,4 +238,4 @@ jobs:
           $venvPython test/oga_hybrid_model_prep_api.py
 
 # This file was originally licensed under Apache 2.0. It has been modified.
-# Modifications Copyright (c) 2025 AMD
\ No newline at end of file
+# Modifications Copyright (c) 2025 AMD

From 457df746b201b1316dbcc245d35fdbba9d3b2eab Mon Sep 17 00:00:00 2001
From: Iswarya Alex <47045679+iswaryaalex@users.noreply.github.com>
Date: Mon, 9 Feb 2026 18:52:16 -0800
Subject: [PATCH 13/13] test workflow

Removed the test-oga-model-prep job from the workflow.
---
 .github/workflows/test_lemonade_eval.yml | 38 ------------------------
 1 file changed, 38 deletions(-)

diff --git a/.github/workflows/test_lemonade_eval.yml b/.github/workflows/test_lemonade_eval.yml
index 5971086..e36f102 100644
--- a/.github/workflows/test_lemonade_eval.yml
+++ b/.github/workflows/test_lemonade_eval.yml
@@ -199,43 +199,5 @@ jobs:
           Get-Process -Name "lemonade-server", "lemonade-router", "llama-server" -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
           Write-Host "Server stopped." -ForegroundColor Green
 
-  test-oga-model-prep:
-    # This job requires Ryzen AI hardware with NPU support (e.g., RAI 160 SDK)
-    # It will only run if a self-hosted runner with the 'rai-160-sdk' label is available
-    env:
-      LEMONADE_CI_MODE: "True"
-    runs-on: [stx, Windows]
-    concurrency:
-      group: ${{ github.workflow }}-hybrid-model-prep-${{ github.ref }}
-      cancel-in-progress: true
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Set up Python 3.12
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.12"
-
-      - name: Create virtual environment and install dependencies
-        shell: bash
-        run: |
-          python -m venv .venv
-          venvPython=".venv/Scripts/python"
-          venvPip=".venv/Scripts/pip"
-          $venvPython -m pip install --upgrade pip
-          $venvPython -m pip check
-          $venvPip install -e .[oga-ryzenai,model-generate] --extra-index-url=https://pypi.amd.com/simple
-
-      - name: Test OGA Hybrid Model Prep
-        shell: bash
-        run: |
-          venvPython=".venv/Scripts/python"
-          venvLemonade=".venv/Scripts/lemonade-eval"
-          
-          echo "Testing OGA-Load with Hybrid model generation..."
-          $venvLemonade -i amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead oga-load --device hybrid --dtype int4
-          echo "Running OGA Hybrid Model Prep API tests..."
-          $venvPython test/oga_hybrid_model_prep_api.py
-
 # This file was originally licensed under Apache 2.0. It has been modified.
 # Modifications Copyright (c) 2025 AMD