Skip to content

Commit 22dde33

Browse files
authored
Merge pull request #6 from lemonade-sdk/iswarya/upgrade-lemonade-eval-rai1.7.0
Ugrade to RAI 1.7.0 incl. model-gen
2 parents 3a92093 + 457df74 commit 22dde33

File tree

5 files changed

+204
-43
lines changed

5 files changed

+204
-43
lines changed

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ pip install -e .[oga-cpu]
9696
# For RyzenAI NPU support (Windows + Python 3.12 only):
9797
pip install -e .[oga-ryzenai] --extra-index-url=https://pypi.amd.com/simple
9898

99-
# For model generation/export (Windows + Python 3.12 only):
99+
# For model generation/custom export (Windows + Python 3.12 only):
100100
pip install -e .[oga-ryzenai,model-generate] --extra-index-url=https://pypi.amd.com/simple
101101
```
102102

@@ -171,15 +171,15 @@ See the [Models List](https://lemonade-server.ai/docs/server/server_models/) for
171171

172172
## OGA-Load for Model Preparation
173173

174-
The `oga-load` tool is for preparing custom OGA (ONNX Runtime GenAI) models. It can build and quantize models from Hugging Face for use on NPU, iGPU, or CPU.
175-
174+
The `oga-load` tool is for preparing custom OGA (ONNX Runtime GenAI) models. It can build quark-quantized models from Hugging Face for use on NPU, iGPU, or CPU.
175+
Checkout the official [Ryzen AI Model Preparation guide](https://ryzenai.docs.amd.com/en/latest/oga_model_prepare.html) for more details.
176176
> **Note**: For running pre-built NPU/Hybrid models, use the server-based workflow above with `-NPU` or `-Hybrid` models. The `oga-load` tool is primarily for model preparation and testing custom checkpoints.
177177
178178
### Usage
179179

180180
```bash
181181
# Prepare and test a model on CPU
182-
lemonade-eval -i microsoft/Phi-3-mini-4k-instruct oga-load --device cpu --dtype int4 llm-prompt -p "Hello!"
182+
lemonade-eval -i amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead oga-load --device hybrid --dtype int4 llm-prompt -p "Alice and Bob" --max-new-tokens 10
183183
```
184184

185185
### Installation for OGA

setup.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,17 @@
5353
extras_require={
5454
# Extras for specific backends
5555
"oga-ryzenai": [
56-
"onnxruntime-genai-directml-ryzenai==0.9.2.1",
56+
"onnxruntime-genai-directml-ryzenai==0.11.2",
5757
"protobuf>=6.30.1",
5858
],
5959
"oga-cpu": [
60-
"onnxruntime-genai==0.9.2",
60+
"onnxruntime-genai==0.11.2",
6161
"onnxruntime >=1.22.0",
6262
],
6363
"model-generate": [
64-
"model-generate==1.5.0; platform_system=='Windows' and python_version=='3.10'",
64+
"model-generate==1.7.0; platform_system=='Windows' and python_version=='3.12'",
65+
"numpy<2",
66+
"onnx_ir",
6567
],
6668
},
6769
classifiers=[],

src/lemonade/tools/oga/load.py

Lines changed: 130 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,73 @@ def parser(add_help: bool = True) -> argparse.ArgumentParser:
232232
f'{", ".join([value + " for " + key for key, value in execution_providers.items()])}.',
233233
)
234234

235+
parser.add_argument(
236+
"--packed-const",
237+
action="store_true",
238+
default=False,
239+
help="[model-generate] Pass this if packed constants are\n"
240+
"required (packed constants).",
241+
)
242+
243+
parser.add_argument(
244+
"--script-option",
245+
choices=["jit_npu", "non_jit"],
246+
default=None,
247+
help="[model-generate] Script variant: jit_npu (hybrid),\n"
248+
"non_jit (NPU basic) (default depends on device)",
249+
)
250+
251+
parser.add_argument(
252+
"--optimize",
253+
choices=[
254+
"prefill",
255+
"prefill_llama3",
256+
"decode",
257+
"full_fusion",
258+
"full_fusion_llama3",
259+
],
260+
default=None,
261+
help="[model-generate] Optimization: prefill(_llama3) (hybrid),\n"
262+
"decode/full_fusion(_llama3) (NPU basic)",
263+
)
264+
265+
parser.add_argument(
266+
"--max-seq-len",
267+
default=None,
268+
type=int,
269+
help="[model-generate] Max sequence length for prefill\n"
270+
"fusion (default: 4096)",
271+
)
272+
273+
parser.add_argument(
274+
"--npu-op-version",
275+
choices=["v1", "v2"],
276+
default=None,
277+
help="[model-generate] NPU LLM op version (v1 / v2)",
278+
)
279+
280+
parser.add_argument(
281+
"--npu-basic",
282+
action="store_true",
283+
default=False,
284+
help="[model-generate] Use basic NPU flow with matmulnbits pass file",
285+
)
286+
287+
parser.add_argument(
288+
"--npu-use-ep",
289+
action="store_true",
290+
default=False,
291+
help="[model-generate] Use EP (Execution Provider) flow\n"
292+
"(only applies to --npu --optimize decode)",
293+
)
294+
295+
parser.add_argument(
296+
"--no-prune-logits",
297+
action="store_true",
298+
default=False,
299+
help="[model-generate] Disable logits pruning by setting prune_logits=false",
300+
)
301+
235302
return parser
236303

237304
@staticmethod
@@ -340,7 +407,7 @@ def _setup_model_dependencies(full_model_path, device, ryzenai_version, oga_path
340407
3. Check NPU driver version if required for device and ryzenai_version.
341408
"""
342409

343-
# For RyzenAI 1.6.0, check NPU driver version for NPU and hybrid devices
410+
# For RyzenAI 1.7.0, check NPU driver version for NPU and hybrid devices
344411
if device in ["npu", "hybrid"]:
345412
required_driver_version = REQUIRED_NPU_DRIVER_VERSION
346413

@@ -378,24 +445,6 @@ def _setup_model_dependencies(full_model_path, device, ryzenai_version, oga_path
378445
dll_source_path = os.path.join(
379446
env_path, "Lib", "site-packages", "onnxruntime_genai"
380447
)
381-
required_dlls = ["libutf8_validity.dll", "abseil_dll.dll"]
382-
383-
# Validate that all required DLLs exist in the source directory
384-
missing_dlls = []
385-
386-
for dll_name in required_dlls:
387-
dll_source = os.path.join(dll_source_path, dll_name)
388-
if not os.path.exists(dll_source):
389-
missing_dlls.append(dll_source)
390-
391-
if missing_dlls:
392-
dll_list = "\n - ".join(missing_dlls)
393-
raise RuntimeError(
394-
f"Required DLLs not found for {device} inference:\n - {dll_list}\n"
395-
f"Please ensure your RyzenAI installation is complete and supports {device}.\n"
396-
"See installation instructions at:\n"
397-
"https://github.com/lemonade-sdk/lemonade-eval#installation\n"
398-
)
399448

400449
# Add the DLL source directory to PATH
401450
current_path = os.environ.get("PATH", "")
@@ -543,7 +592,22 @@ def _cleanup_environment(saved_state):
543592
os.chdir(saved_state["cwd"])
544593
os.environ["PATH"] = saved_state["path"]
545594

546-
def _generate_model_for_oga(self, output_model_path, device, input_model_path):
595+
def _generate_model_for_oga(
596+
self,
597+
output_model_path,
598+
device,
599+
input_model_path,
600+
packed_const=False,
601+
script_option=None,
602+
optimize=None,
603+
max_seq_len=None,
604+
npu_op_version=None,
605+
npu_basic=False,
606+
npu_use_ep=False,
607+
no_prune_logits=False,
608+
dml_only=False,
609+
cpu_only=False,
610+
):
547611
"""
548612
Uses the model_generate tool to generate the model for OGA hybrid or npu targets.
549613
"""
@@ -569,18 +633,30 @@ def _generate_model_for_oga(self, output_model_path, device, input_model_path):
569633

570634
try:
571635
if device_flag == "npu":
636+
script_opt = script_option if script_option is not None else "non_jit"
572637
model_generate.generate_npu_model(
573638
input_model=input_model_path,
574639
output_dir=output_model_path,
575-
packed_const=False,
640+
packed_const=packed_const,
641+
script_option=script_opt,
642+
optimize=optimize,
643+
max_seq_len=max_seq_len,
644+
npu_op_version=npu_op_version,
645+
basic=npu_basic,
646+
use_ep=npu_use_ep,
647+
no_prune_logits=no_prune_logits,
648+
cpu_only=cpu_only,
576649
)
577650
else: # hybrid
651+
script_opt = script_option if script_option is not None else "jit_npu"
578652
model_generate.generate_hybrid_model(
579653
input_model=input_model_path,
580654
output_dir=output_model_path,
581-
script_option="jit_npu",
582-
mode="bf16",
583-
dml_only=False,
655+
script_option=script_opt,
656+
optimize=optimize,
657+
max_seq_len=max_seq_len,
658+
no_prune_logits=no_prune_logits,
659+
dml_only=dml_only,
584660
)
585661
except Exception as e:
586662
raise RuntimeError(
@@ -600,6 +676,16 @@ def run(
600676
trust_remote_code=False,
601677
subfolder: str = None,
602678
do_not_upgrade: bool = False,
679+
packed_const: bool = False,
680+
script_option: str = None,
681+
optimize: str = None,
682+
max_seq_len: int = None,
683+
npu_op_version: str = None,
684+
npu_basic: bool = False,
685+
npu_use_ep: bool = False,
686+
no_prune_logits: bool = False,
687+
dml_only: bool = False,
688+
cpu_only: bool = False,
603689
) -> State:
604690
from lemonade.common.network import (
605691
custom_snapshot_download,
@@ -714,28 +800,23 @@ def run(
714800
"It does not contain ONNX or safetensors files."
715801
)
716802
if device in ["npu", "hybrid"]:
803+
needs_generation = False
717804
if is_onnx_model:
718805
if is_preoptimized_onnx:
719806
# Use HuggingFace cache path as it is
720807
full_model_path = input_model_path
721808
else:
722809
# If ONNX but not modified yet for Hybrid or NPU,
723810
# needs further optimization
724-
self._generate_model_for_oga(
725-
full_model_path,
726-
device,
727-
input_model_path,
728-
)
811+
needs_generation = True
729812
elif is_safetensors_model:
730813
config_path = os.path.join(input_model_path, "config.json")
731814
if os.path.exists(config_path):
732815
with open(config_path, "r", encoding="utf-8") as f:
733816
config = json.load(f)
734817
if "quantization_config" in config:
735818
# If quantized, use subprocess to generate the model
736-
self._generate_model_for_oga(
737-
full_model_path, device, input_model_path
738-
)
819+
needs_generation = True
739820
else:
740821
raise ValueError(
741822
f"The safetensors model {checkpoint} is not quantized. "
@@ -750,6 +831,23 @@ def run(
750831
raise ValueError(
751832
f"Unsupported model type for checkpoint: {checkpoint}"
752833
)
834+
835+
if needs_generation:
836+
self._generate_model_for_oga(
837+
full_model_path,
838+
device,
839+
input_model_path,
840+
packed_const,
841+
script_option,
842+
optimize,
843+
max_seq_len,
844+
npu_op_version,
845+
npu_basic,
846+
npu_use_ep,
847+
no_prune_logits,
848+
dml_only,
849+
cpu_only,
850+
)
753851
else:
754852
if is_onnx_model:
755853
# Use HuggingFace cache path as it is

src/lemonade/tools/oga/ryzenai.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,17 +45,17 @@ def get_ryzenai_version_info():
4545

4646
if Version(og.__version__) >= Version("0.7.0"):
4747
oga_path = os.path.dirname(og.__file__)
48-
if og.__version__ in ("0.9.2", "0.9.2.1"):
49-
return "1.6.0", oga_path
48+
if og.__version__ in ("0.11.2", "0.11.2.1"):
49+
return "1.7.0", oga_path
5050
else:
5151
raise ValueError(
5252
f"Unsupported onnxruntime-genai-directml-ryzenai version: {og.__version__}\n"
53-
"Only RyzenAI 1.6.0 is currently supported.\n"
53+
"Only RyzenAI 1.7.0 is currently supported.\n"
5454
"See installation instructions at: https://github.com/lemonade-sdk/lemonade-eval#installation" # pylint: disable=line-too-long
5555
)
5656
else:
5757
raise ValueError(
5858
"Legacy RyzenAI installation detected (version < 0.7.0).\n"
59-
"RyzenAI 1.4.0 and 1.5.0 are no longer supported. Please upgrade to 1.6.0.\n"
59+
"RyzenAI 1.4.0, 1.5.0 and 1.6.0 are no longer supported. Please upgrade to 1.7.0.\n"
6060
"See installation instructions at: https://github.com/lemonade-sdk/lemonade-eval#installation" # pylint: disable=line-too-long
6161
)

test/oga_hybrid_model_prep_api.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import unittest
2+
import shutil
3+
import os
4+
from lemonade.state import State
5+
import lemonade.common.test_helpers as common
6+
from lemonade.common.build import builds_dir
7+
from lemonade.tools.prompt import LLMPrompt
8+
from lemonade.tools.oga.load import OgaLoad
9+
import sys
10+
11+
ci_mode = os.getenv("LEMONADE_CI_MODE", False)
12+
13+
checkpoint = "amd/Llama-3.2-1B-Instruct-awq-uint4-asym-g128-bf16-lmhead"
14+
device = "hybrid"
15+
dtype = "int4"
16+
force = False
17+
prompt = "Alice and Bob"
18+
19+
20+
class Testing(unittest.TestCase):
21+
22+
def setUp(self) -> None:
23+
shutil.rmtree(builds_dir(cache_dir), ignore_errors=True)
24+
25+
def test_001_oga_model_prep_hybrid(self):
26+
# Test the OgaLoad with model generation (oga_model_prep) for hybrid device
27+
# and LLMPrompt tools
28+
29+
state = State(cache_dir=cache_dir, build_name="test")
30+
31+
state = OgaLoad().run(
32+
state,
33+
input=checkpoint,
34+
device=device,
35+
dtype=dtype,
36+
force=force,
37+
dml_only=True,
38+
)
39+
state = LLMPrompt().run(state, prompt=prompt, max_new_tokens=10)
40+
41+
assert len(state.response) > 0, state.response
42+
43+
44+
if __name__ == "__main__":
45+
cache_dir, _ = common.create_test_dir(
46+
"lemonade_oga_hybrid_model_prep_api", base_dir=os.path.abspath(".")
47+
)
48+
49+
suite = unittest.TestSuite()
50+
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(Testing))
51+
52+
# Run the test suite
53+
runner = unittest.TextTestRunner()
54+
result = runner.run(suite)
55+
56+
# Set exit code based on test results
57+
if not result.wasSuccessful():
58+
sys.exit(1)
59+
60+
# This file was originally licensed under Apache 2.0. It has been modified.
61+
# Modifications Copyright (c) 2025 AMD

0 commit comments

Comments
 (0)