microsoft · xieofxie · Jun 16, 2026 · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026
@@ -1,19 +1,19 @@
 {
-    "configCheck": 173,
-    "copyCheck": 183,
+    "configCheck": 174,
+    "copyCheck": 184,
     "executePatchPyCheck": 0,
-    "executeRuntimeCheck": 106,
+    "executeRuntimeCheck": 107,
     "extensionCheck": 2,
-    "gitignoreCheck": 44,
-    "inferenceModelCheck": 25,
-    "ipynbCheck": 51,
-    "licenseCheck": 41,
-    "modelProjectCheck": 46,
-    "oliveCheck": 82,
-    "oliveJsonCheck": 173,
-    "pathCheck": 1463,
-    "requirementsCheck": 44,
+    "gitignoreCheck": 45,
+    "inferenceModelCheck": 26,
+    "ipynbCheck": 52,
+    "licenseCheck": 42,
+    "modelProjectCheck": 47,
+    "oliveCheck": 85,
+    "oliveJsonCheck": 174,
+    "pathCheck": 1468,
+    "requirementsCheck": 45,
     "templateCheck": 3,
-    "venvRequirementsCheck": 23,
-    "winmlCopyCheck": 38
+    "venvRequirementsCheck": 24,
+    "winmlCopyCheck": 39
 }
@@ -485,7 +485,7 @@
             "relativePath": "sam-vit-base/aitk",
             "version": 2,
             "pipeline_tags": [
-                "fill-mask"
+                "mask-generation"
             ]
         },
         {
@@ -501,7 +501,7 @@
             "relativePath": "sam2.1-hiera-small/aitk",
             "version": 2,
             "pipeline_tags": [
-                "fill-mask"
+                "mask-generation"
             ]
         },
         {
@@ -837,6 +837,22 @@
                 "text-generation"
             ]
         },
+        {
+            "displayName": "Qwen/Qwen3.5-2B",
+            "icon": "qwen",
+            "modelLink": "https://huggingface.co/Qwen/Qwen3.5-2B",
+            "id": "huggingface/Qwen/Qwen3.5-2B",
+            "runtimes": [
+                "NvidiaTRTRTX"
+            ],
+            "architecture": "Transformer",
+            "status": "Ready",
+            "relativePath": "Qwen-Qwen3.5-2B/aitk",
+            "version": 1,
+            "pipeline_tags": [
+                "image-text-to-text"
+            ]
+        },
         {
             "displayName": "sd2-community/stable-diffusion-2-1",
             "icon": "HuggingFace",

@@ -30,6 +30,7 @@
 | [Qwen2.5 Coder 14B Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct) | [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-Coder-14B-Instruct/aitk/qwen2_5_trtrtx.json), [Intel CPU](../../../Qwen-Qwen2.5-Coder-14B-Instruct/aitk/qwen2_5_ov_config.json), [Intel GPU](../../../Qwen-Qwen2.5-Coder-14B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-Coder-14B-Instruct/aitk/qwen2_5_ov_npu_config.json) |
 | [Qwen2.5 Coder 3B Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct) | [Intel CPU](../../../Qwen-Qwen2.5-Coder-3B-Instruct/aitk/qwen2_5_ov_config.json), [Intel GPU](../../../Qwen-Qwen2.5-Coder-3B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-Coder-3B-Instruct/aitk/qwen2_5_ov_npu_config.json) |
 | [Qwen2.5 Coder 7B Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) | [AMD NPU](../../../Qwen-Qwen2.5-Coder-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-Coder-7B-Instruct/aitk/qwen2_5_trtrtx.json), [Intel CPU](../../../Qwen-Qwen2.5-Coder-7B-Instruct/aitk/qwen2_5_ov_config.json), [Intel GPU](../../../Qwen-Qwen2.5-Coder-7B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-Coder-7B-Instruct/aitk/qwen2_5_ov_npu_config.json) |
+| [Qwen3.5 2B](https://huggingface.co/Qwen/Qwen3.5-2B) | [NVIDIA TensorRT for RTX](../../../Qwen-Qwen3.5-2B/aitk/qwen_trtrtx_workflow.json) |
 ## Non-LLM Models
 
 | Model Name | Supported Runtimes |

@@ -0,0 +1,8 @@
+olive-ai==0.13.0
+onnx-ir==0.2.1
+onnxscript==0.7.0
+transformers==5.2.0
+onnxruntime-genai-winml==0.14.1
+torch==2.10.0
+torchmetrics==1.9.0
+torchvision==0.25.0
@@ -0,0 +1,5 @@
+__pycache__
+/cache
+/history/*/*
+!/history/*/history.config
+!/history/*/olive_config.json
@@ -0,0 +1,37 @@
+# Qwen3.5-2B Model Optimization — NVIDIA TRT for RTX
+
+This recipe converts the [Qwen3.5-2B](https://huggingface.co/Qwen/Qwen3.5-2B)
+vision-language model to ONNX for the **NVIDIA TensorRT for RTX** execution
+provider (`NvTensorRTRTXExecutionProvider`) and runs it with ONNX Runtime
+GenAI.
+
+Qwen3.5 is a hybrid architecture combining GatedDeltaNet linear attention
+layers with standard full attention layers. The pipeline exports three
+sub-models and assembles them into a single ONNX Runtime GenAI model folder:
+
+- **embedding.json** — token embedding + image feature fusion (FP16)
+- **vision.json** — vision encoder, packed patches → image features (FP16)
+- **text.json** — text decoder via ModelBuilder (INT4, hybrid GatedDeltaNet + full attention)
+
+Because AITK runs a single Olive workflow per recipe, the three inner Olive
+configs are wrapped behind one `AitkPython` pass
+(`qwen_trtrtx_workflow.py`). The script runs each inner config, then patches
+`genai_config.json` / `processor_config.json` and the tokenizer for the GenAI
+runtime.
+
+## Optimization
+
+| Sub-model | Precision |
+|-----------|-----------|
+| Vision encoder | FP16 |
+| Text embedding | FP16 |
+| Text decoder | INT4 (block size 128, accuracy level 4) |
+
+## Inference
+
+Run the provided `inference_sample.ipynb`. It loads the optimized model from
+`./model`, registers the NVIDIA TRT for RTX execution provider, and streams a
+response for a text (and optional image) prompt.
+
+> Metrics (latency / accuracy on a specific device) to be added after a
+> benchmark run on target RTX hardware.
@@ -0,0 +1,8 @@
+{
+    "copies": [
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/winml.py",
+            "dst": "winml.py"
+        }
+    ]
+}