microsoft · xiaofeihan1 · Jan 19, 2026 · Jan 26, 2026 · Jan 26, 2026 · Jan 27, 2026
diff --git a/.github/workflows/linux-cpu-x64-build.yml b/.github/workflows/linux-cpu-x64-build.yml
@@ -39,6 +39,11 @@ jobs:
         with:
           gradle-version: '8.6'
 
+      - uses: actions/setup-python@v6
+        with:
+          python-version: '3.11.x'
+          architecture: 'x64'
+
       - uses: microsoft/onnxruntime-github-actions/setup-build-tools@v0.0.8
         with:
           vcpkg-version: '2025.03.19'

diff --git a/.github/workflows/linux-gpu-x64-build.yml b/.github/workflows/linux-gpu-x64-build.yml
@@ -20,7 +20,7 @@ env:
 jobs:
   linux-cuda-x64-build:
     env:
-      PYTHON_EXECUTABLE: "/opt/python/cp310-cp310/bin/python3.10"
+      PYTHON_EXECUTABLE: "/opt/python/cp311-cp311/bin/python3.11"
     runs-on: ["self-hosted", "1ES.Pool=onnxruntime-genai-Ubuntu2204-A10"]
     steps:
       - name: Checkout OnnxRuntime GenAI repo

diff --git a/.github/workflows/win-cpu-arm64-build.yml b/.github/workflows/win-cpu-arm64-build.yml
@@ -94,7 +94,7 @@ jobs:
           # Uninstalling LLVM/Clang as it is no longer required and causes issues with numpy installation
           choco uninstall llvm --yes
           python -m pip install "numpy<2" coloredlogs flatbuffers packaging protobuf sympy pytest
-          python -m pip install onnxruntime-qnn
+          python -m pip install onnxruntime-qnn==1.25.0.dev20260126001 -i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/
           python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps
 
       - name: Run the Python Tests

diff --git a/.github/workflows/win-directml-x64-build.yml b/.github/workflows/win-directml-x64-build.yml
@@ -55,7 +55,7 @@ jobs:
         run: |
           $resp = Invoke-RestMethod "${{ env.ORT_NIGHTLY_REST_API }}"
           # $ORT_NIGHTLY_VERSION = $resp.value[0].versions[0].normalizedVersion
-          $ORT_NIGHTLY_VERSION = "1.23.0"
+          $ORT_NIGHTLY_VERSION = "1.25.0-dev-20260125-0556-727db0d3dc"
 
           Write-Host "$ORT_NIGHTLY_VERSION"
           "ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" | Out-File -FilePath $env:GITHUB_ENV -Append

diff --git a/.pipelines/nuget-publishing.yml b/.pipelines/nuget-publishing.yml
@@ -61,7 +61,7 @@ parameters:
   - name: ort_version
     displayName: 'OnnxRuntime version'
     type: string
-    default: '1.23.0'
+    default: '1.25.0-dev-20260125-1205-727db0d3dc'
 
   - name: ort_winml_version
     displayName: 'Microsoft.WindowsAppSDK.ML Version (should match CMakeList.txt)'
@@ -71,12 +71,12 @@ parameters:
   - name: ort_cuda_version
     displayName: 'OnnxRuntime GPU version'
     type: string
-    default: '1.23.0'
+    default: '1.25.0-dev-20260125-0617-727db0d3dc'
 
   - name: ort_dml_version
     displayName: 'OnnxRuntime DML version'
     type: string
-    default: '1.23.0'
+    default: '1.25.0-dev-20260125-0556-727db0d3dc'
 
   - name: cuda_version
     displayName: 'CUDA version'

diff --git a/cmake/ortlib.cmake b/cmake/ortlib.cmake
@@ -81,16 +81,16 @@ if(ORT_HOME)
   endif()
 else()
   # If ORT_HOME is not specified, download the onnxruntime headers and libraries from the nightly feed
-  set(ORT_VERSION "1.23.0")
+  set(ORT_VERSION "1.25.0-dev-20260125-1205-727db0d3dc")
   set(ORT_FEED_ORG_NAME "aiinfra")
   set(ORT_FEED_PROJECT "2692857e-05ef-43b4-ba9c-ccf1c22c437c")
   set(ORT_NIGHTLY_FEED_ID "7982ae20-ed19-4a35-a362-a96ac99897b7")
 
   if (USE_DML)
-    set(ORT_VERSION "1.23.0")
+    set(ORT_VERSION "1.25.0-dev-20260125-0556-727db0d3dc")
     set(ORT_PACKAGE_NAME "Microsoft.ML.OnnxRuntime.DirectML")
   elseif(USE_CUDA)
-    set(ORT_VERSION "1.23.0")
+    set(ORT_VERSION "1.25.0-dev-20260125-0617-727db0d3dc")
     if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
       set(ORT_PACKAGE_NAME "Microsoft.ML.OnnxRuntime.Gpu.Linux")
     elseif(WIN32)
@@ -99,7 +99,7 @@ else()
       message(FATAL_ERROR "Unsupported platform for CUDA")
     endif()
   elseif(USE_ROCM)
-    set(ORT_VERSION "1.23.0")
+    set(ORT_VERSION "1.25.0-dev-20260125-0617-727db0d3dc")
     set(ORT_PACKAGE_NAME "Microsoft.ML.OnnxRuntime.Rocm")
   else()
     set(ORT_PACKAGE_NAME "Microsoft.ML.OnnxRuntime")

diff --git a/documents/Runtime_option.md b/documents/Runtime_option.md
@@ -13,3 +13,26 @@ To recover from a terminated state, use this key value pair: ("terminate_session
 Key: "terminate_session"
 
 Accepted values: ("0", "1")
+
+## Enable Profiling
+
+Enable Profiling is a runtime option to dynamically enable or disable ONNX Runtime profiling during generation. Once enabled, each subsequent token generation will produce profiling data saved to a separate JSON file. You can stop profiling at any time.
+
+To enable profiling with default file prefix "onnxruntime_run_profile", use this key value pair: ("enable_profiling", "1")
+
+To disable profiling, use this key value pair: ("enable_profiling", "0")
+
+To enable profiling with a custom file prefix, use this key value pair: ("enable_profiling", "<your_custom_prefix>")
+
+Key: "enable_profiling"
+
+Accepted values: ("0", "1", or a custom profile file prefix string)
+
+Note: Difference from SessionOptions `enable_profiling` in genai_config.json
+
+The `enable_profiling` option in `genai_config.json` under `SessionOptions` is a session-level configuration. When enabled, it collects all profiling data from session creation to session end and aggregates them into a single JSON file. This configuration cannot be started or stopped dynamically during inference.
+
+In contrast, ``enable_profiling` in runtime option provides dynamic control:
+- Can be enabled or disabled at any point during generation
+- Each token generation produces its own profiling file when enabled
+- Useful for profiling specific portions of the generation process
diff --git a/examples/slm_engine/build_scripts/build_deps.py b/examples/slm_engine/build_scripts/build_deps.py
@@ -577,9 +577,9 @@ def main():
     ort_home = None
     if args.build_ort_from_source:
         if args.ort_version_to_use is None:
-            # If not Windows then use 1.23.0
+            # If not Windows then use 1.25.0-dev-20260125-1205-727db0d3dc
             if platform.system() != "Windows":
-                args.ort_version_to_use = "v1.23.0"
+                args.ort_version_to_use = "v1.25.0-dev-20260125-1205-727db0d3dc"
             else:
                 args.ort_version_to_use = "main"
         ort_home = build_ort(args, dep_src_dir, artifacts_dir)
@@ -590,7 +590,7 @@ def main():
             # The ORT binaries are available as they were downloaded during the GenAI build
             # This is the supported version for most platforms
             if args.ort_version_to_use is None:
-                ORT_VERSION = "1.23.0"
+                ORT_VERSION = "1.25.0-dev-20260125-1205-727db0d3dc"
             else:
                 ORT_VERSION = args.ort_version_to_use
             # Copy the ORT artifacts to the artifacts directory.

diff --git a/src/models/model.cpp b/src/models/model.cpp
@@ -173,6 +173,19 @@ void State::SetRunOption(const char* key, const char* value) {
       throw std::runtime_error(std::string("terminate_session key value unexpected: ") + value);
     }
     return;
+  } else if (strcmp(key, "enable_profiling") == 0) {
+    if (strcmp(value, "0") == 0) {
+      run_options_->DisableProfiling();
+    } else if (strcmp(value, "1") == 0) {
+      run_options_->EnableProfiling(ORT_TSTR("onnxruntime_run_profile"));
+    } else {
+      auto ToProfileString = [](const char* s) -> std::basic_string<ORTCHAR_T> {
+        std::string str(s);
+        return std::basic_string<ORTCHAR_T>(str.begin(), str.end());
+      };
+      run_options_->EnableProfiling(ToProfileString(value).c_str());
+    }
+    return;
   }
   run_options_->AddConfigEntry(key, value);
 }

diff --git a/src/models/onnxruntime_api.h b/src/models/onnxruntime_api.h
@@ -555,7 +555,9 @@ struct OrtRunOptions {
    */
   OrtRunOptions& UnsetTerminate();
 
-  OrtRunOptions& AddActiveLoraAdapter(const OrtLoraAdapter& adapter);  ///< Wraps OrtApi::RunOptionsSetActiveLoraAdapter
+  OrtRunOptions& AddActiveLoraAdapter(const OrtLoraAdapter& adapter);    ///< Wraps OrtApi::RunOptionsSetActiveLoraAdapter
+  OrtRunOptions& EnableProfiling(const ORTCHAR_T* profile_file_prefix);  ///< Wraps OrtApi::RunOptionsEnableProfiling
+  OrtRunOptions& DisableProfiling();                                     ///< Wraps OrtApi::RunOptionsDisableProfiling
 
   static void operator delete(void* p) { Ort::api->ReleaseRunOptions(reinterpret_cast<OrtRunOptions*>(p)); }
   Ort::Abstract make_abstract;

diff --git a/src/models/onnxruntime_inline.h b/src/models/onnxruntime_inline.h
@@ -532,6 +532,16 @@ inline OrtRunOptions& OrtRunOptions::AddActiveLoraAdapter(const OrtLoraAdapter&
   return *this;
 }
 
+inline OrtRunOptions& OrtRunOptions::EnableProfiling(const ORTCHAR_T* profile_file_prefix) {
+  Ort::ThrowOnError(Ort::api->RunOptionsEnableProfiling(this, profile_file_prefix));
+  return *this;
+}
+
+inline OrtRunOptions& OrtRunOptions::DisableProfiling() {
+  Ort::ThrowOnError(Ort::api->RunOptionsDisableProfiling(this));
+  return *this;
+}
+
 inline std::unique_ptr<OrtCUDAProviderOptionsV2> OrtCUDAProviderOptionsV2::Create() {
   OrtCUDAProviderOptionsV2* p;
   Ort::ThrowOnError(Ort::api->CreateCUDAProviderOptions(&p));

diff --git a/src/python/python.cpp b/src/python/python.cpp
@@ -264,6 +264,10 @@ struct PyGenerator {
     generator_->SetActiveAdapter(adapters, adapter_name.c_str());
   }
 
+  void SetRuntimeOption(const std::string& key, const std::string& value) {
+    generator_->SetRuntimeOption(key.c_str(), value.c_str());
+  }
+
  private:
   std::unique_ptr<OgaGenerator> generator_;
 };
@@ -467,7 +471,8 @@ PYBIND11_MODULE(onnxruntime_genai, m) {
       .def("rewind_to", &PyGenerator::RewindTo)
       .def("get_next_tokens", &PyGenerator::GetNextTokens)
       .def("get_sequence", &PyGenerator::GetSequence)
-      .def("set_active_adapter", &PyGenerator::SetActiveAdapter);
+      .def("set_active_adapter", &PyGenerator::SetActiveAdapter)
+      .def("set_runtime_option", &PyGenerator::SetRuntimeOption);
 
   pybind11::class_<OgaImages>(m, "Images")
       .def_static("open", [](pybind11::args image_paths) {

diff --git a/test/python/cpu/ort/requirements.txt b/test/python/cpu/ort/requirements.txt
@@ -1 +1,2 @@
-onnxruntime==1.23.0
+-i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/
+onnxruntime==1.25.0.dev20260126001
diff --git a/test/python/cuda/ort/requirements.txt b/test/python/cuda/ort/requirements.txt
@@ -1 +1,2 @@
-onnxruntime-gpu==1.23.0
+-i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/
+onnxruntime-gpu==1.25.0.dev20260123001
diff --git a/test/python/directml/ort/requirements.txt b/test/python/directml/ort/requirements.txt
@@ -1 +1,2 @@
-onnxruntime-directml==1.23.0
+-i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/
+onnxruntime-directml==1.25.0.dev20260125001
diff --git a/test/python/macos/ort/requirements.txt b/test/python/macos/ort/requirements.txt
@@ -1 +1,2 @@
-onnxruntime==1.23.0
+-i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/
+onnxruntime==1.25.0.dev20260126001
diff --git a/test/python/requirements.txt b/test/python/requirements.txt
@@ -7,5 +7,5 @@ sympy
 pytest
 onnx
 onnx_ir>=0.1.3
-transformers
+transformers<5.0.0
 huggingface_hub[cli]