Skip to content

Commit 97d44f6

Browse files
authored
Cherry pick round 3 (#1245)
1 parent 8d45fdf commit 97d44f6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+1365
-866
lines changed

.github/workflows/android-build.yml

+8-4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ env:
1515
ORT_NIGHTLY_SOURCE: "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json"
1616
# ANDROID_ABI: "arm64-v8a"
1717
ANDROID_ABI: "x86_64"
18+
ANDROID_NDK_VERSION: "27.2.12479018" # LTS version
1819
jobs:
1920
android_x64:
2021
# Note: linux is the only good option for the Android emulator currently.
@@ -49,8 +50,11 @@ jobs:
4950
ls -l $ANDROID_HOME
5051
5152
echo "ANDROID_SDK_ROOT=$ANDROID_SDK_ROOT"
53+
"${ANDROID_SDK_ROOT}/cmdline-tools/latest/bin/sdkmanager" --install "ndk;${{ env.ANDROID_NDK_VERSION }}"
54+
55+
echo "ANDROID_NDK_HOME=${ANDROID_SDK_ROOT}/ndk/${{ env.ANDROID_NDK_VERSION }}" >> $GITHUB_ENV
56+
echo "ANDROID_AVD_HOME=${{ runner.temp }}" >> $GITHUB_ENV
5257
53-
echo "ANDROID_NDK_HOME=$ANDROID_NDK_HOME"
5458
echo "ANDROID_NDK_LATEST_HOME=$ANDROID_NDK_LATEST_HOME"
5559
ls -l $ANDROID_HOME/ndk
5660
@@ -76,12 +80,12 @@ jobs:
7680
run: |
7781
set -e -x
7882
rm -rf build
79-
./build.sh --android --android_api=27 --android_ndk_path=${ANDROID_NDK_LATEST_HOME} --config=RelWithDebInfo --android_abi=${{ env.ANDROID_ABI }} --parallel --build_java --update
83+
./build.sh --android --android_api=27 --android_ndk_path=${ANDROID_NDK_HOME} --config=RelWithDebInfo --android_abi=${{ env.ANDROID_ABI }} --parallel --build_java --update
8084
8185
- name: Run Android build
8286
run: |
8387
set -e -x
84-
./build.sh --android --android_api=27 --android_ndk_path=${ANDROID_NDK_LATEST_HOME} --config=RelWithDebInfo --android_abi=${{ env.ANDROID_ABI }} --parallel --build_java --build
88+
./build.sh --android --android_api=27 --android_ndk_path=${ANDROID_NDK_HOME} --config=RelWithDebInfo --android_abi=${{ env.ANDROID_ABI }} --parallel --build_java --build
8589
8690
- name: Enable KVM group perms so Android emulator can run
8791
run: |
@@ -92,4 +96,4 @@ jobs:
9296
- name: Run Android tests
9397
run: |
9498
set -e -x
95-
./build.sh --android --android_api=27 --android_ndk_path=${ANDROID_NDK_LATEST_HOME} --config=RelWithDebInfo --android_abi=${{ env.ANDROID_ABI }} --parallel --build_java --android_run_emulator --test
99+
./build.sh --android --android_api=27 --android_ndk_path=${ANDROID_NDK_HOME} --config=RelWithDebInfo --android_abi=${{ env.ANDROID_ABI }} --parallel --build_java --android_run_emulator --test

.pipelines/stages/jobs/py-packaging-job.yml

+3
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ jobs:
6060
Python312:
6161
PyDotVer: '3.12'
6262
PyNoDotVer: '312'
63+
Python313:
64+
PyDotVer: '3.13'
65+
PyNoDotVer: '313'
6366

6467
timeoutInMinutes: 240
6568
workspace:

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ See documentation at https://onnxruntime.ai/docs/genai.
1515

1616
|Support matrix|Supported now|Under development|On the roadmap|
1717
| -------------- | ------------- | ----------------- | -------------- |
18-
| Model architectures | Gemma <br/> Llama * <br/> Mistral + <br/> Phi (language + vision) <br/> Qwen <br/> Nemotron <br/> Granite <br/> AMD OLMo | Whisper | Stable diffusion |
18+
| Model architectures | DeepSeek <br/> Gemma <br/> Llama * <br/> Mistral + <br/> Phi (language + vision) <br/> Qwen <br/> Nemotron <br/> Granite <br/> AMD OLMo | Whisper | Stable diffusion |
1919
|API| Python <br/>C# <br/>C/C++ <br/> Java ^ |Objective-C||
2020
|Platform| Linux <br/> Windows <br/>Mac ^ <br/>Android ^ ||iOS |||
2121
|Architecture|x86 <br/> x64 <br/> Arm64 ~ ||||

VERSION_INFO

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.6.0-rc4
1+
0.6.0

build.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ class HelpFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescript
7777
parser.add_argument("--build", action="store_true", help="Build.")
7878
parser.add_argument("--test", action="store_true", help="Run tests.")
7979
parser.add_argument(
80-
"--clean", action="store_true", help="Run 'cmake --build --target clean' for the selected config/s."
80+
"--clean", action="store_true", help="Run 'cmake --build --target clean' for the selected config."
8181
)
8282

8383
parser.add_argument("--skip_tests", action="store_true", help="Skip all tests. Overrides --test.")
@@ -320,7 +320,7 @@ def _validate_cmake_args(args: argparse.Namespace):
320320

321321
def _validate_args(args: argparse.Namespace):
322322
# default to all 3 stages
323-
if not args.update and not args.build and not args.test:
323+
if not any((args.update, args.clean, args.build, args.test)):
324324
args.update = True
325325
args.build = True
326326
args.test = True
@@ -639,7 +639,7 @@ def clean(args: argparse.Namespace, env: dict[str, str]):
639639
Clean the build output.
640640
"""
641641
log.info("Cleaning targets")
642-
cmd_args = [str(args.cmake), "--build", str(args.build_dir), "--config", args.config, "--target", "clean"]
642+
cmd_args = [str(args.cmake_path), "--build", str(args.build_dir), "--config", args.config, "--target", "clean"]
643643
util.run(cmd_args, env=env)
644644

645645

@@ -655,6 +655,9 @@ def clean(args: argparse.Namespace, env: dict[str, str]):
655655
if arguments.update:
656656
update(arguments, environment)
657657

658+
if arguments.clean:
659+
clean(arguments, environment)
660+
658661
if arguments.build:
659662
build(arguments, environment)
660663

cmake/check_cuda.cmake

-5
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,6 @@ if(USE_CUDA AND CMAKE_CUDA_COMPILER)
4646
"${GENERATORS_ROOT}/cuda/*.cuh"
4747
)
4848

49-
file(GLOB test_cuda_srcs CONFIGURE_DEPENDS
50-
"${TESTS_ROOT}/*.cu"
51-
"${TESTS_ROOT}/*.cuh"
52-
)
53-
list(APPEND test_srcs ${test_cuda_srcs})
5449
add_compile_definitions(USE_CUDA=1)
5550
include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}")
5651
elseif(USE_CUDA)

examples/c/CMakeLists.txt

+11-3
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ set(CMAKE_CXX_STANDARD 20)
66
option(USE_CUDA "Build with CUDA support" OFF)
77
option(USE_CXX "Invoke the C++ example" ON)
88
option(PHI3 "Build the Phi example" OFF)
9+
option(PHI3_QA "Build the Phi Q&A example without multi-turn prompting" OFF)
910
option(PHI3V "Build the Phi3v example" OFF)
1011
option(WHISPER "Build the Whisper example" OFF)
1112

@@ -53,17 +54,24 @@ function(prepare_executable executable)
5354
endforeach()
5455
endfunction()
5556

57+
set(EXAMPLES_SOURCE_DIR ${CMAKE_SOURCE_DIR}/src)
58+
5659
if(PHI3)
57-
add_executable(phi3 ${CMAKE_SOURCE_DIR}/src/phi3.cpp)
60+
add_executable(phi3 ${EXAMPLES_SOURCE_DIR}/phi3.cpp ${EXAMPLES_SOURCE_DIR}/common.cpp)
5861
prepare_executable(phi3)
5962
endif()
6063

64+
if(PHI3_QA)
65+
add_executable(phi3_qa ${EXAMPLES_SOURCE_DIR}/phi3_qa.cpp ${EXAMPLES_SOURCE_DIR}/common.cpp)
66+
prepare_executable(phi3_qa)
67+
endif()
68+
6169
if(PHI3V)
62-
add_executable(phi3v ${CMAKE_SOURCE_DIR}/src/phi3v.cpp)
70+
add_executable(phi3v ${EXAMPLES_SOURCE_DIR}/phi3v.cpp ${EXAMPLES_SOURCE_DIR}/common.cpp)
6371
prepare_executable(phi3v)
6472
endif()
6573

6674
if(WHISPER)
67-
add_executable(whisper ${CMAKE_SOURCE_DIR}/src/whisper.cpp)
75+
add_executable(whisper ${CMAKE_SOURCE_DIR}/src/whisper.cpp ${EXAMPLES_SOURCE_DIR}/common.cpp)
6876
prepare_executable(whisper)
6977
endif()

examples/c/README.md

+17-12
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,13 @@ This example uses the [Phi-3.5 mini model](https://huggingface.co/microsoft/Phi-
2727
You can clone this entire model repository or download individual model variants. To download individual variants, you need to install the Hugging Face CLI.
2828

2929
```bash
30-
huggingface-cli download microsoft/Phi-3.5-mini-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir .
30+
huggingface-cli download microsoft/Phi-3.5-mini-instruct-onnx --include cpu_and_mobile/cpu-int4-awq-block-128-acc-level-4/* --local-dir .
3131
```
3232

33+
### Chat vs Question Answering
34+
35+
The below examples are for scenarios where you can have chat with the model (i.e. model retains the previous conversation), but if you want to run just Question Answering (i.e. model does not remember past conversation) use `-DPHI3_QA` instead of `-DPHI3`. Also, the executable name would be `phi3_qa` instead of `phi3`.
36+
3337
### Windows x64 CPU
3438

3539
#### Install the onnxruntime and onnxruntime-genai binaries
@@ -69,7 +73,7 @@ cmake --build . --config Release
6973

7074
```bash
7175
cd Release
72-
.\phi3.exe path_to_model
76+
.\phi3.exe <path_to_model> <execution_provider>
7377
```
7478

7579
### Windows x64 DirectML
@@ -114,7 +118,7 @@ cmake --build . --config Release
114118

115119
```bash
116120
cd Release
117-
.\phi3.exe path_to_model
121+
.\phi3.exe <path_to_model> <execution_provider>
118122
```
119123

120124
### Windows arm64 CPU
@@ -156,7 +160,7 @@ cmake --build . --config Release
156160

157161
```bash
158162
cd Release
159-
.\phi3.exe path_to_model
163+
.\phi3.exe <path_to_model> <execution_provider>
160164
```
161165

162166
### Windows arm64 DirectML
@@ -201,7 +205,7 @@ cmake --build . --config Release
201205

202206
```bash
203207
cd Release
204-
.\phi3.exe path_to_model
208+
.\phi3.exe <path_to_model> <execution_provider>
205209
```
206210

207211
### Linux
@@ -216,8 +220,8 @@ Change into the onnxruntime-genai directory.
216220
cd examples/c
217221
curl -L https://github.com/microsoft/onnxruntime/releases/download/v1.20.1/onnxruntime-linux-x64-1.20.1.tgz -o onnxruntime-linux-x64-1.20.1.tgz
218222
tar xvzf onnxruntime-linux-x64-1.20.1.tgz
219-
cp onnxruntime-linux-x64-1.20.1/include/* include
220-
cp onnxruntime-linux-x64-1.20.1/lib/* lib
223+
cp -r onnxruntime-linux-x64-1.20.1/include/* include
224+
cp -r onnxruntime-linux-x64-1.20.1/lib/* lib
221225
cd ../..
222226
```
223227

@@ -257,7 +261,8 @@ cmake --build . --config Release
257261
#### Run the sample
258262

259263
```bash
260-
./phi3 path_to_model
264+
./phi3 <path_to_model> <execution_provider>
265+
# Example for CPU: ./phi3 ../cpu_and_mobile/cpu-int4-awq-block-128-acc-level-4/ cpu
261266
```
262267

263268
## Phi-3.5 vision
@@ -314,7 +319,7 @@ cmake --build . --config Release
314319

315320
```bash
316321
cd Release
317-
.\phi3v.exe path_to_model
322+
.\phi3v.exe <path_to_model> <execution_provider>
318323
```
319324

320325
### Run on Linux
@@ -329,8 +334,8 @@ Change into the onnxruntime-genai directory.
329334
cd examples/c
330335
curl -L https://github.com/microsoft/onnxruntime/releases/download/v1.20.1/onnxruntime-linux-x64-1.20.1.tgz -o onnxruntime-linux-x64-1.20.1.tgz
331336
tar xvzf onnxruntime-linux-x64-1.20.1.tgz
332-
cp onnxruntime-linux-x64-1.20.1/include/* include
333-
cp onnxruntime-linux-x64-1.20.1/lib/* lib
337+
cp -r onnxruntime-linux-x64-1.20.1/include/* include
338+
cp -r onnxruntime-linux-x64-1.20.1/lib/* lib
334339
cd ../..
335340
```
336341

@@ -369,5 +374,5 @@ cmake --build . --config Release
369374

370375
```bash
371376
cd build/Release
372-
./phi3v path_to_model
377+
./phi3v <path_to_model> <execution_provider>
373378
```

examples/c/src/common.cpp

+94
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
#include "common.h"
5+
#include <cassert>
6+
7+
void Timing::RecordStartTimestamp() {
8+
assert(start_timestamp_.time_since_epoch().count() == 0);
9+
start_timestamp_ = Clock::now();
10+
}
11+
12+
void Timing::RecordFirstTokenTimestamp() {
13+
assert(first_token_timestamp_.time_since_epoch().count() == 0);
14+
first_token_timestamp_ = Clock::now();
15+
}
16+
17+
void Timing::RecordEndTimestamp() {
18+
assert(end_timestamp_.time_since_epoch().count() == 0);
19+
end_timestamp_ = Clock::now();
20+
}
21+
22+
void Timing::Log(const int prompt_tokens_length, const int new_tokens_length) {
23+
assert(start_timestamp_.time_since_epoch().count() != 0);
24+
assert(first_token_timestamp_.time_since_epoch().count() != 0);
25+
assert(end_timestamp_.time_since_epoch().count() != 0);
26+
27+
Duration prompt_time = (first_token_timestamp_ - start_timestamp_);
28+
Duration run_time = (end_timestamp_ - first_token_timestamp_);
29+
30+
const auto default_precision{std::cout.precision()};
31+
std::cout << std::endl;
32+
std::cout << "-------------" << std::endl;
33+
std::cout << std::fixed << std::showpoint << std::setprecision(2)
34+
<< "Prompt length: " << prompt_tokens_length << ", New tokens: " << new_tokens_length
35+
<< ", Time to first: " << prompt_time.count() << "s"
36+
<< ", Prompt tokens per second: " << prompt_tokens_length / prompt_time.count() << " tps"
37+
<< ", New tokens per second: " << new_tokens_length / run_time.count() << " tps"
38+
<< std::setprecision(default_precision) << std::endl;
39+
std::cout << "-------------" << std::endl;
40+
}
41+
42+
void TerminateSession::signalHandler(int signum) {
43+
std::cout << "Interrupt signal received. Terminating current session...\n";
44+
std::unique_lock<std::mutex> lock(mtx);
45+
stopFlag = true;
46+
cv.notify_one();
47+
}
48+
49+
void TerminateSession::Generator_SetTerminate_Call(OgaGenerator* generator) {
50+
std::unique_lock<std::mutex> lock(mtx);
51+
while (!generator->IsDone()) {
52+
if (stopFlag) {
53+
generator->SetRuntimeOption("terminate_session", "1");
54+
stopFlag = false;
55+
break;
56+
}
57+
// Wait for stopflag to become true or it will timeout after 1000 ms
58+
auto timeout = std::chrono::milliseconds(1000);
59+
cv.wait_for(lock, timeout, [this] { return stopFlag; });
60+
}
61+
}
62+
63+
void TerminateSession::Generator_SetTerminate_Call_C(OgaGenerator* generator) {
64+
std::unique_lock<std::mutex> lock(mtx);
65+
while (!OgaGenerator_IsDone(generator)) {
66+
if (stopFlag) {
67+
OgaGenerator_SetRuntimeOption(generator, "terminate_session", "1");
68+
stopFlag = false;
69+
break;
70+
}
71+
// Wait for stopflag to become true or it will timeout after 1000 ms
72+
auto timeout = std::chrono::milliseconds(1000);
73+
cv.wait_for(lock, timeout, [this] { return stopFlag; });
74+
}
75+
}
76+
77+
bool FileExists(const char* path) {
78+
return static_cast<bool>(std::ifstream(path));
79+
}
80+
81+
std::string trim(const std::string& str) {
82+
const size_t first = str.find_first_not_of(' ');
83+
if (std::string::npos == first) {
84+
return str;
85+
}
86+
const size_t last = str.find_last_not_of(' ');
87+
return str.substr(first, (last - first + 1));
88+
}
89+
90+
void print_usage(int /*argc*/, char** argv) {
91+
std::cerr << "usage: " << argv[0] << std::endl;
92+
std::cerr << "model_path = " << argv[1] << std::endl;
93+
std::cerr << "execution_provider = " << argv[2] << std::endl;
94+
}

0 commit comments

Comments
 (0)