Skip to content

Commit fe82dac

Browse files
committed
Merge branch 'master' into docs-pages
2 parents 42cbfbe + 2e08d78 commit fe82dac

File tree

134 files changed

+6252
-5301
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

134 files changed

+6252
-5301
lines changed

.github/workflows/causal_lm_cpp.yml

Lines changed: 1 addition & 254 deletions
Original file line numberDiff line numberDiff line change
@@ -22,165 +22,6 @@ env:
2222
w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-18141-b0a120c9684/openvino_toolkit_windows_2025.1.0.dev20250211_x86_64.zip
2323

2424
jobs:
25-
cpp-beam_search_causal_lm-ubuntu:
26-
strategy:
27-
matrix:
28-
executable:
29-
[
30-
./build/samples/cpp/text_generation/beam_search_causal_lm,
31-
python ./samples/python/text_generation/beam_search_causal_lm.py,
32-
]
33-
runs-on: ubuntu-22.04
34-
defaults:
35-
run:
36-
shell: bash
37-
steps:
38-
- uses: actions/checkout@v4
39-
with:
40-
submodules: recursive
41-
- uses: actions/setup-python@v4
42-
with:
43-
python-version: '3.10'
44-
- name: Install OpenVINO
45-
run: |
46-
mkdir ./ov/
47-
curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
48-
sudo ./ov/install_dependencies/install_openvino_dependencies.sh
49-
- name: Build app
50-
run: |
51-
source ./ov/setupvars.sh
52-
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
53-
cmake --build ./build/ --config Release -j
54-
- name: Download and convert and model
55-
run: |
56-
source ./ov/setupvars.sh
57-
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
58-
python -m pip install -r ./samples/requirements.txt
59-
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
60-
- name: Compare
61-
env:
62-
PYTHONPATH: "./build/" # C++ ignores that
63-
run: |
64-
source ./ov/setupvars.sh
65-
timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?" > ./pred.txt
66-
python -c "
67-
import transformers
68-
with open('pred.txt', 'r') as file:
69-
predictions = file.read()
70-
tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
71-
prompt = 'Why is the Sun yellow?'
72-
if tokenizer.chat_template:
73-
prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True)
74-
tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
75-
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
76-
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
77-
idx = predictions.find(ref)
78-
if -1 == idx:
79-
raise RuntimeError(f'Missing "{ref=}" from predictions')
80-
predictions = predictions[:idx] + predictions[idx + len(ref):]
81-
"
82-
echo "Why is the Sun yellow?" passed
83-
84-
timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt
85-
python -c "
86-
import transformers
87-
with open('pred.txt', 'r') as file:
88-
predictions = file.read()
89-
tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
90-
prompt = '69'
91-
if tokenizer.chat_template:
92-
prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True)
93-
tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
94-
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
95-
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
96-
idx = predictions.find(ref)
97-
if -1 == idx:
98-
raise RuntimeError(f'Missing "{ref=}" from predictions')
99-
predictions = predictions[:idx] + predictions[idx + len(ref):]
100-
"
101-
echo 69 passed
102-
103-
timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ Hi > ./pred.txt
104-
python -c "
105-
import transformers
106-
with open('pred.txt', 'r') as file:
107-
predictions = file.read()
108-
tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
109-
prompt = 'Hi'
110-
if tokenizer.chat_template:
111-
prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True)
112-
tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
113-
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
114-
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
115-
idx = predictions.find(ref)
116-
if -1 == idx:
117-
raise RuntimeError(f'Missing "{ref=}" from predictions')
118-
predictions = predictions[:idx] + predictions[idx + len(ref):]
119-
"
120-
echo "Hi" passed
121-
122-
timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ "return 0" > ./pred.txt
123-
python -c "
124-
import transformers
125-
with open('pred.txt', 'r') as file:
126-
predictions = file.read()
127-
tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
128-
prompt = 'return 0'
129-
if tokenizer.chat_template:
130-
prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True)
131-
tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
132-
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
133-
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
134-
idx = predictions.find(ref)
135-
if -1 == idx:
136-
raise RuntimeError(f'Missing "{ref=}" from predictions')
137-
predictions = predictions[:idx] + predictions[idx + len(ref):]
138-
"
139-
echo "return 0" passed
140-
141-
timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ "你好! 你好嗎?" > ./pred.txt
142-
python -c "
143-
import transformers
144-
with open('pred.txt', 'r', errors='ignore') as file:
145-
predictions = file.read()
146-
tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
147-
prompt = '你好! 你好嗎?'
148-
if tokenizer.chat_template:
149-
prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True)
150-
tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
151-
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
152-
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
153-
idx = predictions.find(ref.replace('�', ''))
154-
if -1 == idx:
155-
raise RuntimeError(f'Missing "{ref=}" from predictions')
156-
predictions = predictions[:idx] + predictions[idx + len(ref):]
157-
"
158-
echo "你好! 你好嗎?" passed
159-
160-
timeout 1m ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?" "return 0" "你好! 你好嗎?" > ./pred.txt
161-
python -c "
162-
import transformers
163-
with open('pred.txt', 'r', errors='ignore') as file:
164-
predictions = file.read()
165-
tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
166-
prompts = [
167-
'Why is the Sun yellow?',
168-
'return 0',
169-
'你好! 你好嗎?'
170-
]
171-
for prompt in prompts:
172-
if tokenizer.chat_template:
173-
prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True)
174-
tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
175-
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
176-
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
177-
idx = predictions.find(ref.replace('�', ''))
178-
if -1 == idx:
179-
raise RuntimeError(f'Missing "{ref=}" from predictions')
180-
predictions = predictions[:idx] + predictions[idx + len(ref):]
181-
"
182-
echo "Multi prompt" passed
183-
18425
cpp-greedy_causal_lm-windows:
18526
runs-on: windows-latest
18627
env:
@@ -245,63 +86,6 @@ jobs:
24586
&& call .\ov\setupvars.bat
24687
&& python samples\python\text_generation\lora.py .\TinyLlama\TinyLlama-1.1B-intermediate-step-1431k-3T\ adapter_model.safetensors "How to create a table with two columns, one of them has type float, another one has type int?"
24788
248-
cpp-speculative_decoding_lm-ubuntu:
249-
runs-on: ubuntu-22.04-16-cores
250-
defaults:
251-
run:
252-
shell: bash
253-
steps:
254-
- uses: actions/checkout@v4
255-
with:
256-
submodules: recursive
257-
- uses: actions/setup-python@v4
258-
with:
259-
python-version: 3.11
260-
- name: Install OpenVINO
261-
run: |
262-
mkdir ./ov/
263-
curl ${{ env.l_u22_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
264-
sudo ./ov/install_dependencies/install_openvino_dependencies.sh
265-
- name: Build app
266-
run: |
267-
source ./ov/setupvars.sh
268-
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
269-
cmake --build ./build/ --config Release -j
270-
- name: Download and convert and model
271-
run: |
272-
source ./ov/setupvars.sh
273-
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
274-
python -m pip install -r ./samples/requirements.txt
275-
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b
276-
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b
277-
- name: run and compare
278-
run: |
279-
source ./ov/setupvars.sh
280-
echo Running speculative_decoding_lm C++ sample...
281-
./build/samples/cpp/text_generation/speculative_decoding_lm ./dolly-v2-7b/ ./dolly-v2-3b/ "Alan Turing was a" > predictions_speculative.txt
282-
echo Running greedy_causal_lm C++ sample...
283-
./build/samples/cpp/text_generation/greedy_causal_lm ./dolly-v2-7b/ "Alan Turing was a" > predictions_greedy.txt
284-
echo Running speculative_decoding_lm Python sample...
285-
python ./samples/python/text_generation/speculative_decoding_lm.py ./dolly-v2-7b/ ./dolly-v2-3b/ "Alan Turing was a" > predictions_py.txt
286-
echo All samples executed, checking result correctness...
287-
python -c "
288-
with open('predictions_greedy.txt', 'r') as f:
289-
predicted_greedy = f.readline()
290-
with open('predictions_speculative.txt', 'r') as f:
291-
predicted_speculative = f.readline()
292-
with open('predictions_py.txt', 'r') as f:
293-
predicted_py = f.readline()
294-
print(f'Predicted greedy: {predicted_greedy}')
295-
print(f'Predicted speculative: {predicted_speculative}')
296-
assert predicted_greedy == predicted_speculative
297-
assert predicted_greedy == predicted_py
298-
assert predicted_speculative == predicted_py
299-
"
300-
echo "Alan Turing was a" passed
301-
env:
302-
PYTHONPATH: "./build/:$PYTHONPATH"
303-
LD_LIBRARY_PATH: "./build/openvino_genai/:$LD_LIBRARY_PATH"
304-
30589
cpp-Phi-1_5:
30690
runs-on: ubuntu-22.04-16-cores
30791
defaults:
@@ -485,41 +269,6 @@ jobs:
485269
diff pred2.txt ref.txt
486270
echo "Chat sample python" passed
487271
488-
cpp-encrypted_model_causal_lm-ubuntu:
489-
runs-on: ubuntu-24.04
490-
defaults:
491-
run:
492-
shell: bash
493-
steps:
494-
- uses: actions/checkout@v4
495-
with:
496-
submodules: recursive
497-
- uses: actions/setup-python@v4
498-
with:
499-
python-version: 3.11
500-
- name: Install OpenVINO
501-
run: |
502-
mkdir ./ov/
503-
curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
504-
sudo ./ov/install_dependencies/install_openvino_dependencies.sh
505-
- name: Build app
506-
run: |
507-
source ./ov/setupvars.sh
508-
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
509-
cmake --build ./build/ --config Release -j
510-
- name: Download and convert and model
511-
run: |
512-
source ./ov/setupvars.sh
513-
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
514-
python -m pip install -r ./samples/requirements.txt
515-
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
516-
- name: Run
517-
env:
518-
PYTHONPATH: "./build"
519-
run: |
520-
source ./ov/setupvars.sh
521-
timeout 30s ./build/samples/cpp/text_generation/encrypted_model_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the sun yellow?"
522-
523272
benchmark_genai-ubuntu:
524273
runs-on: ubuntu-24.04
525274
defaults:
@@ -861,9 +610,7 @@ jobs:
861610
862611
Overall_Status:
863612
name: ci/gha_overall_status_causal_lm
864-
needs: [cpp-beam_search_causal_lm-ubuntu, cpp-greedy_causal_lm-windows,
865-
cpp-speculative_decoding_lm-ubuntu, cpp-Phi-1_5,
866-
cpp-greedy_causal_lm-redpajama-3b-chat, cpp-chat_sample-ubuntu, cpp-continuous-batching-ubuntu,
613+
needs: [cpp-greedy_causal_lm-windows, cpp-Phi-1_5, cpp-greedy_causal_lm-redpajama-3b-chat, cpp-chat_sample-ubuntu, cpp-continuous-batching-ubuntu,
867614
visual_language_chat_sample-ubuntu-minicpm_v2_6, visual_language_chat_sample-ubuntu-llava_1_5, visual_language_chat_sample-ubuntu-llava_next, visual_language_chat_sample-ubuntu-internvl2,
868615
cpp-continuous-batching-windows, cpp-continuous-batching-macos]
869616
if: ${{ always() }}

.github/workflows/genai-tools.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,14 @@ jobs:
4444
with:
4545
platform: ubuntu22
4646
commit_packages_to_provide: wheels
47-
revision: latest_available_commit
47+
revision: a8aba4e104f027c2ba8a21fd6c4c861110c57ed9
4848

4949
llm_bench:
5050
name: 'LLM bench tests'
5151
defaults:
5252
run:
5353
shell: bash
54-
runs-on: ubuntu-22.04
54+
runs-on: ubuntu-22.04-8-cores
5555
strategy:
5656
fail-fast: false
5757
matrix:
@@ -151,7 +151,7 @@ jobs:
151151
defaults:
152152
run:
153153
shell: bash
154-
runs-on: ubuntu-22.04
154+
runs-on: ubuntu-22.04-8-cores
155155
strategy:
156156
fail-fast: false
157157
matrix:

.github/workflows/lcm_dreamshaper_cpp.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ env:
2424

2525
jobs:
2626
lcm_dreamshaper_v7_cpp-linux:
27-
runs-on: ubuntu-22.04
27+
runs-on: ubuntu-22.04-8-cores
2828
defaults:
2929
run:
3030
shell: bash
@@ -108,7 +108,7 @@ jobs:
108108
${{ env.build_dir }}/samples/cpp/image_generation/benchmark_image_gen -t inpainting -m ./models/lcm_dreamshaper_v7 -p "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting" -i ./image.png --mi ./mask_image.png
109109
110110
lcm_dreamshaper_v7_cpp-windows:
111-
runs-on: windows-2022
111+
runs-on: aks-win-4-cores-8gb-staging
112112
defaults:
113113
run:
114114
shell: pwsh

.github/workflows/linux.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ jobs:
5252
with:
5353
platform: ubuntu22
5454
commit_packages_to_provide: wheels,openvino_js_package.tar.gz
55-
revision: 1643337771eec0816aada5a6245a93fbff4b976a
55+
revision: a8aba4e104f027c2ba8a21fd6c4c861110c57ed9
5656

5757
- name: Clone docker tag from OpenVINO repo
5858
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

0 commit comments

Comments
 (0)