|
22 | 22 | w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-18141-b0a120c9684/openvino_toolkit_windows_2025.1.0.dev20250211_x86_64.zip |
23 | 23 |
|
24 | 24 | jobs: |
25 | | - cpp-beam_search_causal_lm-ubuntu: |
26 | | - strategy: |
27 | | - matrix: |
28 | | - executable: |
29 | | - [ |
30 | | - ./build/samples/cpp/text_generation/beam_search_causal_lm, |
31 | | - python ./samples/python/text_generation/beam_search_causal_lm.py, |
32 | | - ] |
33 | | - runs-on: ubuntu-22.04 |
34 | | - defaults: |
35 | | - run: |
36 | | - shell: bash |
37 | | - steps: |
38 | | - - uses: actions/checkout@v4 |
39 | | - with: |
40 | | - submodules: recursive |
41 | | - - uses: actions/setup-python@v4 |
42 | | - with: |
43 | | - python-version: '3.10' |
44 | | - - name: Install OpenVINO |
45 | | - run: | |
46 | | - mkdir ./ov/ |
47 | | - curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz |
48 | | - sudo ./ov/install_dependencies/install_openvino_dependencies.sh |
49 | | - - name: Build app |
50 | | - run: | |
51 | | - source ./ov/setupvars.sh |
52 | | - cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ |
53 | | - cmake --build ./build/ --config Release -j |
54 | | - - name: Download and convert and model |
55 | | - run: | |
56 | | - source ./ov/setupvars.sh |
57 | | - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly |
58 | | - python -m pip install -r ./samples/requirements.txt |
59 | | - optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 |
60 | | - - name: Compare |
61 | | - env: |
62 | | - PYTHONPATH: "./build/" # C++ ignores that |
63 | | - run: | |
64 | | - source ./ov/setupvars.sh |
65 | | - timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?" > ./pred.txt |
66 | | - python -c " |
67 | | - import transformers |
68 | | - with open('pred.txt', 'r') as file: |
69 | | - predictions = file.read() |
70 | | - tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') |
71 | | - prompt = 'Why is the Sun yellow?' |
72 | | - if tokenizer.chat_template: |
73 | | - prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True) |
74 | | - tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False) |
75 | | - for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): |
76 | | - ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) |
77 | | - idx = predictions.find(ref) |
78 | | - if -1 == idx: |
79 | | - raise RuntimeError(f'Missing "{ref=}" from predictions') |
80 | | - predictions = predictions[:idx] + predictions[idx + len(ref):] |
81 | | - " |
82 | | - echo "Why is the Sun yellow?" passed |
83 | | -
|
84 | | - timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt |
85 | | - python -c " |
86 | | - import transformers |
87 | | - with open('pred.txt', 'r') as file: |
88 | | - predictions = file.read() |
89 | | - tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') |
90 | | - prompt = '69' |
91 | | - if tokenizer.chat_template: |
92 | | - prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True) |
93 | | - tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False) |
94 | | - for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): |
95 | | - ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) |
96 | | - idx = predictions.find(ref) |
97 | | - if -1 == idx: |
98 | | - raise RuntimeError(f'Missing "{ref=}" from predictions') |
99 | | - predictions = predictions[:idx] + predictions[idx + len(ref):] |
100 | | - " |
101 | | - echo 69 passed |
102 | | -
|
103 | | - timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ Hi > ./pred.txt |
104 | | - python -c " |
105 | | - import transformers |
106 | | - with open('pred.txt', 'r') as file: |
107 | | - predictions = file.read() |
108 | | - tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') |
109 | | - prompt = 'Hi' |
110 | | - if tokenizer.chat_template: |
111 | | - prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True) |
112 | | - tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False) |
113 | | - for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): |
114 | | - ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) |
115 | | - idx = predictions.find(ref) |
116 | | - if -1 == idx: |
117 | | - raise RuntimeError(f'Missing "{ref=}" from predictions') |
118 | | - predictions = predictions[:idx] + predictions[idx + len(ref):] |
119 | | - " |
120 | | - echo "Hi" passed |
121 | | -
|
122 | | - timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ "return 0" > ./pred.txt |
123 | | - python -c " |
124 | | - import transformers |
125 | | - with open('pred.txt', 'r') as file: |
126 | | - predictions = file.read() |
127 | | - tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') |
128 | | - prompt = 'return 0' |
129 | | - if tokenizer.chat_template: |
130 | | - prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True) |
131 | | - tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False) |
132 | | - for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): |
133 | | - ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) |
134 | | - idx = predictions.find(ref) |
135 | | - if -1 == idx: |
136 | | - raise RuntimeError(f'Missing "{ref=}" from predictions') |
137 | | - predictions = predictions[:idx] + predictions[idx + len(ref):] |
138 | | - " |
139 | | - echo "return 0" passed |
140 | | -
|
141 | | - timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ "你好! 你好嗎?" > ./pred.txt |
142 | | - python -c " |
143 | | - import transformers |
144 | | - with open('pred.txt', 'r', errors='ignore') as file: |
145 | | - predictions = file.read() |
146 | | - tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') |
147 | | - prompt = '你好! 你好嗎?' |
148 | | - if tokenizer.chat_template: |
149 | | - prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True) |
150 | | - tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False) |
151 | | - for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): |
152 | | - ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) |
153 | | - idx = predictions.find(ref.replace('�', '')) |
154 | | - if -1 == idx: |
155 | | - raise RuntimeError(f'Missing "{ref=}" from predictions') |
156 | | - predictions = predictions[:idx] + predictions[idx + len(ref):] |
157 | | - " |
158 | | - echo "你好! 你好嗎?" passed |
159 | | -
|
160 | | - timeout 1m ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?" "return 0" "你好! 你好嗎?" > ./pred.txt |
161 | | - python -c " |
162 | | - import transformers |
163 | | - with open('pred.txt', 'r', errors='ignore') as file: |
164 | | - predictions = file.read() |
165 | | - tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') |
166 | | - prompts = [ |
167 | | - 'Why is the Sun yellow?', |
168 | | - 'return 0', |
169 | | - '你好! 你好嗎?' |
170 | | - ] |
171 | | - for prompt in prompts: |
172 | | - if tokenizer.chat_template: |
173 | | - prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True) |
174 | | - tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False) |
175 | | - for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): |
176 | | - ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) |
177 | | - idx = predictions.find(ref.replace('�', '')) |
178 | | - if -1 == idx: |
179 | | - raise RuntimeError(f'Missing "{ref=}" from predictions') |
180 | | - predictions = predictions[:idx] + predictions[idx + len(ref):] |
181 | | - " |
182 | | - echo "Multi prompt" passed |
183 | | -
|
184 | 25 | cpp-greedy_causal_lm-windows: |
185 | 26 | runs-on: windows-latest |
186 | 27 | env: |
@@ -245,63 +86,6 @@ jobs: |
245 | 86 | && call .\ov\setupvars.bat |
246 | 87 | && python samples\python\text_generation\lora.py .\TinyLlama\TinyLlama-1.1B-intermediate-step-1431k-3T\ adapter_model.safetensors "How to create a table with two columns, one of them has type float, another one has type int?" |
247 | 88 |
|
248 | | - cpp-speculative_decoding_lm-ubuntu: |
249 | | - runs-on: ubuntu-22.04-16-cores |
250 | | - defaults: |
251 | | - run: |
252 | | - shell: bash |
253 | | - steps: |
254 | | - - uses: actions/checkout@v4 |
255 | | - with: |
256 | | - submodules: recursive |
257 | | - - uses: actions/setup-python@v4 |
258 | | - with: |
259 | | - python-version: 3.11 |
260 | | - - name: Install OpenVINO |
261 | | - run: | |
262 | | - mkdir ./ov/ |
263 | | - curl ${{ env.l_u22_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz |
264 | | - sudo ./ov/install_dependencies/install_openvino_dependencies.sh |
265 | | - - name: Build app |
266 | | - run: | |
267 | | - source ./ov/setupvars.sh |
268 | | - cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ |
269 | | - cmake --build ./build/ --config Release -j |
270 | | - - name: Download and convert and model |
271 | | - run: | |
272 | | - source ./ov/setupvars.sh |
273 | | - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly |
274 | | - python -m pip install -r ./samples/requirements.txt |
275 | | - optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b |
276 | | - optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b |
277 | | - - name: run and compare |
278 | | - run: | |
279 | | - source ./ov/setupvars.sh |
280 | | - echo Running speculative_decoding_lm C++ sample... |
281 | | - ./build/samples/cpp/text_generation/speculative_decoding_lm ./dolly-v2-7b/ ./dolly-v2-3b/ "Alan Turing was a" > predictions_speculative.txt |
282 | | - echo Running greedy_causal_lm C++ sample... |
283 | | - ./build/samples/cpp/text_generation/greedy_causal_lm ./dolly-v2-7b/ "Alan Turing was a" > predictions_greedy.txt |
284 | | - echo Running speculative_decoding_lm Python sample... |
285 | | - python ./samples/python/text_generation/speculative_decoding_lm.py ./dolly-v2-7b/ ./dolly-v2-3b/ "Alan Turing was a" > predictions_py.txt |
286 | | - echo All samples executed, checking result correctness... |
287 | | - python -c " |
288 | | - with open('predictions_greedy.txt', 'r') as f: |
289 | | - predicted_greedy = f.readline() |
290 | | - with open('predictions_speculative.txt', 'r') as f: |
291 | | - predicted_speculative = f.readline() |
292 | | - with open('predictions_py.txt', 'r') as f: |
293 | | - predicted_py = f.readline() |
294 | | - print(f'Predicted greedy: {predicted_greedy}') |
295 | | - print(f'Predicted speculative: {predicted_speculative}') |
296 | | - assert predicted_greedy == predicted_speculative |
297 | | - assert predicted_greedy == predicted_py |
298 | | - assert predicted_speculative == predicted_py |
299 | | - " |
300 | | - echo "Alan Turing was a" passed |
301 | | - env: |
302 | | - PYTHONPATH: "./build/:$PYTHONPATH" |
303 | | - LD_LIBRARY_PATH: "./build/openvino_genai/:$LD_LIBRARY_PATH" |
304 | | - |
305 | 89 | cpp-Phi-1_5: |
306 | 90 | runs-on: ubuntu-22.04-16-cores |
307 | 91 | defaults: |
@@ -485,41 +269,6 @@ jobs: |
485 | 269 | diff pred2.txt ref.txt |
486 | 270 | echo "Chat sample python" passed |
487 | 271 |
|
488 | | - cpp-encrypted_model_causal_lm-ubuntu: |
489 | | - runs-on: ubuntu-24.04 |
490 | | - defaults: |
491 | | - run: |
492 | | - shell: bash |
493 | | - steps: |
494 | | - - uses: actions/checkout@v4 |
495 | | - with: |
496 | | - submodules: recursive |
497 | | - - uses: actions/setup-python@v4 |
498 | | - with: |
499 | | - python-version: 3.11 |
500 | | - - name: Install OpenVINO |
501 | | - run: | |
502 | | - mkdir ./ov/ |
503 | | - curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz |
504 | | - sudo ./ov/install_dependencies/install_openvino_dependencies.sh |
505 | | - - name: Build app |
506 | | - run: | |
507 | | - source ./ov/setupvars.sh |
508 | | - cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ |
509 | | - cmake --build ./build/ --config Release -j |
510 | | - - name: Download and convert and model |
511 | | - run: | |
512 | | - source ./ov/setupvars.sh |
513 | | - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly |
514 | | - python -m pip install -r ./samples/requirements.txt |
515 | | - optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 |
516 | | - - name: Run |
517 | | - env: |
518 | | - PYTHONPATH: "./build" |
519 | | - run: | |
520 | | - source ./ov/setupvars.sh |
521 | | - timeout 30s ./build/samples/cpp/text_generation/encrypted_model_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the sun yellow?" |
522 | | -
|
523 | 272 | benchmark_genai-ubuntu: |
524 | 273 | runs-on: ubuntu-24.04 |
525 | 274 | defaults: |
@@ -861,9 +610,7 @@ jobs: |
861 | 610 |
|
862 | 611 | Overall_Status: |
863 | 612 | name: ci/gha_overall_status_causal_lm |
864 | | - needs: [cpp-beam_search_causal_lm-ubuntu, cpp-greedy_causal_lm-windows, |
865 | | - cpp-speculative_decoding_lm-ubuntu, cpp-Phi-1_5, |
866 | | - cpp-greedy_causal_lm-redpajama-3b-chat, cpp-chat_sample-ubuntu, cpp-continuous-batching-ubuntu, |
| 613 | + needs: [cpp-greedy_causal_lm-windows, cpp-Phi-1_5, cpp-greedy_causal_lm-redpajama-3b-chat, cpp-chat_sample-ubuntu, cpp-continuous-batching-ubuntu, |
867 | 614 | visual_language_chat_sample-ubuntu-minicpm_v2_6, visual_language_chat_sample-ubuntu-llava_1_5, visual_language_chat_sample-ubuntu-llava_next, visual_language_chat_sample-ubuntu-internvl2, |
868 | 615 | cpp-continuous-batching-windows, cpp-continuous-batching-macos] |
869 | 616 | if: ${{ always() }} |
|
0 commit comments