Skip to content

Commit a64cae7

Browse files
[GHA] Replaced cpp-chat_sample-ubuntu pipeline (openvinotoolkit#1913)
Replaced cpp-chat_sample-ubuntu pipeline to Linux samples pipeline --------- Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
1 parent 93d63f8 commit a64cae7

File tree

2 files changed

+63
-68
lines changed

2 files changed

+63
-68
lines changed

.github/workflows/causal_lm_cpp.yml

Lines changed: 1 addition & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -22,73 +22,6 @@ env:
2222
w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-18343-5e16b688156/openvino_toolkit_windows_2025.1.0.dev20250304_x86_64.zip
2323

2424
jobs:
25-
cpp-chat_sample-ubuntu:
26-
runs-on: ubuntu-24.04
27-
defaults:
28-
run:
29-
shell: bash
30-
steps:
31-
- uses: actions/checkout@v4
32-
with:
33-
submodules: recursive
34-
- uses: actions/setup-python@v4
35-
with:
36-
python-version: 3.11
37-
- name: Install OpenVINO
38-
run: |
39-
mkdir ./ov/
40-
curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
41-
sudo ./ov/install_dependencies/install_openvino_dependencies.sh
42-
- name: Build app
43-
run: |
44-
source ./ov/setupvars.sh
45-
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
46-
cmake --build ./build/ --config Release -j
47-
- name: Download and convert and model
48-
run: |
49-
source ./ov/setupvars.sh
50-
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
51-
python -m pip install -r ./samples/requirements.txt
52-
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
53-
- name: Compare
54-
env:
55-
PYTHONPATH: "./build"
56-
run: |
57-
source ./ov/setupvars.sh
58-
printf 'What is 2 + 2?\nWhat is the previous answer?\nAdd 1 to it.\nSubtract 5 from it.\nWhy is the sun yellow?\nWhat was my first question?\n' > ./input.txt
59-
timeout 30s ./build/samples/cpp/text_generation/chat_sample ./TinyLlama-1.1B-Chat-v1.0/ < input.txt > ./pred.txt
60-
python -c "
61-
from transformers import AutoTokenizer, AutoModelForCausalLM
62-
model_id = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'
63-
tokenizer = AutoTokenizer.from_pretrained(model_id)
64-
model = AutoModelForCausalLM.from_pretrained(model_id)
65-
prompts = ['What is 2 + 2?', 'What is the previous answer?', 'Add 1 to it.', 'Subtract 5 from it.', 'Why is the sun yellow?', 'What was my first question?']
66-
def gen_prompt(prompt):
67-
return {'role': 'user', 'content': prompt}
68-
def gen_answer(answer):
69-
return {'role': 'assistant', 'content': answer}
70-
chat_history = []
71-
chat_prompt = ''
72-
output = open('ref.txt', 'w')
73-
for prompt in prompts:
74-
output.write('question:\n')
75-
chat_history.append(gen_prompt(prompt))
76-
chat_prompt = tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True)
77-
tokenized = tokenizer(chat_prompt, return_tensors='pt', add_special_tokens=False)
78-
answer = model.generate(**tokenized, max_length=1000, do_sample=False)
79-
answer_str = tokenizer.decode(answer[0, tokenized['input_ids'].numel():], skip_special_tokens=True)
80-
chat_history.append(gen_answer(answer_str))
81-
output.write(answer_str)
82-
output.write('\n----------\n')
83-
output.write('question:\n')
84-
output.close()
85-
"
86-
diff pred.txt ref.txt
87-
echo "Chat sample cpp" passed
88-
timeout 30s ./samples/python/text_generation/chat_sample.py ./TinyLlama-1.1B-Chat-v1.0/ < input.txt > ./pred2.txt
89-
diff pred2.txt ref.txt
90-
echo "Chat sample python" passed
91-
9225
cpp-continuous-batching-ubuntu:
9326
runs-on: ubuntu-22.04-8-cores
9427
defaults:
@@ -226,7 +159,7 @@ jobs:
226159
227160
Overall_Status:
228161
name: ci/gha_overall_status_causal_lm
229-
needs: [cpp-chat_sample-ubuntu, cpp-continuous-batching-ubuntu, cpp-continuous-batching-windows, cpp-continuous-batching-macos]
162+
needs: [cpp-continuous-batching-ubuntu, cpp-continuous-batching-windows, cpp-continuous-batching-macos]
230163
if: ${{ always() }}
231164
runs-on: ubuntu-latest
232165
steps:
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Copyright (C) 2025 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import os
5+
import pytest
6+
import sys
7+
8+
from conftest import logger, SAMPLES_PY_DIR, SAMPLES_CPP_DIR, MODELS
9+
from test_utils import run_sample
10+
11+
class TestChatSample:
12+
@pytest.mark.llm
13+
@pytest.mark.samples
14+
@pytest.mark.parametrize("convert_model", ["TinyLlama-1.1B-Chat-v1.0"], indirect=True)
15+
@pytest.mark.parametrize("prompts",
16+
[
17+
['What is 2 + 2?', 'What is the previous answer?', 'Add 1 to it.', 'Subtract 5 from it.', 'Why is the sun yellow?', 'What was my first question?'],
18+
],
19+
)
20+
def test_chat_sample_refs(self, request, convert_model, prompts):
21+
# Python test
22+
py_script = os.path.join(SAMPLES_PY_DIR, "text_generation/chat_sample.py")
23+
py_command = [sys.executable, py_script, convert_model]
24+
py_result = run_sample(py_command, '\n'.join(prompts))
25+
py_predictions = py_result.stdout
26+
27+
# C++ test
28+
cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'chat_sample')
29+
cpp_command = [cpp_sample, convert_model]
30+
cpp_result = run_sample(cpp_command, '\n'.join(prompts))
31+
cpp_predictions = cpp_result.stdout
32+
33+
# Compare results
34+
assert py_predictions == cpp_predictions, "Python and C++ results should match"
35+
36+
model_name = request.node.callspec.params['convert_model']
37+
model = MODELS[model_name]
38+
39+
from transformers import AutoTokenizer, AutoModelForCausalLM
40+
tokenizer = AutoTokenizer.from_pretrained(model['name'])
41+
model = AutoModelForCausalLM.from_pretrained(model['name'])
42+
43+
def gen_prompt(prompt):
44+
return {'role': 'user', 'content': prompt}
45+
def gen_answer(answer):
46+
return {'role': 'assistant', 'content': answer}
47+
48+
chat_history = []
49+
50+
for prompt in prompts:
51+
chat_history.append(gen_prompt(prompt))
52+
if tokenizer.chat_template:
53+
prompt = tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True)
54+
tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
55+
for answer in model.generate(**tokenized, max_length=1000, do_sample=False):
56+
ref = tokenizer.decode(answer[tokenized['input_ids'].numel():], skip_special_tokens=True)
57+
chat_history.append(gen_answer(ref))
58+
59+
logger.info(f'Checking for "{ref=}"')
60+
idx = cpp_predictions.find(ref)
61+
assert -1 != idx, f'Missing "{ref=}" from predictions'
62+
cpp_predictions = cpp_predictions[:idx] + cpp_predictions[idx + len(ref):]

0 commit comments

Comments
 (0)