yatarkan
diff --git a/‎.github/workflows/causal_lm_cpp.yml‎
Lines changed: 1 addition & 254 deletions b/‎.github/workflows/causal_lm_cpp.yml‎
Lines changed: 1 addition & 254 deletions
diff --git a/‎.github/workflows/genai-tools.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/genai-tools.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/lcm_dreamshaper_cpp.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/lcm_dreamshaper_cpp.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/linux.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/linux.yml‎
Lines changed: 1 addition & 1 deletion
@@ -22,165 +22,6 @@ env:
   w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-18141-b0a120c9684/openvino_toolkit_windows_2025.1.0.dev20250211_x86_64.zip
 
 jobs:
-  cpp-beam_search_causal_lm-ubuntu:
-    strategy:
-      matrix:
-        executable:
-          [
-            ./build/samples/cpp/text_generation/beam_search_causal_lm,
-            python ./samples/python/text_generation/beam_search_causal_lm.py,
-          ]
-    runs-on: ubuntu-22.04
-    defaults:
-      run:
-        shell: bash
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - uses: actions/setup-python@v4
-        with:
-          python-version: '3.10'
-      - name: Install OpenVINO
-        run: |
-          mkdir ./ov/
-          curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
-          sudo ./ov/install_dependencies/install_openvino_dependencies.sh
-      - name: Build app
-        run: |
-          source ./ov/setupvars.sh
-          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
-          cmake --build ./build/ --config Release -j
-      - name: Download and convert and model
-        run: |
-          source ./ov/setupvars.sh
-          python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-          python -m pip install -r ./samples/requirements.txt
-          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
-      - name: Compare
-        env:
-          PYTHONPATH: "./build/" # C++ ignores that
-        run: |
-          source ./ov/setupvars.sh
-          timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?" > ./pred.txt
-          python -c "
-          import transformers
-          with open('pred.txt', 'r') as file:
-              predictions = file.read()
-          tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
-          prompt = 'Why is the Sun yellow?'
-          if tokenizer.chat_template:
-              prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True)
-          tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
-          for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
-              ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
-              idx = predictions.find(ref)
-              if -1 == idx:
-                  raise RuntimeError(f'Missing "{ref=}" from predictions')
-              predictions = predictions[:idx] + predictions[idx + len(ref):]
-          "
-          echo "Why is the Sun yellow?" passed
-
-          timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt
-          python -c "
-          import transformers
-          with open('pred.txt', 'r') as file:
-              predictions = file.read()
-          tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
-          prompt = '69'
-          if tokenizer.chat_template:
-              prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True)
-          tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
-          for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
-              ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
-              idx = predictions.find(ref)
-              if -1 == idx:
-                  raise RuntimeError(f'Missing "{ref=}" from predictions')
-              predictions = predictions[:idx] + predictions[idx + len(ref):]
-          "
-          echo 69 passed
-
-          timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ Hi > ./pred.txt
-          python -c "
-          import transformers
-          with open('pred.txt', 'r') as file:
-              predictions = file.read()
-          tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
-          prompt = 'Hi'
-          if tokenizer.chat_template:
-            prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True)
-          tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
-          for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
-              ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
-              idx = predictions.find(ref)
-              if -1 == idx:
-                  raise RuntimeError(f'Missing "{ref=}" from predictions')
-              predictions = predictions[:idx] + predictions[idx + len(ref):]
-          "
-          echo "Hi" passed
-
-          timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ "return 0" > ./pred.txt
-          python -c "
-          import transformers
-          with open('pred.txt', 'r') as file:
-              predictions = file.read()
-          tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
-          prompt = 'return 0'
-          if tokenizer.chat_template:
-              prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True)
-          tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
-          for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
-              ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
-              idx = predictions.find(ref)
-              if -1 == idx:
-                  raise RuntimeError(f'Missing "{ref=}" from predictions')
-              predictions = predictions[:idx] + predictions[idx + len(ref):]
-          "
-          echo "return 0" passed
-
-          timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ "你好！ 你好嗎？" > ./pred.txt
-          python -c "
-          import transformers
-          with open('pred.txt', 'r', errors='ignore') as file:
-              predictions = file.read()
-          tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
-          prompt = '你好！ 你好嗎？'
-          if tokenizer.chat_template:
-              prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True)
-          tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
-          for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
-              ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
-              idx = predictions.find(ref.replace('�', ''))
-              if -1 == idx:
-                  raise RuntimeError(f'Missing "{ref=}" from predictions')
-              predictions = predictions[:idx] + predictions[idx + len(ref):]
-          "
-          echo "你好！ 你好嗎？" passed
-
-          timeout 1m ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?" "return 0" "你好！ 你好嗎？" > ./pred.txt
-          python -c "
-          import transformers
-          with open('pred.txt', 'r', errors='ignore') as file:
-              predictions = file.read()
-          tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
-          prompts = [
-            'Why is the Sun yellow?',
-            'return 0',
-            '你好！ 你好嗎？'
-          ]
-          for prompt in prompts:
-            if tokenizer.chat_template:
-                prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True)
-            tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
-            for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
-                ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
-                idx = predictions.find(ref.replace('�', ''))
-                if -1 == idx:
-                    raise RuntimeError(f'Missing "{ref=}" from predictions')
-                predictions = predictions[:idx] + predictions[idx + len(ref):]
-          "
-          echo "Multi prompt" passed
-
   cpp-greedy_causal_lm-windows:
     runs-on: windows-latest
     env:
@@ -245,63 +86,6 @@ jobs:
           && call .\ov\setupvars.bat
           && python samples\python\text_generation\lora.py .\TinyLlama\TinyLlama-1.1B-intermediate-step-1431k-3T\ adapter_model.safetensors "How to create a table with two columns, one of them has type float, another one has type int?"
 
-  cpp-speculative_decoding_lm-ubuntu:
-    runs-on: ubuntu-22.04-16-cores
-    defaults:
-      run:
-        shell: bash
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - uses: actions/setup-python@v4
-        with:
-          python-version: 3.11
-      - name: Install OpenVINO
-        run: |
-          mkdir ./ov/
-          curl ${{ env.l_u22_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
-          sudo ./ov/install_dependencies/install_openvino_dependencies.sh
-      - name: Build app
-        run: |
-          source ./ov/setupvars.sh
-          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
-          cmake --build ./build/ --config Release -j
-      - name: Download and convert and model
-        run: |
-          source ./ov/setupvars.sh
-          python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-          python -m pip install -r ./samples/requirements.txt
-          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b
-          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b
-      - name: run and compare
-        run: |
-          source ./ov/setupvars.sh
-          echo Running speculative_decoding_lm C++ sample...
-          ./build/samples/cpp/text_generation/speculative_decoding_lm ./dolly-v2-7b/ ./dolly-v2-3b/ "Alan Turing was a" > predictions_speculative.txt
-          echo Running greedy_causal_lm C++ sample...
-          ./build/samples/cpp/text_generation/greedy_causal_lm ./dolly-v2-7b/ "Alan Turing was a" > predictions_greedy.txt
-          echo Running speculative_decoding_lm Python sample...
-          python ./samples/python/text_generation/speculative_decoding_lm.py ./dolly-v2-7b/ ./dolly-v2-3b/ "Alan Turing was a" > predictions_py.txt
-          echo All samples executed, checking result correctness...
-          python -c "
-          with open('predictions_greedy.txt', 'r') as f:
-              predicted_greedy = f.readline()
-          with open('predictions_speculative.txt', 'r') as f:
-              predicted_speculative = f.readline()
-          with open('predictions_py.txt', 'r') as f:
-              predicted_py = f.readline()
-          print(f'Predicted greedy: {predicted_greedy}')
-          print(f'Predicted speculative: {predicted_speculative}')
-          assert predicted_greedy == predicted_speculative
-          assert predicted_greedy == predicted_py
-          assert predicted_speculative == predicted_py
-          "
-          echo "Alan Turing was a" passed
-        env:
-          PYTHONPATH: "./build/:$PYTHONPATH"
-          LD_LIBRARY_PATH: "./build/openvino_genai/:$LD_LIBRARY_PATH"
-
   cpp-Phi-1_5:
     runs-on: ubuntu-22.04-16-cores
     defaults:
@@ -485,41 +269,6 @@ jobs:
           diff pred2.txt ref.txt
           echo "Chat sample python" passed
 
-  cpp-encrypted_model_causal_lm-ubuntu:
-    runs-on: ubuntu-24.04
-    defaults:
-      run:
-        shell: bash
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - uses: actions/setup-python@v4
-        with:
-          python-version: 3.11
-      - name: Install OpenVINO
-        run: |
-          mkdir ./ov/
-          curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
-          sudo ./ov/install_dependencies/install_openvino_dependencies.sh
-      - name: Build app
-        run: |
-          source ./ov/setupvars.sh
-          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
-          cmake --build ./build/ --config Release -j
-      - name: Download and convert and model
-        run: |
-          source ./ov/setupvars.sh
-          python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-          python -m pip install -r ./samples/requirements.txt
-          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
-      - name: Run
-        env:
-          PYTHONPATH: "./build"
-        run: |
-          source ./ov/setupvars.sh
-          timeout 30s ./build/samples/cpp/text_generation/encrypted_model_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the sun yellow?"
-
   benchmark_genai-ubuntu:
     runs-on: ubuntu-24.04
     defaults:
@@ -861,9 +610,7 @@ jobs:
 
   Overall_Status:
     name: ci/gha_overall_status_causal_lm
-    needs: [cpp-beam_search_causal_lm-ubuntu, cpp-greedy_causal_lm-windows,
-            cpp-speculative_decoding_lm-ubuntu, cpp-Phi-1_5, 
-            cpp-greedy_causal_lm-redpajama-3b-chat, cpp-chat_sample-ubuntu, cpp-continuous-batching-ubuntu,
+    needs: [cpp-greedy_causal_lm-windows, cpp-Phi-1_5, cpp-greedy_causal_lm-redpajama-3b-chat, cpp-chat_sample-ubuntu, cpp-continuous-batching-ubuntu,
             visual_language_chat_sample-ubuntu-minicpm_v2_6, visual_language_chat_sample-ubuntu-llava_1_5, visual_language_chat_sample-ubuntu-llava_next, visual_language_chat_sample-ubuntu-internvl2,
             cpp-continuous-batching-windows, cpp-continuous-batching-macos]
     if: ${{ always() }}
 
@@ -44,14 +44,14 @@ jobs:
       with:
         platform: ubuntu22
         commit_packages_to_provide: wheels
-        revision: latest_available_commit
+        revision: a8aba4e104f027c2ba8a21fd6c4c861110c57ed9
 
   llm_bench:
     name: 'LLM bench tests'
     defaults:
       run:
         shell: bash
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-22.04-8-cores
     strategy:
       fail-fast: false
       matrix:
@@ -151,7 +151,7 @@ jobs:
     defaults:
       run:
         shell: bash
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-22.04-8-cores
     strategy:
       fail-fast: false
       matrix:
 
@@ -24,7 +24,7 @@ env:
 
 jobs:
   lcm_dreamshaper_v7_cpp-linux:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-22.04-8-cores
     defaults:
       run:
         shell: bash
@@ -108,7 +108,7 @@ jobs:
           ${{ env.build_dir }}/samples/cpp/image_generation/benchmark_image_gen -t inpainting -m ./models/lcm_dreamshaper_v7 -p "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting" -i ./image.png --mi ./mask_image.png
 
   lcm_dreamshaper_v7_cpp-windows:
-    runs-on: windows-2022
+    runs-on: aks-win-4-cores-8gb-staging
     defaults:
       run:
         shell: pwsh
 
@@ -52,7 +52,7 @@ jobs:
       with:
         platform: ubuntu22
         commit_packages_to_provide: wheels,openvino_js_package.tar.gz
-        revision: 1643337771eec0816aada5a6245a93fbff4b976a
+        revision: a8aba4e104f027c2ba8a21fd6c4c861110c57ed9
 
     - name: Clone docker tag from OpenVINO repo
       uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2