Skip to content

Commit e96d58e

Browse files
Update nightly tests for Haystack 2 (#312)
* Correctly pick latest 1.x version for nightly * Adapt matrix generation script to handle 2.x * Add Haystack 2 tutorial nightly testing * CI trigger * Fix matrix generation * Fix jq parsing * Use raw jq output * Add dependencies installation * Add dependencies in index for all Haystack 2 tutorials * Update matrix generator to add tutorial dependencies * Install tutorial dependencies if they exist * Add some env var in Haystack 2 tests * Cache common dependencies * Change how secrets are handled in Haystack 2 tutorials * Add missing env var secret * Remove caching for the time being * Install other stuffs too * Try again with cache * Remove caching * Re enable testing for Haystack 1 tutorials * Remove CI trigger * Fix tutorials deps install * Trigger CI * Fix if * Fix if again * Try to fix the install if again * Yet another attempt trying to fix if * Restore max-parallel * Remove CI trigger * Update tutorials to run tests * Fix more errors * Add missing deps * Change api key name for 2.0 tutorials * Introduce a new way of file upload that works for both colab and local envs 🤞 * Update how to download files in tut 30 * Add new dependencies * Update `run_tutorials.yml` for 2.x tutorials (#314) * Update run_tutorials.yml for 2.x tutorials * Update .github/workflows/run_tutorials.yml --------- Co-authored-by: Bilge Yücel <[email protected]> * Rename the workflow for v2 tutorials * Rework workflow to test Haystack 1.x tutorials in PR (#313) * Rework workflow to test Haystack 1.x tutorials in PR * Update .github/workflows/run_tutorials_v1.yml * Update .github/workflows/run_tutorials_v1.yml --------- Co-authored-by: Bilge Yücel <[email protected]> --------- Co-authored-by: bilgeyucel <[email protected]>
1 parent bfe4db4 commit e96d58e

17 files changed

+1006
-729
lines changed

.github/workflows/nightly.yml

+79-9
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,30 @@ jobs:
99
generate-matrix:
1010
runs-on: ubuntu-latest
1111
outputs:
12-
matrix: ${{ steps.generator.outputs.matrix }}
12+
matrix_v1: ${{ steps.generator.outputs.matrix_v1 }}
13+
matrix_v2: ${{ steps.generator.outputs.matrix_v2 }}
1314
steps:
1415
- uses: actions/checkout@v3
1516
- uses: actions/setup-python@v4
1617
with:
17-
python-version: '3.11'
18+
python-version: "3.11"
1819
- id: generator
1920
env:
20-
GH_TOKEN: ${{ github.token }}
21+
GH_TOKEN: ${{ github.token }}
2122
run: |
22-
VERSION=v1.25.0
23+
# Get tutorial notebooks for v1
24+
VERSION=$(gh api /repos/deepset-ai/haystack/releases | \
25+
jq -r '[.[].tag_name | select(test("^v1.[0-9]+.[0-9]+$"))] | first')
2326
NOTEBOOKS=$(python ./scripts/generate_matrix.py --haystack-version "$VERSION" --include-main)
24-
echo "matrix={\"include\":$NOTEBOOKS}" >> "$GITHUB_OUTPUT"
27+
echo "matrix_v1={\"include\":$NOTEBOOKS}" >> "$GITHUB_OUTPUT"
2528
26-
run-tutorials:
29+
# Get tutorial notebooks for v2
30+
VERSION=$(gh api /repos/deepset-ai/haystack/releases | \
31+
jq -r '[.[].tag_name | select(test("^v2.[0-9]+.[0-9]+$"))] | first')
32+
NOTEBOOKS=$(python ./scripts/generate_matrix.py --haystack-version "$VERSION" --include-main)
33+
echo "matrix_v2={\"include\":$NOTEBOOKS}" >> "$GITHUB_OUTPUT"
34+
35+
run-tutorials-v1:
2736
needs: generate-matrix
2837
runs-on: ubuntu-latest
2938
container: deepset/haystack:base-cpu-${{ matrix.haystack_version }}
@@ -38,7 +47,7 @@ jobs:
3847
strategy:
3948
max-parallel: 2
4049
fail-fast: false
41-
matrix: ${{ fromJSON(needs.generate-matrix.outputs.matrix) }}
50+
matrix: ${{ fromJSON(needs.generate-matrix.outputs.matrix_v1) }}
4251

4352
env:
4453
HAYSTACK_TELEMETRY_ENABLED: "False"
@@ -66,10 +75,71 @@ jobs:
6675
- name: Install Hugging Face datasets
6776
run: |
6877
pip install "datasets>=2.6.1"
69-
78+
7079
- name: Install ipywidgets for tutorial 24
7180
run: |
72-
pip install ipywidgets
81+
pip install ipywidgets
82+
83+
- name: Convert notebook to Python
84+
run: |
85+
jupyter nbconvert --to python --RegexRemovePreprocessor.patterns '%%bash' ./tutorials/${{ matrix.notebook }}.ipynb
86+
87+
- name: Run the converted notebook
88+
run: |
89+
python ./tutorials/${{ matrix.notebook }}.py
90+
91+
- name: Send Failure to Datadog
92+
if: failure()
93+
uses: masci/datadog@v1
94+
with:
95+
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
96+
api-url: https://api.datadoghq.eu
97+
events: |
98+
- title: "Tutorial ${{ matrix.notebook }} failed"
99+
text: "Branch ${{ github.ref_name }} tests failed"
100+
alert_type: "error"
101+
source_type_name: "Github"
102+
host: ${{ github.repository_owner }}
103+
tags:
104+
- "project:${{ github.repository }}"
105+
- "name:${{ matrix.notebook }}"
106+
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
107+
108+
run-tutorials-v2:
109+
needs: generate-matrix
110+
runs-on: ubuntu-latest
111+
container: deepset/haystack:base-${{ matrix.haystack_version }}
112+
113+
strategy:
114+
max-parallel: 2
115+
fail-fast: false
116+
matrix: ${{ fromJSON(needs.generate-matrix.outputs.matrix_v2) }}
117+
118+
env:
119+
HAYSTACK_TELEMETRY_ENABLED: "False"
120+
HF_API_TOKEN: ${{ secrets.HF_API_KEY }}
121+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
122+
SERPERDEV_API_KEY: ${{ secrets.SERPERDEV_API_KEY }}
123+
124+
steps:
125+
- name: Checkout
126+
uses: actions/checkout@v3
127+
128+
- name: Install common dependencies
129+
run: |
130+
apt-get update && apt-get install -y \
131+
build-essential \
132+
gcc \
133+
libsndfile1 \
134+
ffmpeg
135+
136+
pip install nbconvert ipython
137+
pip install "pyworld<=0.2.12" espnet espnet-model-zoo pydub
138+
139+
- name: Install tutorial dependencies
140+
if: toJSON(matrix.dependencies) != '[]'
141+
run: |
142+
pip install "${{ join(matrix.dependencies, '" "')}}"
73143
74144
- name: Convert notebook to Python
75145
run: |

.github/workflows/run_tutorials.yml

-96
This file was deleted.
+154
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
name: Run Tutorials for v1.x
2+
3+
on:
4+
workflow_dispatch: # Activate this workflow manually
5+
pull_request:
6+
paths:
7+
# This workflow must run only for v1.x tutorials
8+
# Some v1 tutorials are ignored in any case as we can't run those
9+
# for different reasons
10+
- "tutorials/01_Basic_QA_Pipeline.ipynb"
11+
- "tutorials/03_Scalable_QA_System.ipynb"
12+
- "tutorials/04_FAQ_style_QA.ipynb"
13+
- "tutorials/05_Evaluation.ipynb"
14+
- "tutorials/06_Better_Retrieval_via_Embedding_Retrieval.ipynb"
15+
- "tutorials/07_RAG_Generator.ipynb"
16+
- "tutorials/08_Preprocessing.ipynb"
17+
- "tutorials/10_Knowledge_Graph.ipynb"
18+
- "tutorials/11_Pipelines.ipynb"
19+
- "tutorials/14_Query_Classifier.ipynb"
20+
- "tutorials/15_TableQA.ipynb"
21+
- "tutorials/16_Document_Classifier_at_Index_Time.ipynb"
22+
- "tutorials/17_Audio.ipynb"
23+
- "tutorials/19_Text_to_Image_search_pipeline_with_MultiModal_Retriever.ipynb"
24+
- "tutorials/20_Using_Haystack_with_REST_API.ipynb"
25+
- "tutorials/21_Customizing_PromptNode.ipynb"
26+
- "tutorials/22_Pipeline_with_PromptNode.ipynb"
27+
- "tutorials/23_Answering_Multihop_Questions_with_Agents.ipynb"
28+
- "tutorials/24_Building_Chat_App.ipynb"
29+
- "tutorials/25_Customizing_Agents.ipynb"
30+
- "tutorials/26_Hybrid_Retrieval.ipynb"
31+
32+
jobs:
33+
generate-matrix:
34+
runs-on: ubuntu-latest
35+
outputs:
36+
matrix: ${{ steps.filter.outputs.matrix }}
37+
steps:
38+
- uses: actions/checkout@v3
39+
40+
- uses: actions/setup-python@v4
41+
with:
42+
python-version: "3.11"
43+
44+
- id: generator
45+
env:
46+
GH_TOKEN: ${{ github.token }}
47+
run: |
48+
# Get tutorial notebooks for 1.x
49+
VERSION=$(gh api /repos/deepset-ai/haystack/releases | \
50+
jq -r '[.[].tag_name | select(test("^v1.[0-9]+.[0-9]+$"))] | first')
51+
NOTEBOOKS=$(python ./scripts/generate_matrix.py --haystack-version "$VERSION" --include-main)
52+
echo "matrix={\"include\":$NOTEBOOKS}" >> "$GITHUB_OUTPUT"
53+
54+
- name: Get changed files
55+
id: files
56+
uses: tj-actions/changed-files@v44
57+
with:
58+
# We only want v1 tutorials, this is a necessary duplication
59+
files: |
60+
tutorials/01_Basic_QA_Pipeline.ipynb
61+
tutorials/03_Scalable_QA_System.ipynb
62+
tutorials/04_FAQ_style_QA.ipynb
63+
tutorials/05_Evaluation.ipynb
64+
tutorials/06_Better_Retrieval_via_Embedding_Retrieval.ipynb
65+
tutorials/07_RAG_Generator.ipynb
66+
tutorials/08_Preprocessing.ipynb
67+
tutorials/10_Knowledge_Graph.ipynb
68+
tutorials/11_Pipelines.ipynb
69+
tutorials/14_Query_Classifier.ipynb
70+
tutorials/15_TableQA.ipynb
71+
tutorials/16_Document_Classifier_at_Index_Time.ipynb
72+
tutorials/17_Audio.ipynb
73+
tutorials/19_Text_to_Image_search_pipeline_with_MultiModal_Retriever.ipynb
74+
tutorials/20_Using_Haystack_with_REST_API.ipynb
75+
tutorials/21_Customizing_PromptNode.ipynb
76+
tutorials/22_Pipeline_with_PromptNode.ipynb
77+
tutorials/23_Answering_Multihop_Questions_with_Agents.ipynb
78+
tutorials/24_Building_Chat_App.ipynb
79+
tutorials/25_Customizing_Agents.ipynb
80+
tutorials/26_Hybrid_Retrieval.ipynb
81+
82+
- name: Filter non changed notebooks
83+
id: filter
84+
shell: python
85+
env:
86+
MATRIX: ${{ steps.generator.outputs.matrix }}
87+
CHANGED_FILES: ${{ steps.files.outputs.all_changed_files }}
88+
run: |
89+
import os
90+
import json
91+
92+
matrix = json.loads(os.environ["MATRIX"])
93+
changed_files = json.loads(os.environ["CHANGED_FILES"])
94+
new_matrix = {"include": []}
95+
for item in matrix:
96+
notebook = item["notebook"]
97+
if f"tutorials/{notebook}" not in changed_files:
98+
continue
99+
new_matrix["include"].append(item)
100+
101+
new_matrix = json.dumps(new_matrix)
102+
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
103+
print(f"matrix={new_matrix}", file=f)
104+
105+
run-tutorials:
106+
runs-on: ubuntu-latest
107+
needs: get-latest-version
108+
container: deepset/haystack:base-cpu-${{ matrix.version }}
109+
110+
strategy:
111+
fail-fast: false
112+
matrix: ${{ fromJSON(needs.generate-matrix.outputs.matrix) }}
113+
114+
services:
115+
elasticsearch:
116+
image: elasticsearch:7.9.2
117+
env:
118+
discovery.type: "single-node"
119+
ES_JAVA_OPTS: "-Xms128m -Xmx256m"
120+
121+
env:
122+
HAYSTACK_TELEMETRY_ENABLED: "False"
123+
ELASTICSEARCH_HOST: "elasticsearch"
124+
125+
steps:
126+
- name: Install dependencies
127+
run: |
128+
apt-get update && apt-get install -y git build-essential gcc libsndfile1 ffmpeg && rm -rf /var/lib/apt/lists/*
129+
pip install nbconvert ipython
130+
pip install "pyworld<=0.2.12" espnet espnet-model-zoo pydub
131+
pip install farm-haystack[pdf]
132+
133+
- name: Install Haystack Extras text2speech dependencies
134+
run: |
135+
pip install farm-haystack-text2speech
136+
137+
- name: Install Hugging Face datasets
138+
run: |
139+
pip install "datasets>=2.6.1"
140+
141+
- name: Checkout
142+
uses: actions/checkout@v3
143+
144+
# See https://github.com/actions/runner-images/issues/6775
145+
- name: Change Owner of Container Working Directory
146+
run: chown root:root .
147+
148+
- name: Convert notebook to Python
149+
run: |
150+
jupyter nbconvert --to python --RegexRemovePreprocessor.patterns '%%bash' ./tutorials/${{ matrix.notebook }}.ipynb
151+
152+
- name: Run the converted notebook
153+
run: |
154+
python ./tutorials/${{ matrix.notebook }}.py

0 commit comments

Comments
 (0)