-
Notifications
You must be signed in to change notification settings - Fork 0
160 lines (131 loc) · 5.4 KB
/
ci.yml
File metadata and controls
160 lines (131 loc) · 5.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
name: ci-workflow
on: [push, pull_request, workflow_dispatch]
permissions:
contents: read
env:
THIRD_PARTY_DIR: ${{ github.workspace }}/third
CORENLP: ${{ github.workspace }}/third/stanford-corenlp
CORENLP_MODELS: ${{ github.workspace }}/third/stanford-corenlp
STANFORD_PARSER: ${{ github.workspace }}/third/stanford-parser
STANFORD_MODELS: ${{ github.workspace }}/third/stanford-postagger
STANFORD_POSTAGGER: ${{ github.workspace }}/third/stanford-postagger
SENNA: ${{ github.workspace }}/third/senna
PROVER9: ${{ github.workspace }}/third/prover9/bin
MEGAM: ${{ github.workspace }}/third/megam
MALT_PARSER: ${{ github.workspace }}/third/maltparser
jobs:
pre-commit:
name: pre-commit
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.13" # or your chosen version
- name: Install pre-commit
run: pip install pre-commit
- name: Run pre-commit hooks
run: pre-commit run --all-files
minimal_download_test:
name: Minimal NLTK Download Test
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.14"
- name: Install regex
run: pip install regex
- name: Set NLTK_DATA environment variable
shell: bash
run: echo "NLTK_DATA=${{ github.workspace }}/nltk_data" >> $GITHUB_ENV
- name: Show NLTK_DATA in shell
shell: bash
run: |
echo "NLTK_DATA in shell: $NLTK_DATA"
- name: Ensure minimal NLTK data for cache
shell: bash
run: |
python -c "import os, nltk; d = os.environ['NLTK_DATA']; import pathlib; pathlib.Path(d).mkdir(parents=True, exist_ok=True); nltk.download('wordnet', download_dir=d)"
test:
name: Python ${{ matrix.python-version }} on ${{ matrix.os }}
needs: [pre-commit, minimal_download_test]
strategy:
matrix:
python-version: ['3.10', '3.11', '3.12', '3.13', '3.14', '3.14t']
os: [ubuntu-latest, macos-latest, windows-latest]
exclude:
- os: windows-latest
python-version: '3.14t' # scikit-learn issue on Py3.14t on Windows
fail-fast: false
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: Set NLTK_DATA environment variable
shell: bash
run: echo "NLTK_DATA=${{ github.workspace }}/nltk_data" >> $GITHUB_ENV
- name: Install dependencies
run: |
pip install --upgrade pip
pip install --upgrade --requirement requirements-ci.txt
- name: Ensure minimal NLTK data for cache
shell: bash
run: |
python -c "import os, nltk; d = os.environ['NLTK_DATA']; import pathlib; pathlib.Path(d).mkdir(parents=True, exist_ok=True); nltk.download('wordnet', download_dir=d)"
- name: Show NLTK_DATA and workspace
shell: bash
run: |
echo "GITHUB_WORKSPACE is: $GITHUB_WORKSPACE"
echo "NLTK_DATA is: $NLTK_DATA"
python -c "import os; print('Python sees GITHUB_WORKSPACE:', os.environ.get('GITHUB_WORKSPACE')); print('Python sees NLTK_DATA:', os.environ.get('NLTK_DATA'))"
- name: List contents of NLTK data dir
shell: bash
run: ls -lR "${{ github.workspace }}/nltk_data" || echo "nltk_data not found"
- name: Cache nltk data
uses: actions/cache@v5
id: nltk-data-cache
with:
path: ${{ github.workspace }}/nltk_data
key: nltk_data_${{ runner.os }}_v1
- name: Download nltk data on cache miss
if: steps.nltk-data-cache.outputs.cache-hit != 'true'
shell: bash
run: |
python -c "import os; import nltk; from pathlib import Path; path = Path(os.environ['NLTK_DATA']); path.mkdir(parents=True, exist_ok=True); nltk.download('all', download_dir=path)"
# --- THIRD PARTY TOOLS CACHE SECTION ---
- name: Ensure third-party directory exists
run: mkdir -p "${{ env.THIRD_PARTY_DIR }}"
- name: Cache third-party tools
uses: actions/cache@v5
id: third-party-cache
with:
path: ${{ env.THIRD_PARTY_DIR }}
key: third_${{ runner.os }}_${{ hashFiles('tools/github_actions/third-party.sh') }}_v1
- name: List contents of third-party dir before download
shell: bash
run: ls -lR "${{ env.THIRD_PARTY_DIR }}" || echo "third-party dir not found"
- name: Download third-party data on cache miss
if: steps.third-party-cache.outputs.cache-hit != 'true'
shell: bash
run: |
chmod +x ./tools/github_actions/third-party.sh
./tools/github_actions/third-party.sh
- name: List contents of third-party dir after download/cache
shell: bash
run: ls -lR "${{ env.THIRD_PARTY_DIR }}" || echo "third-party dir not found"
- name: Print NLTK data search paths
shell: bash
run: python -c "import nltk; print('NLTK data search paths:', nltk.data.path)"
- name: Run pytest
shell: bash
run: |
pytest --numprocesses auto -rsx --doctest-modules nltk