|
40 | 40 | - "requirements/**" |
41 | 41 | - ".azure-pipelines/**" |
42 | 42 | - ".actions/**" |
| 43 | + drafts: false # Only run for PRs that are "ready for review" |
43 | 44 |
|
44 | 45 | jobs: |
45 | 46 | - job: pytest |
@@ -68,59 +69,94 @@ jobs: |
68 | 69 | steps: |
69 | 70 |
|
70 | 71 | - bash: | |
71 | | - . /tmp/venvs/fts_dev/bin/activate |
72 | | - pip install --upgrade pip requests setuptools |
73 | | - pip install -e . --no-warn-script-location --requirement requirements/devel.txt |
74 | | - # pip install lightning --upgrade # rather than upgrade, we now use the relevant pin |
| 72 | + set -e # Exit on any error |
| 73 | + source /tmp/venvs/fts_dev/bin/activate |
| 74 | +
|
| 75 | + echo "=== Installing finetuning-scheduler in editable mode ===" |
| 76 | + if ! uv pip install -e ".[all]" --override requirements/ci/overrides.txt; then |
| 77 | + echo "ERROR: Failed to install finetuning-scheduler in editable mode" |
| 78 | + exit 1 |
| 79 | + fi |
| 80 | + echo "✓ Finetuning-scheduler installation completed" |
| 81 | +
|
| 82 | + echo "=== Installing locked CI requirements ===" |
| 83 | + if ! uv pip install -r requirements/ci/requirements.txt; then |
| 84 | + echo "ERROR: Failed to install locked CI requirements" |
| 85 | + exit 1 |
| 86 | + fi |
| 87 | + echo "✓ CI requirements installation completed" |
| 88 | +
|
| 89 | + echo "=== Installed packages ===" |
| 90 | + uv pip list |
75 | 91 | env: |
76 | 92 | USE_CI_COMMIT_PIN: "1" |
77 | 93 | displayName: 'Install dependencies' |
78 | 94 |
|
79 | 95 | - bash: | |
80 | | - . /tmp/venvs/fts_dev/bin/activate |
| 96 | + source /tmp/venvs/fts_dev/bin/activate |
81 | 97 | python requirements/collect_env_details.py |
82 | 98 | python -c "import torch ; print(f'PyTorch CUDA version: {torch.version.cuda}') ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'GPU: {mgpu}'" |
83 | 99 | displayName: 'Env details' |
84 | 100 |
|
85 | 101 | - bash: | |
86 | | - . /tmp/venvs/fts_dev/bin/activate |
| 102 | + source /tmp/venvs/fts_dev/bin/activate |
87 | 103 | python -m coverage run --source src/finetuning_scheduler -m pytest src/finetuning_scheduler tests -v --junitxml=$(Build.Repository.LocalPath)/test-results.xml --durations=50 |
88 | 104 | displayName: 'Testing: standard' |
89 | 105 |
|
90 | 106 | - bash: | |
91 | | - . /tmp/venvs/fts_dev/bin/activate |
| 107 | + source /tmp/venvs/fts_dev/bin/activate |
92 | 108 | bash ./tests/special_tests.sh --mark_type=standalone --filter_pattern='test_f' |
93 | 109 | displayName: 'Testing: standalone multi-gpu' |
94 | 110 |
|
95 | 111 | # - bash: | |
96 | | - # . /tmp/venvs/fts_dev/bin/activate |
| 112 | + # source /tmp/venvs/fts_dev/bin/activate |
97 | 113 | # bash ./tests/special_tests.sh --mark_type=exp_patch --filter_pattern='test_f' --experiment_patch_mask="1 0 0 1" |
98 | 114 | # displayName: 'Testing: Experimental Multi-GPU' |
99 | 115 |
|
100 | 116 | - bash: | |
101 | | - . /tmp/venvs/fts_dev/bin/activate |
| 117 | + source /tmp/venvs/fts_dev/bin/activate |
102 | 118 | python -m coverage report |
103 | 119 | python -m coverage xml |
104 | 120 | python -m coverage html |
105 | | - curl -Os https://uploader.codecov.io/latest/linux/codecov |
| 121 | + # curl -Os https://uploader.codecov.io/latest/linux/codecov |
| 122 | +
|
| 123 | + curl https://keybase.io/codecovsecurity/pgp_keys.asc | gpg --no-default-keyring --keyring trustedkeys.gpg --import |
| 124 | + curl -Os https://cli.codecov.io/latest/linux/codecov |
| 125 | + curl -Os https://cli.codecov.io/latest/linux/codecov.SHA256SUM |
| 126 | + curl -Os https://cli.codecov.io/latest/linux/codecov.SHA256SUM.sig |
| 127 | + gpg --no-default-keyring --keyring trustedkeys.gpg --verify codecov.SHA256SUM.sig codecov.SHA256SUM |
| 128 | + shasum -a 256 -c codecov.SHA256SUM |
106 | 129 | chmod +x codecov |
107 | | - ./codecov -t $CODECOV_TOK --commit=$(Build.SourceVersion) --flags=gpu,pytest --name="GPU-coverage" --env=linux,azure |
| 130 | + # ./codecov -t $CODECOV_TOK --commit=$(Build.SourceVersion) --flags=gpu,pytest --name="GPU-coverage" --env=linux,azure |
| 131 | + ./codecov upload-process --slug 'speediedan/finetuning-scheduler' -t $CODECOV_TOK --commit-sha $(Build.SourceVersion) --git-service 'github' -n "GPU-coverage" -F 'gpu,pytest' --env 'linux,azure' -f 'coverage.xml' |
108 | 132 | env: |
109 | 133 | CODECOV_TOK: $(CODECOV_TOKEN) # explicit mapping required for secret azure pipeline variables |
110 | 134 | displayName: 'Statistics' |
111 | 135 |
|
112 | 136 | - bash: | |
113 | 137 | set -e |
114 | | - . /tmp/venvs/fts_dev/bin/activate |
| 138 | + source /tmp/venvs/fts_dev/bin/activate |
115 | 139 | python -m pytest src/fts_examples -v --maxfail=1 --durations=0 -W ignore:\`np.object\`:DeprecationWarning -W ignore:'`np.int` is':DeprecationWarning |
116 | 140 | # condition: notIn(variables['scope'], '2.0.1') |
117 | 141 | displayName: 'Testing: Examples' |
118 | 142 |
|
119 | 143 | - bash: | |
120 | | - . /tmp/venvs/fts_dev/bin/activate |
| 144 | + source /tmp/venvs/fts_dev/bin/activate |
121 | 145 | mkdir -p /__w/_temp/kernel_cache |
122 | 146 | bash ./tests/special_tests.sh --mark_type=standalone --collect_dir='src/fts_examples' --filter_pattern='model_parallel_examples' |
123 | 147 | # condition: notIn(variables['scope'], '2.0.1') |
124 | 148 | env: |
125 | 149 | PYTORCH_KERNEL_CACHE_PATH: "/__w/_temp/kernel_cache" |
126 | 150 | displayName: 'Testing: Multi-GPU Examples' |
| 151 | +
|
| 152 | + - bash: | |
| 153 | + # since we use rootless docker and userns-remapping, we need to ensure all files/directories in previous |
| 154 | + # steps that may have been written with Azure's `az_pipeline_agent_azpcontainer` user (100997 in the host |
| 155 | + # subuid range) are chmod'd or removed |
| 156 | + echo "Adjusting ownership/permissions..." |
| 157 | + sudo chmod -R 775 /__w/1/s || true |
| 158 | + echo "Cleaning up ephemeral directories..." |
| 159 | + sudo rm -rf /__w/1/s/.pytest_cache || true |
| 160 | + echo 'Agent workspace cleanup completed' |
| 161 | + condition: always() |
| 162 | + displayName: 'Cleaning up agent workspace' |
0 commit comments