@@ -102,13 +102,13 @@ jobs:
102102 workers : 4
103103 - backend : argo-kubernetes
104104 services : " minio,postgresql,metadata-service,argo-workflows"
105- workers : 2
105+ workers : 1
106106 - backend : airflow-kubernetes
107107 services : " minio,postgresql,metadata-service,airflow"
108- workers : 2
108+ workers : 1
109109 - backend : sfn-batch
110110 services : " minio,postgresql,metadata-service,localbatch,ddb-local,sfn-local"
111- workers : 4
111+ workers : 2
112112
113113 runs-on : ubuntu-latest
114114
@@ -130,15 +130,15 @@ jobs:
130130 - name : Install Metaflow and test dependencies
131131 run : |
132132 pip install --upgrade pip
133- pip install -e ".[dev]" pytest-xdist pytest-timeout pytest-cov
133+ pip install -e ".[dev]" pytest-xdist pytest-timeout pytest-cov pytest-rerunfailures
134134 pip install "git+https://github.com/npow/localbatch.git@main#egg=localbatch"
135135
136136 - name : Set up minikube
137137 uses : medyagh/setup-minikube@aba8d5ff1666c72adf94ccd078b2ca12e7756382
138138 with :
139139 driver : docker
140140 cpus : 2
141- memory : 6144
141+ memory : 4096
142142
143143 - name : Restore minikube image cache
144144 id : image-cache
@@ -236,11 +236,17 @@ jobs:
236236 if : matrix.backend == 'airflow-kubernetes'
237237 run : devtools/ci/wait-airflow-api.sh
238238
239- - name : Clean up completed pods (argo/airflow only)
239+ - name : Clean up completed pods and start background cleanup
240240 if : matrix.backend == 'argo-kubernetes' || matrix.backend == 'airflow-kubernetes'
241241 run : |
242242 kubectl delete pods --field-selector=status.phase=Succeeded --all-namespaces 2>/dev/null || true
243243 kubectl delete pods --field-selector=status.phase=Failed --all-namespaces 2>/dev/null || true
244+ # Periodically clean up completed pods during test runs to free cluster resources
245+ while true; do
246+ sleep 60
247+ kubectl delete pods --field-selector=status.phase=Succeeded --all-namespaces 2>/dev/null || true
248+ kubectl delete pods --field-selector=status.phase=Failed --all-namespaces 2>/dev/null || true
249+ done &
244250
245251 - name : Run UX tests — ${{ matrix.backend }}
246252 run : |
@@ -253,6 +259,8 @@ jobs:
253259 -v \
254260 --tb=short \
255261 --timeout=1800 \
262+ --reruns 1 \
263+ --reruns-delay 10 \
256264 --cov=metaflow \
257265 --cov-report=term-missing \
258266 --cov-report=xml:coverage.xml \
0 commit comments