diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 14274cc89b..a70782307b 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -124,7 +124,7 @@ jobs: JULIA_PKG_SERVER_REGISTRY_PREFERENCE: eager - uses: julia-actions/julia-runtest@v1 if: matrix.version != 'nightly' || steps.buildpkg.outcome == 'success' - continue-on-error: ${{ matrix.version == 'nightly' || matrix.arch == 'x86' }} + continue-on-error: ${{ matrix.version == 'nightly' }} id: run_tests # TODO restore coverage post https://github.com/JuliaGPU/GPUCompiler.jl/pull/711 with: diff --git a/test/runtests.jl b/test/runtests.jl index b074da9e46..786fc713f6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -118,10 +118,22 @@ else end end +#Always set the max rss so that if tests add large global variables (which they do) we don't make the GC's life too hard +if Sys.WORD_SIZE == 64 + const JULIA_TEST_MAXRSS_MB = 3800 +else + # Assume that we only have 3.5GB available to a single process, and that a single + # test can take up to 2GB of RSS. This means that we should instruct the test + # framework to restart any worker that comes into a test set with 1.5GB of RSS. + const JULIA_TEST_MAXRSS_MB = 1536 +end + +const max_worker_rss = JULIA_TEST_MAXRSS_MB * 2^20 + # determine parallelism if !set_jobs jobs = Sys.CPU_THREADS - memory_jobs = Int(Sys.free_memory()) ÷ (2 * 2^30) + memory_jobs = Int64(Sys.free_memory()) ÷ (2 * 2^30) jobs = max(1, min(jobs, memory_jobs)) end @info "Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the `JULIA_CPU_THREADS` environment variable." @@ -298,6 +310,11 @@ try p = recycle_worker(p) else print_testworker_stats(test, wrkr, resp) + if resp[end] > max_worker_rss + # the worker has reached the max-rss limit, recycle it + # so future tests start with a smaller working set + p = recycle_worker(p) + end end end