Skip to content

Commit 96ca5c0

Browse files
Merge pull request #147 from CliMA/ne/hotfix
Fix broken worker polling
2 parents 6a73eb6 + 58c7c6f commit 96ca5c0

File tree

3 files changed

+3
-3
lines changed

3 files changed

+3
-3
lines changed

.buildkite/derecho_pipeline.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
agents:
22
queue: derecho
33
modules: climacommon
4-
pbs_q: preempt
4+
pbs_q: main
55
pbs_l_select: "1:ngpus=1:ncpus=1" # We are out of CPU-only hours...
66

77
env:

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "ClimaCalibrate"
22
uuid = "4347a170-ebd6-470c-89d3-5c705c0cacc2"
33
authors = ["Climate Modeling Alliance"]
4-
version = "0.0.11"
4+
version = "0.0.12"
55

66
[deps]
77
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"

src/workers.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ function poll_file_for_worker_startup(
202202
launched_workers = 0
203203

204204
for retry_delay in push!(collect(retry_delays), 0)
205-
if process_exited(pid)
205+
if process_exited(pid) && pid.exitcode != 0
206206
error(
207207
"""Worker launch process exited with code $(pid.exitcode).
208208
Please check the terminal for error messages from the job scheduler.""",

0 commit comments

Comments
 (0)