Skip to content

Commit d0ecdb4

Browse files
authored
Merge pull request #33 from pfnet-research/kill-task-subprocesses-cleanly
Worker should kill task handler process and its descendant processes cleanly when timeout
2 parents df55148 + 69e313c commit d0ecdb4

File tree

6 files changed

+15
-8
lines changed

6 files changed

+15
-8
lines changed

.github/workflows/ci.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
- name: Set up Go 1.x
1919
uses: actions/setup-go@v2
2020
with:
21-
go-version: ^1.13
21+
go-version: ^1.14
2222
id: go
2323
- name: Check out code into the Go module directory
2424
uses: actions/checkout@v2
@@ -50,7 +50,7 @@ jobs:
5050
- name: Set up Go 1.x
5151
uses: actions/setup-go@v2
5252
with:
53-
go-version: ^1.13
53+
go-version: ^1.14
5454
id: go
5555
- name: Check out
5656
uses: actions/checkout@v2

.github/workflows/release.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
- name: Set up Go 1.x
1818
uses: actions/setup-go@v2
1919
with:
20-
go-version: ^1.13
20+
go-version: ^1.14
2121
id: go
2222
- name: Check out
2323
uses: actions/checkout@v2
@@ -42,7 +42,7 @@ jobs:
4242
- name: Set up Go 1.x
4343
uses: actions/setup-go@v2
4444
with:
45-
go-version: ^1.13
45+
go-version: ^1.14
4646
id: go
4747
- name: Check out
4848
uses: actions/checkout@v2

.goreleaser.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ builds:
99
binary: pftaskqueue
1010
goos:
1111
- linux
12-
- windows
1312
- darwin
1413
goarch:
1514
- amd64

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM golang:1.13.5 as builder
1+
FROM golang:1.14 as builder
22
ARG RELEASE
33
ARG VERSION
44
WORKDIR /workspace

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module github.com/pfnet-research/pftaskqueue
22

3-
go 1.13
3+
go 1.14
44

55
require (
66
github.com/MakeNowJust/heredoc/v2 v2.0.1

pkg/worker/worker.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"strconv"
3232
"strings"
3333
"sync"
34+
"syscall"
3435
"time"
3536

3637
"github.com/pfnet-research/pftaskqueue/pkg/apis/taskqueue"
@@ -301,7 +302,11 @@ func (w *Worker) runCommand(logger zerolog.Logger, t *task.Task) (task.TaskResul
301302

302303
cmdCtx, cmdCtxCancel := context.WithTimeout(w.ctx, t.Spec.ActualTimeout(w.config.TaskHandler.DefaultCommandTimeout))
303304
defer cmdCtxCancel()
304-
cmd := exec.CommandContext(cmdCtx, w.config.TaskHandler.Commands[0], w.config.TaskHandler.Commands[1:]...)
305+
306+
cmd := exec.Command(w.config.TaskHandler.Commands[0], w.config.TaskHandler.Commands[1:]...)
307+
cmd.SysProcAttr = &syscall.SysProcAttr{
308+
Setpgid: true,
309+
}
305310

306311
// Inject workspace path to stdin
307312
rStdout, wStdout := io.Pipe()
@@ -330,6 +335,9 @@ func (w *Worker) runCommand(logger zerolog.Logger, t *task.Task) (task.TaskResul
330335
}()
331336
select {
332337
case <-cmdCtx.Done():
338+
if err := syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL); err != nil {
339+
streamLogger.Error().Int("pid", cmd.Process.Pid).Err(err).Msg("Failed to kill the process and its descendants")
340+
}
333341
cmdErr = cmdCtx.Err()
334342
case err := <-cmdDone:
335343
cmdErr = err

0 commit comments

Comments
 (0)