Skip to content

Commit 78a264c

Browse files
Migrate parser to Gardener v2 Jobs API (#1098)
* Migrate to the v2 gardener Jobs API * Update go mod and build to go1.18
1 parent 49b87f3 commit 78a264c

File tree

8 files changed

+129
-163
lines changed

8 files changed

+129
-163
lines changed

.travis.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
dist: bionic
99
language: go
1010
go:
11-
- "1.15.8"
11+
- 1.18
1212

1313
env:
1414
- PATH=$PATH:$HOME/gopath/bin
@@ -17,8 +17,8 @@ before_install:
1717
- echo Branch is ${TRAVIS_BRANCH} and Tag is ${TRAVIS_TAG}
1818

1919
# Install coverage tools.
20-
- go get github.com/mattn/goveralls
21-
- go get github.com/wadey/gocovmerge
20+
- go install github.com/mattn/goveralls@latest
21+
- go install github.com/wadey/gocovmerge@latest
2222

2323
# Install gcloud, for integration tests.
2424
- $TRAVIS_BUILD_DIR/travis/install_gcloud.sh

Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# Dockerfile to contain the generate_schema_docs CLI.
22

3-
FROM golang:1.16.7-alpine3.14 AS build
3+
FROM golang:1.18-alpine3.14 AS build
44
RUN apk update
55
RUN apk add --virtual build-dependencies build-base gcc wget git linux-headers
66
# Build the command.
77
COPY . /go/src/github.com/m-lab/etl
88
WORKDIR /go/src/github.com/m-lab/etl
9-
RUN go get -v github.com/m-lab/etl/cmd/generate_schema_docs
9+
RUN go install -v github.com/m-lab/etl/cmd/generate_schema_docs
1010

1111
# Now copy the resulting command into the minimal base image.
1212
FROM alpine:3.14

Dockerfile.testing

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
FROM gcr.io/google.com/cloudsdktool/cloud-sdk:331.0.0
22

3-
43
# Fetch recent go version.
5-
ENV GOLANG_VERSION 1.15.8
4+
ENV GOLANG_VERSION 1.18.3
65
ENV GOLANG_DOWNLOAD_URL https://golang.org/dl/go$GOLANG_VERSION.linux-amd64.tar.gz
7-
ENV GOLANG_DOWNLOAD_SHA256 d3379c32a90fdf9382166f8f48034c459a8cc433730bc9476d39d9082c94583b
6+
ENV GOLANG_DOWNLOAD_SHA256 956f8507b302ab0bb747613695cdae10af99bbd39a90cae522b7c0302cc27245
87

98
RUN curl -fsSL "$GOLANG_DOWNLOAD_URL" -o golang.tar.gz \
109
&& echo "$GOLANG_DOWNLOAD_SHA256 golang.tar.gz" | sha256sum -c - \
@@ -19,6 +18,6 @@ WORKDIR $GOPATH
1918
# Install binaries needed for builds and testing.
2019
RUN apt-get --allow-releaseinfo-change update
2120
RUN apt-get install -y jq gcc netcat
22-
RUN go get -v github.com/m-lab/gcp-config/cmd/cbif
21+
RUN go install -v github.com/m-lab/gcp-config/cmd/cbif@v1.3.12
2322

2423
ENTRYPOINT ["/go/bin/cbif"]

active/poller.go

Lines changed: 24 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@ package active
22

33
import (
44
"context"
5-
"errors"
65
"fmt"
7-
"io/ioutil"
86
"log"
97
"net/http"
108
"net/url"
@@ -19,7 +17,7 @@ import (
1917
"google.golang.org/api/iterator"
2018
"google.golang.org/api/option"
2119

22-
job "github.com/m-lab/etl-gardener/client"
20+
gardener "github.com/m-lab/etl-gardener/client/v2"
2321
"github.com/m-lab/etl-gardener/tracker"
2422
"github.com/m-lab/go/cloud/gcs"
2523
"github.com/m-lab/go/rtx"
@@ -46,11 +44,13 @@ var JobFailures = promauto.NewCounterVec(
4644
type GardenerAPI struct {
4745
trackerBase url.URL
4846
gcs stiface.Client
47+
jobs *gardener.JobClient
4948
}
5049

5150
// NewGardenerAPI creates a GardenerAPI.
5251
func NewGardenerAPI(trackerBase url.URL, gcs stiface.Client) *GardenerAPI {
53-
return &GardenerAPI{trackerBase: trackerBase, gcs: gcs}
52+
c := gardener.NewJobClient(trackerBase)
53+
return &GardenerAPI{trackerBase: trackerBase, gcs: gcs, jobs: c}
5454
}
5555

5656
// MustStorageClient creates a default GCS client.
@@ -60,43 +60,12 @@ func MustStorageClient(ctx context.Context) stiface.Client {
6060
return stiface.AdaptClient(c)
6161
}
6262

63-
// TODO migrate this to m-lab/go
64-
func post(ctx context.Context, url url.URL) ([]byte, int, error) {
65-
ctx, cancel := context.WithTimeout(ctx, time.Minute)
66-
defer cancel()
67-
req, reqErr := http.NewRequestWithContext(ctx, "POST", url.String(), nil)
68-
if reqErr != nil {
69-
return nil, 0, reqErr
70-
}
71-
resp, postErr := http.DefaultClient.Do(req)
72-
if postErr != nil {
73-
return nil, 0, postErr // Documentation says we can ignore body.
74-
}
75-
76-
// Gauranteed to have a non-nil response and body.
77-
defer resp.Body.Close()
78-
b, err := ioutil.ReadAll(resp.Body) // Documentation recommends reading body.
79-
return b, resp.StatusCode, err
80-
}
81-
82-
// TODO add retry in case gardener is offline (during redeployment)
83-
// TODO add metrics to track latency, retries, and errors.
84-
func postAndIgnoreResponse(ctx context.Context, url url.URL) error {
85-
_, status, err := post(ctx, url)
86-
if err != nil {
87-
return err
88-
}
89-
if status != http.StatusOK {
90-
return errors.New(http.StatusText(status))
91-
}
92-
return nil
93-
}
94-
9563
// RunAll will execute functions provided by Next() until there are no more,
9664
// or the context is canceled.
97-
func (g *GardenerAPI) RunAll(ctx context.Context, rSrc RunnableSource, job tracker.Job) (*errgroup.Group, error) {
65+
func (g *GardenerAPI) RunAll(ctx context.Context, rSrc RunnableSource, jt *tracker.JobWithTarget) (*errgroup.Group, error) {
9866
eg := &errgroup.Group{}
9967
count := 0
68+
job := jt.Job
10069
for {
10170
run, err := rSrc.Next(ctx)
10271
if err != nil {
@@ -111,9 +80,8 @@ func (g *GardenerAPI) RunAll(ctx context.Context, rSrc RunnableSource, job track
11180
}
11281
}
11382

114-
heartbeat := tracker.HeartbeatURL(g.trackerBase, job)
115-
if postErr := postAndIgnoreResponse(ctx, *heartbeat); postErr != nil {
116-
log.Println(postErr, "on heartbeat for", job.Path())
83+
if err := g.jobs.Heartbeat(ctx, jt.ID); err != nil {
84+
log.Println(err, "on heartbeat for", job.Path())
11785
}
11886

11987
debug.Println("Starting func")
@@ -131,9 +99,8 @@ func (g *GardenerAPI) RunAll(ctx context.Context, rSrc RunnableSource, job track
13199

132100
err = run.Run(ctx)
133101
if err == nil {
134-
update := tracker.UpdateURL(g.trackerBase, job, tracker.Parsing, run.Info())
135-
if postErr := postAndIgnoreResponse(ctx, *update); postErr != nil {
136-
log.Println(postErr, "on update for", job.Path())
102+
if err := g.jobs.Update(ctx, jt.ID, tracker.Parsing, run.Info()); err != nil {
103+
log.Println(err, "on update for", job.Path())
137104
}
138105
}
139106
return
@@ -178,52 +145,48 @@ func (g *GardenerAPI) JobFileSource(ctx context.Context, job tracker.Job,
178145
}
179146

180147
// NextJob requests a new job from Gardener service.
181-
func (g *GardenerAPI) NextJob(ctx context.Context) (tracker.JobWithTarget, error) {
182-
return job.NextJob(ctx, g.trackerBase)
148+
func (g *GardenerAPI) NextJob(ctx context.Context) (*tracker.JobWithTarget, error) {
149+
return g.jobs.Next(ctx)
183150
}
184151

185152
func (g *GardenerAPI) pollAndRun(ctx context.Context,
186153
toRunnable func(o *storage.ObjectAttrs) Runnable, tokens TokenSource) error {
187-
job, err := g.NextJob(ctx)
154+
jt, err := g.jobs.Next(ctx)
188155
if err != nil {
189156
log.Println(err, "on Gardener client.NextJob()")
190157
return err
191158
}
192159

193-
log.Println(job, "filter:", job.Filter)
194-
gcsSource, err := g.JobFileSource(ctx, job.Job, toRunnable)
160+
log.Println(jt, "filter:", jt.Job.Filter)
161+
gcsSource, err := g.JobFileSource(ctx, jt.Job, toRunnable)
195162
if err != nil {
196163
log.Println(err, "on JobFileSource")
197164
return err
198165
}
199166
src := Throttle(gcsSource, tokens)
200167

201-
log.Println("Running", job.Path())
202-
203-
update := tracker.UpdateURL(g.trackerBase, job.Job, tracker.Parsing, "starting tasks")
204-
if postErr := postAndIgnoreResponse(ctx, *update); postErr != nil {
205-
log.Println(postErr)
168+
log.Println("Running", jt.Job.Path())
169+
if err := g.jobs.Update(ctx, jt.ID, tracker.Parsing, "starting tasks"); err != nil {
170+
log.Println(err)
206171
}
207172

208-
eg, err := g.RunAll(ctx, src, job.Job)
173+
eg, err := g.RunAll(ctx, src, jt)
209174
if err != nil {
210175
log.Println(err)
211176
}
212177

213178
// Once all are dispatched, we want to wait until all have completed
214179
// before posting the state change.
215180
go func() {
216-
log.Println("all tasks dispatched for", job.Path())
181+
log.Println("all tasks dispatched for", jt.Job.Path())
217182
err := eg.Wait()
218183
if err != nil {
219-
log.Println(err, "on wait for", job.Path())
184+
log.Println(err, "on wait for", jt.Job.Path())
220185
} else {
221-
log.Println("finished", job.Path())
186+
log.Println("finished", jt.Job.Path())
222187
}
223-
update := tracker.UpdateURL(g.trackerBase, job.Job, tracker.ParseComplete, "")
224-
// TODO - should this have a retry?
225-
if postErr := postAndIgnoreResponse(ctx, *update); postErr != nil {
226-
log.Println(postErr)
188+
if err := g.jobs.Update(ctx, jt.ID, tracker.ParseComplete, ""); err != nil {
189+
log.Println(err)
227190
}
228191
}()
229192

active/poller_test.go

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,17 @@ type fakeGardener struct {
2828
t *testing.T // for logging
2929

3030
lock sync.Mutex
31-
jobs []tracker.Job
31+
jobs []*tracker.JobWithTarget
3232
heartbeats int
3333
updates int
3434
}
3535

3636
func (g *fakeGardener) AddJob(job tracker.Job) {
37-
g.jobs = append(g.jobs, job)
37+
jt := &tracker.JobWithTarget{
38+
ID: job.Key(),
39+
Job: job,
40+
}
41+
g.jobs = append(g.jobs, jt)
3842
}
3943

4044
func (g *fakeGardener) ServeHTTP(w http.ResponseWriter, r *http.Request) {
@@ -43,21 +47,21 @@ func (g *fakeGardener) ServeHTTP(w http.ResponseWriter, r *http.Request) {
4347
log.Fatal("Should be POST") // Not t.Fatal because this is asynchronous.
4448
}
4549
g.lock.Lock()
46-
g.lock.Unlock()
47-
switch r.URL.Path {
48-
case "/job":
50+
defer g.lock.Unlock()
51+
switch {
52+
case r.URL.Path == "/v2/job/next":
4953
if len(g.jobs) < 1 {
5054
w.WriteHeader(http.StatusInternalServerError)
5155
return
5256
}
5357
j := g.jobs[0]
5458
g.jobs = g.jobs[1:]
5559
w.Write(j.Marshal())
56-
case "/heartbeat":
60+
case r.URL.Path == "/v2/job/heartbeat":
5761
g.t.Log(r.URL.Path, r.URL.Query())
5862
g.heartbeats++
5963

60-
case "/update":
64+
case r.URL.Path == "/v2/job/update":
6165
g.t.Log(r.URL.Path, r.URL.Query())
6266
g.updates++
6367

@@ -75,7 +79,7 @@ func TestGardenerAPI_JobFileSource(t *testing.T) {
7579
c := testClient()
7680

7781
// set up a fake gardener service.
78-
fg := fakeGardener{t: t, jobs: make([]tracker.Job, 0)}
82+
fg := fakeGardener{t: t, jobs: make([]*tracker.JobWithTarget, 0)}
7983
fg.AddJob(tracker.NewJob("foobar", "ndt", "ndt5", time.Date(2019, 01, 01, 0, 0, 0, 0, time.UTC)))
8084
tracker := httptest.NewServer(&fg)
8185
defer tracker.Close()
@@ -85,12 +89,12 @@ func TestGardenerAPI_JobFileSource(t *testing.T) {
8589
// Set up GardenerAPI using the fakes.
8690
g := active.NewGardenerAPI(*tkURL, c)
8791

88-
job, err := g.NextJob(ctx)
92+
jt, err := g.NextJob(ctx)
8993
rtx.Must(err, "next job")
9094

9195
// The test counter creates runnables for the jobs.
9296
p := newCounter(t)
93-
src, err := g.JobFileSource(ctx, job.Job, p.toRunnable)
97+
src, err := g.JobFileSource(ctx, jt.Job, p.toRunnable)
9498
rtx.Must(err, "file source")
9599
log.Println(src)
96100

@@ -119,7 +123,7 @@ func TestGardenerAPI_RunAll(t *testing.T) {
119123
c := testClient()
120124

121125
// set up a fake gardener service.
122-
fg := fakeGardener{t: t, jobs: make([]tracker.Job, 0)}
126+
fg := fakeGardener{t: t, jobs: make([]*tracker.JobWithTarget, 0)}
123127
fg.AddJob(tracker.NewJob("foobar", "ndt", "ndt5", time.Date(2019, 01, 01, 0, 0, 0, 0, time.UTC)))
124128
tracker := httptest.NewServer(&fg)
125129
defer tracker.Close()
@@ -129,14 +133,14 @@ func TestGardenerAPI_RunAll(t *testing.T) {
129133
// Set up GardenerAPI using the fakes.
130134
g := active.NewGardenerAPI(*tkURL, c)
131135

132-
job, err := g.NextJob(ctx)
136+
jt, err := g.NextJob(ctx)
133137
rtx.Must(err, "next job")
134138

135139
// The test counter creates runnables for the jobs.
136140
p := newCounter(t)
137-
src, err := g.JobFileSource(ctx, job.Job, p.toRunnable)
141+
src, err := g.JobFileSource(ctx, jt.Job, p.toRunnable)
138142
rtx.Must(err, "file source")
139-
eg, err := g.RunAll(ctx, src, job.Job)
143+
eg, err := g.RunAll(ctx, src, jt)
140144
if err != nil {
141145
t.Fatal(err)
142146
}
@@ -168,7 +172,7 @@ func TestGardenerAPI_Poll(t *testing.T) {
168172
c := testClient()
169173

170174
// set up a fake gardener service.
171-
fg := fakeGardener{t: t, jobs: make([]tracker.Job, 0)}
175+
fg := fakeGardener{t: t, jobs: make([]*tracker.JobWithTarget, 0)}
172176
fg.AddJob(tracker.NewJob("foobar", "ndt", "ndt5", time.Date(2019, 01, 01, 0, 0, 0, 0, time.UTC)))
173177
tracker := httptest.NewServer(&fg)
174178
defer tracker.Close()

cmd/etl_worker/Dockerfile.k8s

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,21 @@
1-
FROM golang:1.15.8 as builder
2-
1+
FROM golang:1.18 as builder
32
ARG VERSION
4-
53
WORKDIR /go/src/github.com/m-lab/etl
64
COPY . .
7-
85
RUN go get -v ./...
9-
106
# This leave some dynamically linked lookups, but seems to work anyway. But
117
# it is unclear whether we might see random segfaults.
12-
RUN go get \
8+
RUN go install \
139
-tags netgo -a -v \
1410
-ldflags "-linkmode external -extldflags -static \
1511
-X github.com/m-lab/go/prometheusx.GitShortCommit=$(git log -1 --format=%h) \
1612
-X github.com/m-lab/etl/etl.Version=$VERSION \
1713
-X github.com/m-lab/etl/etl.GitCommit=$(git log -1 --format=%H)" \
1814
./cmd/etl_worker
1915

20-
FROM alpine:3.12
16+
FROM alpine:3.14
2117
RUN apk add --no-cache ca-certificates
22-
2318
COPY --from=builder /go/bin/etl_worker /bin/etl_worker
24-
2519
EXPOSE 9090 8080
26-
2720
WORKDIR /
2821
ENTRYPOINT [ "/bin/etl_worker" ]

0 commit comments

Comments
 (0)