Skip to content

Commit 5bdc452

Browse files
authored
[#22737] Re-write Go SDK data plane to support timers. (#25982)
* commit with lock * add throughput benchmark * Move closed signal to atomic. * gofmt datasource_test.go * Retain session runner receives in this PR. * timerWriter copypasta --------- Co-authored-by: lostluck <[email protected]>
1 parent 3efd3c3 commit 5bdc452

File tree

6 files changed

+862
-380
lines changed

6 files changed

+862
-380
lines changed

sdks/go/pkg/beam/core/runtime/exec/data.go

+12-4
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,12 @@ type SideCache interface {
5757
// DataManager manages external data byte streams. Each data stream can be
5858
// opened by one consumer only.
5959
type DataManager interface {
60-
// OpenRead opens a closable byte stream for reading.
61-
OpenRead(ctx context.Context, id StreamID) (io.ReadCloser, error)
62-
// OpenWrite opens a closable byte stream for writing.
60+
// OpenElementChan opens a channel for data and timers.
61+
OpenElementChan(ctx context.Context, id StreamID, expectedTimerTransforms []string) (<-chan Elements, error)
62+
// OpenWrite opens a closable byte stream for data writing.
6363
OpenWrite(ctx context.Context, id StreamID) (io.WriteCloser, error)
64+
// OpenTimerWrite opens a byte stream for writing timers
65+
OpenTimerWrite(ctx context.Context, id StreamID, family string) (io.WriteCloser, error)
6466
}
6567

6668
// StateReader is the interface for reading side input data.
@@ -91,4 +93,10 @@ type StateReader interface {
9193
GetSideInputCache() SideCache
9294
}
9395

94-
// TODO(herohde) 7/20/2018: user state management
96+
// Elements holds data or timers sent across the data channel.
97+
// If TimerFamilyID is populated, it's a timer, otherwise it's
98+
// data elements.
99+
type Elements struct {
100+
Data, Timers []byte
101+
TimerFamilyID, PtransformID string
102+
}

sdks/go/pkg/beam/core/runtime/exec/datasource.go

+122-60
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"github.com/apache/beam/sdks/v2/go/pkg/beam/core/util/ioutilx"
3131
"github.com/apache/beam/sdks/v2/go/pkg/beam/internal/errors"
3232
"github.com/apache/beam/sdks/v2/go/pkg/beam/log"
33+
"golang.org/x/exp/maps"
3334
)
3435

3536
// DataSource is a Root execution unit.
@@ -40,9 +41,12 @@ type DataSource struct {
4041
Coder *coder.Coder
4142
Out Node
4243
PCol PCollection // Handles size metrics. Value instead of pointer so it's initialized by default in tests.
44+
// OnTimerTransforms maps PtransformIDs to their execution nodes that handle OnTimer callbacks.
45+
OnTimerTransforms map[string]*ParDo
4346

44-
source DataManager
45-
state StateReader
47+
source DataManager
48+
state StateReader
49+
curInst string
4650

4751
index int64
4852
splitIdx int64
@@ -94,20 +98,79 @@ func (n *DataSource) Up(ctx context.Context) error {
9498
// StartBundle initializes this datasource for the bundle.
9599
func (n *DataSource) StartBundle(ctx context.Context, id string, data DataContext) error {
96100
n.mu.Lock()
101+
n.curInst = id
97102
n.source = data.Data
98103
n.state = data.State
99104
n.start = time.Now()
100-
n.index = -1
105+
n.index = 0
101106
n.splitIdx = math.MaxInt64
102107
n.mu.Unlock()
103108
return n.Out.StartBundle(ctx, id, data)
104109
}
105110

111+
// splitSuccess is a marker error to indicate we've reached the split index.
112+
// Akin to io.EOF.
113+
var splitSuccess = errors.New("split index reached")
114+
115+
// process handles converting elements from the data source to timers.
116+
//
117+
// The data and timer callback functions must return an io.EOF if the reader terminates to signal that an additional
118+
// buffer is desired. On successful splits, [splitSuccess] must be returned to indicate that the
119+
// PTransform is done processing data for this instruction.
120+
func (n *DataSource) process(ctx context.Context, data func(bcr *byteCountReader, ptransformID string) error, timer func(bcr *byteCountReader, ptransformID, timerFamilyID string) error) error {
121+
// The SID contains this instruction's expected data processing transform (this one).
122+
elms, err := n.source.OpenElementChan(ctx, n.SID, maps.Keys(n.OnTimerTransforms))
123+
if err != nil {
124+
return err
125+
}
126+
127+
n.PCol.resetSize() // initialize the size distribution for this bundle.
128+
var r bytes.Reader
129+
130+
var byteCount int
131+
bcr := byteCountReader{reader: &r, count: &byteCount}
132+
133+
splitPrimaryComplete := map[string]bool{}
134+
for {
135+
var err error
136+
select {
137+
case e, ok := <-elms:
138+
// Channel closed, so time to exit
139+
if !ok {
140+
return nil
141+
}
142+
if splitPrimaryComplete[e.PtransformID] {
143+
continue
144+
}
145+
if len(e.Data) > 0 {
146+
r.Reset(e.Data)
147+
err = data(&bcr, e.PtransformID)
148+
}
149+
if len(e.Timers) > 0 {
150+
r.Reset(e.Timers)
151+
err = timer(&bcr, e.PtransformID, e.TimerFamilyID)
152+
}
153+
154+
if err == splitSuccess {
155+
// Returning splitSuccess means we've split, and aren't consuming the remaining buffer.
156+
// We mark the PTransform done to ignore further data.
157+
splitPrimaryComplete[e.PtransformID] = true
158+
} else if err != nil && err != io.EOF {
159+
return errors.Wrap(err, "source failed")
160+
}
161+
// io.EOF means the reader successfully drained.
162+
// We're ready for a new buffer.
163+
case <-ctx.Done():
164+
return nil
165+
}
166+
}
167+
}
168+
106169
// ByteCountReader is a passthrough reader that counts all the bytes read through it.
107170
// It trusts the nested reader to return accurate byte information.
108171
type byteCountReader struct {
109172
count *int
110-
reader io.ReadCloser
173+
reader io.Reader
111174
}
112175

113176
func (r *byteCountReader) Read(p []byte) (int, error) {
@@ -117,7 +180,10 @@ func (r *byteCountReader) Read(p []byte) (int, error) {
117180
}
118181

119182
func (r *byteCountReader) Close() error {
120-
return r.reader.Close()
183+
if c, ok := r.reader.(io.Closer); ok {
184+
c.Close()
185+
}
186+
return nil
121187
}
122188

123189
func (r *byteCountReader) reset() int {
@@ -128,15 +194,6 @@ func (r *byteCountReader) reset() int {
128194

129195
// Process opens the data source, reads and decodes data, kicking off element processing.
130196
func (n *DataSource) Process(ctx context.Context) ([]*Checkpoint, error) {
131-
r, err := n.source.OpenRead(ctx, n.SID)
132-
if err != nil {
133-
return nil, err
134-
}
135-
defer r.Close()
136-
n.PCol.resetSize() // initialize the size distribution for this bundle.
137-
var byteCount int
138-
bcr := byteCountReader{reader: r, count: &byteCount}
139-
140197
c := coder.SkipW(n.Coder)
141198
wc := MakeWindowDecoder(n.Coder.Window)
142199

@@ -155,58 +212,63 @@ func (n *DataSource) Process(ctx context.Context) ([]*Checkpoint, error) {
155212
}
156213

157214
var checkpoints []*Checkpoint
158-
for {
159-
if n.incrementIndexAndCheckSplit() {
160-
break
161-
}
162-
// TODO(lostluck) 2020/02/22: Should we include window headers or just count the element sizes?
163-
ws, t, pn, err := DecodeWindowedValueHeader(wc, r)
164-
if err != nil {
165-
if err == io.EOF {
166-
break
215+
err := n.process(ctx, func(bcr *byteCountReader, ptransformID string) error {
216+
for {
217+
// TODO(lostluck) 2020/02/22: Should we include window headers or just count the element sizes?
218+
ws, t, pn, err := DecodeWindowedValueHeader(wc, bcr.reader)
219+
if err != nil {
220+
return err
167221
}
168-
return nil, errors.Wrap(err, "source failed")
169-
}
170-
171-
// Decode key or parallel element.
172-
pe, err := cp.Decode(&bcr)
173-
if err != nil {
174-
return nil, errors.Wrap(err, "source decode failed")
175-
}
176-
pe.Timestamp = t
177-
pe.Windows = ws
178-
pe.Pane = pn
179222

180-
var valReStreams []ReStream
181-
for _, cv := range cvs {
182-
values, err := n.makeReStream(ctx, cv, &bcr, len(cvs) == 1 && n.singleIterate)
223+
// Decode key or parallel element.
224+
pe, err := cp.Decode(bcr)
183225
if err != nil {
184-
return nil, err
226+
return errors.Wrap(err, "source decode failed")
185227
}
186-
valReStreams = append(valReStreams, values)
187-
}
228+
pe.Timestamp = t
229+
pe.Windows = ws
230+
pe.Pane = pn
188231

189-
if err := n.Out.ProcessElement(ctx, pe, valReStreams...); err != nil {
190-
return nil, err
191-
}
192-
// Collect the actual size of the element, and reset the bytecounter reader.
193-
n.PCol.addSize(int64(bcr.reset()))
194-
bcr.reader = r
195-
196-
// Check if there's a continuation and return residuals
197-
// Needs to be done immeadiately after processing to not lose the element.
198-
if c := n.getProcessContinuation(); c != nil {
199-
cp, err := n.checkpointThis(ctx, c)
200-
if err != nil {
201-
// Errors during checkpointing should fail a bundle.
202-
return nil, err
232+
var valReStreams []ReStream
233+
for _, cv := range cvs {
234+
values, err := n.makeReStream(ctx, cv, bcr, len(cvs) == 1 && n.singleIterate)
235+
if err != nil {
236+
return err
237+
}
238+
valReStreams = append(valReStreams, values)
203239
}
204-
if cp != nil {
205-
checkpoints = append(checkpoints, cp)
240+
241+
if err := n.Out.ProcessElement(ctx, pe, valReStreams...); err != nil {
242+
return err
243+
}
244+
// Collect the actual size of the element, and reset the bytecounter reader.
245+
n.PCol.addSize(int64(bcr.reset()))
246+
247+
// Check if there's a continuation and return residuals
248+
// Needs to be done immediately after processing to not lose the element.
249+
if c := n.getProcessContinuation(); c != nil {
250+
cp, err := n.checkpointThis(ctx, c)
251+
if err != nil {
252+
// Errors during checkpointing should fail a bundle.
253+
return err
254+
}
255+
if cp != nil {
256+
checkpoints = append(checkpoints, cp)
257+
}
258+
}
259+
// We've finished processing an element, check if we have finished a split.
260+
if n.incrementIndexAndCheckSplit() {
261+
return splitSuccess
206262
}
207263
}
208-
}
209-
return checkpoints, nil
264+
},
265+
func(bcr *byteCountReader, ptransformID, timerFamilyID string) error {
266+
tmap, err := decodeTimer(cp, wc, bcr)
267+
log.Infof(ctx, "DEBUGLOG: timer received for: %v and %v - %+v err: %v", ptransformID, timerFamilyID, tmap, err)
268+
return nil
269+
})
270+
271+
return checkpoints, err
210272
}
211273

212274
func (n *DataSource) makeReStream(ctx context.Context, cv ElementDecoder, bcr *byteCountReader, onlyStream bool) (ReStream, error) {
@@ -313,7 +375,7 @@ func (n *DataSource) makeReStream(ctx context.Context, cv ElementDecoder, bcr *b
313375
}
314376
}
315377

316-
func readStreamToBuffer(cv ElementDecoder, r io.ReadCloser, size int64, buf []FullValue) ([]FullValue, error) {
378+
func readStreamToBuffer(cv ElementDecoder, r io.Reader, size int64, buf []FullValue) ([]FullValue, error) {
317379
for i := int64(0); i < size; i++ {
318380
value, err := cv.Decode(r)
319381
if err != nil {
@@ -472,7 +534,7 @@ func (n *DataSource) checkpointThis(ctx context.Context, pc sdf.ProcessContinuat
472534
// The bufSize param specifies the estimated number of elements that will be
473535
// sent to this DataSource, and is used to be able to perform accurate splits
474536
// even if the DataSource has not yet received all its elements. A bufSize of
475-
// 0 or less indicates that its unknown, and so uses the current known size.
537+
// 0 or less indicates that it's unknown, and so uses the current known size.
476538
func (n *DataSource) Split(ctx context.Context, splits []int64, frac float64, bufSize int64) (SplitResult, error) {
477539
if n == nil {
478540
return SplitResult{}, fmt.Errorf("failed to split at requested splits: {%v}, DataSource not initialized", splits)

0 commit comments

Comments
 (0)