Skip to content

Commit 76384ba

Browse files
MatanLevyMatan Levy
andauthored
Storage write api - support default stream (#226)
* storage write api support default stream * commit in case of commited stream * Storage write api - support default stream: add test cases * empty commit --------- Co-authored-by: Matan Levy <[email protected]>
1 parent 316038b commit 76384ba

File tree

2 files changed

+91
-13
lines changed

2 files changed

+91
-13
lines changed

server/storage_handler.go

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,6 @@ func (s *storageWriteServer) CreateWriteStream(ctx context.Context, req *storage
409409
TableSchema: schema,
410410
WriteMode: storagepb.WriteStream_INSERT,
411411
}
412-
413412
s.mu.Lock()
414413
s.streamMap[streamName] = &writeStreamStatus{
415414
streamType: streamType,
@@ -525,6 +524,7 @@ func (s *storageWriteServer) appendRows(req *storagepb.AppendRowsRequest, msgDes
525524
status.rows = append(status.rows, data...)
526525
}
527526
return s.sendResult(stream, streamName, offset+int64(len(rows)))
527+
528528
}
529529

530530
func (s *storageWriteServer) sendResult(stream storagepb.BigQueryWrite_AppendRowsServer, streamName string, offset int64) error {
@@ -677,10 +677,14 @@ func (s *storageWriteServer) insertTableData(ctx context.Context, tx *connection
677677

678678
func (s *storageWriteServer) GetWriteStream(ctx context.Context, req *storagepb.GetWriteStreamRequest) (*storagepb.WriteStream, error) {
679679
s.mu.RLock()
680-
defer s.mu.RUnlock()
681680
status, exists := s.streamMap[req.Name]
681+
s.mu.RUnlock()
682682
if !exists {
683-
return nil, fmt.Errorf("failed to find stream from %s", req.Name)
683+
stream, err := s.createDefaultStream(ctx, req)
684+
if err != nil {
685+
return nil, fmt.Errorf("failed to find stream from %s", req.Name)
686+
}
687+
return stream, err
684688
}
685689
return status.stream, nil
686690
}
@@ -775,6 +779,58 @@ func (s *storageWriteServer) FlushRows(ctx context.Context, req *storagepb.Flush
775779
}, nil
776780
}
777781

782+
/*
783+
*
784+
According to google documentation (https://pkg.go.dev/cloud.google.com/go/bigquery/storage/apiv1#BigQueryWriteClient.GetWriteStream)
785+
every table has a special stream named ‘_default’ to which data can be written. This stream doesn’t need to be created using CreateWriteStream
786+
787+
Here we create the default stream and add it to map in case it not exists yet, the GetWriteStreamRequest given as second
788+
argument should have Name in this format: projects/<projectId>/datasets/<datasetId>/tables/<tableId>/streams/_default
789+
*/
790+
func (s *storageWriteServer) createDefaultStream(ctx context.Context, req *storagepb.GetWriteStreamRequest) (*storagepb.WriteStream, error) {
791+
streamId := req.Name
792+
suffix := "_default"
793+
streams := "/streams/"
794+
if !strings.HasSuffix(streamId, suffix) {
795+
return nil, fmt.Errorf("unexpected stream id: %s, expected '%s' suffix", streamId, suffix)
796+
}
797+
index := strings.LastIndex(streamId, streams)
798+
if index == -1 {
799+
return nil, fmt.Errorf("unexpected stream id: %s, expected containg '%s'", streamId, streams)
800+
}
801+
streamPart := streamId[:index]
802+
writeStreamReq := &storagepb.CreateWriteStreamRequest{
803+
Parent: streamPart,
804+
WriteStream: &storagepb.WriteStream{
805+
Type: storagepb.WriteStream_COMMITTED,
806+
},
807+
}
808+
stream, err := s.CreateWriteStream(ctx, writeStreamReq)
809+
if err != nil {
810+
return nil, err
811+
}
812+
projectID, datasetID, tableID, err := getIDsFromPath(streamPart)
813+
if err != nil {
814+
return nil, err
815+
}
816+
tableMetadata, err := getTableMetadata(ctx, s.server, projectID, datasetID, tableID)
817+
if err != nil {
818+
return nil, err
819+
}
820+
streamStatus := &writeStreamStatus{
821+
streamType: storagepb.WriteStream_COMMITTED,
822+
stream: stream,
823+
projectID: projectID,
824+
datasetID: datasetID,
825+
tableID: tableID,
826+
tableMetadata: tableMetadata,
827+
}
828+
s.mu.Lock()
829+
defer s.mu.Unlock()
830+
s.streamMap[streamId] = streamStatus
831+
return stream, nil
832+
}
833+
778834
func getIDsFromPath(path string) (string, string, string, error) {
779835
paths := strings.Split(path, "/")
780836
if len(paths)%2 != 0 {

server/storage_test.go

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,7 @@ func TestStorageWrite(t *testing.T) {
395395
for _, test := range []struct {
396396
name string
397397
streamType storagepb.WriteStream_Type
398+
isDefaultStream bool
398399
expectedRowsAfterFirstWrite int
399400
expectedRowsAfterSecondWrite int
400401
expectedRowsAfterThirdWrite int
@@ -416,6 +417,15 @@ func TestStorageWrite(t *testing.T) {
416417
expectedRowsAfterThirdWrite: 6,
417418
expectedRowsAfterExplicitCommit: 6,
418419
},
420+
{
421+
name: "default",
422+
streamType: storagepb.WriteStream_COMMITTED,
423+
isDefaultStream: true,
424+
expectedRowsAfterFirstWrite: 1,
425+
expectedRowsAfterSecondWrite: 4,
426+
expectedRowsAfterThirdWrite: 6,
427+
expectedRowsAfterExplicitCommit: 6,
428+
},
419429
} {
420430
const (
421431
projectID = "test"
@@ -490,24 +500,36 @@ func TestStorageWrite(t *testing.T) {
490500
}
491501
defer client.Close()
492502
t.Run(test.name, func(t *testing.T) {
493-
writeStream, err := client.CreateWriteStream(ctx, &storagepb.CreateWriteStreamRequest{
494-
Parent: fmt.Sprintf("projects/%s/datasets/%s/tables/%s", projectID, datasetID, tableID),
495-
WriteStream: &storagepb.WriteStream{
496-
Type: test.streamType,
497-
},
498-
})
499-
if err != nil {
500-
t.Fatalf("CreateWriteStream: %v", err)
503+
var writeStreamName string
504+
fullTableName := fmt.Sprintf("projects/%s/datasets/%s/tables/%s", projectID, datasetID, tableID)
505+
if !test.isDefaultStream {
506+
writeStream, err := client.CreateWriteStream(ctx, &storagepb.CreateWriteStreamRequest{
507+
Parent: fullTableName,
508+
WriteStream: &storagepb.WriteStream{
509+
Type: test.streamType,
510+
},
511+
})
512+
if err != nil {
513+
t.Fatalf("CreateWriteStream: %v", err)
514+
}
515+
writeStreamName = writeStream.GetName()
501516
}
502517
m := &exampleproto.SampleData{}
503518
descriptorProto, err := adapt.NormalizeDescriptor(m.ProtoReflect().Descriptor())
504519
if err != nil {
505520
t.Fatalf("NormalizeDescriptor: %v", err)
506521
}
522+
var writerOptions []managedwriter.WriterOption
523+
if test.isDefaultStream {
524+
writerOptions = append(writerOptions, managedwriter.WithType(managedwriter.DefaultStream))
525+
writerOptions = append(writerOptions, managedwriter.WithDestinationTable(fullTableName))
526+
} else {
527+
writerOptions = append(writerOptions, managedwriter.WithStreamName(writeStreamName))
528+
}
529+
writerOptions = append(writerOptions, managedwriter.WithSchemaDescriptor(descriptorProto))
507530
managedStream, err := client.NewManagedStream(
508531
ctx,
509-
managedwriter.WithStreamName(writeStream.GetName()),
510-
managedwriter.WithSchemaDescriptor(descriptorProto),
532+
writerOptions...,
511533
)
512534
if err != nil {
513535
t.Fatalf("NewManagedStream: %v", err)

0 commit comments

Comments
 (0)