Skip to content

Commit df4bdda

Browse files
Add ArchiveSize and FileSize to parser records (#1118)
* Add Metadata.ArchiveSize and TestSource.GetSize() * Add ArchiveSize and FileSize to ParseInfo * Populate ArchiveSize and FileSize for all datatypes
1 parent f4993ea commit df4bdda

File tree

18 files changed

+109
-71
lines changed

18 files changed

+109
-71
lines changed

etl/etl.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,12 @@ type InserterParams struct {
105105

106106
// Metadata provides metadata about the parser and archive files.
107107
type Metadata struct {
108-
Version string
109-
ArchiveURL string
110-
GitCommit string
111-
Date civil.Date
112-
Start time.Time
108+
Version string
109+
ArchiveURL string
110+
GitCommit string
111+
Date civil.Date
112+
Start time.Time
113+
ArchiveSize int64
113114
}
114115

115116
// ErrHighInsertionFailureRate should be returned by TaskError when there are more than 10% BQ insertion errors.
@@ -153,6 +154,7 @@ type TestSource interface {
153154
// Returns io.EOF when there are no more tests.
154155
NextTest(maxSize int64) (string, []byte, error)
155156
Close() error
157+
GetSize() int64
156158

157159
Detail() string // Detail for logs.
158160
Type() string // Data type for logs and metrics

parser/annotation2.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,13 @@ func (ap *Annotation2Parser) ParseAndInsert(meta etl.Metadata, testName string,
6363

6464
row := schema.Annotation2Row{
6565
Parser: schema.ParseInfo{
66-
Version: meta.Version,
67-
Time: time.Now(),
68-
ArchiveURL: meta.ArchiveURL,
69-
Filename: testName,
70-
GitCommit: meta.GitCommit,
66+
Version: meta.Version,
67+
Time: time.Now(),
68+
ArchiveURL: meta.ArchiveURL,
69+
Filename: testName,
70+
GitCommit: meta.GitCommit,
71+
ArchiveSize: meta.ArchiveSize,
72+
FileSize: int64(len(test)),
7173
},
7274
}
7375

parser/annotation2_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ func TestAnnotation2Parser_ParseAndInsert(t *testing.T) {
6868
Filename: tt.file,
6969
Priority: 0,
7070
GitCommit: "12345678",
71+
FileSize: int64(len(data)),
7172
}
7273

7374
if diff := deep.Equal(row.Parser, expPI); diff != nil {

parser/hopannotation2.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,13 @@ func (p *HopAnnotation2Parser) ParseAndInsert(meta etl.Metadata, testName string
4848

4949
row := schema.HopAnnotation2Row{
5050
Parser: schema.ParseInfo{
51-
Version: meta.Version,
52-
Time: time.Now(),
53-
ArchiveURL: meta.ArchiveURL,
54-
Filename: testName,
55-
GitCommit: meta.GitCommit,
51+
Version: meta.Version,
52+
Time: time.Now(),
53+
ArchiveURL: meta.ArchiveURL,
54+
Filename: testName,
55+
GitCommit: meta.GitCommit,
56+
ArchiveSize: meta.ArchiveSize,
57+
FileSize: int64(len(rawContent)),
5658
},
5759
}
5860

parser/hopannotation2_test.go

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,11 @@ func TestHopAnnotation2Parser_ParseAndInsert(t *testing.T) {
3232
date := civil.Date{Year: 2021, Month: 07, Day: 30}
3333

3434
meta := etl.Metadata{
35-
ArchiveURL: path.Join(hopAnnotation2GCSPath, hopAnnotation2Filename),
36-
Date: date,
37-
Version: parser.Version(),
38-
GitCommit: parser.GitCommit(),
35+
ArchiveURL: path.Join(hopAnnotation2GCSPath, hopAnnotation2Filename),
36+
Date: date,
37+
Version: parser.Version(),
38+
GitCommit: parser.GitCommit(),
39+
ArchiveSize: int64(len(data)),
3940
}
4041

4142
if err := n.ParseAndInsert(meta, hopAnnotation2Filename, data); err != nil {
@@ -50,12 +51,14 @@ func TestHopAnnotation2Parser_ParseAndInsert(t *testing.T) {
5051
row := ins.data[0].(*schema.HopAnnotation2Row)
5152

5253
expectedParseInfo := schema.ParseInfo{
53-
Version: "https://github.com/m-lab/etl/tree/foobar",
54-
Time: row.Parser.Time,
55-
ArchiveURL: path.Join(hopAnnotation2GCSPath, hopAnnotation2Filename),
56-
Filename: hopAnnotation2Filename,
57-
Priority: 0,
58-
GitCommit: "12345678",
54+
Version: "https://github.com/m-lab/etl/tree/foobar",
55+
Time: row.Parser.Time,
56+
ArchiveURL: path.Join(hopAnnotation2GCSPath, hopAnnotation2Filename),
57+
Filename: hopAnnotation2Filename,
58+
Priority: 0,
59+
GitCommit: "12345678",
60+
ArchiveSize: int64(len(data)),
61+
FileSize: int64(len(data)),
5962
}
6063

6164
expectedGeolocation := annotator.Geolocation{

parser/ndt5_result.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,13 @@ func (dp *NDT5ResultParser) ParseAndInsert(meta etl.Metadata, testName string, t
8484
}
8585

8686
parser := schema.ParseInfo{
87-
Version: meta.Version,
88-
Time: time.Now(),
89-
ArchiveURL: meta.ArchiveURL,
90-
Filename: testName,
91-
GitCommit: meta.GitCommit,
87+
Version: meta.Version,
88+
Time: time.Now(),
89+
ArchiveURL: meta.ArchiveURL,
90+
Filename: testName,
91+
GitCommit: meta.GitCommit,
92+
ArchiveSize: meta.ArchiveSize,
93+
FileSize: int64(len(test)),
9294
}
9395
date := meta.Date
9496

parser/ndt7_result.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,13 @@ func (dp *NDT7ResultParser) ParseAndInsert(meta etl.Metadata, testName string, t
6969

7070
row := schema.NDT7ResultRow{
7171
Parser: schema.ParseInfo{
72-
Version: meta.Version,
73-
Time: time.Now(),
74-
ArchiveURL: meta.ArchiveURL,
75-
Filename: testName,
76-
GitCommit: meta.GitCommit,
72+
Version: meta.Version,
73+
Time: time.Now(),
74+
ArchiveURL: meta.ArchiveURL,
75+
Filename: testName,
76+
GitCommit: meta.GitCommit,
77+
ArchiveSize: meta.ArchiveSize,
78+
FileSize: int64(len(test)),
7779
},
7880
}
7981

parser/ndt7_result_test.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import (
1515
"github.com/m-lab/go/pretty"
1616
)
1717

18-
func setupNDT7InMemoryParser(t *testing.T, testName string) (*schema.NDT7ResultRow, error) {
18+
func setupNDT7InMemoryParser(t *testing.T, testName string) (*schema.NDT7ResultRow, int64, error) {
1919
ins := newInMemorySink()
2020
n := parser.NewNDT7ResultParser(ins, "test", "_suffix")
2121

@@ -31,14 +31,14 @@ func setupNDT7InMemoryParser(t *testing.T, testName string) (*schema.NDT7ResultR
3131
}
3232
err = n.ParseAndInsert(meta, testName, resultData)
3333
if err != nil {
34-
return nil, err
34+
return nil, 0, err
3535
}
3636
if n.Accepted() != 1 {
3737
t.Fatal("Failed to insert snaplog data.", ins)
3838
}
3939
n.Flush()
4040
row := ins.data[0].(*schema.NDT7ResultRow)
41-
return row, err
41+
return row, int64(len(resultData)), err
4242
}
4343

4444
func TestNDT7ResultParser_ParseAndInsert(t *testing.T) {
@@ -58,7 +58,7 @@ func TestNDT7ResultParser_ParseAndInsert(t *testing.T) {
5858
}
5959
for _, tt := range tests {
6060
t.Run(tt.name, func(t *testing.T) {
61-
row, err := setupNDT7InMemoryParser(t, tt.testName)
61+
row, size, err := setupNDT7InMemoryParser(t, tt.testName)
6262
if (err != nil) != tt.wantErr {
6363
t.Errorf("NDT7ResultParser.ParseAndInsert() error = %v, wantErr %v", err, tt.wantErr)
6464
}
@@ -82,6 +82,7 @@ func TestNDT7ResultParser_ParseAndInsert(t *testing.T) {
8282
Filename: "ndt7-download-20200318T000657.568382877Z.ndt-knwp4_1583603744_000000000000590E.json",
8383
Priority: 0,
8484
GitCommit: "12345678",
85+
FileSize: size,
8586
}
8687
if diff := deep.Equal(row.Parser, expPI); diff != nil {
8788
pretty.Print(row.Parser)
@@ -121,6 +122,7 @@ func TestNDT7ResultParser_ParseAndInsert(t *testing.T) {
121122
Filename: "ndt7-upload-20200318T001352.496224022Z.ndt-knwp4_1583603744_0000000000005CF2.json",
122123
Priority: 0,
123124
GitCommit: "12345678",
125+
FileSize: size,
124126
}
125127
if diff := deep.Equal(row.Parser, expPI); diff != nil {
126128
t.Errorf("NDT7ResultParser.ParseAndInsert() different summary: %s", strings.Join(diff, "\n"))
@@ -160,7 +162,7 @@ func TestNDT7ResultParser_ParseAndInsertUnsafe(t *testing.T) {
160162
}
161163
for _, tt := range tests {
162164
t.Run(tt.name, func(t *testing.T) {
163-
row, err := setupNDT7InMemoryParser(t, tt.testName)
165+
row, _, err := setupNDT7InMemoryParser(t, tt.testName)
164166
if (err != nil) != tt.wantErr {
165167
t.Errorf("NDT7ResultParser.ParseAndInsert() error = %v, wantErr %v", err, tt.wantErr)
166168
}

parser/pcap.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,11 +148,13 @@ func (p *PCAPParser) ParseAndInsert(meta etl.Metadata, testName string, rawConte
148148

149149
row := schema.PCAPRow{
150150
Parser: schema.ParseInfo{
151-
Version: meta.Version,
152-
Time: time.Now(),
153-
ArchiveURL: meta.ArchiveURL,
154-
Filename: testName,
155-
GitCommit: meta.GitCommit,
151+
Version: meta.Version,
152+
Time: time.Now(),
153+
ArchiveURL: meta.ArchiveURL,
154+
Filename: testName,
155+
GitCommit: meta.GitCommit,
156+
ArchiveSize: meta.ArchiveSize,
157+
FileSize: int64(len(rawContent)),
156158
},
157159
}
158160

parser/pcap_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ func TestPCAPParser_ParseAndInsert(t *testing.T) {
5656
Filename: pcapFilename,
5757
Priority: 0,
5858
GitCommit: "12345678",
59+
FileSize: int64(len(data)),
5960
}
6061

6162
expectedPCAPRow := schema.PCAPRow{

0 commit comments

Comments
 (0)