Skip to content

Commit cfd7374

Browse files
authored
Add new field Counter to switch schema. (#951)
* Add new field Counter to switch schema. * Changes Counter to a uint64, since that is what DISCOv2 outputs. * Change Value to a float64 and Counter to int64. * Removes all JSON type hints from switch schema struct fields. * Removes all code/logic related to Cloud Functions. * Adds comment on why struct field types differ from DISCOv2. * Adds update-schema back to Travis. Adds auto detect of SwitchStats for update-schema. * Removes partField definition from CreateOrUpdateSwitchStats. * Replaces receiver var s with row. * Removes an outdated comment. Removes cf-prod case for update-schema deployment. * Adds cases for SwitchStats in switch block. * Changes int64 'timestamp's to time.Time type. * Manually changes type of Sample.Timestamp for BigQuery. * Adds a comment about why we subtitute an int64 for a TIMESTAMP. * Moves substitutions before search for docs for schema.
1 parent 2fc1687 commit cfd7374

File tree

3 files changed

+88
-65
lines changed

3 files changed

+88
-65
lines changed

.travis.yml

Lines changed: 14 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,6 @@
99
# * test and build the go code
1010
# * on success, deploy the result when the origin branch matches a supported
1111
# deployment target.
12-
#
13-
# NOTE: Cloud functions only support primitive IAM roles: Owner, Editor, Viewer.
14-
# See: https://cloud.google.com/functions/docs/concepts/iam
15-
# TODO(soltesz): Add deployment automation when fine-grained permissions are
16-
# possible.
1712

1813
dist: bionic
1914
language: go
@@ -25,12 +20,6 @@ env:
2520

2621
before_install:
2722
- sudo apt-get install -y jq # Dependency for sync_tables_with_schema.sh.
28-
# Install javascript libraries
29-
- pushd $TRAVIS_BUILD_DIR/functions
30-
- npm install --verbose
31-
- pushd embargo
32-
- npm install --verbose
33-
- popd; popd
3423

3524
# Coverage tools
3625
- go get github.com/mattn/goveralls
@@ -84,13 +73,6 @@ script:
8473
- diff /tmp/current-bindata.go schema/bindata.go || (
8574
echo "Files do not match; run 'update go-bindata, go generate ./schema' and commit changes" && false )
8675

87-
# Run all javascript tests.
88-
- pushd $TRAVIS_BUILD_DIR/functions
89-
- npm test
90-
- pushd embargo
91-
- npm test
92-
- popd; popd
93-
9476
# To start the Go tests, run all the non-integration tests.
9577
# Currently skipping storage tests, because they depend on GCS, and there is
9678
# no emulator.
@@ -185,20 +167,6 @@ deploy:
185167
all_branches: true
186168
condition: $TRAVIS_BRANCH == qp-sandbox-*
187169

188-
## Service: cloud function -- AppEngine Flexible Environment.
189-
- provider: script
190-
script:
191-
$TRAVIS_BUILD_DIR/travis/activate_service_account.sh SERVICE_ACCOUNT_mlab_sandbox
192-
&& cd $TRAVIS_BUILD_DIR/functions
193-
&& gcloud config set project mlab-sandbox
194-
&& gcloud functions deploy createSandboxTaskOnFileNotification --stage-bucket=functions-mlab-sandbox --trigger-event=providers/cloud.storage/eventTypes/object.change --trigger-resource=archive-mlab-sandbox
195-
&& gcloud functions deploy createSandboxTaskOnEmbargoFileNotification --stage-bucket=functions-mlab-sandbox --trigger-event=providers/cloud.storage/eventTypes/object.change --trigger-resource=embargo-mlab-sandbox
196-
skip_cleanup: true
197-
on:
198-
repo: m-lab/etl
199-
all_branches: true
200-
condition: $TRAVIS_BRANCH == cf-sandbox-* || $TRAVIS_BRANCH == sandbox-*
201-
202170
## Task Queues
203171
- provider: script
204172
script:
@@ -294,11 +262,6 @@ deploy:
294262
$TRAVIS_BUILD_DIR/travis/kubectl.sh mlab-staging data-processing ./apply-cluster.sh
295263
&& $TRAVIS_BUILD_DIR/travis/deploy_app.sh mlab-staging
296264
SERVICE_ACCOUNT_mlab_staging $TRAVIS_BUILD_DIR/cmd/etl_worker app-batch.yaml
297-
&& cd $TRAVIS_BUILD_DIR/functions
298-
&& gcloud functions deploy createStagingTaskOnFileNotification --project=mlab-staging --stage-bucket=functions-mlab-staging --trigger-event=providers/cloud.storage/eventTypes/object.change --trigger-resource=archive-mlab-staging
299-
&& gcloud functions deploy createStagingTaskOnEmbargoFileNotification --project=mlab-staging --stage-bucket=functions-mlab-staging --trigger-event=providers/cloud.storage/eventTypes/object.change --trigger-resource=embargo-mlab-staging
300-
&& cd $TRAVIS_BUILD_DIR/functions/embargo
301-
&& gcloud functions deploy embargoOnFileNotificationStaging --project=mlab-staging --stage-bucket=functions-mlab-staging --trigger-event=providers/cloud.storage/eventTypes/object.change --trigger-resource=scraper-mlab-staging
302265
# && $TRAVIS_BUILD_DIR/etl-schema/schema/sync_tables_with_schema.sh mlab-staging batch nodryrun
303266
# && $TRAVIS_BUILD_DIR/etl-schema/schema/sync_tables_with_schema.sh mlab-staging base_tables nodryrun
304267
skip_cleanup: true
@@ -313,24 +276,6 @@ deploy:
313276
# Triggers when *ANY* branch is tagged with one of these tags'
314277
######################################################################
315278

316-
## Service: cloud function -- AppEngine Flexible Environment.
317-
# TODO - update-schema is run on many triggers, often multiple times. Should clean it up.
318-
- provider: script
319-
script:
320-
$TRAVIS_BUILD_DIR/travis/activate_service_account.sh SERVICE_ACCOUNT_mlab_oti
321-
&& $TRAVIS_BUILD_DIR/travis/run_with_application_credentials.sh mlab-oti
322-
SERVICE_ACCOUNT_mlab_oti $TRAVIS_BUILD_DIR/cmd/update-schema update-schema
323-
&& cd $TRAVIS_BUILD_DIR/functions
324-
&& gcloud functions deploy createProdTaskOnFileNotification --project=mlab-oti --stage-bucket=functions-mlab-oti --trigger-event=providers/cloud.storage/eventTypes/object.change --trigger-resource=archive-mlab-oti
325-
&& gcloud functions deploy createProdTaskOnEmbargoFileNotification --project=mlab-oti --stage-bucket=functions-mlab-oti --trigger-event=providers/cloud.storage/eventTypes/object.change --trigger-resource=embargo-mlab-oti
326-
&& cd $TRAVIS_BUILD_DIR/functions/embargo
327-
&& gcloud functions deploy embargoOnFileNotificationOti --project=mlab-oti --stage-bucket=functions-mlab-oti --trigger-event=providers/cloud.storage/eventTypes/object.change --trigger-resource=scraper-mlab-oti
328-
skip_cleanup: true
329-
on:
330-
repo: m-lab/etl
331-
all_branches: true
332-
condition: $TRAVIS_BRANCH == cf-prod-* || $TRAVIS_BRANCH == ndt-prod-* || $TRAVIS_BRANCH == prod-*
333-
334279
###################### UNIVERSAL PARSER ###############################
335280
# Deploys Universal K8S Parser.
336281
# Triggers when *ANY* branch is tagged with u-prod-* OR prod-*
@@ -400,3 +345,17 @@ deploy:
400345
repo: m-lab/etl
401346
all_branches: true
402347
condition: $TRAVIS_TAG == ndt-prod-* || $TRAVIS_TAG == prod-*
348+
349+
350+
##################### UPDATE SCHEMAS ###############################
351+
# Updates or creates schemas in BigQuery
352+
- provider: script
353+
script:
354+
$TRAVIS_BUILD_DIR/travis/activate_service_account.sh SERVICE_ACCOUNT_mlab_oti
355+
&& $TRAVIS_BUILD_DIR/travis/run_with_application_credentials.sh mlab-oti
356+
SERVICE_ACCOUNT_mlab_oti $TRAVIS_BUILD_DIR/cmd/update-schema update-schema
357+
skip_cleanup: true
358+
on:
359+
repo: m-lab/etl
360+
all_branches: true
361+
condition: $TRAVIS_BRANCH == ndt-prod-* || $TRAVIS_BRANCH == prod-*

cmd/update-schema/update.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,13 @@ func CreateOrUpdateAnnotationRow(project string, dataset string, table string) e
6666
return CreateOrUpdate(schema, project, dataset, table, "Date")
6767
}
6868

69+
func CreateOrUpdateSwitchStats(project string, dataset string, table string) error {
70+
row := schema.SwitchStats{}
71+
schema, err := row.Schema()
72+
rtx.Must(err, "SwitchStats.Schema")
73+
return CreateOrUpdate(schema, project, dataset, table, "")
74+
}
75+
6976
// CreateOrUpdate will update or create a table from the given schema.
7077
func CreateOrUpdate(schema bigquery.Schema, project, dataset, table, partField string) error {
7178
name := project + "." + dataset + "." + table
@@ -141,6 +148,12 @@ func updateLegacyTables(project string) int {
141148
if err := CreateOrUpdateNDT5ResultRow(project, "batch", "ndt5"); err != nil {
142149
errCount++
143150
}
151+
if err := CreateOrUpdateSwitchStats(project, "base_tables", "switch"); err != nil {
152+
errCount++
153+
}
154+
if err := CreateOrUpdateSwitchStats(project, "batch", "switch"); err != nil {
155+
errCount++
156+
}
144157
return errCount
145158
}
146159

@@ -216,6 +229,14 @@ func main() {
216229
errCount++
217230
}
218231

232+
case "switch":
233+
if err := CreateOrUpdateSwitchStats(*project, "base_tables", "switch"); err != nil {
234+
errCount++
235+
}
236+
if err := CreateOrUpdateSwitchStats(*project, "batch", "switch"); err != nil {
237+
errCount++
238+
}
239+
219240
default:
220241
log.Fatal("invalid updateType: ", *updateType)
221242
}

schema/switch_schema.go

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,71 @@
11
package schema
22

3-
import "time"
3+
import (
4+
"cloud.google.com/go/bigquery"
5+
6+
"github.com/m-lab/go/cloud/bqx"
7+
8+
"time"
9+
)
410

511
// Sample is an individual measurement taken by DISCO.
12+
// NOTE: the types of the fields in this struct differ from the types used
13+
// natively by the structs in DISCOv2. In DiSCOv2 Value is a uint64, but must
14+
// be a float here because DISCOv1 outputs floats. float64 should be able to
15+
// accommodate both types of input values safely. For Counter, DISCOv2 uses a
16+
// uint64, but BigQuery does not support the notion of unsigned integers, so we
17+
// use int64 here, which should be safe, too.
618
type Sample struct {
7-
Timestamp int64 `json:"timestamp,int64" bigquery:"timestamp"`
8-
Value float32 `json:"value,float32" bigquery:"value"`
19+
Timestamp int64 `json:"timestamp" bigquery:"timestamp"`
20+
Value float64 `json:"value" bigquery:"value"`
21+
Counter int64 `json:"counter" bigquery:"counter"`
922
}
1023

1124
// SwitchStats represents a row of data taken from the raw DISCO export file.
1225
type SwitchStats struct {
13-
TaskFilename string `json:"task_filename,string" bigquery:"task_filename"`
14-
TestID string `json:"test_id,string" bigquery:"test_id"`
26+
TaskFilename string `json:"task_filename" bigquery:"task_filename"`
27+
TestID string `json:"test_id" bigquery:"test_id"`
1528
ParseTime time.Time `json:"parse_time" bigquery:"parse_time"`
16-
ParserVersion string `json:"parser_version,string" bigquery:"parser_version"`
17-
LogTime int64 `json:"log_time,int64" bigquery:"log_time"`
29+
ParserVersion string `json:"parser_version" bigquery:"parser_version"`
30+
LogTime time.Time `json:"log_time" bigquery:"log_time"`
1831
Sample []Sample `json:"sample" bigquery:"sample"`
1932
Metric string `json:"metric" bigquery:"metric"`
2033
Hostname string `json:"hostname" bigquery:"hostname"`
2134
Experiment string `json:"experiment" bigquery:"experiment"`
2235
}
2336

2437
// Size estimates the number of bytes in the SwitchStats object.
25-
func (s *SwitchStats) Size() int {
26-
return (len(s.TaskFilename) + len(s.TestID) + 8 +
27-
12*len(s.Sample) + len(s.Metric) + len(s.Hostname) + len(s.Experiment))
38+
func (row *SwitchStats) Size() int {
39+
return (len(row.TaskFilename) + len(row.TestID) + 8 +
40+
12*len(row.Sample) + len(row.Metric) + len(row.Hostname) + len(row.Experiment))
41+
}
42+
43+
// Schema returns the BigQuery schema for SwitchStats.
44+
func (row *SwitchStats) Schema() (bigquery.Schema, error) {
45+
sch, err := bigquery.InferSchema(row)
46+
if err != nil {
47+
return bigquery.Schema{}, err
48+
}
49+
50+
// The raw data from DISCO stores the timestamp of a sample as an integer (a
51+
// UNIX timestamp), but BigQuery represent the value as type TIMESTAMP.
52+
// TODO: DISCO should probably store the value as time.Time to avoid the
53+
// need for this.
54+
subs := map[string]bigquery.FieldSchema{
55+
"timestamp": bigquery.FieldSchema{
56+
Name: "timestamp",
57+
Description: "",
58+
Repeated: false,
59+
Required: false,
60+
Type: "TIMESTAMP"},
61+
}
62+
c := bqx.Customize(sch, subs)
63+
64+
docs := FindSchemaDocsFor(row)
65+
for _, doc := range docs {
66+
bqx.UpdateSchemaDescription(c, doc)
67+
}
68+
rr := bqx.RemoveRequired(c)
69+
70+
return rr, err
2871
}

0 commit comments

Comments
 (0)