Skip to content

Commit ecf2106

Browse files
Use SchemaDoc for NDTRow & Add basic generate_schema_docs CLI (#755)
* Add steps to prepare schema descriptions * Add initial schema field description yaml files * Add top level schema go:generate directive * Add pre-generated file so "go get" works * Enforce sources match generated bindata.go * Use canonical NDTResultRow and NDTResultParser names
1 parent f74d2ab commit ecf2106

File tree

14 files changed

+666
-17
lines changed

14 files changed

+666
-17
lines changed

.travis.yml

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ language: go
1919
go:
2020
- 1.12
2121

22+
env:
23+
- PATH=$PATH:$HOME/gopath/bin
24+
2225
before_install:
2326
- sudo apt-get install -y jq # Dependency for sync_tables_with_schema.sh.
2427
# Install javascript libraries
@@ -32,6 +35,9 @@ before_install:
3235
- go get github.com/mattn/goveralls
3336
- go get github.com/wadey/gocovmerge
3437

38+
# Tool to generate and embed binary assets.
39+
- go get github.com/go-bindata/go-bindata/go-bindata
40+
3541
- echo Branch is ${TRAVIS_BRANCH} and Tag is $TRAVIS_TAG
3642

3743
# Install gcloud, for integration tests.
@@ -53,6 +59,12 @@ cache:
5359
- "$HOME/google-cloud-sdk/"
5460

5561
script:
62+
# Enforce that the local binary assets match the generated ones.
63+
- cp schema/bindata.go /tmp/current-bindata.go
64+
- go generate ./schema
65+
- diff -q schema/bindata.go /tmp/current-bindata.go || (
66+
echo "Files do not match; run 'go generate ./schema' and commit changes" && false )
67+
5668
# Run all javascript tests.
5769
- pushd $TRAVIS_BUILD_DIR/functions
5870
- npm test
@@ -249,7 +261,7 @@ deploy:
249261
repo: m-lab/etl
250262
all_branches: true
251263
condition: $TRAVIS_BRANCH == pt-sandbox-* || $TRAVIS_BRANCH == sandbox-*
252-
264+
253265
## Service: etl-scamper-parser -- AppEngine Flexible Environment.
254266
- provider: script
255267
script:

cmd/generate_schema_docs/main.go

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
// Copyright 2019 ETL Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
//////////////////////////////////////////////////////////////////////////////
15+
16+
// generate_schema_docs uses ETL schema field descriptions to generate
17+
// documentation in various formats.
18+
package main
19+
20+
import (
21+
"bytes"
22+
"flag"
23+
"fmt"
24+
"io/ioutil"
25+
"log"
26+
"os"
27+
"path"
28+
"reflect"
29+
"strings"
30+
31+
"cloud.google.com/go/bigquery"
32+
"github.com/m-lab/go/bqx"
33+
"github.com/m-lab/go/flagx"
34+
"github.com/m-lab/go/rtx"
35+
36+
"github.com/m-lab/etl/schema"
37+
)
38+
39+
var usage = `
40+
SUMMARY
41+
Format BigQuery schema field descriptions as a Markdown table.
42+
43+
USAGE
44+
$ generate_schema_docs -doc.output ./include
45+
Writing include/schema_ndtresult.md
46+
47+
`
48+
49+
// Flags
50+
var (
51+
outputFormat string
52+
outputDirectory string
53+
)
54+
55+
func init() {
56+
log.SetFlags(0)
57+
flag.StringVar(&outputFormat, "doc.format", "md", "Format for output files.")
58+
flag.StringVar(&outputDirectory, "doc.output", ".", "Write files to given directory.")
59+
60+
flag.Usage = func() {
61+
fmt.Fprintf(os.Stderr, "%s\n", os.Args[0])
62+
fmt.Fprintf(os.Stderr, usage)
63+
fmt.Fprintln(os.Stderr, "Flags:")
64+
flag.PrintDefaults()
65+
}
66+
}
67+
68+
func generateMarkdown(schema bigquery.Schema) []byte {
69+
buf := &bytes.Buffer{}
70+
fmt.Fprintln(buf, "| Field name | Type | Description |")
71+
fmt.Fprintln(buf, "| :----------------|:----------:|:---------------|")
72+
bqx.WalkSchema(schema, func(prefix []string, field *bigquery.FieldSchema) error {
73+
var path string
74+
if len(prefix) == 1 {
75+
path = ""
76+
} else {
77+
path = strings.Join(prefix[:len(prefix)-1], ".") + "."
78+
}
79+
fmt.Fprintf(buf, "| %s**%s** | %s | %s |\n", path, prefix[len(prefix)-1], field.Type, field.Description)
80+
return nil
81+
})
82+
return buf.Bytes()
83+
}
84+
85+
// All record structs define a Schema method. This interface allows us to
86+
// process each of them easily.
87+
type schemaGenerator interface {
88+
Schema() (bigquery.Schema, error)
89+
}
90+
91+
// shortNameOf returns the short type name of the underlying schemaGenerator type.
92+
// NOTE: the generator must reference an underlying pointer type,
93+
// e.g. `&schema.NDTResultRow{}` not `schema.NDTResultRow{}`
94+
func shortNameOf(g schemaGenerator) string {
95+
return strings.ToLower(reflect.TypeOf(g).Elem().Name())
96+
}
97+
98+
func main() {
99+
flag.Parse()
100+
flagx.ArgsFromEnv(flag.CommandLine)
101+
102+
generators := []schemaGenerator{
103+
&schema.NDTResultRow{},
104+
// TODO(https://github.com/m-lab/etl/issues/745): Add additional types once
105+
// "standard columns" are resolved.
106+
}
107+
108+
for _, current := range generators {
109+
name := shortNameOf(current)
110+
schema, err := current.Schema()
111+
rtx.Must(err, "Failed to generate Schema for %s", name)
112+
113+
var b []byte
114+
switch outputFormat {
115+
case "md":
116+
b = generateMarkdown(schema)
117+
default:
118+
log.Fatalf("Unsupported output format: %q", outputFormat)
119+
}
120+
121+
file := path.Join(outputDirectory, "schema_"+name+"."+outputFormat)
122+
log.Printf("Writing %s", file)
123+
err = ioutil.WriteFile(file, b, 0644)
124+
rtx.Must(err, "Failed to write file: %q", file)
125+
}
126+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// Copyright 2019 ETL Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
//////////////////////////////////////////////////////////////////////////////
15+
16+
// generate_schema_docs uses ETL schema field descriptions to generate
17+
// documentation in various formats.
18+
package main
19+
20+
import (
21+
"io/ioutil"
22+
"os"
23+
"path"
24+
"testing"
25+
26+
"github.com/m-lab/go/rtx"
27+
)
28+
29+
func Test_main(t *testing.T) {
30+
tmpdir, err := ioutil.TempDir("", "testing")
31+
rtx.Must(err, "Failed to create temporary directory")
32+
outputDirectory = tmpdir
33+
defer os.RemoveAll(tmpdir)
34+
35+
main() // no crash == working
36+
37+
// Check for expected files in tmpdir
38+
_, err = os.Stat(path.Join(tmpdir, "schema_ndtresultrow.md"))
39+
if err != nil {
40+
t.Errorf("main() missing output file; missing schema_ndtresultrow.md")
41+
}
42+
}

cmd/update-schema/update.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,10 @@ func CreateOrUpdatePT(project string, dataset string, table string) error {
4343
return CreateOrUpdate(schema, project, dataset, table)
4444
}
4545

46-
func CreateOrUpdateNDTResult(project string, dataset string, table string) error {
47-
row := schema.NDTResult{}
46+
func CreateOrUpdateNDTResultRow(project string, dataset string, table string) error {
47+
row := schema.NDTResultRow{}
4848
schema, err := row.Schema()
49-
rtx.Must(err, "NDTResult.Schema")
49+
rtx.Must(err, "NDTResultRow.Schema")
5050
return CreateOrUpdate(schema, project, dataset, table)
5151
}
5252

@@ -119,10 +119,10 @@ func main() {
119119
if err := CreateOrUpdatePT(project, "batch", "traceroute"); err != nil {
120120
errCount++
121121
}
122-
if err := CreateOrUpdateNDTResult(project, "base_tables", "ndt5"); err != nil {
122+
if err := CreateOrUpdateNDTResultRow(project, "base_tables", "ndt5"); err != nil {
123123
errCount++
124124
}
125-
if err := CreateOrUpdateNDTResult(project, "batch", "ndt5"); err != nil {
125+
if err := CreateOrUpdateNDTResultRow(project, "batch", "ndt5"); err != nil {
126126
errCount++
127127
}
128128

@@ -143,10 +143,10 @@ func main() {
143143
}
144144

145145
case "ndt5":
146-
if err := CreateOrUpdateNDTResult(project, "base_tables", "ndt5"); err != nil {
146+
if err := CreateOrUpdateNDTResultRow(project, "base_tables", "ndt5"); err != nil {
147147
errCount++
148148
}
149-
if err := CreateOrUpdateNDTResult(project, "batch", "ndt5"); err != nil {
149+
if err := CreateOrUpdateNDTResultRow(project, "batch", "ndt5"); err != nil {
150150
errCount++
151151
}
152152

parser/ndt_result.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ func (dp *NDTResultParser) IsParsable(testName string, data []byte) (string, boo
4545
return "unknown", false
4646
}
4747

48-
// NOTE: NDTResult data is a JSON object that should be pushed directly into BigQuery.
48+
// NOTE: data.NDTResult is a JSON object that should be pushed directly into BigQuery.
4949
// We read the value into a struct, for compatibility with current inserter
5050
// backend and to eventually rely on the schema inference in m-lab/go/bqx.CreateTable().
5151

52-
// ParseAndInsert decodes the NDT Result JSON data and inserts it into BQ.
52+
// ParseAndInsert decodes the data.NDTResult JSON and inserts it into BQ.
5353
func (dp *NDTResultParser) ParseAndInsert(meta map[string]bigquery.Value, testName string, test []byte) error {
5454
// TODO: derive 'ndt5' (or 'ndt7') labels from testName.
5555
metrics.WorkerState.WithLabelValues(dp.TableName(), "ndt_result").Inc()
@@ -68,7 +68,7 @@ func (dp *NDTResultParser) ParseAndInsert(meta map[string]bigquery.Value, testNa
6868
rowCount := 0
6969

7070
for dec.More() {
71-
stats := schema.NDTResult{
71+
stats := schema.NDTResultRow{
7272
TestID: testName,
7373
ParseInfo: &schema.ParseInfo{
7474
TaskFileName: meta["filename"].(string),

parser/ndt_result_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func TestNDTResultParser_ParseAndInsert(t *testing.T) {
4747
if ins.Accepted() != 1 {
4848
t.Fatalf("Failed to insert snaplog data.")
4949
}
50-
actualValues := ins.data[0].(schema.NDTResult)
50+
actualValues := ins.data[0].(schema.NDTResultRow)
5151
if actualValues.Result.Control == nil {
5252
t.Fatal("Result.Control is nil, expected value")
5353
}

0 commit comments

Comments
 (0)