From 7007f230313a44ecac2b920d2c74a435b9d1fb79 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Wed, 28 Aug 2019 10:17:17 -0500 Subject: [PATCH 01/40] move picsv command over from go-pilosa batch ingest branch --- cmd/picsv/.gitignore | 2 + cmd/picsv/Makefile | 3 + cmd/picsv/main.go | 323 +++++++++++++++++++++++++++++++ cmd/picsv/main_internal_test.go | 20 ++ cmd/picsv/main_test.go | 331 ++++++++++++++++++++++++++++++++ cmd/picsv/testdata/sample.csv | 8 + go.mod | 20 +- go.sum | 205 ++++++++++++++++++++ 8 files changed, 899 insertions(+), 13 deletions(-) create mode 100644 cmd/picsv/.gitignore create mode 100644 cmd/picsv/Makefile create mode 100644 cmd/picsv/main.go create mode 100644 cmd/picsv/main_internal_test.go create mode 100644 cmd/picsv/main_test.go create mode 100644 cmd/picsv/testdata/sample.csv diff --git a/cmd/picsv/.gitignore b/cmd/picsv/.gitignore new file mode 100644 index 0000000..d9f1f6e --- /dev/null +++ b/cmd/picsv/.gitignore @@ -0,0 +1,2 @@ +marketing-*.csv +config.json diff --git a/cmd/picsv/Makefile b/cmd/picsv/Makefile new file mode 100644 index 0000000..d8276e1 --- /dev/null +++ b/cmd/picsv/Makefile @@ -0,0 +1,3 @@ + +bench: + GO111MODULE=on go test -bench=. -run=ZZZ -benchtime=3x diff --git a/cmd/picsv/main.go b/cmd/picsv/main.go new file mode 100644 index 0000000..ba8b928 --- /dev/null +++ b/cmd/picsv/main.go @@ -0,0 +1,323 @@ +package main + +import ( + "encoding/csv" + "encoding/json" + "fmt" + "io" + "log" + "os" + "strconv" + "time" + + "github.com/jaffee/commandeer" + "github.com/pilosa/go-pilosa" + "github.com/pilosa/go-pilosa/gpexp" + "github.com/pkg/errors" +) + +type Main struct { + Pilosa []string + File string + Index string + BatchSize int + ConfigFile string + + Config *Config `flag:"-"` +} + +func NewMain() *Main { + return &Main{ + Pilosa: []string{"localhost:10101"}, + File: "data.csv", + Index: "picsvtest", + BatchSize: 1000, + + Config: NewConfig(), + } +} + +func (m *Main) Run() error { + start := time.Now() + + // Load Config File (if available) + if m.ConfigFile != "" { + f, err := os.Open(m.ConfigFile) + if err != nil { + return errors.Wrap(err, "opening config file") + } + dec := json.NewDecoder(f) + err = dec.Decode(m.Config) + if err != nil { + return errors.Wrap(err, "decoding config file") + } + } + log.Printf("Flags: %+v\n", *m) + log.Printf("Config: %+v\n", *m.Config) + + f, err := os.Open(m.File) + if err != nil { + return errors.Wrap(err, "opening file") + } + defer f.Close() + reader := csv.NewReader(f) + + client, err := pilosa.NewClient(m.Pilosa) + if err != nil { + return errors.Wrap(err, "getting pilosa client") + } + schema, err := client.Schema() + if err != nil { + return errors.Wrap(err, "getting schema") + } + opts := []pilosa.IndexOption{} + if m.Config.IDField != "" { + opts = append(opts, pilosa.OptIndexKeys(true)) + } + index := schema.Index(m.Index, opts...) + + headerRow, err := reader.Read() + if err != nil { + return errors.Wrap(err, "reading CSV header") + } + log.Println("Got Header: ", headerRow) + fields, header, getIDFn, err := processHeader(m.Config, index, headerRow) + if err != nil { + return errors.Wrap(err, "processing header") + } + + // this has a non-obvious dependence on the previous line... the fields are set up in the index which comes from the schema + client.SyncSchema(schema) + batch, err := gpexp.NewBatch(client, m.BatchSize, index, fields) + if err != nil { + return errors.Wrap(err, "getting new batch") + } + record := gpexp.Row{ + Values: make([]interface{}, len(header)), + } + + numRecords := uint64(0) + for row, err := reader.Read(); err == nil; row, err = reader.Read() { + record.ID = getIDFn(row, numRecords) + for _, meta := range header { + if meta.srcIndex < len(row) { + record.Values[meta.recordIndex] = meta.valGetter(row[meta.srcIndex]) + } else { + record.Values[meta.recordIndex] = nil + log.Printf("row is shorter than header: %v", row) + } + } + err := batch.Add(record) + if err == gpexp.ErrBatchNowFull { + err := batch.Import() + if err != nil { + return errors.Wrap(err, "importing") + } + } else if err != nil { + return errors.Wrap(err, "adding to batch") + } + + numRecords++ + } + + if err != io.EOF && err != nil { + return errors.Wrap(err, "reading csv") + } + err = batch.Import() + if err != nil { + return errors.Wrap(err, "final import") + } + + log.Printf("processed %d ids\n", numRecords) + log.Println("Duration: ", time.Since(start)) + return nil +} + +type valueMeta struct { + srcIndex int + recordIndex int + valGetter func(val string) interface{} +} + +type idGetter func(row []string, numRecords uint64) interface{} + +func processHeader(config *Config, index *pilosa.Index, headerRow []string) ([]*pilosa.Field, map[string]valueMeta, idGetter, error) { + fields := make([]*pilosa.Field, 0, len(headerRow)) + header := make(map[string]valueMeta) + getIDFn := func(row []string, numRecords uint64) interface{} { + return numRecords + } + for i, fieldName := range headerRow { + if fieldName == config.IDField { + idIndex := i + switch config.IDType { + case "uint64": + getIDFn = func(row []string, numRecords uint64) interface{} { + uintVal, err := strconv.ParseUint(row[idIndex], 0, 64) + if err != nil { + return nil + } + return uintVal + } + case "string": + getIDFn = func(row []string, numRecords uint64) interface{} { + return row[idIndex] + } + default: + return nil, nil, nil, errors.Errorf("unknown IDType: %s", config.IDType) + } + continue + } + + var valGetter func(val string) interface{} + srcField, ok := config.SourceFields[fieldName] + if !ok { + srcField = SourceField{ + TargetField: fieldName, + Type: "string", + } + config.SourceFields[fieldName] = srcField + } + pilosaField, ok := config.PilosaFields[srcField.TargetField] + if !ok { + pilosaField = Field{ + Type: "set", + CacheType: pilosa.CacheTypeRanked, + CacheSize: 100000, + Keys: true, + } + config.PilosaFields[fieldName] = pilosaField + } + + fieldName = srcField.TargetField + switch srcField.Type { + case "ignore": + continue + case "int": + valGetter = func(val string) interface{} { + intVal, err := strconv.ParseInt(val, 10, 64) + if err != nil { + return nil + } + return intVal + } + fields = append(fields, index.Field(fieldName, pilosaField.MakeOptions()...)) + case "float": + if srcField.Multiplier != 0 { + valGetter = func(val string) interface{} { + floatVal, err := strconv.ParseFloat(val, 64) + if err != nil { + return nil + } + return int64(floatVal * srcField.Multiplier) + } + } else { + valGetter = func(val string) interface{} { + floatVal, err := strconv.ParseFloat(val, 64) + if err != nil { + return nil + } + return int64(floatVal) + } + } + fields = append(fields, index.Field(fieldName, pilosaField.MakeOptions()...)) + case "string": + valGetter = func(val string) interface{} { + if val == "" { + return nil // ignore empty strings + } + return val + } + fields = append(fields, index.Field(fieldName, pilosaField.MakeOptions()...)) + case "uint64": + valGetter = func(val string) interface{} { + uintVal, err := strconv.ParseUint(val, 0, 64) + if err != nil { + return nil + } + return uintVal + } + fields = append(fields, index.Field(fieldName, pilosaField.MakeOptions()...)) + } + header[fieldName] = valueMeta{ + valGetter: valGetter, + srcIndex: i, + recordIndex: len(fields) - 1, + } + } + + return fields, header, getIDFn, nil +} + +func main() { + if err := commandeer.Run(NewMain()); err != nil { + log.Fatal(err) + } +} + +func NewConfig() *Config { + return &Config{ + PilosaFields: make(map[string]Field), + SourceFields: make(map[string]SourceField), + IDType: "string", + } +} + +type Config struct { + PilosaFields map[string]Field `json:"pilosa-fields"` + SourceFields map[string]SourceField `json:"source-fields"` + + // IDField denotes which field in the source should be used for Pilosa record IDs. + IDField string `json:"id-field"` + + // IDType denotes whether the ID field should be parsed as a string or uint64. + IDType string `json:"id-type"` +} + +type Field struct { + Type string `json:"type"` + Min int64 `json:"min"` + Max int64 `json:"max"` + Keys bool `json:"keys"` + CacheType pilosa.CacheType `json:"cache-type"` + CacheSize int `json:"cache-size"` + // TODO time stuff +} + +func (f Field) MakeOptions() (opts []pilosa.FieldOption) { + switch f.Type { + case "set": + opts = append(opts, pilosa.OptFieldKeys(f.Keys), pilosa.OptFieldTypeSet(f.CacheType, f.CacheSize)) + case "int": + if f.Max != 0 || f.Min != 0 { + opts = append(opts, pilosa.OptFieldTypeInt(f.Min, f.Max)) + } else { + opts = append(opts, pilosa.OptFieldTypeInt()) + } + default: + panic(fmt.Sprintf("unknown pilosa field type: %s", f.Type)) + } + return opts +} + +type SourceField struct { + // TargetField is the Pilosa field that this source field should map to. + TargetField string `json:"target-field"` + + // Type denotes how the source field should be parsed. (string, + // int, rowID, float, or ignore). rowID means that the field will + // be parsed as a uint64 and then used directly as a rowID for a + // set field. If "string", key translation must be on for that + // Pilosa field, and it must be a set field. If int or float, it + // must be a Pilosa int field. + Type string `json:"type"` + + // Multiplier is for float fields. Because Pilosa does not support + // floats natively, it is sometimes useful to store a float in + // Pilosa as an integer, but first multiplied by some constant + // factor to preserve some amount of precision. If 0 this field won't be used. + Multiplier float64 `json:"multiplier"` +} + +// TODO we should validate the Config once it is constructed. +// What are valid mappings from source fields to pilosa fields? diff --git a/cmd/picsv/main_internal_test.go b/cmd/picsv/main_internal_test.go new file mode 100644 index 0000000..bfae155 --- /dev/null +++ b/cmd/picsv/main_internal_test.go @@ -0,0 +1,20 @@ +package main + +import ( + "strings" + "testing" +) + +func TestProcessHeader(t *testing.T) { + config := NewConfig() + headerRow := []string{"a", "b", "c"} + + t.Run("invalid IDType", func(t *testing.T) { + config.IDField = "a" + config.IDType = "nope" + _, _, _, err := processHeader(config, nil, headerRow) + if err == nil || !strings.Contains(err.Error(), "unknown IDType") { + t.Fatalf("unknown IDType gave: %v", err) + } + }) +} diff --git a/cmd/picsv/main_test.go b/cmd/picsv/main_test.go new file mode 100644 index 0000000..bb10221 --- /dev/null +++ b/cmd/picsv/main_test.go @@ -0,0 +1,331 @@ +package main_test + +import ( + "fmt" + "io" + "net/http" + "os" + "testing" + + "github.com/pilosa/go-pilosa" + picsv "github.com/pilosa/pdk/cmd/picsv" + "github.com/pkg/errors" +) + +func BenchmarkImportCSV(b *testing.B) { + m := picsv.NewMain() + m.BatchSize = 1 << 20 + m.Index = "picsvbench" + m.File = "marketing-200k.csv" + getRawData(b, m.File) + client, err := pilosa.NewClient(m.Pilosa) + if err != nil { + b.Fatalf("getting client: %v", err) + } + b.ResetTimer() + + for i := 0; i < b.N; i++ { + err := m.Run() + if err != nil { + b.Fatalf("running import: %v", err) + } + b.StopTimer() + err = client.DeleteIndexByName(m.Index) + if err != nil { + b.Fatalf("deleting index: %v", err) + } + b.StartTimer() + } + +} + +func getRawData(t testing.TB, file string) { + if _, err := os.Open(file); err == nil { + return + } else if !os.IsNotExist(err) { + t.Fatalf("opening %s: %v", file, err) + } + // if the file doesn't exist + f, err := os.Create(file) + if err != nil { + t.Fatalf("creating file: %v", err) + } + resp, err := http.Get(fmt.Sprintf("https://molecula-sample-data.s3.amazonaws.com/%s", file)) + if err != nil { + t.Fatalf("getting data: %v", err) + } + if resp.StatusCode > 299 { + t.Fatalf("getting data failed: %v", resp.Status) + } + _, err = io.Copy(f, resp.Body) + if err != nil { + t.Fatalf("copying data into file: %v", err) + } + + err = f.Close() + if err != nil { + t.Fatalf("closing file: %v", err) + } + +} + +func TestImportCSV(t *testing.T) { + m := picsv.NewMain() + m.BatchSize = 100000 + m.Index = "testpicsv" + m.File = "marketing-200k.csv" + m.Config.SourceFields["age"] = picsv.SourceField{TargetField: "age", Type: "float"} + m.Config.PilosaFields["age"] = picsv.Field{Type: "int"} + m.Config.IDField = "id" + getRawData(t, m.File) + client, err := pilosa.NewClient(m.Pilosa) + if err != nil { + t.Fatalf("getting client: %v", err) + } + + defer func() { + err = client.DeleteIndexByName(m.Index) + if err != nil { + t.Fatalf("deleting index: %v", err) + } + }() + err = m.Run() + if err != nil { + t.Fatalf("running ingest: %v", err) + } + + schema, err := client.Schema() + if err != nil { + t.Fatalf("getting schema: %v", err) + } + + index := schema.Index(m.Index) + marital := index.Field("marital") + converted := index.Field("converted") + age := index.Field("age") + + tests := []struct { + query *pilosa.PQLRowQuery + bash string + exp int64 + }{ + { + query: marital.Row("married"), + bash: `awk -F, '/married/ {print $1,$4}' marketing-200k.csv | sort | uniq | wc`, + exp: 125514, + }, + { + query: converted.Row("no"), + bash: `awk -F, '{print $1,$17}' marketing-200k.csv | grep "no" |sort | uniq | wc`, + exp: 199999, + }, + { + query: age.Equals(55), + bash: `awk -F, '{print $1,$2}' marketing-200k.csv | grep " 55.0" |sort | uniq | wc`, + exp: 3282, + }, + } + + for i, test := range tests { + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + q := index.Count(test.query) + resp, err := client.Query(q) + if err != nil { + t.Fatalf("running query '%s': %v", q.Serialize(), err) + } + if resp.Result().Count() != test.exp { + t.Fatalf("Got unexpected result %d instead of %d for\nquery: %s\nbash: %s", resp.Result().Count(), test.exp, q.Serialize(), test.bash) + } + }) + } +} + +func TestSmallImport(t *testing.T) { + m := picsv.NewMain() + m.BatchSize = 1 << 20 + m.Index = "testsample" + m.File = "testdata/sample.csv" + m.ConfigFile = "config.json" + client, err := pilosa.NewClient(m.Pilosa) + if err != nil { + t.Fatalf("getting client: %v", err) + } + defer func() { + err = client.DeleteIndexByName(m.Index) + if err != nil { + t.Logf("deleting index: %v", err) + } + }() + config := `{ +"pilosa-fields": {"size": {"type": "set", "keys": true, "cache-type": "ranked", "cache-size": 100000}, + "age": {"type": "int"}, + "color": {"type": "set", "keys": true}, + "result": {"type": "int"}, + "dayofweek": {"type": "set", "keys": false, "cache-type": "ranked", "cache-size": 7} + }, +"id-field": "ID", +"id-type": "string", +"source-fields": { + "Size": {"target-field": "size", "type": "string"}, + "Color": {"target-field": "color", "type": "string"}, + "Age": {"target-field": "age", "type": "int"}, + "Result": {"target-field": "result", "type": "float", "multiplier": 100000000}, + "dayofweek": {"target-field": "dayofweek", "type": "uint64"} + } +} +` + data := ` +ID,Size,Color,Age,Result,dayofweek +ABDJ,small,green,42,1.13106317,1 +HFZP,large,red,99,30.23959735,2 +HFZP,small,green,99,NA,3 +EJSK,medium,purple,22,20.23959735,1 +EJSK,large,green,35,25.13106317, +FEFF,,,,,6 +` + writeFile(t, m.ConfigFile, config) + writeFile(t, m.File, data) + + err = m.Run() + if err != nil { + t.Fatalf("running ingest: %v", err) + } + + schema, err := client.Schema() + if err != nil { + t.Fatalf("getting schema: %v", err) + } + + index := schema.Index(m.Index) + size := index.Field("size") + color := index.Field("color") + age := index.Field("age") + result := index.Field("result") + day := index.Field("dayofweek") + + tests := []struct { + query pilosa.PQLQuery + resType string + exp interface{} + }{ + { + query: index.Count(size.Row("small")), + resType: "count", + exp: int64(2), + }, + { + query: size.Row("small"), + resType: "rowKeys", + exp: []string{"ABDJ", "HFZP"}, + }, + { + query: color.Row("green"), + resType: "rowKeys", + exp: []string{"ABDJ", "HFZP", "EJSK"}, + }, + { + query: age.Equals(99), + resType: "rowKeys", + exp: []string{"HFZP"}, + }, + { + query: age.GT(0), + resType: "rowKeys", + exp: []string{"ABDJ", "HFZP", "EJSK"}, + }, + { + query: result.GT(0), + resType: "rowKeys", + exp: []string{"ABDJ", "EJSK"}, + }, + { + query: result.GT(100000), + resType: "rowKeys", + exp: []string{"ABDJ", "EJSK"}, + }, + { + query: day.Row(1), + resType: "rowKeys", + exp: []string{"ABDJ", "EJSK"}, + }, + { + query: day.Row(6), + resType: "rowKeys", + exp: []string{"FEFF"}, + }, + { + query: index.Count(day.Row(3)), + resType: "count", + exp: int64(1), + }, + { + query: index.Count(day.Row(2)), + resType: "count", + exp: int64(1), // not mutually exclusive! + }, + { + query: size.Row(`""`), // TODO... go-pilosa should probably serialize keys into PQL using quotes. + resType: "rowKeys", + exp: []string{}, // empty strings are ignored rather than ingested + }, + { + query: color.Row(`""`), + resType: "rowKeys", + exp: []string{}, // empty strings are ignored rather than ingested + }, + } + + for i, test := range tests { + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + resp, err := client.Query(test.query) + if err != nil { + t.Fatalf("running query: %v", err) + } + res := resp.Result() + switch test.resType { + case "count": + if res.Count() != test.exp.(int64) { + t.Fatalf("unexpected count %d is not %d", res.Count(), test.exp.(int64)) + } + case "rowKeys": + got := res.Row().Keys + exp := test.exp.([]string) + if err := isPermutationOf(got, exp); err != nil { + t.Fatalf("unequal rows %v expected/got:\n%v\n%v", err, exp, got) + } + } + }) + } + +} + +func writeFile(t testing.TB, name, contents string) { + cf, err := os.Create(name) + if err != nil { + t.Fatalf("creating config file: %v", err) + } + _, err = cf.Write([]byte(contents)) + if err != nil { + t.Fatalf("writing config file: %v", err) + } +} + +func isPermutationOf(one, two []string) error { + if len(one) != len(two) { + return errors.Errorf("different lengths %d and %d", len(one), len(two)) + } +outer: + for _, vOne := range one { + for j, vTwo := range two { + if vOne == vTwo { + two = append(two[:j], two[j+1:]...) + continue outer + } + } + return errors.Errorf("%s in one but not two", vOne) + } + if len(two) != 0 { + return errors.Errorf("vals in two but not one: %v", two) + } + return nil +} diff --git a/cmd/picsv/testdata/sample.csv b/cmd/picsv/testdata/sample.csv new file mode 100644 index 0000000..2804233 --- /dev/null +++ b/cmd/picsv/testdata/sample.csv @@ -0,0 +1,8 @@ + +ID,Size,Color,Age,Result,dayofweek +ABDJ,small,green,42,1.13106317,1 +HFZP,large,red,99,30.23959735,2 +HFZP,small,green,99,NA,3 +EJSK,medium,purple,22,20.23959735,1 +EJSK,large,green,35,25.13106317, +FEFF,,,,,6 diff --git a/go.mod b/go.mod index ab818e7..c570cde 100644 --- a/go.mod +++ b/go.mod @@ -1,10 +1,10 @@ module github.com/pilosa/pdk +replace github.com/pilosa/go-pilosa => /Users/jaffee/go/src/github.com/pilosa/go-pilosa + require ( - github.com/BurntSushi/toml v0.3.1 // indirect github.com/Shopify/sarama v1.19.0 github.com/Shopify/toxiproxy v2.1.4+incompatible // indirect - github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6 // indirect github.com/aws/aws-sdk-go v1.15.88 github.com/boltdb/bolt v1.3.1 github.com/bsm/sarama-cluster v2.1.15+incompatible @@ -12,27 +12,21 @@ require ( github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21 // indirect github.com/eapache/queue v1.1.0 // indirect github.com/elodina/go-avro v0.0.0-20160406082632-0c8185d9a3ba - github.com/go-ole/go-ole v1.2.1 // indirect - github.com/gorilla/context v1.1.1 // indirect github.com/hashicorp/go-uuid v1.0.1 // indirect - github.com/inconshreveable/mousetrap v1.0.0 // indirect - github.com/jaffee/commandeer v0.1.0 + github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e github.com/linkedin/goavro v0.0.0-20181018120728-1beee2a74088 - github.com/miekg/dns v1.1.1 // indirect github.com/mmcloughlin/geohash v0.0.0-20181009053802-f7f2bcae3294 github.com/onsi/ginkgo v1.7.0 // indirect github.com/onsi/gomega v1.4.3 // indirect github.com/pierrec/lz4 v0.0.0-20181005164709-635575b42742 // indirect - github.com/pilosa/go-pilosa v1.3.1-0.20190612142550-e616c1393660 - github.com/pilosa/pilosa v1.2.1-0.20190410162749-b973f8c96356 + github.com/pilosa/go-pilosa v1.3.1-0.20190715210601-8606626b90d6 + github.com/pilosa/pilosa v1.3.1 github.com/pkg/errors v0.8.1 github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a // indirect - github.com/shirou/w32 v0.0.0-20160930032740-bb4de0191aa4 // indirect - github.com/spf13/cobra v0.0.3 + github.com/spf13/cobra v0.0.5 github.com/spf13/pflag v1.0.3 - github.com/spf13/viper v1.3.1 + github.com/spf13/viper v1.4.0 github.com/stretchr/testify v1.3.0 // indirect github.com/syndtr/goleveldb v0.0.0-20181128100959-b001fa50d6b2 - golang.org/x/net v0.0.0-20181201002055-351d144fa1fc // indirect gopkg.in/linkedin/goavro.v1 v1.0.5 // indirect ) diff --git a/go.sum b/go.sum index 98fcdc2..98828af 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,10 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= +cloud.google.com/go v0.43.0/go.mod h1:BOSR3VbTLkk6FDC/TcffxP4NF/FFBGA5ku+jvKOP7pg= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/CAFxX/gcnotifier v0.0.0-20170518020117-39b0596a2da3 h1:bZrDXM2lN6jLwij+LZ7OUZvhP3VjPZp9iCDC/FG+SC0= github.com/CAFxX/gcnotifier v0.0.0-20170518020117-39b0596a2da3/go.mod h1:Rn2zM2MnHze07LwkneP48TWt6UiZhzQTwCvw6djVGfE= github.com/CAFxX/gcnotifier v0.0.0-20190112062741-224a280d589d h1:n0G4ckjMEj7bWuGYUX0i8YlBeBBJuZ+HEHvHfyBDZtI= @@ -14,25 +19,39 @@ github.com/Shopify/toxiproxy v2.1.4+incompatible h1:TKdv8HiTLgE5wdJuEML90aBgNWso github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6 h1:fLjPD/aNc3UIOA6tDi6QXUemppXK3P9BI7mr2hd6gx8= github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= +github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= +github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da h1:8GUt8eRujhVEGZFFEjBj46YV4rDjvGrNxb0KMWYkL2I= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= github.com/aws/aws-sdk-go v1.15.88 h1:Om0MayFrixOds/PrbBey2Cg/lkNEIyOrAF2RFXLwmnE= github.com/aws/aws-sdk-go v1.15.88/go.mod h1:es1KtYUFs7le0xQ3rOihkuoVD90z7D0fR2Qm4S00/gU= +github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= +github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/boltdb/bolt v1.3.1 h1:JQmyP4ZBrce+ZQu0dY660FMfatumYDLun9hBCUVIkF4= github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= github.com/bsm/sarama-cluster v2.1.15+incompatible h1:RkV6WiNRnqEEbp81druK8zYhmnIgdOjqSVi0+9Cnl2A= github.com/bsm/sarama-cluster v2.1.15+incompatible/go.mod h1:r7ao+4tTNXvWm+VRpRJchr2kQhqxgmAp2iEX5W96gMM= github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd h1:qMd81Ts1T2OTKmB4acZcyKaMtRnY5Y44NuXGX2GFJ1w= github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= +github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= +github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= +github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= +github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= +github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/eapache/go-resiliency v1.1.0 h1:1NtRmCAqadE2FN4ZcN6g90TP3uk8cg9rn9eNK2197aU= github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21 h1:YEetp8/yCZMuEPMUDHG0CW/brkkEp8mzqk2+ODEitlw= @@ -43,28 +62,61 @@ github.com/elodina/go-avro v0.0.0-20160406082632-0c8185d9a3ba h1:QkK2L3uvEaZJ40i github.com/elodina/go-avro v0.0.0-20160406082632-0c8185d9a3ba/go.mod h1:3A7SOsr8WBIpkWUsqzMpR3tIQbanKqxZcis2GSl12Nk= github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= +github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-ole/go-ole v1.2.1 h1:2lOsA72HgjxAuMlKpFiCbHTvu44PIVkZ5hqm3RSdI/E= github.com/go-ole/go-ole v1.2.1/go.mod h1:7FAglXiTm7HKlQRDeOQ6ZNUHidzCWXuZWq/1dTyBNF8= +github.com/go-ole/go-ole v1.2.4/go.mod h1:XCwSNxSkXRo4vlyPy93sltvi/qJq0jqQhjqQNIwKuxM= +github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/gogo/protobuf v1.1.1 h1:72R+M5VuhED/KujmZVcIquuo8mBgX4oVda//DQb3PXo= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.0 h1:xU6/SpYbvkNYiptHJYEDRseDLvYE7wSqhYYNy0QSUzI= github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= +github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= +github.com/golang/protobuf v0.0.0-20170427213220-18c9bb326172/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db h1:woRePGFeVFfLKN/pOkfl+p/TAqKOfFu+7KPlMVpok/w= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c h1:964Od4U6p2jUkFxvCydnIczKteheJEzHRToSGK3Bnlw= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= +github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= +github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8= github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= github.com/gorilla/handlers v1.3.0 h1:tsg9qP3mjt1h4Roxp+M1paRjrVBfPSOpBuVclh6YluI= github.com/gorilla/handlers v1.3.0/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/OAirnOIQ= +github.com/gorilla/handlers v1.4.1/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/OAirnOIQ= +github.com/gorilla/mux v1.4.0/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/mux v1.6.2 h1:Pgr17XVTNXAk3q/r4CpKzC5xBM/qW1uVLV+IhRZpIIk= github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/mux v1.7.0 h1:tOSd0UKHQd6urX6ApfOn4XdBMY6Sh1MfxV3kmaazO+U= github.com/gorilla/mux v1.7.0/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= +github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= +github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= +github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= +github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway v1.9.4/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0= @@ -81,6 +133,7 @@ github.com/hashicorp/go-uuid v1.0.1 h1:fv1ep09latC32wFoVwnqcnKJGnMSdBanPczbHAYm1 github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/memberlist v0.1.3 h1:EmmoJme1matNzb+hMpDuR/0sbJSUisxyqBGG676r31M= @@ -91,20 +144,39 @@ github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NH github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jaffee/commandeer v0.1.0 h1:UxHHnhKmtz8gAgqu67lYK5tlX5D9A86mGc9AWcEMSWU= github.com/jaffee/commandeer v0.1.0/go.mod h1:x1WpthEI14PRNcPtVna43ontBxJ1o7plCOsZ8kksl8M= +github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e h1:CC1usSIzu9p6zmz7jPj0QiP3FdpGW+PCGc9d1yhSls0= +github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e/go.mod h1:N5yIzoHN6EwVFi0QCKvpFPJeECoZyEcFBQSR8r+7Mz0= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8 h1:12VvqtR6Aowv3l/EQUlocDHW2Cp4G9WJVH7uyH8QFJE= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= +github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= +github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= +github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= +github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/linkedin/goavro v0.0.0-20181018120728-1beee2a74088 h1:T+kPxsfvkFtz7x6ysgOYjki7khHjowQW6DD1rcpOS0Q= github.com/linkedin/goavro v0.0.0-20181018120728-1beee2a74088/go.mod h1:vL3ODoWTPCBSeKVFgQ+lvSq0VOzTB5TcXvUX+4pU/+Q= github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= github.com/miekg/dns v1.1.1 h1:DVkblRdiScEnEr0LR9nTnEQqHYycjkXW9bOjd+2EL2o= github.com/miekg/dns v1.1.1/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/miekg/dns v1.1.15/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/mapstructure v1.0.0/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mmcloughlin/geohash v0.0.0-20181009053802-f7f2bcae3294 h1:QlTAK00UrY80KK9Da+foE04AjxhXFrgp87aZB6yfU5c= github.com/mmcloughlin/geohash v0.0.0-20181009053802-f7f2bcae3294/go.mod h1:oNZxQo5yWJh0eMQEP/8hwQuVx9Z9tjwFUqcTB1SmG0c= +github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs= github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= @@ -120,6 +192,7 @@ github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181 github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pierrec/lz4 v0.0.0-20181005164709-635575b42742 h1:wKfigKMTgvSzBLIVvB5QaBBQI0odU6n45/UKSphjLus= github.com/pierrec/lz4 v0.0.0-20181005164709-635575b42742/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= +github.com/pilosa/demo-taxi v0.0.0-20190604185441-6b6ef983bff7/go.mod h1:DM8Umjg0r/UscmOs49RJeE0WUb8Nj4PLUj4J02vigLk= github.com/pilosa/go-pilosa v1.2.0 h1:EgokWNJt/yYRX1P09+uDy7QI3jUKa42iu6pe8hB6umE= github.com/pilosa/go-pilosa v1.2.0/go.mod h1:uli4HiTymHocSAXJ9XpDbkH6kS63P8Yc0xyWDzooouc= github.com/pilosa/go-pilosa v1.2.1-0.20190321212254-72b91a013211 h1:2NZOJBJoB2TjeSP1LkMYQfttqWyTHXRdAez+Mn4qDa4= @@ -132,6 +205,7 @@ github.com/pilosa/go-pilosa v1.3.1-0.20190503193736-ad53edf56c18 h1:uUA588w4MeX0 github.com/pilosa/go-pilosa v1.3.1-0.20190503193736-ad53edf56c18/go.mod h1:9ECbvb0EQJvjxBups5CUCzeLh8KrLgQVY9/1zSoQHQE= github.com/pilosa/go-pilosa v1.3.1-0.20190612142550-e616c1393660 h1:0UUfONtKBe4n1yIRLshVPNCJbtXdg/WKNLDqmCxu/uw= github.com/pilosa/go-pilosa v1.3.1-0.20190612142550-e616c1393660/go.mod h1:9ECbvb0EQJvjxBups5CUCzeLh8KrLgQVY9/1zSoQHQE= +github.com/pilosa/pilosa v0.0.0-20181115192138-84148d4ee6c0/go.mod h1:NgpkJkefqUKUHV7O3TqBOu89tsao3ksth2wzTNe8CPQ= github.com/pilosa/pilosa v0.0.0-20181130171212-dfb748ec5b01 h1:gtkt282G8/+8XN0D9+934/3zpUiEtsFINjqCs+vZs04= github.com/pilosa/pilosa v0.0.0-20181130171212-dfb748ec5b01/go.mod h1:NgpkJkefqUKUHV7O3TqBOu89tsao3ksth2wzTNe8CPQ= github.com/pilosa/pilosa v0.0.0-20190104143002-8c4b1548bc4b h1:2H/+JUxL4dv0uJ4G4i+C83S1yq/+pUrHHjsF8TEY85I= @@ -142,6 +216,9 @@ github.com/pilosa/pilosa v1.2.1-0.20190401200108-927e8b89425e h1:leJjlNm0+3Vbbp7 github.com/pilosa/pilosa v1.2.1-0.20190401200108-927e8b89425e/go.mod h1:rRLglQ1zRxKarDMyHhsLR+0XhacXWUNrNXaAs69b1LQ= github.com/pilosa/pilosa v1.2.1-0.20190410162749-b973f8c96356 h1:jDxhpV4l+CpKqVVgld73e9/EyogdCcO1ftbCvifrhSc= github.com/pilosa/pilosa v1.2.1-0.20190410162749-b973f8c96356/go.mod h1:QN7EwQwoQHNPVsd7CHXFDasPznLDA6DPswmnLr4eJ6o= +github.com/pilosa/pilosa v1.3.1 h1:rLDVqJBuRzhPtue730D+EX0YEVS4R0oDzsE4bJBwLcE= +github.com/pilosa/pilosa v1.3.1/go.mod h1:97yLL9mpUqOj9naKu5XA/b/U6JLe3JGGUlc2HOTDw+A= +github.com/pilosa/tools v0.0.0-20190810124639-ee77232ff3aa/go.mod h1:n/Od1ErfFlaIEueOaQjlbo06EzKuRhSPxUGR3xmfEqE= github.com/pkg/errors v0.8.0 h1:WdK/asTD0HN+q6hsWO3/vpuAkAr+tw6aNJNDFFf0+qw= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= @@ -149,9 +226,23 @@ github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= +github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= +github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= +github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= +github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.0.3/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ= +github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= +github.com/rakyll/statik v0.0.0-20170410192944-89fe3459b5c8/go.mod h1:OEi9wJV/fMUAGx1eNjq75DKDsJVuEv1U0oYdX6GX8Zs= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a h1:9ZKAASQSHhDYGoxY8uLVpewe1GDZ2vu2Tr/vTdVAkFQ= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/remyoudompheng/bigfft v0.0.0-20190321074620-2f0d2b0e0001/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= +github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= +github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= @@ -162,6 +253,8 @@ github.com/shirou/gopsutil v2.18.12+incompatible h1:1eaJvGomDnH74/5cF4CTmTbLHAri github.com/shirou/gopsutil v2.18.12+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= github.com/shirou/w32 v0.0.0-20160930032740-bb4de0191aa4 h1:udFKJ0aHUL60LboW/A+DfgoHVedieIzIXE8uylPue0U= github.com/shirou/w32 v0.0.0-20160930032740-bb4de0191aa4/go.mod h1:qsXQc7+bwAM3Q1u/4XEfrquwF8Lw7D7y5cD8CuHnfIc= +github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= +github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spf13/afero v1.1.2 h1:m8/z1t7/fwjysjQRYbP0RD+bUIF/8tJwPdEZsI83ACI= @@ -171,8 +264,10 @@ github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8= github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cobra v0.0.3 h1:ZlrZ4XsMRm04Fr5pSFxBgfND2EBVa1nLpiy1stUsX/8= github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= +github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9Gc1vn7yk= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= +github.com/spf13/pflag v0.0.0-20170427125145-f1d95a35e132/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.2/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= @@ -180,48 +275,158 @@ github.com/spf13/viper v1.2.1 h1:bIcUwXqLseLF3BDAZduuNfekWG87ibtFxi59Bq+oI9M= github.com/spf13/viper v1.2.1/go.mod h1:P4AexN0a+C9tGAnUFNwDMYYZv3pjFuvmeiMyKRaNVlI= github.com/spf13/viper v1.3.1 h1:5+8j8FTpnFV4nEImW/ofkzEt8VoOiLXxdYIDsB73T38= github.com/spf13/viper v1.3.1/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= +github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= +github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/syndtr/goleveldb v0.0.0-20181128100959-b001fa50d6b2 h1:GnOzE5fEFN3b2zDhJJABEofdb51uMRNb8eqIVtdducs= github.com/syndtr/goleveldb v0.0.0-20181128100959-b001fa50d6b2/go.mod h1:Z4AUp2Km+PwemOoO/VB5AOx9XSsIItzFjoJlOSiYmn0= +github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/uber/jaeger-client-go v2.15.0+incompatible h1:NP3qsSqNxh8VYr956ur1N/1C1PjvOJnJykCzcD5QHbk= github.com/uber/jaeger-client-go v2.15.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= github.com/uber/jaeger-lib v1.5.0 h1:OHbgr8l656Ub3Fw5k9SWnBfIEwvoHQ+W2y+Aa9D1Uyo= github.com/uber/jaeger-lib v1.5.0/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U= +github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= +github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= +github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= +github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= +go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= +go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= +go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= +go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= +go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= +golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9 h1:mKdxBk7AujPs8kU4m80U72y/zjbZ3UcXC7dClwKbUI0= golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= +golang.org/x/exp v0.0.0-20190718202018-cfdd5522f6f6/go.mod h1:JhuoJpWY28nO4Vef9tZUw9qufEGTyX1+7lmHxV5q5G4= +golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= +golang.org/x/image v0.0.0-20190703141733-d6a02ce849c9/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= +golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= +golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= +golang.org/x/net v0.0.0-20180530234432-1e491301e022/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181201002055-351d144fa1fc h1:a3CU5tJYVj92DY2LaA1kUkrsqD5/3mLDhx2NcNqyW+0= golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4 h1:YUO/7uOKsKeq9UokNS62b8FYywz3ker1l1vDZRCRefw= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEhagSSnXWGV915qUMm9mrU= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180906133057-8cf3aee42992/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181128092732-4ed8d59d0b35 h1:YAFjXN64LMvktoUZH9zgY4lGc/msGN7HQfoSuKCgaDU= golang.org/x/sys v0.0.0-20181128092732-4ed8d59d0b35/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a h1:1n5lsVfiQW3yfsRGu98756EH1YthsFqr/5mxHduZW2A= golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190719005602-e377ae9d6386/go.mod h1:jcCCGcm9btYwXyDqrUWc6MKQKKGJCWEQ3AfLSRIbEuI= +google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= +google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190716160619-c506a9f90610/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= +google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= +google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= +google.golang.org/grpc v1.22.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/linkedin/goavro.v1 v1.0.5 h1:BJa69CDh0awSsLUmZ9+BowBdokpduDZSM9Zk8oKHfN4= gopkg.in/linkedin/goavro.v1 v1.0.5/go.mod h1:Aw5GdAbizjOEl0kAMHV9iHmA8reZzW/OKuJAl4Hb9F0= +gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= modernc.org/mathutil v1.0.0/go.mod h1:wU0vUrJsVWBZ4P6e7xtFJEhFSNsfRLJ8H458uRjg03k= modernc.org/strutil v1.0.0/go.mod h1:lstksw84oURvj9y3tn8lGvRxyRC1S2+g5uuIzNfIOBs= +rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= From 55d019f71972ad9dfaced4945eb404a7a84958ab Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Wed, 28 Aug 2019 12:37:43 -0500 Subject: [PATCH 02/40] unfortunate move+refactor of picsv batch stuff moves most of the CSV logic out of cmd/picsv and into csv package. refactor to be a bit cleaner and get ready to handle multiple sources of CSV data. --- Makefile | 1 + cmd/picsv/main.go | 313 +----------------- {cmd/picsv => csv}/.gitignore | 0 {cmd/picsv => csv}/Makefile | 0 csv/batch.go | 333 ++++++++++++++++++++ cmd/picsv/main_test.go => csv/batch_test.go | 4 +- csv/config.json | 17 + {cmd/picsv => csv}/main_internal_test.go | 10 +- {cmd/picsv => csv}/testdata/sample.csv | 0 go.sum | 12 + 10 files changed, 373 insertions(+), 317 deletions(-) rename {cmd/picsv => csv}/.gitignore (100%) rename {cmd/picsv => csv}/Makefile (100%) create mode 100644 csv/batch.go rename cmd/picsv/main_test.go => csv/batch_test.go (99%) create mode 100644 csv/config.json rename {cmd/picsv => csv}/main_internal_test.go (67%) rename {cmd/picsv => csv}/testdata/sample.csv (100%) diff --git a/Makefile b/Makefile index 7e683d2..43b4780 100644 --- a/Makefile +++ b/Makefile @@ -32,6 +32,7 @@ crossbuild: install: go install $(LDFLAGS) $(FLAGS) $(CLONE_URL)/cmd/pdk + go install $(LDFLAGS) $(FLAGS) $(CLONE_URL)/cmd/picsv gometalinter: vendor GO111MODULE=off gometalinter --vendor --disable-all \ diff --git a/cmd/picsv/main.go b/cmd/picsv/main.go index ba8b928..803c4eb 100644 --- a/cmd/picsv/main.go +++ b/cmd/picsv/main.go @@ -1,323 +1,14 @@ package main import ( - "encoding/csv" - "encoding/json" - "fmt" - "io" "log" - "os" - "strconv" - "time" "github.com/jaffee/commandeer" - "github.com/pilosa/go-pilosa" - "github.com/pilosa/go-pilosa/gpexp" - "github.com/pkg/errors" + "github.com/pilosa/pdk/csv" ) -type Main struct { - Pilosa []string - File string - Index string - BatchSize int - ConfigFile string - - Config *Config `flag:"-"` -} - -func NewMain() *Main { - return &Main{ - Pilosa: []string{"localhost:10101"}, - File: "data.csv", - Index: "picsvtest", - BatchSize: 1000, - - Config: NewConfig(), - } -} - -func (m *Main) Run() error { - start := time.Now() - - // Load Config File (if available) - if m.ConfigFile != "" { - f, err := os.Open(m.ConfigFile) - if err != nil { - return errors.Wrap(err, "opening config file") - } - dec := json.NewDecoder(f) - err = dec.Decode(m.Config) - if err != nil { - return errors.Wrap(err, "decoding config file") - } - } - log.Printf("Flags: %+v\n", *m) - log.Printf("Config: %+v\n", *m.Config) - - f, err := os.Open(m.File) - if err != nil { - return errors.Wrap(err, "opening file") - } - defer f.Close() - reader := csv.NewReader(f) - - client, err := pilosa.NewClient(m.Pilosa) - if err != nil { - return errors.Wrap(err, "getting pilosa client") - } - schema, err := client.Schema() - if err != nil { - return errors.Wrap(err, "getting schema") - } - opts := []pilosa.IndexOption{} - if m.Config.IDField != "" { - opts = append(opts, pilosa.OptIndexKeys(true)) - } - index := schema.Index(m.Index, opts...) - - headerRow, err := reader.Read() - if err != nil { - return errors.Wrap(err, "reading CSV header") - } - log.Println("Got Header: ", headerRow) - fields, header, getIDFn, err := processHeader(m.Config, index, headerRow) - if err != nil { - return errors.Wrap(err, "processing header") - } - - // this has a non-obvious dependence on the previous line... the fields are set up in the index which comes from the schema - client.SyncSchema(schema) - batch, err := gpexp.NewBatch(client, m.BatchSize, index, fields) - if err != nil { - return errors.Wrap(err, "getting new batch") - } - record := gpexp.Row{ - Values: make([]interface{}, len(header)), - } - - numRecords := uint64(0) - for row, err := reader.Read(); err == nil; row, err = reader.Read() { - record.ID = getIDFn(row, numRecords) - for _, meta := range header { - if meta.srcIndex < len(row) { - record.Values[meta.recordIndex] = meta.valGetter(row[meta.srcIndex]) - } else { - record.Values[meta.recordIndex] = nil - log.Printf("row is shorter than header: %v", row) - } - } - err := batch.Add(record) - if err == gpexp.ErrBatchNowFull { - err := batch.Import() - if err != nil { - return errors.Wrap(err, "importing") - } - } else if err != nil { - return errors.Wrap(err, "adding to batch") - } - - numRecords++ - } - - if err != io.EOF && err != nil { - return errors.Wrap(err, "reading csv") - } - err = batch.Import() - if err != nil { - return errors.Wrap(err, "final import") - } - - log.Printf("processed %d ids\n", numRecords) - log.Println("Duration: ", time.Since(start)) - return nil -} - -type valueMeta struct { - srcIndex int - recordIndex int - valGetter func(val string) interface{} -} - -type idGetter func(row []string, numRecords uint64) interface{} - -func processHeader(config *Config, index *pilosa.Index, headerRow []string) ([]*pilosa.Field, map[string]valueMeta, idGetter, error) { - fields := make([]*pilosa.Field, 0, len(headerRow)) - header := make(map[string]valueMeta) - getIDFn := func(row []string, numRecords uint64) interface{} { - return numRecords - } - for i, fieldName := range headerRow { - if fieldName == config.IDField { - idIndex := i - switch config.IDType { - case "uint64": - getIDFn = func(row []string, numRecords uint64) interface{} { - uintVal, err := strconv.ParseUint(row[idIndex], 0, 64) - if err != nil { - return nil - } - return uintVal - } - case "string": - getIDFn = func(row []string, numRecords uint64) interface{} { - return row[idIndex] - } - default: - return nil, nil, nil, errors.Errorf("unknown IDType: %s", config.IDType) - } - continue - } - - var valGetter func(val string) interface{} - srcField, ok := config.SourceFields[fieldName] - if !ok { - srcField = SourceField{ - TargetField: fieldName, - Type: "string", - } - config.SourceFields[fieldName] = srcField - } - pilosaField, ok := config.PilosaFields[srcField.TargetField] - if !ok { - pilosaField = Field{ - Type: "set", - CacheType: pilosa.CacheTypeRanked, - CacheSize: 100000, - Keys: true, - } - config.PilosaFields[fieldName] = pilosaField - } - - fieldName = srcField.TargetField - switch srcField.Type { - case "ignore": - continue - case "int": - valGetter = func(val string) interface{} { - intVal, err := strconv.ParseInt(val, 10, 64) - if err != nil { - return nil - } - return intVal - } - fields = append(fields, index.Field(fieldName, pilosaField.MakeOptions()...)) - case "float": - if srcField.Multiplier != 0 { - valGetter = func(val string) interface{} { - floatVal, err := strconv.ParseFloat(val, 64) - if err != nil { - return nil - } - return int64(floatVal * srcField.Multiplier) - } - } else { - valGetter = func(val string) interface{} { - floatVal, err := strconv.ParseFloat(val, 64) - if err != nil { - return nil - } - return int64(floatVal) - } - } - fields = append(fields, index.Field(fieldName, pilosaField.MakeOptions()...)) - case "string": - valGetter = func(val string) interface{} { - if val == "" { - return nil // ignore empty strings - } - return val - } - fields = append(fields, index.Field(fieldName, pilosaField.MakeOptions()...)) - case "uint64": - valGetter = func(val string) interface{} { - uintVal, err := strconv.ParseUint(val, 0, 64) - if err != nil { - return nil - } - return uintVal - } - fields = append(fields, index.Field(fieldName, pilosaField.MakeOptions()...)) - } - header[fieldName] = valueMeta{ - valGetter: valGetter, - srcIndex: i, - recordIndex: len(fields) - 1, - } - } - - return fields, header, getIDFn, nil -} - func main() { - if err := commandeer.Run(NewMain()); err != nil { + if err := commandeer.Run(csv.NewMain()); err != nil { log.Fatal(err) } } - -func NewConfig() *Config { - return &Config{ - PilosaFields: make(map[string]Field), - SourceFields: make(map[string]SourceField), - IDType: "string", - } -} - -type Config struct { - PilosaFields map[string]Field `json:"pilosa-fields"` - SourceFields map[string]SourceField `json:"source-fields"` - - // IDField denotes which field in the source should be used for Pilosa record IDs. - IDField string `json:"id-field"` - - // IDType denotes whether the ID field should be parsed as a string or uint64. - IDType string `json:"id-type"` -} - -type Field struct { - Type string `json:"type"` - Min int64 `json:"min"` - Max int64 `json:"max"` - Keys bool `json:"keys"` - CacheType pilosa.CacheType `json:"cache-type"` - CacheSize int `json:"cache-size"` - // TODO time stuff -} - -func (f Field) MakeOptions() (opts []pilosa.FieldOption) { - switch f.Type { - case "set": - opts = append(opts, pilosa.OptFieldKeys(f.Keys), pilosa.OptFieldTypeSet(f.CacheType, f.CacheSize)) - case "int": - if f.Max != 0 || f.Min != 0 { - opts = append(opts, pilosa.OptFieldTypeInt(f.Min, f.Max)) - } else { - opts = append(opts, pilosa.OptFieldTypeInt()) - } - default: - panic(fmt.Sprintf("unknown pilosa field type: %s", f.Type)) - } - return opts -} - -type SourceField struct { - // TargetField is the Pilosa field that this source field should map to. - TargetField string `json:"target-field"` - - // Type denotes how the source field should be parsed. (string, - // int, rowID, float, or ignore). rowID means that the field will - // be parsed as a uint64 and then used directly as a rowID for a - // set field. If "string", key translation must be on for that - // Pilosa field, and it must be a set field. If int or float, it - // must be a Pilosa int field. - Type string `json:"type"` - - // Multiplier is for float fields. Because Pilosa does not support - // floats natively, it is sometimes useful to store a float in - // Pilosa as an integer, but first multiplied by some constant - // factor to preserve some amount of precision. If 0 this field won't be used. - Multiplier float64 `json:"multiplier"` -} - -// TODO we should validate the Config once it is constructed. -// What are valid mappings from source fields to pilosa fields? diff --git a/cmd/picsv/.gitignore b/csv/.gitignore similarity index 100% rename from cmd/picsv/.gitignore rename to csv/.gitignore diff --git a/cmd/picsv/Makefile b/csv/Makefile similarity index 100% rename from cmd/picsv/Makefile rename to csv/Makefile diff --git a/csv/batch.go b/csv/batch.go new file mode 100644 index 0000000..b0a37a5 --- /dev/null +++ b/csv/batch.go @@ -0,0 +1,333 @@ +package csv + +import ( + "encoding/csv" + "encoding/json" + "fmt" + "io" + "log" + "os" + "strconv" + "time" + + "github.com/pilosa/go-pilosa" + "github.com/pilosa/go-pilosa/gpexp" + "github.com/pkg/errors" +) + +type Main struct { + Pilosa []string + File string + Index string + BatchSize int + ConfigFile string + + Config *Config `flag:"-"` +} + +func NewMain() *Main { + return &Main{ + Pilosa: []string{"localhost:10101"}, + File: "data.csv", + Index: "picsvtest", + BatchSize: 1000, + + Config: NewConfig(), + } +} + +func (m *Main) Run() error { + start := time.Now() + + // Load Config File (if available) + if m.ConfigFile != "" { + f, err := os.Open(m.ConfigFile) + if err != nil { + return errors.Wrap(err, "opening config file") + } + dec := json.NewDecoder(f) + err = dec.Decode(m.Config) + if err != nil { + return errors.Wrap(err, "decoding config file") + } + } + log.Printf("Flags: %+v\n", *m) + log.Printf("Config: %+v\n", *m.Config) + + client, err := pilosa.NewClient(m.Pilosa) + if err != nil { + return errors.Wrap(err, "getting pilosa client") + } + schema, err := client.Schema() + if err != nil { + return errors.Wrap(err, "getting schema") + } + opts := []pilosa.IndexOption{} + if m.Config.IDField != "" { + opts = append(opts, pilosa.OptIndexKeys(true)) + } + index := schema.Index(m.Index, opts...) + + /////////////////////////////////////////////////////// + // for each file to process (just one right now) + f, err := os.Open(m.File) + if err != nil { + return errors.Wrap(err, "opening file") + } + defer f.Close() + reader := csv.NewReader(f) + + batch, parseConfig, err := processHeader(m.Config, client, index, reader, m.BatchSize) + if err != nil { + return errors.Wrap(err, "processing header") + } + // this has a non-obvious dependence on processHeader which sets up fields. TODO Do this inside processHeader? + client.SyncSchema(schema) + + // TODO send actual file processing to worker pool. + num, err := processFile(reader, batch, parseConfig) + if err != nil { + return errors.Wrapf(err, "processing %s", f.Name()) + } + log.Printf("Num: %d, Duration: %s", num, time.Since(start)) + return nil +} + +func processFile(reader *csv.Reader, batch *gpexp.Batch, pc *parseConfig) (n uint64, err error) { + record := gpexp.Row{ + Values: make([]interface{}, len(pc.fieldConfig)), + } + numRecords := uint64(0) + recsImported := numRecords + var row []string + for row, err = reader.Read(); err == nil; row, err = reader.Read() { + record.ID = pc.getID(row, numRecords) + numRecords++ + for _, meta := range pc.fieldConfig { + record.Values[meta.recordIndex] = meta.valGetter(row[meta.srcIndex]) + } + err := batch.Add(record) + if err == gpexp.ErrBatchNowFull { + err := batch.Import() + if err != nil { + return recsImported, errors.Wrap(err, "importing") + } + recsImported = numRecords + } else if err != nil { + return recsImported, errors.Wrap(err, "adding to batch") + } + } + + if err != io.EOF && err != nil { + return recsImported, errors.Wrapf(err, "reading csv, record %v", row) + } + err = batch.Import() + if err != nil { + return recsImported, errors.Wrap(err, "final import") + } + recsImported = numRecords + + return recsImported, nil +} + +type parseConfig struct { + getID func(row []string, numRecords uint64) interface{} + // terrible name + fieldConfig map[string]valueMeta +} + +// terrible name +type valueMeta struct { + srcIndex int + recordIndex int + // terrible name + valGetter func(val string) interface{} +} + +func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, reader *csv.Reader, batchSize int) (*gpexp.Batch, *parseConfig, error) { + headerRow, err := reader.Read() + if err != nil { + return nil, nil, errors.Wrap(err, "reading CSV header") + } + + pc := &parseConfig{ + fieldConfig: make(map[string]valueMeta), + } + pc.getID = func(row []string, numRecords uint64) interface{} { + return numRecords + } + + fields := make([]*pilosa.Field, 0, len(headerRow)) + for i, fieldName := range headerRow { + if fieldName == config.IDField { + idIndex := i + switch config.IDType { + case "uint64": + pc.getID = func(row []string, numRecords uint64) interface{} { + uintVal, err := strconv.ParseUint(row[idIndex], 0, 64) + if err != nil { + return nil + } + return uintVal + } + case "string": + pc.getID = func(row []string, numRecords uint64) interface{} { + return row[idIndex] + } + default: + return nil, nil, errors.Errorf("unknown IDType: %s", config.IDType) + } + continue + } + + var valGetter func(val string) interface{} + srcField, ok := config.SourceFields[fieldName] + if !ok { + srcField = SourceField{ + TargetField: fieldName, + Type: "string", + } + config.SourceFields[fieldName] = srcField + } + pilosaField, ok := config.PilosaFields[srcField.TargetField] + if !ok { + pilosaField = Field{ + Type: "set", + CacheType: pilosa.CacheTypeRanked, + CacheSize: 100000, + Keys: true, + } + config.PilosaFields[fieldName] = pilosaField + } + + fieldName = srcField.TargetField + switch srcField.Type { + case "ignore": + continue + case "int": + valGetter = func(val string) interface{} { + intVal, err := strconv.ParseInt(val, 10, 64) + if err != nil { + return nil + } + return intVal + } + fields = append(fields, index.Field(fieldName, pilosaField.MakeOptions()...)) + case "float": + if srcField.Multiplier != 0 { + valGetter = func(val string) interface{} { + floatVal, err := strconv.ParseFloat(val, 64) + if err != nil { + return nil + } + return int64(floatVal * srcField.Multiplier) + } + } else { + valGetter = func(val string) interface{} { + floatVal, err := strconv.ParseFloat(val, 64) + if err != nil { + return nil + } + return int64(floatVal) + } + } + fields = append(fields, index.Field(fieldName, pilosaField.MakeOptions()...)) + case "string": + valGetter = func(val string) interface{} { + if val == "" { + return nil // ignore empty strings + } + return val + } + fields = append(fields, index.Field(fieldName, pilosaField.MakeOptions()...)) + case "uint64": + valGetter = func(val string) interface{} { + uintVal, err := strconv.ParseUint(val, 0, 64) + if err != nil { + return nil + } + return uintVal + } + fields = append(fields, index.Field(fieldName, pilosaField.MakeOptions()...)) + } + pc.fieldConfig[fieldName] = valueMeta{ + valGetter: valGetter, + srcIndex: i, + recordIndex: len(fields) - 1, + } + } + + batch, err := gpexp.NewBatch(client, batchSize, index, fields) + if err != nil { + return nil, nil, errors.Wrap(err, "getting new batch") + } + + return batch, pc, nil +} + +func NewConfig() *Config { + return &Config{ + PilosaFields: make(map[string]Field), + SourceFields: make(map[string]SourceField), + IDType: "string", + } +} + +type Config struct { + PilosaFields map[string]Field `json:"pilosa-fields"` + SourceFields map[string]SourceField `json:"source-fields"` + + // IDField denotes which field in the source should be used for Pilosa record IDs. + IDField string `json:"id-field"` + + // IDType denotes whether the ID field should be parsed as a string or uint64. + IDType string `json:"id-type"` +} + +type Field struct { + Type string `json:"type"` + Min int64 `json:"min"` + Max int64 `json:"max"` + Keys bool `json:"keys"` + CacheType pilosa.CacheType `json:"cache-type"` + CacheSize int `json:"cache-size"` + // TODO time stuff +} + +func (f Field) MakeOptions() (opts []pilosa.FieldOption) { + switch f.Type { + case "set": + opts = append(opts, pilosa.OptFieldKeys(f.Keys), pilosa.OptFieldTypeSet(f.CacheType, f.CacheSize)) + case "int": + if f.Max != 0 || f.Min != 0 { + opts = append(opts, pilosa.OptFieldTypeInt(f.Min, f.Max)) + } else { + opts = append(opts, pilosa.OptFieldTypeInt()) + } + default: + panic(fmt.Sprintf("unknown pilosa field type: %s", f.Type)) + } + return opts +} + +type SourceField struct { + // TargetField is the Pilosa field that this source field should map to. + TargetField string `json:"target-field"` + + // Type denotes how the source field should be parsed. (string, + // int, rowID, float, or ignore). rowID means that the field will + // be parsed as a uint64 and then used directly as a rowID for a + // set field. If "string", key translation must be on for that + // Pilosa field, and it must be a set field. If int or float, it + // must be a Pilosa int field. + Type string `json:"type"` + + // Multiplier is for float fields. Because Pilosa does not support + // floats natively, it is sometimes useful to store a float in + // Pilosa as an integer, but first multiplied by some constant + // factor to preserve some amount of precision. If 0 this field won't be used. + Multiplier float64 `json:"multiplier"` +} + +// TODO we should validate the Config once it is constructed. +// What are valid mappings from source fields to pilosa fields? diff --git a/cmd/picsv/main_test.go b/csv/batch_test.go similarity index 99% rename from cmd/picsv/main_test.go rename to csv/batch_test.go index bb10221..2a45731 100644 --- a/cmd/picsv/main_test.go +++ b/csv/batch_test.go @@ -1,4 +1,4 @@ -package main_test +package csv_test import ( "fmt" @@ -8,7 +8,7 @@ import ( "testing" "github.com/pilosa/go-pilosa" - picsv "github.com/pilosa/pdk/cmd/picsv" + picsv "github.com/pilosa/pdk/csv" "github.com/pkg/errors" ) diff --git a/csv/config.json b/csv/config.json new file mode 100644 index 0000000..fa3aece --- /dev/null +++ b/csv/config.json @@ -0,0 +1,17 @@ +{ +"pilosa-fields": {"size": {"type": "set", "keys": true, "cache-type": "ranked", "cache-size": 100000}, + "age": {"type": "int"}, + "color": {"type": "set", "keys": true}, + "result": {"type": "int"}, + "dayofweek": {"type": "set", "keys": false, "cache-type": "ranked", "cache-size": 7} + }, +"id-field": "ID", +"id-type": "string", +"source-fields": { + "Size": {"target-field": "size", "type": "string"}, + "Color": {"target-field": "color", "type": "string"}, + "Age": {"target-field": "age", "type": "int"}, + "Result": {"target-field": "result", "type": "float", "multiplier": 100000000}, + "dayofweek": {"target-field": "dayofweek", "type": "uint64"} + } +} diff --git a/cmd/picsv/main_internal_test.go b/csv/main_internal_test.go similarity index 67% rename from cmd/picsv/main_internal_test.go rename to csv/main_internal_test.go index bfae155..12cf298 100644 --- a/cmd/picsv/main_internal_test.go +++ b/csv/main_internal_test.go @@ -1,18 +1,20 @@ -package main +package csv import ( + "encoding/csv" "strings" "testing" ) func TestProcessHeader(t *testing.T) { config := NewConfig() - headerRow := []string{"a", "b", "c"} - + file := `a,b,c +` + reader := csv.NewReader(strings.NewReader(file)) t.Run("invalid IDType", func(t *testing.T) { config.IDField = "a" config.IDType = "nope" - _, _, _, err := processHeader(config, nil, headerRow) + _, _, err := processHeader(config, nil, nil, reader, 10) if err == nil || !strings.Contains(err.Error(), "unknown IDType") { t.Fatalf("unknown IDType gave: %v", err) } diff --git a/cmd/picsv/testdata/sample.csv b/csv/testdata/sample.csv similarity index 100% rename from cmd/picsv/testdata/sample.csv rename to csv/testdata/sample.csv diff --git a/go.sum b/go.sum index 98828af..7acf054 100644 --- a/go.sum +++ b/go.sum @@ -75,6 +75,7 @@ github.com/gogo/protobuf v1.1.1 h1:72R+M5VuhED/KujmZVcIquuo8mBgX4oVda//DQb3PXo= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.0 h1:xU6/SpYbvkNYiptHJYEDRseDLvYE7wSqhYYNy0QSUzI= github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= +github.com/gogo/protobuf v1.2.1 h1:/s5zKNz0uPFCZ5hddgPdo2TK2TVrUNMn0OOX8/aZMTE= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -92,6 +93,7 @@ github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db h1:woRePGFeVFfLKN/pO github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c h1:964Od4U6p2jUkFxvCydnIczKteheJEzHRToSGK3Bnlw= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -105,12 +107,14 @@ github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8 github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= github.com/gorilla/handlers v1.3.0 h1:tsg9qP3mjt1h4Roxp+M1paRjrVBfPSOpBuVclh6YluI= github.com/gorilla/handlers v1.3.0/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/OAirnOIQ= +github.com/gorilla/handlers v1.4.1 h1:BHvcRGJe/TrL+OqFxoKQGddTgeibiOjaBssV5a/N9sw= github.com/gorilla/handlers v1.4.1/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/OAirnOIQ= github.com/gorilla/mux v1.4.0/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/mux v1.6.2 h1:Pgr17XVTNXAk3q/r4CpKzC5xBM/qW1uVLV+IhRZpIIk= github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/mux v1.7.0 h1:tOSd0UKHQd6urX6ApfOn4XdBMY6Sh1MfxV3kmaazO+U= github.com/gorilla/mux v1.7.0/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= +github.com/gorilla/mux v1.7.3 h1:gnP5JzjVOuiZD07fKKToCAOjS0yOpj/qPETTXCCS6hw= github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= @@ -133,6 +137,7 @@ github.com/hashicorp/go-uuid v1.0.1 h1:fv1ep09latC32wFoVwnqcnKJGnMSdBanPczbHAYm1 github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru v0.5.1 h1:0hERBMJE1eitiLkihrMvRVBYAkpHzc/J3QdDN+dAcgU= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= @@ -167,6 +172,7 @@ github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5 github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= github.com/miekg/dns v1.1.1 h1:DVkblRdiScEnEr0LR9nTnEQqHYycjkXW9bOjd+2EL2o= github.com/miekg/dns v1.1.1/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/miekg/dns v1.1.15 h1:CSSIDtllwGLMoA6zjdKnaE6Tx6eVUxQ29LUgGetiDCI= github.com/miekg/dns v1.1.15/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/mapstructure v1.0.0/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= @@ -264,6 +270,7 @@ github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8= github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cobra v0.0.3 h1:ZlrZ4XsMRm04Fr5pSFxBgfND2EBVa1nLpiy1stUsX/8= github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= +github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s= github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9Gc1vn7yk= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= @@ -276,6 +283,7 @@ github.com/spf13/viper v1.2.1/go.mod h1:P4AexN0a+C9tGAnUFNwDMYYZv3pjFuvmeiMyKRaN github.com/spf13/viper v1.3.1 h1:5+8j8FTpnFV4nEImW/ofkzEt8VoOiLXxdYIDsB73T38= github.com/spf13/viper v1.3.1/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= +github.com/spf13/viper v1.4.0 h1:yXHLWeravcrgGyFSyCgdYpXQ9dR9c/WED3pg1RhxqEU= github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -309,6 +317,7 @@ golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnf golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 h1:HuIa8hRrWRSrqYzx1qI49NNxhdi2PrY7gxVSq1JjLDc= golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -342,6 +351,7 @@ golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190628185345-da137c7871d7 h1:rTIdg5QFRR7XCaK4LCjBiPbx8j4DQRpdYMnGn/bJUEU= golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -371,10 +381,12 @@ golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 h1:LepdCS8Gf/MVejFIt8lsiexZATdoGVyp5bcyS+rYoUI= golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= From 1979e4f73ead1a2b2333bd86a8a4fcaa2cb94302 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Wed, 28 Aug 2019 22:14:01 -0500 Subject: [PATCH 03/40] set up for concurrent/sharded id allocation --- csv/batch.go | 55 ++++++++++++++------ csv/main_internal_test.go | 2 +- idallocator.go | 106 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 17 deletions(-) create mode 100644 idallocator.go diff --git a/csv/batch.go b/csv/batch.go index b0a37a5..19203cf 100644 --- a/csv/batch.go +++ b/csv/batch.go @@ -12,15 +12,17 @@ import ( "github.com/pilosa/go-pilosa" "github.com/pilosa/go-pilosa/gpexp" + "github.com/pilosa/pdk" "github.com/pkg/errors" ) type Main struct { - Pilosa []string - File string - Index string - BatchSize int - ConfigFile string + Pilosa []string `help:"Comma separated list of host:port describing Pilosa cluster."` + File string + Index string `help:"Name of index to ingest data into."` + BatchSize int `help:"Number of records to put in a batch before importing to Pilosa."` + ConfigFile string `help:"JSON configuration describing source fields, and how to parse and map them to Pilosa fields."` + RangeAllocator string `help:"Designates where to retrieve unused ranged of record IDs (if generating ids). If left blank, generate locally starting from 0."` Config *Config `flag:"-"` } @@ -67,6 +69,12 @@ func (m *Main) Run() error { opts = append(opts, pilosa.OptIndexKeys(true)) } index := schema.Index(m.Index, opts...) + shardWidth := index.ShardWidth() + if shardWidth == 0 { + shardWidth = pilosa.DefaultShardWidth + } + // TODO currently ignoring m.RangeAllocator + ra := pdk.NewLocalRangeAllocator(shardWidth) /////////////////////////////////////////////////////// // for each file to process (just one right now) @@ -77,7 +85,13 @@ func (m *Main) Run() error { defer f.Close() reader := csv.NewReader(f) - batch, parseConfig, err := processHeader(m.Config, client, index, reader, m.BatchSize) + nexter, err := pdk.NewRangeNexter(ra) + if err != nil { + return errors.Wrap(err, "getting nexter") + } + defer nexter.Return() + + batch, parseConfig, err := processHeader(m.Config, client, index, reader, m.BatchSize, nexter) if err != nil { return errors.Wrap(err, "processing header") } @@ -101,7 +115,10 @@ func processFile(reader *csv.Reader, batch *gpexp.Batch, pc *parseConfig) (n uin recsImported := numRecords var row []string for row, err = reader.Read(); err == nil; row, err = reader.Read() { - record.ID = pc.getID(row, numRecords) + record.ID, err = pc.getID(row) + if err != nil { + return recsImported, errors.Wrap(err, "getting record ID") + } numRecords++ for _, meta := range pc.fieldConfig { record.Values[meta.recordIndex] = meta.valGetter(row[meta.srcIndex]) @@ -131,9 +148,11 @@ func processFile(reader *csv.Reader, batch *gpexp.Batch, pc *parseConfig) (n uin } type parseConfig struct { - getID func(row []string, numRecords uint64) interface{} + getID func(row []string) (interface{}, error) // terrible name fieldConfig map[string]valueMeta + + r pdk.IDRange } // terrible name @@ -144,7 +163,7 @@ type valueMeta struct { valGetter func(val string) interface{} } -func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, reader *csv.Reader, batchSize int) (*gpexp.Batch, *parseConfig, error) { +func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, reader *csv.Reader, batchSize int, n pdk.RangeNexter) (*gpexp.Batch, *parseConfig, error) { headerRow, err := reader.Read() if err != nil { return nil, nil, errors.Wrap(err, "reading CSV header") @@ -153,8 +172,12 @@ func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, r pc := &parseConfig{ fieldConfig: make(map[string]valueMeta), } - pc.getID = func(row []string, numRecords uint64) interface{} { - return numRecords + pc.getID = func(row []string) (interface{}, error) { + next, err := n.Next() + if err != nil { + return nil, err + } + return next, nil } fields := make([]*pilosa.Field, 0, len(headerRow)) @@ -163,16 +186,16 @@ func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, r idIndex := i switch config.IDType { case "uint64": - pc.getID = func(row []string, numRecords uint64) interface{} { + pc.getID = func(row []string) (interface{}, error) { uintVal, err := strconv.ParseUint(row[idIndex], 0, 64) if err != nil { - return nil + return nil, nil // we don't want to stop because we couldn't parse an ID here... but really it should be up to the caller to determine whether or not it should stop. TODO fix. } - return uintVal + return uintVal, nil } case "string": - pc.getID = func(row []string, numRecords uint64) interface{} { - return row[idIndex] + pc.getID = func(row []string) (interface{}, error) { + return row[idIndex], nil } default: return nil, nil, errors.Errorf("unknown IDType: %s", config.IDType) diff --git a/csv/main_internal_test.go b/csv/main_internal_test.go index 12cf298..e8108a6 100644 --- a/csv/main_internal_test.go +++ b/csv/main_internal_test.go @@ -14,7 +14,7 @@ func TestProcessHeader(t *testing.T) { t.Run("invalid IDType", func(t *testing.T) { config.IDField = "a" config.IDType = "nope" - _, _, err := processHeader(config, nil, nil, reader, 10) + _, _, err := processHeader(config, nil, nil, reader, 10, nil) if err == nil || !strings.Contains(err.Error(), "unknown IDType") { t.Fatalf("unknown IDType gave: %v", err) } diff --git a/idallocator.go b/idallocator.go new file mode 100644 index 0000000..881fcd6 --- /dev/null +++ b/idallocator.go @@ -0,0 +1,106 @@ +package pdk + +import ( + "fmt" + "math/bits" + "sync" + + "github.com/pkg/errors" +) + +type RangeAllocator interface { + Get() (*IDRange, error) + Return(*IDRange) error +} + +type RangeNexter interface { + Next() (uint64, error) + Return() error +} + +type LocalRangeAllocator struct { + shardWidth uint64 + next uint64 + returned []*IDRange + mu sync.Mutex +} + +func NewLocalRangeAllocator(shardWidth uint64) RangeAllocator { + if shardWidth < 1<<16 || bits.OnesCount64(shardWidth) > 1 { + panic(fmt.Sprintf("bad shardWidth in NewRangeAllocator: %d", shardWidth)) + } + return &LocalRangeAllocator{ + shardWidth: shardWidth, + } +} + +// IDRange is inclusive at Start and exclusive at End... like slices. +type IDRange struct { + Start uint64 + End uint64 +} + +type rangeNexter struct { + a RangeAllocator + r *IDRange +} + +func NewRangeNexter(a RangeAllocator) (RangeNexter, error) { + r, err := a.Get() + if err != nil { + return nil, errors.Wrap(err, "getting range") + } + return &rangeNexter{ + a: a, + r: r, + }, nil +} + +func (n *rangeNexter) Next() (uint64, error) { + var err error + if n.r.Start == n.r.End { + n.r, err = n.a.Get() + if err != nil { + return 0, errors.Wrap(err, "getting next range") + } + } + if n.r.Start >= n.r.End { + panic("Start is greater than End") + } + n.r.Start += 1 + return n.r.Start - 1, nil +} + +func (n *rangeNexter) Return() error { + return n.a.Return(n.r) +} + +func (a *LocalRangeAllocator) Get() (*IDRange, error) { + a.mu.Lock() + defer a.mu.Unlock() + n := len(a.returned) + if n > 0 { + ret := a.returned[n-1] + a.returned = a.returned[:n-1] + return ret, nil + } + ret := &IDRange{ + Start: a.next, + End: a.next + a.shardWidth, + } + a.next += a.shardWidth + return ret, nil +} + +func (a *LocalRangeAllocator) Return(r *IDRange) error { + if r.Start == r.End { + return nil + } + if r.Start > r.End { + return errors.Errorf("attempted to return range with start > end: %v", r) + } + a.mu.Lock() + a.returned = append(a.returned, r) + a.mu.Unlock() + return nil +} From 29a794e821afdfbf856c3af9cd7525086d4a383f Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Sat, 31 Aug 2019 21:14:54 -0500 Subject: [PATCH 04/40] add taxi data test, support timestamp parsing in picsv --- cmd/picsv/README.md | 106 +++++++ csv/.gitignore | 1 + csv/batch.go | 192 ++++++++++--- ...nternal_test.go => batch_internal_test.go} | 0 csv/batch_test.go | 272 ++++++++++++++---- csv/testdata/taxi/green_tripdata_2013-08.csv | 10 + csv/testdata/taxi/green_tripdata_2013-09.csv | 10 + csv/testdata/taxi/green_tripdata_2013-10.csv | 10 + csv/testdata/taxi/green_tripdata_2013-11.csv | 10 + csv/testdata/taxi/green_tripdata_2013-12.csv | 10 + csv/testdata/taxi/green_tripdata_2014-01.csv | 10 + csv/testdata/taxi/green_tripdata_2014-02.csv | 10 + csv/testdata/taxi/green_tripdata_2014-03.csv | 10 + csv/testdata/taxi/green_tripdata_2014-04.csv | 10 + csv/testdata/taxi/green_tripdata_2014-05.csv | 10 + csv/testdata/taxi/green_tripdata_2014-06.csv | 10 + csv/testdata/taxi/green_tripdata_2014-07.csv | 10 + csv/testdata/taxi/green_tripdata_2014-08.csv | 10 + csv/testdata/taxi/green_tripdata_2014-09.csv | 10 + csv/testdata/taxi/green_tripdata_2014-10.csv | 10 + csv/testdata/taxi/green_tripdata_2014-11.csv | 10 + csv/testdata/taxi/green_tripdata_2014-12.csv | 10 + csv/testdata/taxi/green_tripdata_2015-01.csv | 10 + csv/testdata/taxi/green_tripdata_2015-02.csv | 10 + csv/testdata/taxi/green_tripdata_2015-03.csv | 10 + csv/testdata/taxi/green_tripdata_2015-04.csv | 10 + csv/testdata/taxi/green_tripdata_2015-05.csv | 10 + csv/testdata/taxi/green_tripdata_2015-06.csv | 10 + csv/testdata/taxi/green_tripdata_2015-07.csv | 10 + csv/testdata/taxi/green_tripdata_2015-08.csv | 10 + csv/testdata/taxi/green_tripdata_2015-09.csv | 10 + csv/testdata/taxi/green_tripdata_2015-10.csv | 10 + csv/testdata/taxi/green_tripdata_2015-11.csv | 10 + csv/testdata/taxi/green_tripdata_2015-12.csv | 10 + csv/testdata/taxi/green_tripdata_2016-01.csv | 10 + csv/testdata/taxi/green_tripdata_2016-02.csv | 10 + csv/testdata/taxi/green_tripdata_2016-03.csv | 10 + csv/testdata/taxi/green_tripdata_2016-04.csv | 10 + csv/testdata/taxi/green_tripdata_2016-05.csv | 10 + csv/testdata/taxi/green_tripdata_2016-06.csv | 10 + csv/testdata/taxi/yellow_tripdata_2009-01.csv | 10 + csv/testdata/taxi/yellow_tripdata_2009-02.csv | 10 + csv/testdata/taxi/yellow_tripdata_2009-03.csv | 10 + csv/testdata/taxi/yellow_tripdata_2009-04.csv | 10 + csv/testdata/taxi/yellow_tripdata_2009-05.csv | 10 + csv/testdata/taxi/yellow_tripdata_2009-06.csv | 10 + csv/testdata/taxi/yellow_tripdata_2009-07.csv | 10 + csv/testdata/taxi/yellow_tripdata_2009-08.csv | 10 + csv/testdata/taxi/yellow_tripdata_2009-09.csv | 10 + csv/testdata/taxi/yellow_tripdata_2009-10.csv | 10 + csv/testdata/taxi/yellow_tripdata_2009-11.csv | 10 + csv/testdata/taxi/yellow_tripdata_2009-12.csv | 10 + csv/testdata/taxi/yellow_tripdata_2010-01.csv | 10 + csv/testdata/taxi/yellow_tripdata_2010-02.csv | 10 + csv/testdata/taxi/yellow_tripdata_2010-03.csv | 10 + csv/testdata/taxi/yellow_tripdata_2010-04.csv | 10 + csv/testdata/taxi/yellow_tripdata_2010-05.csv | 10 + csv/testdata/taxi/yellow_tripdata_2010-06.csv | 10 + csv/testdata/taxi/yellow_tripdata_2010-07.csv | 10 + csv/testdata/taxi/yellow_tripdata_2010-08.csv | 10 + csv/testdata/taxi/yellow_tripdata_2010-09.csv | 10 + csv/testdata/taxi/yellow_tripdata_2010-10.csv | 10 + csv/testdata/taxi/yellow_tripdata_2010-11.csv | 10 + csv/testdata/taxi/yellow_tripdata_2010-12.csv | 10 + csv/testdata/taxi/yellow_tripdata_2011-01.csv | 10 + csv/testdata/taxi/yellow_tripdata_2011-02.csv | 10 + csv/testdata/taxi/yellow_tripdata_2011-03.csv | 10 + csv/testdata/taxi/yellow_tripdata_2011-04.csv | 10 + csv/testdata/taxi/yellow_tripdata_2011-05.csv | 10 + csv/testdata/taxi/yellow_tripdata_2011-06.csv | 10 + csv/testdata/taxi/yellow_tripdata_2011-07.csv | 10 + csv/testdata/taxi/yellow_tripdata_2011-08.csv | 10 + csv/testdata/taxi/yellow_tripdata_2011-09.csv | 10 + csv/testdata/taxi/yellow_tripdata_2011-10.csv | 10 + csv/testdata/taxi/yellow_tripdata_2011-11.csv | 10 + csv/testdata/taxi/yellow_tripdata_2011-12.csv | 10 + csv/testdata/taxi/yellow_tripdata_2012-01.csv | 10 + csv/testdata/taxi/yellow_tripdata_2012-02.csv | 10 + csv/testdata/taxi/yellow_tripdata_2012-03.csv | 10 + csv/testdata/taxi/yellow_tripdata_2012-04.csv | 10 + csv/testdata/taxi/yellow_tripdata_2012-05.csv | 10 + csv/testdata/taxi/yellow_tripdata_2012-06.csv | 10 + csv/testdata/taxi/yellow_tripdata_2012-07.csv | 10 + csv/testdata/taxi/yellow_tripdata_2012-08.csv | 10 + csv/testdata/taxi/yellow_tripdata_2012-09.csv | 10 + csv/testdata/taxi/yellow_tripdata_2012-10.csv | 10 + csv/testdata/taxi/yellow_tripdata_2012-11.csv | 10 + csv/testdata/taxi/yellow_tripdata_2012-12.csv | 10 + csv/testdata/taxi/yellow_tripdata_2013-01.csv | 10 + csv/testdata/taxi/yellow_tripdata_2013-02.csv | 10 + csv/testdata/taxi/yellow_tripdata_2013-03.csv | 10 + csv/testdata/taxi/yellow_tripdata_2013-04.csv | 10 + csv/testdata/taxi/yellow_tripdata_2013-05.csv | 10 + csv/testdata/taxi/yellow_tripdata_2013-06.csv | 10 + csv/testdata/taxi/yellow_tripdata_2013-07.csv | 10 + csv/testdata/taxi/yellow_tripdata_2013-08.csv | 10 + csv/testdata/taxi/yellow_tripdata_2013-09.csv | 10 + csv/testdata/taxi/yellow_tripdata_2013-10.csv | 10 + csv/testdata/taxi/yellow_tripdata_2013-11.csv | 10 + csv/testdata/taxi/yellow_tripdata_2013-12.csv | 10 + csv/testdata/taxi/yellow_tripdata_2014-01.csv | 10 + csv/testdata/taxi/yellow_tripdata_2014-02.csv | 10 + csv/testdata/taxi/yellow_tripdata_2014-03.csv | 10 + csv/testdata/taxi/yellow_tripdata_2014-04.csv | 10 + csv/testdata/taxi/yellow_tripdata_2014-05.csv | 10 + csv/testdata/taxi/yellow_tripdata_2014-06.csv | 10 + csv/testdata/taxi/yellow_tripdata_2014-07.csv | 10 + csv/testdata/taxi/yellow_tripdata_2014-08.csv | 10 + csv/testdata/taxi/yellow_tripdata_2014-09.csv | 10 + csv/testdata/taxi/yellow_tripdata_2014-10.csv | 10 + csv/testdata/taxi/yellow_tripdata_2014-11.csv | 10 + csv/testdata/taxi/yellow_tripdata_2014-12.csv | 10 + csv/testdata/taxi/yellow_tripdata_2015-01.csv | 10 + csv/testdata/taxi/yellow_tripdata_2015-02.csv | 10 + csv/testdata/taxi/yellow_tripdata_2015-03.csv | 10 + csv/testdata/taxi/yellow_tripdata_2015-04.csv | 10 + csv/testdata/taxi/yellow_tripdata_2015-05.csv | 10 + csv/testdata/taxi/yellow_tripdata_2015-06.csv | 10 + csv/testdata/taxi/yellow_tripdata_2015-07.csv | 10 + csv/testdata/taxi/yellow_tripdata_2015-08.csv | 10 + csv/testdata/taxi/yellow_tripdata_2015-09.csv | 10 + csv/testdata/taxi/yellow_tripdata_2015-10.csv | 10 + csv/testdata/taxi/yellow_tripdata_2015-11.csv | 10 + csv/testdata/taxi/yellow_tripdata_2015-12.csv | 10 + csv/testdata/taxi/yellow_tripdata_2016-01.csv | 10 + csv/testdata/taxi/yellow_tripdata_2016-02.csv | 10 + csv/testdata/taxi/yellow_tripdata_2016-03.csv | 10 + csv/testdata/taxi/yellow_tripdata_2016-04.csv | 10 + csv/testdata/taxi/yellow_tripdata_2016-05.csv | 10 + csv/testdata/taxi/yellow_tripdata_2016-06.csv | 10 + go.mod | 1 + 131 files changed, 1719 insertions(+), 103 deletions(-) create mode 100644 cmd/picsv/README.md rename csv/{main_internal_test.go => batch_internal_test.go} (100%) create mode 100644 csv/testdata/taxi/green_tripdata_2013-08.csv create mode 100644 csv/testdata/taxi/green_tripdata_2013-09.csv create mode 100644 csv/testdata/taxi/green_tripdata_2013-10.csv create mode 100644 csv/testdata/taxi/green_tripdata_2013-11.csv create mode 100644 csv/testdata/taxi/green_tripdata_2013-12.csv create mode 100644 csv/testdata/taxi/green_tripdata_2014-01.csv create mode 100644 csv/testdata/taxi/green_tripdata_2014-02.csv create mode 100644 csv/testdata/taxi/green_tripdata_2014-03.csv create mode 100644 csv/testdata/taxi/green_tripdata_2014-04.csv create mode 100644 csv/testdata/taxi/green_tripdata_2014-05.csv create mode 100644 csv/testdata/taxi/green_tripdata_2014-06.csv create mode 100644 csv/testdata/taxi/green_tripdata_2014-07.csv create mode 100644 csv/testdata/taxi/green_tripdata_2014-08.csv create mode 100644 csv/testdata/taxi/green_tripdata_2014-09.csv create mode 100644 csv/testdata/taxi/green_tripdata_2014-10.csv create mode 100644 csv/testdata/taxi/green_tripdata_2014-11.csv create mode 100644 csv/testdata/taxi/green_tripdata_2014-12.csv create mode 100644 csv/testdata/taxi/green_tripdata_2015-01.csv create mode 100644 csv/testdata/taxi/green_tripdata_2015-02.csv create mode 100644 csv/testdata/taxi/green_tripdata_2015-03.csv create mode 100644 csv/testdata/taxi/green_tripdata_2015-04.csv create mode 100644 csv/testdata/taxi/green_tripdata_2015-05.csv create mode 100644 csv/testdata/taxi/green_tripdata_2015-06.csv create mode 100644 csv/testdata/taxi/green_tripdata_2015-07.csv create mode 100644 csv/testdata/taxi/green_tripdata_2015-08.csv create mode 100644 csv/testdata/taxi/green_tripdata_2015-09.csv create mode 100644 csv/testdata/taxi/green_tripdata_2015-10.csv create mode 100644 csv/testdata/taxi/green_tripdata_2015-11.csv create mode 100644 csv/testdata/taxi/green_tripdata_2015-12.csv create mode 100644 csv/testdata/taxi/green_tripdata_2016-01.csv create mode 100644 csv/testdata/taxi/green_tripdata_2016-02.csv create mode 100644 csv/testdata/taxi/green_tripdata_2016-03.csv create mode 100644 csv/testdata/taxi/green_tripdata_2016-04.csv create mode 100644 csv/testdata/taxi/green_tripdata_2016-05.csv create mode 100644 csv/testdata/taxi/green_tripdata_2016-06.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2009-01.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2009-02.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2009-03.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2009-04.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2009-05.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2009-06.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2009-07.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2009-08.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2009-09.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2009-10.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2009-11.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2009-12.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2010-01.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2010-02.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2010-03.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2010-04.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2010-05.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2010-06.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2010-07.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2010-08.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2010-09.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2010-10.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2010-11.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2010-12.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2011-01.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2011-02.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2011-03.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2011-04.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2011-05.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2011-06.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2011-07.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2011-08.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2011-09.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2011-10.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2011-11.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2011-12.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2012-01.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2012-02.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2012-03.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2012-04.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2012-05.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2012-06.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2012-07.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2012-08.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2012-09.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2012-10.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2012-11.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2012-12.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2013-01.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2013-02.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2013-03.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2013-04.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2013-05.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2013-06.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2013-07.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2013-08.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2013-09.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2013-10.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2013-11.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2013-12.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2014-01.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2014-02.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2014-03.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2014-04.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2014-05.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2014-06.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2014-07.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2014-08.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2014-09.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2014-10.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2014-11.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2014-12.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2015-01.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2015-02.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2015-03.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2015-04.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2015-05.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2015-06.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2015-07.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2015-08.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2015-09.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2015-10.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2015-11.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2015-12.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2016-01.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2016-02.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2016-03.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2016-04.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2016-05.csv create mode 100644 csv/testdata/taxi/yellow_tripdata_2016-06.csv diff --git a/cmd/picsv/README.md b/cmd/picsv/README.md new file mode 100644 index 0000000..df42c56 --- /dev/null +++ b/cmd/picsv/README.md @@ -0,0 +1,106 @@ +I've been noodling on ingest to Pilosa for quite some +time. Historically, it's either been slow, difficult, or, if you were +particularly unlucky in the tools or documentation you stumbled upon, +both. The options have been: + +- Calling "SetBit" via PQL. Which is insanely slow, even when you + batch multiple calls in the same request. +- Using the `pilosa import` tool, which requires one to massage their + data one field at a time into CSV files of a particular format + before importing. +- Using Pilosa's import endpoints. There are a few variants of these + (import-value for integers, import-roaring for sets and time, and + import for everything else). They are fast, but not well-documented, + still one field at a time, and quite complex to use. +- Using the import functionality in the client libraries. These use + Pilosa's import endpoints under the hood, but they are still + per-field, and you pay a significant performance penalty for the + simpler interface they give you. +- Using PDK tools. These give a nice interface, and can, in some cases + hide all the gory details and allow you to ingest data straight from + Kafka, or CSV files without writing any code at all. They use + go-pilosa's import stuff underneath, and put an even larger + performance hit on top of it, so unfortunately, we're back into + "fairly slow" territory. + +The latest turn of this wheel has brought us yet another tool, one +which I'm quite sure is fast, and I hope will prove easier to use. The +basic workflow is this: + +1. Using a client librarly, create your schema as usual. +2. Create a Batch object, passing it an ordered list of Pilosa fields + you are going to be using. +3. Call `Batch.Add` with `Row` objects. A row is an ID (Pilosa + column), and a list of values which correspond to the list of + fields you passed in when creating the Batch. +4. When the batch is full, `Add` will return `ErrBatchNowFull`, and + then it's time to call `Batch.Import` to ingest the data to + Pilosa. `Import` does any necessary key translation and then + efficiently (and concurrently) imports all the data to Pilosa. +5. Repeat 3 and 4 for as long as you have records to ingest. + +Let's walk through an example of ingesting some tabular data in a CSV +file. + +``` +ID,Size,Color,Age +1,small,green,42 +2,large,red,99 +3,small,green,NA +4,small,,31 +``` + +First, you open the file, and read in the header. Create a field in +Pilosa for each item in the header (you do need to know what type each +is at this point). If one of the fields represents the "ID" of that +row, don't create a field for that one. Now, create a Batch object, +passing in the list of Fields you just made which matches up with the +CSV header. Create a `Row` object with a list of `Values` of equal +length to the list of fields. So for our example, we'll have a list of +fields like `["Size", "Color", "Age", "Result"]`, and our `Row` object +will have an empty value list of length 4. + +Now, read in each line of the CSV file and parse each field as needed, +then set each value in the `Values` slice to the parsed value. Set +`Row.ID` to the ID from the first field and call `Batch.Add` with the +`Row` object. For the first line in our example file, the `Row` object +will look like: + +`{ID: 1, Values: {"small", "green", 42}}` + +Currently, there is an implementation of this in [a branch of +go-pilosa](https://github.com/jaffee/go-pilosa/tree/batch-ingest/gpexp) +that has a couple neat properties. The routine calling `Batch.Add` can +reuse the same `Row` object each time it makes the call. This reduces +memory allocations, which decreases garbage collection and improves +cache usage. `Row.Values` is a `[]interface{}` which in Go means it's +a list of objects that can have any type. The `Batch` implementation +does type checking and supports values of various types in various +ways. + +- A `uint64` will be treated directly as a Pilosa row ID. +- A `string` will be translated to a row ID (the corresponding field + must have keys enabled). +- An `int64` will be ingested as an integer — the corresponding field + must be an int field. +- A `nil` will be ignored. + +`Row.ID` can be a `string` or `uint64` depending on whether you want +to use column key translation on the index. + +Caveats: + +The current batch implementation does not support Pilosa time fields, +or boolean or mutex fields, though that is in the works. It probably +won't be a good interface for workloads with lots of fields (hundreds +or thousands) where many of them are often nil for any given record. + +If you want to see example usage of the Batch interface, check out the +code right [here](./batch.go) in the PDK's CSV tooling. The `picsv` +tool takes in CSV files and does it's best to ingest them to Pilosa +performantly with minimal supervision. It does, however, have an +optional configuration which allows one to do basic things like +specify which fields are ints vs strings, and how the CSV field names +map on to Pilosa fields. There are some examples of this in the +[tests](./batch_test.go), and be on the look out for a more complete +writeup with documentation, examples, and benchmarks soon! diff --git a/csv/.gitignore b/csv/.gitignore index d9f1f6e..8e24785 100644 --- a/csv/.gitignore +++ b/csv/.gitignore @@ -1,2 +1,3 @@ marketing-*.csv config.json +taxiconfig.json \ No newline at end of file diff --git a/csv/batch.go b/csv/batch.go index 19203cf..9f8924b 100644 --- a/csv/batch.go +++ b/csv/batch.go @@ -8,31 +8,36 @@ import ( "log" "os" "strconv" + "strings" + "sync" "time" "github.com/pilosa/go-pilosa" "github.com/pilosa/go-pilosa/gpexp" "github.com/pilosa/pdk" "github.com/pkg/errors" + "golang.org/x/sync/errgroup" ) type Main struct { Pilosa []string `help:"Comma separated list of host:port describing Pilosa cluster."` - File string + Files []string Index string `help:"Name of index to ingest data into."` BatchSize int `help:"Number of records to put in a batch before importing to Pilosa."` ConfigFile string `help:"JSON configuration describing source fields, and how to parse and map them to Pilosa fields."` RangeAllocator string `help:"Designates where to retrieve unused ranged of record IDs (if generating ids). If left blank, generate locally starting from 0."` + Concurrency int `help:"Number of goroutines to run processing files."` Config *Config `flag:"-"` } func NewMain() *Main { return &Main{ - Pilosa: []string{"localhost:10101"}, - File: "data.csv", - Index: "picsvtest", - BatchSize: 1000, + Pilosa: []string{"localhost:10101"}, + Files: []string{"data.csv"}, + Index: "picsvtest", + BatchSize: 1000, + Concurrency: 4, Config: NewConfig(), } @@ -53,8 +58,8 @@ func (m *Main) Run() error { return errors.Wrap(err, "decoding config file") } } - log.Printf("Flags: %+v\n", *m) - log.Printf("Config: %+v\n", *m.Config) + // log.Printf("Flags: %+v\n", *m) + // log.Printf("Config: %+v\n", *m.Config) client, err := pilosa.NewClient(m.Pilosa) if err != nil { @@ -76,38 +81,99 @@ func (m *Main) Run() error { // TODO currently ignoring m.RangeAllocator ra := pdk.NewLocalRangeAllocator(shardWidth) + jobs := make(chan fileJob, 0) + stats := make(chan jobReport, 0) + eg := errgroup.Group{} + for i := 0; i < m.Concurrency; i++ { + eg.Go(func() error { + fileProcessor(jobs, stats) + return nil + }) + } + + totalRecords := uint64(0) + mu := &sync.Mutex{} + mu.Lock() + go func() { + for stat := range stats { + // TODO add file name to stats + log.Printf("processed %s\n", stat) + totalRecords += stat.n + } + mu.Unlock() + }() + /////////////////////////////////////////////////////// // for each file to process (just one right now) - f, err := os.Open(m.File) - if err != nil { - return errors.Wrap(err, "opening file") - } - defer f.Close() - reader := csv.NewReader(f) + for _, filename := range m.Files { + f, err := os.Open(filename) + if err != nil { + return errors.Wrap(err, "opening file") + } + defer f.Close() + reader := csv.NewReader(f) + reader.ReuseRecord = true + reader.FieldsPerRecord = -1 - nexter, err := pdk.NewRangeNexter(ra) - if err != nil { - return errors.Wrap(err, "getting nexter") + nexter, err := pdk.NewRangeNexter(ra) + if err != nil { + return errors.Wrap(err, "getting nexter") + } + + batch, parseConfig, err := processHeader(m.Config, client, index, reader, m.BatchSize, nexter) + if err != nil { + return errors.Wrap(err, "processing header") + } + // this has a non-obvious dependence on processHeader which sets up fields. TODO Do this inside processHeader? + client.SyncSchema(schema) + + jobs <- fileJob{ + reader: reader, + batch: batch, + pc: parseConfig, + } } - defer nexter.Return() + close(jobs) + eg.Wait() + close(stats) + mu.Lock() + log.Printf("Processed %d records in %v", totalRecords, time.Since(start)) + return nil +} - batch, parseConfig, err := processHeader(m.Config, client, index, reader, m.BatchSize, nexter) - if err != nil { - return errors.Wrap(err, "processing header") +type jobReport struct { + n uint64 + err error + duration time.Duration +} + +func (j jobReport) String() string { + if j.err != nil { + return fmt.Sprintf("{n:%d duration:%s err:'%s'}", j.n, j.duration, j.err) } - // this has a non-obvious dependence on processHeader which sets up fields. TODO Do this inside processHeader? - client.SyncSchema(schema) + return fmt.Sprintf("{n:%d duration:%s}", j.n, j.duration) +} - // TODO send actual file processing to worker pool. - num, err := processFile(reader, batch, parseConfig) - if err != nil { - return errors.Wrapf(err, "processing %s", f.Name()) +type fileJob struct { + reader *csv.Reader + batch *gpexp.Batch + pc *parseConfig +} + +func fileProcessor(jobs <-chan fileJob, stats chan<- jobReport) { + for fj := range jobs { + start := time.Now() + n, err := processFile(fj.reader, fj.batch, fj.pc) + stats <- jobReport{ + n: n, + err: err, + duration: time.Since(start), + } } - log.Printf("Num: %d, Duration: %s", num, time.Since(start)) - return nil } func processFile(reader *csv.Reader, batch *gpexp.Batch, pc *parseConfig) (n uint64, err error) { + defer pc.nexter.Return() record := gpexp.Row{ Values: make([]interface{}, len(pc.fieldConfig)), } @@ -121,6 +187,9 @@ func processFile(reader *csv.Reader, batch *gpexp.Batch, pc *parseConfig) (n uin } numRecords++ for _, meta := range pc.fieldConfig { + if meta.srcIndex > len(row)-1 { + log.Printf("row: %v\nis not long enough %d is less than %d\n", row, len(row), len(pc.fieldConfig)) + } record.Values[meta.recordIndex] = meta.valGetter(row[meta.srcIndex]) } err := batch.Add(record) @@ -143,7 +212,6 @@ func processFile(reader *csv.Reader, batch *gpexp.Batch, pc *parseConfig) (n uin return recsImported, errors.Wrap(err, "final import") } recsImported = numRecords - return recsImported, nil } @@ -152,7 +220,8 @@ type parseConfig struct { // terrible name fieldConfig map[string]valueMeta - r pdk.IDRange + r pdk.IDRange + nexter pdk.RangeNexter } // terrible name @@ -163,7 +232,7 @@ type valueMeta struct { valGetter func(val string) interface{} } -func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, reader *csv.Reader, batchSize int, n pdk.RangeNexter) (*gpexp.Batch, *parseConfig, error) { +func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, reader *csv.Reader, batchSize int, nexter pdk.RangeNexter) (*gpexp.Batch, *parseConfig, error) { headerRow, err := reader.Read() if err != nil { return nil, nil, errors.Wrap(err, "reading CSV header") @@ -171,9 +240,10 @@ func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, r pc := &parseConfig{ fieldConfig: make(map[string]valueMeta), + nexter: nexter, } pc.getID = func(row []string) (interface{}, error) { - next, err := n.Next() + next, err := pc.nexter.Next() // this is kind of weird... wish each fileProcessor had a nexter instead TODO if err != nil { return nil, err } @@ -181,8 +251,8 @@ func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, r } fields := make([]*pilosa.Field, 0, len(headerRow)) - for i, fieldName := range headerRow { - if fieldName == config.IDField { + for i, srcFieldName := range headerRow { + if srcFieldName == config.IDField { idIndex := i switch config.IDType { case "uint64": @@ -204,13 +274,21 @@ func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, r } var valGetter func(val string) interface{} - srcField, ok := config.SourceFields[fieldName] + srcField, ok := config.SourceFields[srcFieldName] if !ok { - srcField = SourceField{ - TargetField: fieldName, - Type: "string", + name := strings.ToLower(srcFieldName) + name = strings.TrimSpace(name) + name = strings.ReplaceAll(name, " ", "_") + // check if there is a normalized version of this name stored + srcField, ok = config.SourceFields[name] + // if not, create a new config for it + if !ok { + srcField = SourceField{ + TargetField: name, + Type: "string", + } } - config.SourceFields[fieldName] = srcField + config.SourceFields[srcFieldName] = srcField } pilosaField, ok := config.PilosaFields[srcField.TargetField] if !ok { @@ -220,10 +298,10 @@ func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, r CacheSize: 100000, Keys: true, } - config.PilosaFields[fieldName] = pilosaField + config.PilosaFields[srcField.TargetField] = pilosaField } - fieldName = srcField.TargetField + fieldName := srcField.TargetField switch srcField.Type { case "ignore": continue @@ -263,7 +341,7 @@ func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, r return val } fields = append(fields, index.Field(fieldName, pilosaField.MakeOptions()...)) - case "uint64": + case "uint64", "rowID": valGetter = func(val string) interface{} { uintVal, err := strconv.ParseUint(val, 0, 64) if err != nil { @@ -272,6 +350,21 @@ func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, r return uintVal } fields = append(fields, index.Field(fieldName, pilosaField.MakeOptions()...)) + case "time": + if srcField.TimeFormat == "" { + return nil, nil, errors.Errorf("need time format for source field %s of type time", srcFieldName) + } + valGetter = func(val string) interface{} { + tim, err := time.Parse(srcField.TimeFormat, val) + if err != nil { + // TODO some kind of logging or stats around failures in here. + return nil + } + return tim.Unix() + } + fields = append(fields, index.Field(fieldName, pilosaField.MakeOptions()...)) + default: + return nil, nil, errors.Errorf("unknown source type '%s'", srcField.Type) } pc.fieldConfig[fieldName] = valueMeta{ valGetter: valGetter, @@ -338,11 +431,15 @@ type SourceField struct { TargetField string `json:"target-field"` // Type denotes how the source field should be parsed. (string, - // int, rowID, float, or ignore). rowID means that the field will - // be parsed as a uint64 and then used directly as a rowID for a - // set field. If "string", key translation must be on for that - // Pilosa field, and it must be a set field. If int or float, it - // must be a Pilosa int field. + // int, rowID, float, time, or ignore). rowID means that the field + // will be parsed as a uint64 and then used directly as a rowID + // for a set field. If "string", key translation must be on for + // that Pilosa field, and it must be a set field. If int or float, + // it must be a Pilosa int field. If time, a TimeFormat should be + // provided in the Go style using the reference date + // 2006-01-02T15:04:05Z07:00, and the target field type should be + // int - time will be stored as the time since Unix epoch in + // seconds. Type string `json:"type"` // Multiplier is for float fields. Because Pilosa does not support @@ -350,6 +447,7 @@ type SourceField struct { // Pilosa as an integer, but first multiplied by some constant // factor to preserve some amount of precision. If 0 this field won't be used. Multiplier float64 `json:"multiplier"` + TimeFormat string `json:"time-format"` } // TODO we should validate the Config once it is constructed. diff --git a/csv/main_internal_test.go b/csv/batch_internal_test.go similarity index 100% rename from csv/main_internal_test.go rename to csv/batch_internal_test.go diff --git a/csv/batch_test.go b/csv/batch_test.go index 2a45731..dd13223 100644 --- a/csv/batch_test.go +++ b/csv/batch_test.go @@ -5,6 +5,7 @@ import ( "io" "net/http" "os" + "path/filepath" "testing" "github.com/pilosa/go-pilosa" @@ -16,8 +17,8 @@ func BenchmarkImportCSV(b *testing.B) { m := picsv.NewMain() m.BatchSize = 1 << 20 m.Index = "picsvbench" - m.File = "marketing-200k.csv" - getRawData(b, m.File) + m.Files = []string{"testdata/marketing-200k.csv"} + getRawData(b, m.Files[0]) client, err := pilosa.NewClient(m.Pilosa) if err != nil { b.Fatalf("getting client: %v", err) @@ -69,82 +70,224 @@ func getRawData(t testing.TB, file string) { } -func TestImportCSV(t *testing.T) { +func TestImportMarketingCSV(t *testing.T) { + cases := []struct { + name string + idField string + idType string + }{ + { + name: "stringID", + idField: "id", + idType: "string", + }, + { + name: "uint64", + idField: "id", + idType: "string", + }, + { + name: "generatedID", + idField: "", + idType: "", + }, + } + for _, tst := range cases { + t.Run(tst.name, func(t *testing.T) { + m := picsv.NewMain() + m.BatchSize = 99999 + m.Index = "testpicsv" + m.Files = []string{"marketing-200k.csv"} + m.Config.SourceFields["age"] = picsv.SourceField{TargetField: "age", Type: "float"} + m.Config.PilosaFields["age"] = picsv.Field{Type: "int"} + m.Config.IDField = tst.idField + m.Config.IDType = tst.idType + getRawData(t, m.Files[0]) + client, err := pilosa.NewClient(m.Pilosa) + if err != nil { + t.Fatalf("getting client: %v", err) + } + + defer func() { + err = client.DeleteIndexByName(m.Index) + if err != nil { + t.Fatalf("deleting index: %v", err) + } + }() + err = m.Run() + if err != nil { + t.Fatalf("running ingest: %v", err) + } + + schema, err := client.Schema() + if err != nil { + t.Fatalf("getting schema: %v", err) + } + + index := schema.Index(m.Index) + marital := index.Field("marital") + converted := index.Field("converted") + age := index.Field("age") + education := index.Field("education") + + tests := []struct { + query *pilosa.PQLRowQuery + bash string + exp int64 + }{ + { + query: marital.Row("married"), + bash: `awk -F, '/married/ {print $1,$4}' marketing-200k.csv | sort | uniq | wc`, + exp: 125514, + }, + { + query: converted.Row("no"), + bash: `awk -F, '{print $1,$17}' marketing-200k.csv | grep "no" |sort | uniq | wc`, + exp: 199999, + }, + { + query: age.Equals(55), + bash: `awk -F, '{print $1,$2}' marketing-200k.csv | grep " 55.0" |sort | uniq | wc`, + exp: 3282, + }, + { + query: education.Row("professional course"), + bash: `awk -F, '/professional course/ {print $1,$5}' marketing-200k.csv | sort | uniq | wc`, + exp: 25374, + }, + } + + for i, test := range tests { + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + q := index.Count(test.query) + resp, err := client.Query(q) + if err != nil { + t.Fatalf("running query '%s': %v", q.Serialize(), err) + } + if resp.Result().Count() != test.exp { + t.Fatalf("Got unexpected result %d instead of %d for\nquery: %s\nbash: %s", resp.Result().Count(), test.exp, q.Serialize(), test.bash) + } + }) + } + + }) + } +} + +func TestImportMultipleTaxi(t *testing.T) { + // TODO (taxi files in place via: + // for url in `grep -v fhv_tripdata ../usecase/taxi/urls.txt`; do curl -s $url | head > testdata/${url##*/}; done m := picsv.NewMain() - m.BatchSize = 100000 - m.Index = "testpicsv" - m.File = "marketing-200k.csv" - m.Config.SourceFields["age"] = picsv.SourceField{TargetField: "age", Type: "float"} - m.Config.PilosaFields["age"] = picsv.Field{Type: "int"} - m.Config.IDField = "id" - getRawData(t, m.File) + m.BatchSize = 12 + m.Index = "testdtaxi" + m.Files = getFiles(t, "./testdata/taxi/") + m.ConfigFile = "./testdata/taxiconfig.json" client, err := pilosa.NewClient(m.Pilosa) if err != nil { t.Fatalf("getting client: %v", err) } - defer func() { err = client.DeleteIndexByName(m.Index) if err != nil { - t.Fatalf("deleting index: %v", err) + t.Logf("deleting index: %v", err) } }() + + config := `{ +"pilosa-fields": { + "cab_type": {"type": "set", "keys": false, "cache-type": "ranked", "cache-size": 3}, + "pickup_time": {"type": "int"}, + "dropoff_time": {"type": "int"}, + "passenger_count": {"type": "set", "keys": false, "cache-type": "ranked", "cache-size": 50}, + "trip_distance": {"type": "int"}, + "pickup_longitude": {"type": "int"}, + "pickup_latitude": {"type": "int"}, + "dropoff_longitude": {"type": "int"}, + "dropoff_latitude": {"type": "int"}, + "store_and_fwd_flag": {"type": "set", "keys": true}, + "rate_code": {"type": "set", "keys": true}, + "fare_amount": {"type": "int"}, + "extra": {"type": "int"}, + "mta_tax": {"type": "int"}, + "tip_amount": {"type": "int"}, + "tolls_amount": {"type": "int"}, + "total_amount": {"type": "int"}, + "improvement_surcharge": {"type": "int"}, + "ehail_fee": {"type": "int"}, + "payment_type": {"type": "set", "keys": false} + }, +"id-field": "", +"id-type": "", +"source-fields": { + "VendorID": {"target-field": "cab_type", "type": "rowID"}, + "vendor_id": {"target-field": "cab_type", "type": "rowID"}, + "vendor_name": {"target-field": "cab_type", "type": "rowID"}, + "lpep_pickup_datetime": {"target-field": "pickup_time", "type": "time", "time-format": "2006-01-02 15:04:05"}, + "tpep_pickup_datetime": {"target-field": "pickup_time", "type": "time", "time-format": "2006-01-02 15:04:05"}, + "pickup_datetime": {"target-field": "pickup_time", "type": "time", "time-format": "2006-01-02 15:04:05"}, + "Trip_Pickup_Datetime": {"target-field": "pickup_time", "type": "time", "time-format": "2006-01-02 15:04:05"}, + "Lpep_dropoff_datetime": {"target-field": "dropoff_time", "type": "time", "time-format": "2006-01-02 15:04:05"}, + "tpep_dropoff_datetime": {"target-field": "dropoff_time", "type": "time", "time-format": "2006-01-02 15:04:05"}, + "dropoff_datetime": {"target-field": "dropoff_time", "type": "time", "time-format": "2006-01-02 15:04:05"}, + "Trip_Dropoff_Datetime": {"target-field": "dropoff_time", "type": "time", "time-format": "2006-01-02 15:04:05"}, + "passenger_count": {"target-field": "passenger_count", "type": "rowID"}, + "Passenger_count": {"target-field": "passenger_count", "type": "rowID"}, + "Passenger_Count": {"target-field": "passenger_count", "type": "rowID"}, + "trip_distance": {"target-field": "trip_distance", "type": "float", "multiplier": 100}, + "Trip_distance": {"target-field": "trip_distance", "type": "float", "multiplier": 100}, + "trip_Distance": {"target-field": "trip_distance", "type": "float", "multiplier": 100}, + "pickup_longitude": {"target-field": "pickup_longitude", "type": "float", "multiplier": 10000}, + "Pickup_longitude": {"target-field": "pickup_longitude", "type": "float", "multiplier": 10000}, + "dropoff_longitude": {"target-field": "dropoff_longitude", "type": "float", "multiplier": 10000}, + "Dropoff_longitude": {"target-field": "dropoff_longitude", "type": "float", "multiplier": 10000}, + "pickup_latitude": {"target-field": "pickup_latitude", "type": "float", "multiplier": 10000}, + "Pickup_latitude": {"target-field": "pickup_latitude", "type": "float", "multiplier": 10000}, + "dropoff_latitude": {"target-field": "dropoff_latitude", "type": "float", "multiplier": 10000}, + "Dropoff_latitude": {"target-field": "dropoff_latitude", "type": "float", "multiplier": 10000}, + "store_and_fwd_flag": {"target-field": "store_and_fwd_flag", "type": "string"}, + "Store_and_fwd_flag": {"target-field": "store_and_fwd_flag", "type": "string"}, + "store_and_fwd": {"target-field": "store_and_fwd_flag", "type": "string"}, + "rate_code": {"target-field": "rate_code", "type": "string"}, + "Rate_Code": {"target-field": "rate_code", "type": "string"}, + "RateCodeID": {"target-field": "rate_code", "type": "string"}, + "Fare_Amt": {"target-field": "fare_amount", "type": "float", "multiplier": 100}, + "fare_amount": {"target-field": "fare_amount", "type": "float", "multiplier": 100}, + "Tip_Amt": {"target-field": "tip_amount", "type": "float", "multiplier": 100}, + "tip_amount": {"target-field": "tip_amount", "type": "float", "multiplier": 100}, + "Tolls_Amt": {"target-field": "tolls_amount", "type": "float", "multiplier": 100}, + "tolls_amount": {"target-field": "tolls_amount", "type": "float", "multiplier": 100}, + "improvement_surcharge": {"target-field": "improvement_surcharge", "type": "float", "multiplier": 100}, + "surcharge": {"target-field": "improvement_surcharge", "type": "float", "multiplier": 100}, + "mta_tax": {"target-field": "mta_tax", "type": "float", "multiplier": 100}, + "Total_Amt": {"target-field": "total_amount", "type": "float", "multiplier": 100}, + "total_amount": {"target-field": "total_amount", "type": "float", "multiplier": 100}, + "ehail_fee": {"target-field": "ehail_fee", "type": "float", "multiplier": 100}, + "payment_type": {"target-field": "payment_type", "type": "rowID"}, + "extra": {"target-field": "extra", "type": "float", "multiplier": 100} + } +}` + + writeFile(t, m.ConfigFile, config) + err = m.Run() if err != nil { t.Fatalf("running ingest: %v", err) } - schema, err := client.Schema() - if err != nil { - t.Fatalf("getting schema: %v", err) - } - - index := schema.Index(m.Index) - marital := index.Field("marital") - converted := index.Field("converted") - age := index.Field("age") + // schema, err := client.Schema() + // if err != nil { + // t.Fatalf("getting schema: %v", err) + // } - tests := []struct { - query *pilosa.PQLRowQuery - bash string - exp int64 - }{ - { - query: marital.Row("married"), - bash: `awk -F, '/married/ {print $1,$4}' marketing-200k.csv | sort | uniq | wc`, - exp: 125514, - }, - { - query: converted.Row("no"), - bash: `awk -F, '{print $1,$17}' marketing-200k.csv | grep "no" |sort | uniq | wc`, - exp: 199999, - }, - { - query: age.Equals(55), - bash: `awk -F, '{print $1,$2}' marketing-200k.csv | grep " 55.0" |sort | uniq | wc`, - exp: 3282, - }, - } + // index := schema.Index(m.Index) - for i, test := range tests { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { - q := index.Count(test.query) - resp, err := client.Query(q) - if err != nil { - t.Fatalf("running query '%s': %v", q.Serialize(), err) - } - if resp.Result().Count() != test.exp { - t.Fatalf("Got unexpected result %d instead of %d for\nquery: %s\nbash: %s", resp.Result().Count(), test.exp, q.Serialize(), test.bash) - } - }) - } } func TestSmallImport(t *testing.T) { m := picsv.NewMain() m.BatchSize = 1 << 20 m.Index = "testsample" - m.File = "testdata/sample.csv" + m.Files = []string{"testdata/sample.csv"} m.ConfigFile = "config.json" client, err := pilosa.NewClient(m.Pilosa) if err != nil { @@ -184,7 +327,7 @@ EJSK,large,green,35,25.13106317, FEFF,,,,,6 ` writeFile(t, m.ConfigFile, config) - writeFile(t, m.File, data) + writeFile(t, m.Files[0], data) err = m.Run() if err != nil { @@ -329,3 +472,20 @@ outer: } return nil } + +func getFiles(t testing.TB, dir string) []string { + f, err := os.Open(dir) + if err != nil { + t.Fatalf("opening %s: %v", dir, err) + } + fis, err := f.Readdirnames(0) + if err != nil { + t.Fatalf(": %v", err) + } + + for i, name := range fis { + fis[i] = filepath.Join(dir, name) + } + + return fis +} diff --git a/csv/testdata/taxi/green_tripdata_2013-08.csv b/csv/testdata/taxi/green_tripdata_2013-08.csv new file mode 100644 index 0000000..84eb947 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2013-08.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + +2,2013-08-01 08:14:37,2013-08-01 09:09:06,N,1,0,0,0,0,1,.00,21.25,0,0,0,0,,21.25,2,,, +2,2013-08-01 09:13:00,2013-08-01 11:38:00,N,1,0,0,0,0,2,.00,74.5,0,0.5,0,0,,75,2,,, +2,2013-08-01 09:48:00,2013-08-01 09:49:00,N,5,0,0,0,0,1,.00,1,0.1,0,0,1,,2.1,2,,, +2,2013-08-01 10:38:35,2013-08-01 10:38:51,N,1,0,0,0,0,1,.00,3.25,0,0,0,0,,3.25,2,,, +2,2013-08-01 11:51:45,2013-08-01 12:03:52,N,1,0,0,0,0,1,.00,8.5,0,0.5,0,0,,9,2,,, +2,2013-08-01 14:33:39,2013-08-01 15:49:00,N,1,0,0,0,0,1,.00,9,0,0.5,0,0,,9.5,2,,, +2,2013-08-01 17:19:00,2013-08-01 17:19:00,N,1,0,0,0,0,1,.00,2.5,1,0.5,0,0,,4,2,,, +2,2013-08-01 17:22:00,2013-08-01 17:22:00,N,1,-73.937767028808594,40.758480072021484,-73.937767028808594,40.758480072021484,1,.00,2.5,1,0.5,0,5.33,,9.33,2,,, diff --git a/csv/testdata/taxi/green_tripdata_2013-09.csv b/csv/testdata/taxi/green_tripdata_2013-09.csv new file mode 100644 index 0000000..4c94b55 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2013-09.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + +2,2013-09-01 00:02:00,2013-09-01 00:54:51,N,1,-73.952407836914062,40.810726165771484,-73.983940124511719,40.676284790039063,5,14.35,50.5,0.5,0.5,10.3,0,,61.8,1,,, +2,2013-09-01 00:02:34,2013-09-01 00:20:59,N,1,-73.963020324707031,40.711833953857422,-73.966644287109375,40.681690216064453,1,3.24,15,0.5,0.5,0,0,,16,2,,, +2,2013-09-01 00:03:06,2013-09-01 00:28:03,N,1,-73.843460083007813,40.755950927734375,-73.989212036132812,40.740528106689453,1,11.27,34,0.5,0.5,8.07,5.33,,48.4,1,,, +2,2013-09-01 00:03:30,2013-09-01 00:23:02,N,1,-73.924812316894531,40.754245758056641,-73.978736877441406,40.721504211425781,1,6.63,22,0.5,0.5,5.75,0,,28.75,1,,, +2,2013-09-01 00:05:12,2013-09-01 00:30:55,N,1,-73.92950439453125,40.756450653076172,-73.856742858886719,40.697036743164063,1,12.84,37,0.5,0.5,0,0,,38,1,,, +2,2013-09-01 00:05:18,2013-09-01 00:23:31,N,1,-73.925041198730469,40.761703491210938,-73.878608703613281,40.736282348632813,1,3.14,14.5,0.5,0.5,0,0,,15.5,2,,, +2,2013-09-01 00:05:51,2013-09-01 00:13:32,N,1,-73.903205871582031,40.745704650878906,-73.919609069824219,40.758182525634766,1,1.43,7.5,0.5,0.5,0,0,,8.5,2,,, +2,2013-09-01 00:07:01,2013-09-01 00:13:56,N,1,-73.844253540039062,40.721340179443359,-73.834030151367188,40.706809997558594,1,1.61,7.5,0.5,0.5,0,0,,8.5,2,,, diff --git a/csv/testdata/taxi/green_tripdata_2013-10.csv b/csv/testdata/taxi/green_tripdata_2013-10.csv new file mode 100644 index 0000000..003af7e --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2013-10.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + +2,2013-10-01 00:00:00,2013-10-01 15:33:36,N,1,0,0,-73.903465270996094,40.845088958740234,1,.19,42,0,0.5,0,0,,42.5,2,1,, +2,2013-10-01 00:00:00,2013-10-01 09:01:53,N,1,0,0,-73.937408447265625,40.758129119873047,1,.00,2.5,0,0.5,0,0,,3,2,1,, +2,2013-10-01 00:00:00,2013-10-01 16:20:05,N,1,0,0,0,0,1,.00,5.5,1,0.5,0,0,,7,2,1,, +2,2013-10-01 00:00:00,2013-10-01 13:26:24,N,1,0,0,-73.901992797851563,40.763801574707031,1,3.27,10.5,0,0.5,0,0,,11,2,1,, +2,2013-10-01 00:00:00,2013-10-01 18:21:16,N,1,0,0,-73.937629699707031,40.758113861083984,2,.00,6.5,1,0.5,0,0,,8,2,1,, +2,2013-10-01 00:00:00,2013-10-01 09:05:05,N,1,0,0,0,0,1,.00,1.5,0,0.5,0,0,,2,1,,, +2,2013-10-01 00:00:54,2013-10-01 00:41:52,N,1,-73.938819885253906,40.749687194824219,-74.028953552246094,40.635749816894531,1,12.76,42,0.5,0.5,0,0,,43,2,,, +2,2013-10-01 00:03:08,2013-10-01 00:05:39,N,1,-73.903106689453125,40.745418548583984,-73.91400146484375,40.747730255126953,1,.89,4.5,0.5,0.5,0,0,,5.5,1,1,, diff --git a/csv/testdata/taxi/green_tripdata_2013-11.csv b/csv/testdata/taxi/green_tripdata_2013-11.csv new file mode 100644 index 0000000..4365b94 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2013-11.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + +2,2013-11-01 00:00:00,2013-11-01 13:48:03,N,1,0,0,-73.937103271484375,40.760646820068359,2,.00,3.5,0,0.5,0,0,,4,2,,, +2,2013-11-01 00:00:00,2013-11-01 19:54:27,N,1,0,0,-73.9652099609375,40.757942199707031,1,5.25,19,1,0.5,3.5,0,,24,1,,, +2,2013-11-01 00:00:00,2013-11-01 12:06:42,N,1,0,0,-73.96002197265625,40.819927215576172,6,2.03,9.5,0,0.5,0,0,,10,2,,, +2,2013-11-01 00:00:00,2013-11-01 14:51:53,N,1,0,0,-73.860992431640625,40.838615417480469,1,7.09,24,0,0.5,0,0,,24.5,2,,, +2,2013-11-01 00:00:00,2013-11-01 16:04:00,N,1,0,0,-73.956520080566406,40.803035736083984,1,2.28,11.5,0,0.5,0,0,,12,2,,, +2,2013-11-01 00:00:00,2013-11-01 17:29:03,N,1,0,0,-73.922470092773438,40.834331512451172,1,1.12,7,1,0.5,0,0,,8.5,2,,, +2,2013-11-01 00:00:00,2013-11-01 14:41:08,N,1,0,0,0,0,2,.00,12.5,0,0.5,0,0,,13,2,,, +2,2013-11-01 00:00:11,2013-11-01 00:25:26,N,1,-73.940032958984375,40.84088134765625,-73.995292663574219,40.750003814697266,1,8.02,27,0,0.5,0,0,,27.5,1,,, diff --git a/csv/testdata/taxi/green_tripdata_2013-12.csv b/csv/testdata/taxi/green_tripdata_2013-12.csv new file mode 100644 index 0000000..6f38963 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2013-12.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + +2,2013-12-01 00:00:00,2013-12-01 20:44:23,N,1,0,0,-73.957260131835937,40.742355346679687,1,4.00,13,0.5,0.5,0,0,,14,2,,, +2,2013-12-01 00:00:00,2013-12-01 02:53:23,N,1,0,0,0,0,1,3.17,12,0.5,0.5,0,0,,13,2,,, +2,2013-12-01 00:00:00,2013-12-01 02:17:35,N,1,0,0,-73.991531372070313,40.729755401611328,1,3.61,13,0.5,0.5,0,0,,14,2,,, +2,2013-12-01 00:00:00,2013-12-01 12:57:31,N,1,0,0,-73.768447875976562,40.758571624755859,1,4.09,19,0,0.5,2,0,,21.5,1,,, +2,2013-12-01 00:00:00,2013-12-01 18:08:31,N,1,0,0,-73.994026184082031,40.681480407714844,5,12.07,37,0,0.5,0,0,,37.5,2,,, +2,2013-12-01 00:00:00,2013-12-01 12:56:18,N,1,0,0,-73.9508056640625,40.686077117919922,1,2.97,12.5,0,0.5,0,0,,13,2,,, +2,2013-12-01 00:00:00,2013-12-01 19:30:39,N,1,0,0,-73.945266723632812,40.750850677490234,1,1.17,8,0,0.5,0,0,,8.5,2,,, +2,2013-12-01 00:00:00,2013-12-01 17:16:16,N,1,0,0,-73.783378601074219,40.649665832519531,1,13.89,40.5,0,0.5,4,0,,45,1,,, diff --git a/csv/testdata/taxi/green_tripdata_2014-01.csv b/csv/testdata/taxi/green_tripdata_2014-01.csv new file mode 100644 index 0000000..10e5cf9 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2014-01.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + + +2,2014-01-01 00:00:00,2014-01-01 01:08:06,N,1,0,0,-73.865043640136719,40.872306823730469,1,6.47,20,0.5,0.5,0,0,,21,1,,, +2,2014-01-01 00:00:00,2014-01-01 06:03:57,N,2,0,0,-73.7763671875,40.645488739013672,1,20.12,52,0,0.5,0,5.33,,57.83,1,,, +2,2014-01-01 00:00:00,2014-01-01 18:22:44,N,1,0,0,-73.932647705078125,40.852573394775391,2,.81,5,0.5,0.5,0,0,,6,1,,, +2,2014-01-01 00:00:00,2014-01-01 00:52:03,N,1,0,0,-73.99407958984375,40.749092102050781,1,9.55,33.5,0.5,0.5,2.17,5.33,,42,1,,, +2,2014-01-01 00:00:00,2014-01-01 00:49:25,N,1,0,0,-73.936065673828125,40.734725952148437,1,1.22,7,0.5,0.5,2,0,,10,1,,, +2,2014-01-01 00:00:00,2014-01-01 00:01:15,N,1,0,0,-73.912155151367188,40.684059143066406,2,4.27,17,0.5,0.5,0,0,,18,2,,, +2,2014-01-01 00:00:00,2014-01-01 02:37:20,N,1,0,0,-73.935317993164063,40.737010955810547,1,7.50,40,0.5,0.5,0,0,,41,2,,, diff --git a/csv/testdata/taxi/green_tripdata_2014-02.csv b/csv/testdata/taxi/green_tripdata_2014-02.csv new file mode 100644 index 0000000..c9b8e9b --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2014-02.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + + +2,2014-02-01 00:00:00,2014-02-01 23:25:08,N,2,0,0,-73.912254333496094,40.753227233886719,2,.89,52,0,0.5,0,0,,52.5,2,,, +2,2014-02-01 00:00:00,2014-02-01 20:17:35,N,1,0,0,-73.960289001464844,40.761558532714844,1,2.72,11,0.5,0.5,0.75,0,,12.75,1,,, +2,2014-02-01 00:00:00,2014-02-01 14:27:37,N,1,0,0,-73.937232971191406,40.758316040039063,1,.00,2.5,0,0.5,0,0,,3,1,,, +2,2014-02-01 00:00:00,2014-02-01 01:07:26,N,1,0,0,-73.947052001953125,40.683628082275391,1,3.21,12.5,0.5,0.5,3.9,0,,17.4,1,,, +2,2014-02-01 00:00:00,2014-02-01 04:04:40,N,1,0,0,-73.976493835449219,40.788700103759766,1,9.71,31,0.5,0.5,6.3,0,,38.3,1,,, +2,2014-02-01 00:00:00,2014-02-01 11:24:18,N,1,0,0,0,0,1,3.05,13.5,0,0.5,2.7,0,,16.7,1,,, +2,2014-02-01 00:00:00,2014-02-01 20:43:09,N,1,0,0,-73.995849609375,40.764453887939453,1,4.36,15,0.5,0.5,2,0,,18,1,,, diff --git a/csv/testdata/taxi/green_tripdata_2014-03.csv b/csv/testdata/taxi/green_tripdata_2014-03.csv new file mode 100644 index 0000000..b04b894 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2014-03.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + + +2,2014-03-01 00:00:00,2014-03-01 19:18:34,N,1,0,0,-73.872024536132813,40.678714752197266,6,7.02,28.5,0,0.5,0,0,,29,2,1,, +2,2014-03-01 00:00:00,2014-03-01 13:10:37,N,1,0,0,-73.917839050292969,40.757766723632812,1,5.43,23.5,0,0.5,5.88,0,,29.88,1,1,, +2,2014-03-01 00:00:00,2014-03-01 14:36:16,N,1,0,0,-73.882896423339844,40.870456695556641,1,.84,5,0,0.5,0,0,,5.5,1,1,, +2,2014-03-01 00:00:00,2014-03-01 02:51:03,N,1,0,0,0,0,1,8.98,26.5,0.5,0.5,5.4,0,,32.9,1,1,, +2,2014-03-01 00:00:00,2014-03-01 03:13:09,N,1,0,0,0,0,1,.91,5.5,0.5,0.5,0,0,,6.5,2,1,, +2,2014-03-01 00:00:00,2014-03-01 14:12:18,N,1,0,0,0,0,1,2.88,13,0,0.5,2.6,0,,16.1,1,1,, +2,2014-03-01 00:00:00,2014-03-01 19:37:31,N,1,0,0,0,0,1,2.04,9,0,0.5,0,0,,9.5,2,1,, diff --git a/csv/testdata/taxi/green_tripdata_2014-04.csv b/csv/testdata/taxi/green_tripdata_2014-04.csv new file mode 100644 index 0000000..5ec3845 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2014-04.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + + +2,2014-04-01 00:00:00,2014-04-01 14:24:20,N,1,0,0,0,0,1,7.45,23,0,0.5,0,0,,23.5,2,1,, +2,2014-04-01 00:00:00,2014-04-01 17:21:33,N,1,0,0,-73.987663269042969,40.780872344970703,1,8.95,31,1,0.5,0,0,,32.5,2,1,, +2,2014-04-01 00:00:00,2014-04-01 15:06:18,N,1,0,0,-73.946922302246094,40.831764221191406,1,1.32,6.5,0,0.5,0,0,,7,2,1,, +2,2014-04-01 00:00:00,2014-04-01 08:09:27,N,1,0,0,-73.947669982910156,40.808650970458984,5,.10,3,0,0.5,0,0,,3.5,2,1,, +2,2014-04-01 00:00:00,2014-04-01 16:15:13,N,1,0,0,0,0,1,7.09,23.5,0,0.5,4.7,0,,28.7,1,1,, +2,2014-04-01 00:00:00,2014-04-01 16:31:57,N,1,0,0,-73.950538635253906,40.786632537841797,1,5.20,17,0,0.5,0,0,,17.5,2,1,, +2,2014-04-01 00:00:00,2014-04-01 10:59:14,N,1,0,0,0,0,1,8.96,38.5,0,0.5,4,0,,43,1,1,, diff --git a/csv/testdata/taxi/green_tripdata_2014-05.csv b/csv/testdata/taxi/green_tripdata_2014-05.csv new file mode 100644 index 0000000..0b56e19 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2014-05.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + + +2,2014-05-01 00:00:00,2014-05-01 22:05:36,N,1,0,0,-73.977714538574219,40.687541961669922,1,.95,6.5,0.5,0.5,1.4,0,,8.9,1,1,, +2,2014-05-01 00:00:00,2014-05-01 07:52:17,N,1,0,0,0,0,1,1.95,9,0,0.5,2.25,0,,11.75,1,1,, +2,2014-05-01 00:00:00,2014-05-01 10:50:16,N,1,0,0,0,0,1,5.65,26.5,0,0.5,3,0,,30,1,1,, +2,2014-05-01 00:00:00,2014-05-01 20:50:04,N,1,0,0,0,0,1,6.88,24,0.5,0.5,0,0,,25,2,1,, +2,2014-05-01 00:00:00,2014-05-01 10:35:50,N,1,0,0,0,0,1,4.46,21,0,0.5,3,0,,24.5,1,1,, +2,2014-05-01 00:00:00,2014-05-01 11:05:11,N,1,0,0,-73.821548461914062,40.701930999755859,3,7.01,22,0,0.5,0,0,,22.5,2,1,, +2,2014-05-01 00:00:00,2014-05-01 14:58:31,N,1,0,0,-73.853340148925781,40.72711181640625,1,.93,5,0,0.5,0,0,,5.5,2,1,, diff --git a/csv/testdata/taxi/green_tripdata_2014-06.csv b/csv/testdata/taxi/green_tripdata_2014-06.csv new file mode 100644 index 0000000..81604cd --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2014-06.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + + +2,2014-06-01 00:00:00,2014-06-01 07:40:36,N,1,0,0,-73.865570068359375,40.770862579345703,1,8.00,24.5,0,0.5,0,7.5,,32.5,1,1,, +2,2014-06-01 00:00:00,2014-06-01 15:25:06,N,5,0,0,-73.928314208984375,40.815761566162109,1,.94,7,0,0,0,0,,7,2,1,, +2,2014-06-01 00:00:00,2014-06-01 02:06:27,N,1,0,0,-73.92926025390625,40.855411529541016,1,1.12,6.5,0.5,0.5,0,0,,7.5,2,1,, +2,2014-06-01 00:00:00,2014-06-01 23:33:03,N,1,0,0,-73.997047424316406,40.749828338623047,1,6.97,29.5,0.5,0.5,0,0,,30.5,2,1,, +2,2014-06-01 00:00:00,2014-06-01 13:05:02,N,1,0,0,-73.948982238769531,40.797092437744141,1,1.04,7,0,0.5,0,0,,7.5,2,1,, +2,2014-06-01 00:00:00,2014-06-01 14:14:55,N,2,0,0,-73.782569885253906,40.64410400390625,1,18.16,52,0,0.5,0,5.33,,57.83,2,1,, +2,2014-06-01 00:00:00,2014-06-01 23:03:03,N,1,0,0,-73.988029479980469,40.755943298339844,1,6.41,20,0.5,0.5,0,0,,21,2,1,, diff --git a/csv/testdata/taxi/green_tripdata_2014-07.csv b/csv/testdata/taxi/green_tripdata_2014-07.csv new file mode 100644 index 0000000..8b12508 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2014-07.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + + +2,2014-07-01 00:00:00,2014-07-01 22:33:05,N,1,0,0,-73.952804565429687,40.799091339111328,1,.76,4.5,0.5,0.5,0,0,,5.5,2,1,, +2,2014-07-01 00:00:00,2014-07-01 22:12:19,N,1,0,0,-73.925300598144531,40.806041717529297,1,5.06,17,0.5,0.5,0,0,,18,2,1,, +2,2014-07-01 00:00:00,2014-07-01 10:33:40,N,1,0,0,-73.931709289550781,40.797145843505859,5,.85,5,0,0.5,1,0,,6.5,1,1,, +2,2014-07-01 00:00:00,2014-07-01 08:54:25,N,1,0,0,-73.915695190429687,40.772075653076172,2,3.40,13,0,0.5,0,0,,13.5,2,1,, +2,2014-07-01 00:00:00,2014-07-01 08:42:46,N,1,0,0,-73.91033935546875,40.744735717773437,1,1.09,6.5,0,0.5,0,0,,7,2,1,, +2,2014-07-01 00:00:00,2014-07-01 15:05:02,N,1,0,0,-73.973533630371094,40.764797210693359,1,4.38,19,0,0.5,0,0,,19.5,2,1,, +2,2014-07-01 00:00:00,2014-07-01 14:50:30,N,1,0,0,-73.939483642578125,40.820865631103516,6,1.76,9,0,0.5,0,0,,9.5,2,1,, diff --git a/csv/testdata/taxi/green_tripdata_2014-08.csv b/csv/testdata/taxi/green_tripdata_2014-08.csv new file mode 100644 index 0000000..127338d --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2014-08.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + + +2,2014-08-01 00:00:00,2014-08-01 14:26:18,N,1,0,0,-73.95330810546875,40.802398681640625,1,2.60,12.5,0,0.5,0,0,,13,2,1,, +2,2014-08-01 00:00:00,2014-08-01 11:03:10,N,1,0,0,-73.882423400878906,40.847560882568359,1,6.50,23,0,0.5,0,0,,23.5,2,1,, +2,2014-08-01 00:00:00,2014-08-01 20:38:16,N,1,0,0,-74.003044128417969,40.734298706054687,1,6.49,24.5,1,0.5,6.38,0,,32.38,1,1,, +2,2014-08-01 00:00:00,2014-08-01 21:23:08,N,1,0,0,0,0,5,3.10,11,0.5,0.5,2.3,0,,14.3,1,1,, +2,2014-08-01 00:00:00,2014-08-01 04:31:25,N,1,0,0,-73.941558837890625,40.798782348632812,1,2.30,9.5,0.5,0.5,0,0,,10.5,2,1,, +2,2014-08-01 00:00:00,2014-08-01 13:50:11,N,1,0,0,-73.994659423828125,40.750362396240234,3,9.75,40.5,0,0.5,5,7.5,,53.5,1,1,, +2,2014-08-01 00:00:00,2014-08-01 16:39:37,N,1,0,0,-73.921806335449219,40.867523193359375,1,21.22,97,0,0.5,0,0,,97.5,2,1,, diff --git a/csv/testdata/taxi/green_tripdata_2014-09.csv b/csv/testdata/taxi/green_tripdata_2014-09.csv new file mode 100644 index 0000000..ebc5ab0 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2014-09.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + + +2,2014-09-01 00:00:00,2014-09-01 00:02:47,N,1,-73.950965881347656,40.723640441894531,-73.94049072265625,40.724655151367188,1,.65,4,0.5,0.5,1.12,0,,6.12,1,1,, +2,2014-09-01 00:00:00,2014-09-01 00:02:35,N,1,-73.918632507324219,40.75927734375,-73.913055419921875,40.768936157226563,5,.82,4.5,0.5,0.5,1,0,,6.5,1,1,, +2,2014-09-01 00:00:05,2014-09-01 00:18:08,N,1,-73.986015319824219,40.669368743896484,-74.004341125488281,40.721630096435547,4,4.25,16.5,0.5,0.5,3.4,0,,20.9,1,1,, +2,2014-09-01 00:00:06,2014-09-01 00:13:38,N,1,-73.949317932128906,40.713729858398438,-73.978584289550781,40.681247711181641,1,4.10,15,0.5,0.5,3.1,0,,19.1,1,1,, +2,2014-09-01 00:00:10,2014-09-01 00:07:52,N,1,-73.845001220703125,40.754974365234375,-73.834320068359375,40.759044647216797,1,1.72,8,0.5,0.5,0,0,,9,2,1,, +2,2014-09-01 00:00:10,2014-09-01 00:08:35,N,1,-73.9727783203125,40.647693634033203,-74.00970458984375,40.678062438964844,1,3.77,12.5,0.5,0.5,2.6,0,,16.1,1,1,, +2,2014-09-01 00:00:10,2014-09-01 00:09:48,N,1,-73.953346252441406,40.680717468261719,-73.967147827148438,40.713779449462891,1,2.65,10,0.5,0.5,0,0,,11,2,1,, diff --git a/csv/testdata/taxi/green_tripdata_2014-10.csv b/csv/testdata/taxi/green_tripdata_2014-10.csv new file mode 100644 index 0000000..6342f07 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2014-10.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + + +2,2014-10-01 00:00:00,2014-10-01 00:12:37,N,1,-73.957656860351563,40.717777252197266,-73.966835021972656,40.710319519042969,1,1.02,9,0.5,0.5,0,0,,10,2,1,, +2,2014-10-01 00:00:01,2014-10-01 00:11:59,N,1,-73.922706604003906,40.817630767822266,-73.887168884277344,40.827022552490234,1,2.16,10.5,0.5,0.5,0,0,,11.5,2,1,, +1,2014-10-01 00:00:07,2014-10-01 00:16:44,N,1,-73.945610046386719,40.807975769042969,-73.954940795898438,40.804958343505859,1,5.00,17,0.5,0.5,0,0,,18,2,1,, +2,2014-10-01 00:00:13,2014-10-01 00:00:15,N,1,-73.940475463867187,40.679836273193359,-73.940467834472656,40.679836273193359,1,.00,2.5,0.5,0.5,0,0,,3.5,2,1,, +2,2014-10-01 00:00:18,2014-10-01 00:18:27,N,1,-73.9501953125,40.826389312744141,-73.90655517578125,40.851665496826172,1,3.64,15.5,0.5,0.5,0,0,,16.5,2,1,, +2,2014-10-01 00:00:19,2014-10-01 00:12:44,N,1,-73.829856872558594,40.759693145751953,-73.818092346191406,40.739154815673828,1,2.84,12,0.5,0.5,2.5,0,,15.5,1,1,, +2,2014-10-01 00:00:20,2014-10-01 00:09:25,N,1,-73.951286315917969,40.809993743896484,-73.91961669921875,40.82373046875,1,2.46,10,0.5,0.5,0,0,,11,1,1,, diff --git a/csv/testdata/taxi/green_tripdata_2014-11.csv b/csv/testdata/taxi/green_tripdata_2014-11.csv new file mode 100644 index 0000000..f566538 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2014-11.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + + +1,2014-11-01 00:00:00,2014-11-01 00:08:17,N,1,-73.964836120605469,40.806598663330078,-73.94891357421875,40.836437225341797,1,2.70,10,0.5,0,2.6,0,,13.1,1,1,, +2,2014-11-01 00:00:00,2014-11-01 00:00:00,N,1,-73.944717407226563,40.705356597900391,-73.8980712890625,40.706253051757813,5,3.36,14.5,0.5,0.5,0,0,,15.5,2,1,, +2,2014-11-01 00:00:01,2014-11-01 00:07:41,N,1,-73.958663940429688,40.815158843994141,-73.945915222167969,40.834522247314453,1,1.55,8,0.5,0.5,1.7,0,,10.7,1,1,, +2,2014-11-01 00:00:02,2014-11-01 00:18:21,N,1,-73.990150451660156,40.702285766601563,-73.937026977539063,40.705757141113281,6,5.03,18.5,0.5,0.5,3.8,0,,23.3,1,1,, +2,2014-11-01 00:00:02,2014-11-01 00:05:04,N,1,-73.86077880859375,40.833702087402344,-73.848953247070312,40.841697692871094,1,1.23,6,0.5,0.5,0,0,,7,2,1,, +2,2014-11-01 00:00:02,2014-11-01 00:05:27,N,1,-73.958580017089844,40.815601348876953,-73.971900939941406,40.794872283935547,1,1.62,7.5,0,0.5,0,0,,8,2,1,, +2,2014-11-01 00:00:03,2014-11-01 00:14:16,N,1,-73.958976745605469,40.716705322265625,-73.937057495117187,40.698490142822266,1,2.19,11,0.5,0.5,0,0,,12,2,1,, diff --git a/csv/testdata/taxi/green_tripdata_2014-12.csv b/csv/testdata/taxi/green_tripdata_2014-12.csv new file mode 100644 index 0000000..914b963 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2014-12.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,Total_amount,Payment_type,Trip_type + + +2,2014-12-01 00:00:04,2014-12-01 00:08:45,N,1,-73.807624816894531,40.700252532958984,-73.79937744140625,40.710411071777344,1,1.39,7.5,0.5,0.5,1.6,0,,10.1,1,1,, +1,2014-12-01 00:00:04,2014-12-01 00:06:19,N,1,-73.950874328613281,40.662948608398438,-73.947784423828125,40.633140563964844,1,2.10,8.5,0.5,0.5,0,0,,9.5,2,1,, +2,2014-12-01 00:00:12,2014-12-01 00:08:06,N,1,-73.925254821777344,40.761951446533203,-73.913589477539062,40.74530029296875,1,1.77,8,0.5,0.5,0,0,,9,2,1,, +2,2014-12-01 00:00:13,2014-12-01 00:14:24,N,1,-73.994659423828125,40.703788757324219,-74.013725280761719,40.708454132080078,1,3.31,13,0.5,0.5,0,0,,14,2,1,, +1,2014-12-01 00:00:15,2014-12-01 00:19:31,N,1,-73.923118591308594,40.759323120117188,-73.987457275390625,40.763748168945313,1,4.50,17,0.5,0.5,3.6,0,,21.6,1,1,, +2,2014-12-01 00:00:16,2014-12-01 00:23:35,N,1,-73.954421997070312,40.685997009277344,-73.950706481933594,40.785835266113281,1,9.83,29.5,0.5,0.5,1,0,,31.5,1,1,, +2,2014-12-01 00:00:18,2014-12-01 00:09:37,N,1,-73.888587951660156,40.74725341796875,-73.887649536132812,40.728588104248047,1,1.87,8.5,0.5,0.5,0,0,,9.5,2,1,, diff --git a/csv/testdata/taxi/green_tripdata_2015-01.csv b/csv/testdata/taxi/green_tripdata_2015-01.csv new file mode 100644 index 0000000..c7f299d --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2015-01.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type +2,2015-01-01 00:34:42,2015-01-01 00:38:34,N,1,-73.922592163085938,40.754528045654297,-73.91363525390625,40.765522003173828,1,.88,5,0.5,0.5,0,0,,0.3,6.3,2,1,, +2,2015-01-01 00:34:46,2015-01-01 00:47:23,N,1,-73.952751159667969,40.677711486816406,-73.981529235839844,40.658977508544922,1,3.08,12,0.5,0.5,0,0,,0.3,13.3,2,1,, +1,2015-01-01 00:34:44,2015-01-01 00:38:15,N,1,-73.843009948730469,40.71905517578125,-73.846580505371094,40.711566925048828,1,.90,5,0.5,0.5,1.8,0,,0,7.8,1,1,, +2,2015-01-01 00:34:48,2015-01-01 00:38:08,N,1,-73.860824584960938,40.757793426513672,-73.854042053222656,40.749820709228516,1,.85,5,0.5,0.5,0,0,,0.3,6.3,2,1,, +2,2015-01-01 00:34:53,2015-01-01 01:09:10,N,1,-73.945182800292969,40.783321380615234,-73.9896240234375,40.765449523925781,1,4.91,24.5,0.5,0.5,0,0,,0.3,25.8,2,1,, +1,2015-01-01 00:34:55,2015-01-01 00:40:58,N,1,-73.966812133789063,40.714675903320313,-73.949409484863281,40.718437194824219,4,1.20,6.5,0.5,0.5,0,0,,0.3,7.8,2,1,, +1,2015-01-01 00:34:49,2015-01-01 00:53:10,N,1,-73.930488586425781,40.850131988525391,-73.978057861328125,40.789058685302734,1,6.60,22,0.5,0.5,0,0,,0.3,23.3,2,1,, +2,2015-01-01 00:35:03,2015-01-01 00:35:08,N,5,-73.863899230957031,40.895439147949219,-73.86187744140625,40.894779205322266,1,.13,15,0,0,0,0,,0,15,1,2,, +2,2015-01-01 00:35:13,2015-01-01 00:41:04,N,1,-73.917129516601562,40.764888763427734,-73.927978515625,40.761470794677734,5,1.18,6.5,0.5,0.5,1.4,0,,0.3,9.2,1,1,, diff --git a/csv/testdata/taxi/green_tripdata_2015-02.csv b/csv/testdata/taxi/green_tripdata_2015-02.csv new file mode 100644 index 0000000..a52ce1a --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2015-02.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type +2,2015-02-01 00:00:01,2015-02-01 00:18:27,N,1,-73.961990356445313,40.715862274169922,-74.012161254882813,40.716781616210938,1,5.72,20,0.5,0.5,4.1,0,,0.3,25.4,1,1,, +2,2015-02-01 00:00:06,2015-02-01 00:05:26,N,1,-73.945648193359375,40.807811737060547,-73.952201843261719,40.813961029052734,1,.92,5.5,0.5,0.5,0,0,,0.3,6.8,1,1,, +2,2015-02-01 00:00:09,2015-02-01 00:25:24,N,1,-73.954818725585937,40.820701599121094,-73.998832702636719,40.729896545410156,1,8.76,28.5,0.5,0.5,5.8,0,,0.3,35.6,1,1,, +2,2015-02-01 00:00:21,2015-02-01 00:07:10,N,1,-73.916412353515625,40.761032104492188,-73.908439636230469,40.774578094482422,3,1.22,6.5,0.5,0.5,1.4,0,,0.3,9.2,1,1,, +2,2015-02-01 00:00:13,2015-02-01 00:09:51,N,5,-73.914131164550781,40.836639404296875,-73.912620544433594,40.823764801025391,1,1.41,7,0,0,0,0,,0,7,2,2,, +2,2015-02-01 00:00:12,2015-02-01 00:09:35,N,1,-73.923561096191406,40.764064788818359,-73.916923522949219,40.780269622802734,1,1.50,8.5,0.5,0.5,2.45,0,,0.3,12.25,1,1,, +2,2015-02-01 00:00:17,2015-02-01 00:13:37,N,1,-73.97601318359375,40.684051513671875,-73.928504943847656,40.691013336181641,1,3.61,13.5,0.5,0.5,0,0,,0.3,14.8,1,1,, +2,2015-02-01 00:00:37,2015-02-01 00:10:47,N,1,-73.9205322265625,40.763736724853516,-73.924827575683594,40.76861572265625,1,1.13,8,0.5,0.5,0,0,,0.3,9.3,2,1,, +2,2015-02-01 00:00:25,2015-02-01 00:18:03,N,1,-73.955009460449219,40.734226226806641,-73.936843872070313,40.697780609130859,5,2.89,13.5,0.5,0.5,3.7,0,,0.3,18.5,1,1,, diff --git a/csv/testdata/taxi/green_tripdata_2015-03.csv b/csv/testdata/taxi/green_tripdata_2015-03.csv new file mode 100644 index 0000000..6bf7a83 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2015-03.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type +1,2015-03-01 00:02:05,2015-03-01 00:08:01,N,1,-73.961479187011719,40.801872253417969,-73.980720520019531,40.775054931640625,1,2.10,8,0.5,0.5,0,0,,0.3,9.3,1,1,, +1,2015-03-01 00:02:11,2015-03-01 00:27:09,N,1,-73.979759216308594,40.677467346191406,-74.017097473144531,40.710929870605469,1,4.70,20,0.5,0.5,4,0,,0.3,25.3,1,1,, +2,2015-03-01 00:02:07,2015-03-01 00:10:31,N,1,-73.941375732421875,40.833633422851563,-73.926689147949219,40.863410949707031,1,3.09,11,0.5,0.5,0,0,,0.3,12.3,2,1,, +1,2015-03-01 00:02:13,2015-03-01 00:09:09,N,1,-73.939674377441406,40.821151733398438,-73.931854248046875,40.846458435058594,1,2.20,8.5,0.5,0.5,0,0,,0.3,9.8,4,1,, +2,2015-03-01 00:02:18,2015-03-01 00:07:52,N,1,-73.957344055175781,40.712959289550781,-73.954795837402344,40.719650268554688,6,.75,5.5,0.5,0.5,1,0,,0.3,7.8,1,1,, +2,2015-03-01 00:02:20,2015-03-01 00:08:54,N,1,-73.844253540039062,40.720935821533203,-73.863471984863281,40.737400054931641,1,1.97,8,0.5,0.5,2.32,0,,0.3,11.62,1,1,, +2,2015-03-01 00:02:16,2015-03-01 00:05:39,N,1,-73.949958801269531,40.722354888916016,-73.946456909179688,40.715774536132813,1,.56,4.5,0.5,0.5,0,0,,0.3,5.8,1,1,, +1,2015-03-01 00:02:33,2015-03-01 00:19:04,N,1,-73.869071960449219,40.749305725097656,-73.919036865234375,40.769260406494141,1,4.30,16.5,0.5,0.5,0,0,,0.3,17.8,2,1,, +2,2015-03-01 00:02:26,2015-03-01 00:12:27,N,1,-73.844352722167969,40.721359252929688,-73.832145690917969,40.706222534179688,1,1.93,9.5,0.5,0.5,0,0,,0.3,10.8,2,1,, diff --git a/csv/testdata/taxi/green_tripdata_2015-04.csv b/csv/testdata/taxi/green_tripdata_2015-04.csv new file mode 100644 index 0000000..e7497a5 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2015-04.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type +2,2015-04-01 00:00:00,2015-04-01 00:08:15,N,1,-73.958816528320312,40.716823577880859,-73.98297119140625,40.696006774902344,1,3.08,11,0.5,0.5,2.46,0,,0.3,14.76,1,1,, +2,2015-04-01 00:00:04,2015-04-01 00:07:33,N,1,-73.937530517578125,40.804500579833984,-73.946800231933594,40.815998077392578,1,1.21,7,0.5,0.5,0,0,,0.3,8.3,2,1,, +2,2015-04-01 00:00:09,2015-04-01 00:02:10,N,1,-73.955818176269531,40.68115234375,-73.955406188964844,40.687145233154297,1,.64,4,0.5,0.5,0.7,0,,0.3,6,1,1,, +2,2015-04-01 00:00:23,2015-04-01 00:05:16,N,1,-73.830551147460938,40.75946044921875,-73.825363159179687,40.741588592529297,1,1.34,6.5,0.5,0.5,0,0,,0.3,7.8,2,1,, +2,2015-04-01 00:00:25,2015-04-01 00:05:20,N,1,-73.912086486816406,40.775356292724609,-73.915473937988281,40.764091491699219,1,.98,6,0.5,0.5,0,0,,0.3,7.3,2,1,, +2,2015-04-01 00:00:19,2015-04-01 00:17:21,N,1,-73.960014343261719,40.718284606933594,-73.954673767089844,40.769523620605469,1,5.46,18.5,0.5,0.5,5.94,0,,0.3,25.74,1,1,, +1,2015-04-01 00:00:13,2015-04-01 00:02:49,N,1,-73.911949157714844,40.775199890136719,-73.913124084472656,40.781280517578125,1,.60,4,0.5,0.5,0,0,,0.3,5.3,2,1,, +2,2015-04-01 00:00:37,2015-04-01 00:05:32,N,1,-73.964958190917969,40.683097839355469,-73.958351135253906,40.669677734375,1,1.12,6,0.5,0.5,2.19,0,,0.3,9.49,1,1,, +2,2015-04-01 00:00:45,2015-04-01 00:06:21,N,1,-73.806365966796875,40.699153900146484,-73.806381225585938,40.671768188476563,1,2.59,9,0.5,0.5,0,0,,0.3,10.3,2,1,, diff --git a/csv/testdata/taxi/green_tripdata_2015-05.csv b/csv/testdata/taxi/green_tripdata_2015-05.csv new file mode 100644 index 0000000..690df79 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2015-05.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type +2,2015-05-01 00:01:02,2015-05-01 00:06:09,N,1,-73.955619812011719,40.680755615234375,-73.961456298828125,40.6661376953125,1,1.18,6,0.5,0.5,0,0,,0.3,7.3,2,1,, +2,2015-05-01 00:01:13,2015-05-01 00:03:37,N,1,-73.991470336914063,40.691696166992188,-73.992523193359375,40.689407348632813,1,.17,3.5,0.5,0.5,0,0,,0.3,4.8,2,1,, +2,2015-05-01 00:01:09,2015-05-01 00:09:23,N,1,-73.830673217773438,40.759552001953125,-73.862449645996094,40.753837585449219,1,2.36,9.5,0.5,0.5,0,0,,0.3,10.8,2,1,, +2,2015-05-01 00:01:27,2015-05-01 00:05:32,N,1,-73.947647094726563,40.687606811523438,-73.946250915527344,40.695117950439453,1,.75,5,0.5,0.5,0,0,,0.3,6.3,1,1,, +2,2015-05-01 00:01:32,2015-05-01 00:07:05,N,1,-73.944877624511719,40.808528900146484,-73.936790466308594,40.812446594238281,1,.70,5,0.5,0.5,0,0,,0.3,6.3,1,1,, +2,2015-05-01 00:01:19,2015-05-01 00:08:19,N,1,-73.952507019042969,40.810878753662109,-73.946479797363281,40.826526641845703,1,1.42,7.5,0.5,0.5,0,0,,0.3,8.8,2,1,, +2,2015-05-01 00:01:23,2015-05-01 00:52:51,N,1,-73.917572021484375,40.82391357421875,-73.97137451171875,40.685111999511719,5,16.07,51.5,0.5,0.5,0,0,,0.3,52.8,2,1,, +2,2015-05-01 00:02:08,2015-05-01 00:07:57,N,1,-73.954368591308594,40.820835113525391,-73.9415283203125,40.833320617675781,1,1.19,6.5,0.5,0.5,0,0,,0.3,7.8,2,1,, +2,2015-05-01 00:02:16,2015-05-01 00:08:28,N,1,-73.949478149414063,40.71453857421875,-73.944984436035156,40.707778930664063,1,.90,6,0.5,0.5,0,0,,0.3,7.3,2,1,, diff --git a/csv/testdata/taxi/green_tripdata_2015-06.csv b/csv/testdata/taxi/green_tripdata_2015-06.csv new file mode 100644 index 0000000..392cc22 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2015-06.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type +2,2015-06-01 00:00:00,2015-06-01 00:09:32,N,1,-73.878700256347656,40.881328582763672,-73.838386535644531,40.884838104248047,1,2.64,10.5,0.5,0.5,0,0,,0.3,11.8,2,1,, +2,2015-06-01 00:00:05,2015-06-01 00:12:41,N,1,-73.906356811523438,40.876182556152344,-73.944488525390625,40.830490112304688,1,4.79,16,0.5,0.5,0,0,,0.3,17.3,2,1,, +2,2015-06-01 00:00:09,2015-06-01 00:11:29,N,1,-73.887863159179688,40.747196197509766,-73.888786315917969,40.738815307617188,1,1.45,9,0.5,0.5,0,0,,0.3,10.3,2,1,, +2,2015-06-01 00:00:26,2015-06-01 00:03:51,N,1,-73.917800903320312,40.770065307617187,-73.907890319824219,40.766143798828125,1,.74,4.5,0.5,0.5,0,0,,0.3,5.8,2,1,, +1,2015-06-01 00:00:18,2015-06-01 00:04:31,N,1,-73.956329345703125,40.717121124267578,-73.950599670410156,40.723434448242187,1,.80,5,0.5,0.5,1.25,0,,0.3,7.55,1,1,, +2,2015-06-01 00:00:16,2015-06-01 00:10:29,N,1,-73.939163208007812,40.816555023193359,-73.938468933105469,40.796218872070313,1,1.94,9.5,0.5,0.5,0,0,,0.3,10.8,2,1,, +2,2015-06-01 00:00:29,2015-06-01 00:26:47,N,1,-73.941329956054687,40.813583374023438,-73.918571472167969,40.811511993408203,1,6.26,22.5,0.5,0.5,0,0,,0.3,23.8,2,1,, +2,2015-06-01 00:01:15,2015-06-01 00:04:11,N,1,-73.997383117675781,40.674507141113281,-73.98590087890625,40.67755126953125,1,.90,5,0.5,0.5,1.26,0,,0.3,7.56,1,1,, +2,2015-06-01 00:00:39,2015-06-01 00:06:35,N,1,-73.891006469726563,40.746994018554687,-73.880416870117187,40.749176025390625,1,.71,5.5,0.5,0.5,0,0,,0.3,6.8,2,1,, diff --git a/csv/testdata/taxi/green_tripdata_2015-07.csv b/csv/testdata/taxi/green_tripdata_2015-07.csv new file mode 100644 index 0000000..384b6a9 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2015-07.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type +2,2015-07-01 00:01:10,2015-07-01 00:19:04,N,1,-73.940628051757813,40.71502685546875,-73.912315368652344,40.760379791259766,1,4.33,15.5,0.5,0.5,0,0,,0.3,16.8,2,1 +2,2015-07-01 00:05:35,2015-07-01 00:17:42,N,1,-73.951133728027344,40.804946899414063,-73.867218017578125,40.818988800048828,1,6.11,18,0.5,0.5,0,0,,0.3,19.3,1,1 +2,2015-07-01 00:00:27,2015-07-01 00:03:40,N,1,-73.873878479003906,40.742408752441406,-73.889274597167969,40.739227294921875,1,.96,5,0.5,0.5,0.03,0,,0.3,6.33,1,1 +2,2015-07-01 00:00:29,2015-07-01 00:05:30,N,1,-73.903488159179687,40.745758056640625,-73.905784606933594,40.737575531005859,2,1.04,5.5,0.5,0.5,0,0,,0.3,6.8,2,1 +2,2015-07-01 00:00:22,2015-07-01 00:10:43,N,1,-73.845809936523438,40.720569610595703,-73.810676574707031,40.701686859130859,1,2.48,10.5,0.5,0.5,0,0,,0.3,11.8,2,1 +2,2015-07-01 00:00:24,2015-07-01 00:44:06,N,1,-73.830268859863281,40.713668823242188,-73.941886901855469,40.757053375244141,1,11.33,40.5,0.5,0.5,0,0,,0.3,41.8,2,1 +2,2015-07-01 00:13:40,2015-07-01 00:24:35,N,1,-73.976097106933594,40.686973571777344,-73.929664611816406,40.68414306640625,1,2.47,10.5,0.5,0.5,0,0,,0.3,11.8,2,1 +2,2015-07-01 00:32:36,2015-07-01 23:47:38,N,1,-73.967430114746094,40.634902954101563,-73.9365234375,40.663185119628906,1,3.63,14,0.5,0.5,0,0,,0.3,15.3,2,1 +2,2015-07-01 00:00:27,2015-07-01 00:02:22,N,1,-73.957649230957031,40.717945098876953,-73.951423645019531,40.723564147949219,1,.48,3.5,0.5,0.5,0,0,,0.3,4.8,2,1 diff --git a/csv/testdata/taxi/green_tripdata_2015-08.csv b/csv/testdata/taxi/green_tripdata_2015-08.csv new file mode 100644 index 0000000..dab05a1 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2015-08.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type +2,2015-08-01 00:00:03,2015-08-01 00:00:07,N,5,-73.865013122558594,40.826099395751953,-73.864990234375,40.826099395751953,1,.00,7,0,0,0,0,,0,7,1,2 +2,2015-08-01 00:01:57,2015-08-01 00:02:00,N,2,-73.987335205078125,40.692123413085938,-73.987327575683594,40.692123413085938,1,.00,52,0,0.5,0,0,,0.3,52.8,2,1 +2,2015-08-01 00:00:30,2015-08-01 00:01:31,N,1,-73.949958801269531,40.827182769775391,-73.952377319335938,40.82403564453125,1,.25,3,0.5,0.5,0,0,,0.3,4.3,1,1 +2,2015-08-01 00:01:08,2015-08-01 00:01:40,N,1,-73.944976806640625,40.808582305908203,-73.9459228515625,40.807315826416016,1,.04,2.5,0.5,0.5,0,0,,0.3,3.8,2,1 +2,2015-08-01 00:00:38,2015-08-01 00:02:00,N,1,-73.951560974121094,40.812110900878906,-73.952171325683594,40.812458038330078,1,.26,3,0.5,0.5,0,0,,0.3,4.3,2,1 +2,2015-08-01 00:00:38,2015-08-01 00:03:11,N,1,-73.952079772949219,40.790130615234375,-73.947639465332031,40.795391082763672,1,.51,4,0.5,0.5,0,0,,0.3,5.3,2,1 +2,2015-08-01 00:02:23,2015-08-01 00:03:32,N,1,-73.913894653320312,40.683006286621094,-73.913887023925781,40.684761047363281,5,.26,3,0.5,0.5,0,0,,0.3,4.3,2,1 +2,2015-08-01 00:01:06,2015-08-01 00:04:08,N,1,-73.951545715332031,40.713802337646484,-73.958465576171875,40.713428497314453,1,.41,4,0.5,0.5,1.32,0,,0.3,6.62,1,1 +2,2015-08-01 00:02:26,2015-08-01 00:04:11,N,5,-73.853218078613281,40.741741180419922,-73.853225708007812,40.741733551025391,1,.00,11,0,0,0,0,,0,11,1,2 diff --git a/csv/testdata/taxi/green_tripdata_2015-09.csv b/csv/testdata/taxi/green_tripdata_2015-09.csv new file mode 100644 index 0000000..ca93cfc --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2015-09.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type +2,2015-09-01 00:02:34,2015-09-01 00:02:38,N,5,-73.979484558105469,40.684955596923828,-73.97943115234375,40.685020446777344,1,.00,7.8,0,0,1.95,0,,0,9.75,1,2 +2,2015-09-01 00:04:20,2015-09-01 00:04:24,N,5,-74.010795593261719,40.912216186523438,-74.010780334472656,40.912212371826172,1,.00,45,0,0,0,0,,0,45,1,2 +2,2015-09-01 00:01:50,2015-09-01 00:04:24,N,1,-73.921409606933594,40.766708374023438,-73.914413452148438,40.764686584472656,1,.59,4,0.5,0.5,0.5,0,,0.3,5.8,1,1 +2,2015-09-01 00:02:36,2015-09-01 00:06:42,N,1,-73.92138671875,40.766677856445313,-73.931427001953125,40.771583557128906,1,.74,5,0.5,0.5,0,0,,0.3,6.3,2,1 +2,2015-09-01 00:00:14,2015-09-01 00:04:20,N,1,-73.955482482910156,40.714046478271484,-73.944412231445313,40.714729309082031,1,.61,5,0.5,0.5,0,0,,0.3,6.3,2,1 +2,2015-09-01 00:00:39,2015-09-01 00:05:20,N,1,-73.945297241210938,40.808185577392578,-73.937667846679688,40.821197509765625,1,1.07,5.5,0.5,0.5,1.36,0,,0.3,8.16,1,1 +2,2015-09-01 00:00:52,2015-09-01 00:05:50,N,1,-73.890876770019531,40.746425628662109,-73.876922607421875,40.756305694580078,1,1.43,6.5,0.5,0.5,0,0,,0.3,7.8,1,1 +2,2015-09-01 00:02:15,2015-09-01 00:05:34,N,1,-73.946701049804687,40.797321319580078,-73.937644958496094,40.804515838623047,1,.90,5,0.5,0.5,0,0,,0.3,6.3,2,1 +2,2015-09-01 00:02:36,2015-09-01 00:07:20,N,1,-73.963150024414063,40.693828582763672,-73.956787109375,40.680530548095703,1,1.33,6,0.5,0.5,1.46,0,,0.3,8.76,1,1 diff --git a/csv/testdata/taxi/green_tripdata_2015-10.csv b/csv/testdata/taxi/green_tripdata_2015-10.csv new file mode 100644 index 0000000..b9a6c0b --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2015-10.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type +2,2015-10-01 00:38:23,2015-10-01 00:56:03,N,1,-73.937789916992188,40.748725891113281,-73.912384033203125,40.773838043212891,5,2.29,12.5,1,0.5,2.86,0,,0.3,17.16,1,1 +2,2015-10-01 00:38:55,2015-10-01 23:46:51,N,1,-73.987640380859375,40.692001342773437,-73.958122253417969,40.694393157958984,1,1.65,7.5,0.5,0.5,0,0,,0.3,8.8,2,1 +2,2015-10-01 00:01:10,2015-10-01 00:05:59,N,1,-73.9490966796875,40.680374145507812,-73.938522338867188,40.685501098632813,1,1.19,6,0.5,0.5,1.46,0,,0.3,8.76,1,1 +2,2015-10-01 00:03:04,2015-10-01 00:09:15,N,1,-73.844352722167969,40.721626281738281,-73.859039306640625,40.727054595947266,1,1.06,6,0.5,0.5,1.82,0,,0.3,9.12,1,1 +2,2015-10-01 00:00:41,2015-10-01 00:05:00,N,1,-73.987045288085938,40.693759918212891,-73.976585388183594,40.695919036865234,5,.65,5,0.5,0.5,0,0,,0.3,6.3,2,1 +2,2015-10-01 00:00:45,2015-10-01 00:20:16,N,1,-73.94720458984375,40.723033905029297,-73.978462219238281,40.745033264160156,1,5.48,19,0.5,0.5,4.06,0,,0.3,24.36,1,1 +2,2015-10-01 00:01:13,2015-10-01 00:01:18,N,1,-73.93994140625,40.852157592773438,-73.939529418945313,40.853485107421875,5,.00,2.5,0.5,0.5,0,0,,0.3,3.8,2,1 +2,2015-10-01 00:00:10,2015-10-01 00:05:31,N,1,-73.961563110351562,40.714168548583984,-73.94842529296875,40.718326568603516,3,1.29,6,0.5,0.5,1.46,0,,0.3,8.76,1,1 +2,2015-10-01 00:00:16,2015-10-01 00:16:05,N,1,-73.992050170898438,40.690158843994141,-73.959625244140625,40.642547607421875,1,3.98,15.5,0.5,0.5,2.2,0,,0.3,19,1,1 diff --git a/csv/testdata/taxi/green_tripdata_2015-11.csv b/csv/testdata/taxi/green_tripdata_2015-11.csv new file mode 100644 index 0000000..e30ece1 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2015-11.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type +2,2015-11-01 00:57:34,2015-11-01 23:57:45,N,5,-73.955085754394531,40.663722991943359,-73.958984375,40.663448333740234,1,.09,15,0,0,0,0,,0,15,2,2 +2,2015-11-01 00:57:34,2015-11-01 23:57:45,N,5,-73.955085754394531,40.663722991943359,-73.958984375,40.663448333740234,1,.09,-15,0,0,0,0,,0,-15,3,2 +2,2015-11-01 00:01:58,2015-11-01 00:02:16,N,1,-73.913337707519531,40.871875762939453,-73.915885925292969,40.869903564453125,1,.07,2.5,0.5,0.5,0,0,,0.3,3.8,2,1 +2,2015-11-01 00:00:15,2015-11-01 00:02:13,N,1,-73.94207763671875,40.838085174560547,-73.947608947753906,40.830501556396484,1,.59,4,0.5,0.5,0,0,,0.3,5.3,2,1 +2,2015-11-01 00:01:37,2015-11-01 00:02:42,N,1,-73.950157165527344,40.826278686523438,-73.944129943847656,40.823734283447266,1,.37,3,0.5,0.5,0.86,0,,0.3,5.16,1,1 +2,2015-11-01 00:00:36,2015-11-01 00:02:02,N,1,-73.935264587402344,40.714305877685547,-73.933685302734375,40.714008331298828,1,.11,3,0.5,0.5,0,0,,0.3,4.3,2,1 +2,2015-11-01 00:53:39,2015-11-02 00:02:50,N,1,-73.943611145019531,40.830173492431641,-73.952423095703125,40.808151245117188,1,1.82,9,0.5,0.5,1.5,0,,0.3,11.8,1,1 +2,2015-11-01 00:00:08,2015-11-01 00:03:14,N,1,-73.9056396484375,40.887531280517578,-73.909507751464844,40.892841339111328,1,.68,4.5,0.5,0.5,0,0,,0.3,5.8,2,1 +2,2015-11-01 00:00:31,2015-11-01 00:03:35,N,1,-73.952301025390625,40.706470489501953,-73.957771301269531,40.708499908447266,1,.34,4,0.5,0.5,1.06,0,,0.3,6.36,1,1 diff --git a/csv/testdata/taxi/green_tripdata_2015-12.csv b/csv/testdata/taxi/green_tripdata_2015-12.csv new file mode 100644 index 0000000..b07b4a9 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2015-12.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type +2,2015-12-01 00:12:54,2015-12-01 00:18:18,N,1,-73.844680786132813,40.721508026123047,-73.836334228515625,40.708877563476562,1,1.27,6.5,0.5,0.5,1.56,0,,0.3,9.36,1,1 +2,2015-12-01 00:48:19,2015-12-01 00:59:31,N,1,-73.807029724121094,40.699657440185547,-73.863670349121094,40.691143035888672,1,3.57,12.5,0.5,0.5,2,0,,0.3,15.8,1,1 +2,2015-12-01 00:06:13,2015-12-01 00:20:40,N,1,-73.961814880371094,40.805641174316406,-73.925979614257812,40.824123382568359,2,3.51,13.5,0.5,0.5,4.44,0,,0.3,19.24,1,1 +2,2015-12-01 00:43:38,2015-12-01 00:59:37,N,1,-73.945220947265625,40.808383941650391,-73.959587097167969,40.801357269287109,1,2.43,12.5,0.5,0.5,2.76,0,,0.3,16.56,1,1 +2,2015-12-01 00:04:50,2015-12-01 00:09:40,N,1,-73.939018249511719,40.8055419921875,-73.943977355957031,40.813739776611328,5,.89,5.5,0.5,0.5,1,0,,0.3,7.8,1,1 +2,2015-12-01 00:38:47,2015-12-01 00:43:47,N,1,-73.941574096679688,40.806148529052734,-73.953437805175781,40.809429168701172,5,.74,5.5,0.5,0.5,2.04,0,,0.3,8.84,1,1 +2,2015-12-01 00:56:09,2015-12-01 01:09:53,N,1,-73.949653625488281,40.802230834960938,-73.97869873046875,40.745998382568359,5,4.56,15,0.5,0.5,0,0,,0.3,16.3,2,1 +2,2015-12-01 00:00:52,2015-12-01 00:13:31,N,1,-73.962203979492188,40.716083526611328,-73.997581481933594,40.678768157958984,1,5.43,17.5,0.5,0.5,3.76,0,,0.3,22.56,1,1 +2,2015-12-01 00:07:44,2015-12-01 00:15:14,N,1,-73.903549194335938,40.745418548583984,-73.893211364746094,40.73486328125,5,1.24,7,0.5,0.5,0,0,,0.3,8.3,2,1 diff --git a/csv/testdata/taxi/green_tripdata_2016-01.csv b/csv/testdata/taxi/green_tripdata_2016-01.csv new file mode 100644 index 0000000..23afb8b --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2016-01.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type + +2,2016-01-01 00:29:24,2016-01-01 00:39:36,N,1,-73.928642272949219,40.680610656738281,-73.924278259277344,40.698043823242188,1,1.46,8,0.5,0.5,1.86,0,,0.3,11.16,1,1 +2,2016-01-01 00:19:39,2016-01-01 00:39:18,N,1,-73.952674865722656,40.723175048828125,-73.923919677734375,40.761379241943359,1,3.56,15.5,0.5,0.5,0,0,,0.3,16.8,2,1 +2,2016-01-01 00:19:33,2016-01-01 00:39:48,N,1,-73.971611022949219,40.676105499267578,-74.013160705566406,40.646072387695313,1,3.79,16.5,0.5,0.5,4.45,0,,0.3,22.25,1,1 +2,2016-01-01 00:22:12,2016-01-01 00:38:32,N,1,-73.989501953125,40.669578552246094,-74.000648498535156,40.689033508300781,1,3.01,13.5,0.5,0.5,0,0,,0.3,14.8,2,1 +2,2016-01-01 00:24:01,2016-01-01 00:39:22,N,1,-73.964729309082031,40.682853698730469,-73.940719604492188,40.663013458251953,1,2.55,12,0.5,0.5,0,0,,0.3,13.3,2,1 +2,2016-01-01 00:32:59,2016-01-01 00:39:35,N,1,-73.891143798828125,40.746456146240234,-73.867744445800781,40.742111206054688,1,1.37,7,0.5,0.5,0,0,,0.3,8.3,2,1 +2,2016-01-01 00:34:42,2016-01-01 00:39:21,N,1,-73.896675109863281,40.746196746826172,-73.886192321777344,40.745689392089844,1,.57,5,0.5,0.5,0,0,,0.3,6.3,2,1 +2,2016-01-01 00:31:23,2016-01-01 00:39:36,N,1,-73.953353881835937,40.803558349609375,-73.949150085449219,40.794120788574219,1,1.01,7,0.5,0.5,0,0,,0.3,8.3,2,1 diff --git a/csv/testdata/taxi/green_tripdata_2016-02.csv b/csv/testdata/taxi/green_tripdata_2016-02.csv new file mode 100644 index 0000000..2c8a358 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2016-02.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type + +2,2016-02-01 00:00:01,2016-02-01 00:10:06,N,1,-73.939018249511719,40.805213928222656,-73.9725341796875,40.785884857177734,1,2.86,10.5,0.5,0.5,0,0,,0.3,11.8,2,1 +2,2016-02-01 00:01:33,2016-02-01 00:20:13,N,1,-73.891494750976563,40.746650695800781,-73.890876770019531,40.743896484375,1,3.35,13,0.5,0.5,0,0,,0.3,14.3,2,1 +2,2016-02-01 00:03:46,2016-02-01 00:21:04,N,1,-73.983779907226562,40.676132202148438,-73.956977844238281,40.718326568603516,1,4.70,17.5,0.5,0.5,3.76,0,,0.3,22.56,1,1 +2,2016-02-01 00:00:05,2016-02-01 00:06:48,N,1,-73.807518005371094,40.700374603271484,-73.831657409667969,40.705978393554687,1,2.11,8,0.5,0.5,0,0,,0.3,9.3,2,1 +2,2016-02-01 00:06:20,2016-02-01 00:08:47,N,1,-73.903961181640625,40.74493408203125,-73.900009155273438,40.733600616455078,5,.98,5,0.5,0.5,0,0,,0.3,6.3,2,1 +2,2016-02-01 00:00:42,2016-02-01 00:27:25,N,1,-73.978614807128906,40.670421600341797,-73.986892700195313,40.748573303222656,1,6.00,22.5,0.5,0.5,4.76,0,,0.3,28.56,1,1 +2,2016-02-01 00:00:59,2016-02-01 00:07:25,N,1,-73.829826354980469,40.759658813476563,-73.809982299804688,40.75115966796875,5,1.46,7,0.5,0.5,0,0,,0.3,8.3,2,1 +2,2016-02-01 00:00:56,2016-02-01 00:15:31,N,1,-73.95501708984375,40.735298156738281,-73.939865112304688,40.793724060058594,3,5.96,17.5,0.5,0.5,4.7,0,,0.3,23.5,1,1 diff --git a/csv/testdata/taxi/green_tripdata_2016-03.csv b/csv/testdata/taxi/green_tripdata_2016-03.csv new file mode 100644 index 0000000..83ff76d --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2016-03.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type + +2,2016-03-01 00:00:00,2016-03-01 00:01:39,N,1,-73.944755554199219,40.819049835205078,-73.949485778808594,40.812614440917969,1,.49,3.5,0.5,0.5,0,0,,0.3,4.8,2,1 +2,2016-03-01 00:00:23,2016-03-01 00:03:20,N,1,-73.8697509765625,40.733612060546875,-73.859085083007813,40.738811492919922,1,.71,4.5,0.5,0.5,0,0,,0.3,5.8,2,1 +2,2016-03-01 00:00:29,2016-03-01 00:04:03,N,1,-73.903434753417969,40.745346069335938,-73.899635314941406,40.735000610351563,5,.90,5,0.5,0.5,0,0,,0.3,6.3,2,1 +2,2016-03-01 00:02:46,2016-03-01 00:04:29,N,1,-73.91485595703125,40.764007568359375,-73.919029235839844,40.7657470703125,1,.25,3.5,0.5,0.5,1.2,0,,0.3,6,1,1 +2,2016-03-01 00:01:09,2016-03-01 00:04:52,N,1,-73.95074462890625,40.810585021972656,-73.940521240234375,40.824665069580078,1,1.13,5.5,0.5,0.5,0,0,,0.3,6.8,2,1 +2,2016-03-01 00:01:04,2016-03-01 00:04:55,N,1,-73.911727905273438,40.775428771972656,-73.914604187011719,40.764446258544922,1,.82,4.5,0.5,0.5,0,0,,0.3,5.8,2,1 +2,2016-03-01 00:02:40,2016-03-01 00:05:08,N,1,-73.979026794433594,40.678535461425781,-73.974342346191406,40.681133270263672,2,.44,4,0.5,0.5,1.06,0,,0.3,6.36,1,1 +2,2016-03-01 00:00:42,2016-03-01 00:05:30,N,1,-73.84405517578125,40.721141815185547,-73.859611511230469,40.719505310058594,1,1.00,5.5,0.5,0.5,1.7,0,,0.3,8.5,1,1 diff --git a/csv/testdata/taxi/green_tripdata_2016-04.csv b/csv/testdata/taxi/green_tripdata_2016-04.csv new file mode 100644 index 0000000..9586d1e --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2016-04.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type + +2,2016-04-01 00:02:03,2016-04-01 00:05:53,N,1,-73.991180419921875,40.68560791015625,-73.984115600585938,40.695980072021484,1,.94,5,0.5,0.5,1.26,0,,0.3,7.56,1,1 +2,2016-04-01 00:01:31,2016-04-01 00:05:55,N,1,-73.844291687011719,40.721431732177734,-73.850440979003906,40.724143981933594,1,.68,5,0.5,0.5,0,0,,0.3,6.3,2,1 +2,2016-04-01 00:00:57,2016-04-01 00:07:36,N,1,-73.944007873535156,40.71453857421875,-73.938705444335938,40.724925994873047,1,1.53,7,0.5,0.5,1.66,0,,0.3,9.96,1,1 +2,2016-04-01 00:01:22,2016-04-01 00:06:12,N,1,-73.952789306640625,40.810749053955078,-73.963508605957031,40.796485900878906,1,1.16,6,0.5,0.5,0,0,,0.3,7.3,1,1 +2,2016-04-01 00:00:56,2016-04-01 00:05:25,N,1,-73.991249084472656,40.691432952880859,-73.988761901855469,40.683597564697266,3,.75,5,0.5,0.5,0,0,,0.3,6.3,2,1 +2,2016-04-01 00:00:47,2016-04-01 00:14:49,N,1,-73.968704223632812,40.6778564453125,-73.935035705566406,40.651569366455078,1,3.26,13,0.5,0.5,0,0,,0.3,14.3,2,1 +2,2016-04-01 00:00:07,2016-04-01 00:03:41,N,1,-73.957878112792969,40.711040496826172,-73.955886840820313,40.707653045654297,1,.54,4.5,0.5,0.5,0,0,,0.3,5.8,2,1 +2,2016-04-01 00:00:13,2016-04-01 00:18:43,N,1,-73.960647583007813,40.719345092773438,-73.917854309082031,40.781211853027344,1,5.59,18.5,0.5,0.5,4.95,0,,0.3,24.75,1,1 diff --git a/csv/testdata/taxi/green_tripdata_2016-05.csv b/csv/testdata/taxi/green_tripdata_2016-05.csv new file mode 100644 index 0000000..15166c7 --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2016-05.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type + +2,2016-05-01 00:15:25,2016-05-01 00:21:53,N,1,-73.861953735351563,40.750041961669922,-73.852859497070312,40.740276336669922,5,1.04,6,0.5,0.5,0,0,,0.3,7.3,1,1 +2,2016-05-01 00:33:12,2016-05-01 00:51:45,N,1,-73.858810424804688,40.750919342041016,-73.903373718261719,40.703739166259766,5,6.60,21,0.5,0.5,0,0,,0.3,22.3,2,1 +2,2016-05-01 00:38:38,2016-05-01 00:49:47,N,1,-73.966285705566406,40.710605621337891,-73.943603515625,40.717494964599609,2,1.97,9.5,0.5,0.5,1,0,,0.3,11.8,1,1 +2,2016-05-01 00:04:10,2016-05-01 00:34:39,N,1,-73.954132080078125,40.787097930908203,-73.953399658203125,40.724899291992187,2,6.66,25.5,0.5,0.5,5.36,0,,0.3,32.16,1,1 +2,2016-05-01 00:37:43,2016-05-01 00:45:28,N,1,-73.956298828125,40.723331451416016,-73.966049194335937,40.712184906005859,2,.93,6.5,0.5,0.5,3,0,,0.3,10.8,1,1 +2,2016-05-01 00:47:36,2016-05-01 01:01:50,N,1,-73.966041564941406,40.712162017822266,-73.98333740234375,40.722137451171875,2,2.18,11.5,0.5,0.5,2,0,,0.3,14.8,1,1 +2,2016-05-01 00:10:57,2016-05-01 00:30:04,N,1,-73.954071044921875,40.729564666748047,-73.896575927734375,40.703746795654297,6,4.38,16.5,0.5,0.5,0,0,,0.3,17.8,2,1 +2,2016-05-01 00:35:09,2016-05-01 00:50:47,N,1,-73.910980224609375,40.699596405029297,-73.891258239746094,40.662296295166016,6,4.06,15,0.5,0.5,0,0,,0.3,16.3,2,1 diff --git a/csv/testdata/taxi/green_tripdata_2016-06.csv b/csv/testdata/taxi/green_tripdata_2016-06.csv new file mode 100644 index 0000000..91a591f --- /dev/null +++ b/csv/testdata/taxi/green_tripdata_2016-06.csv @@ -0,0 +1,10 @@ +VendorID,lpep_pickup_datetime,Lpep_dropoff_datetime,Store_and_fwd_flag,RateCodeID,Pickup_longitude,Pickup_latitude,Dropoff_longitude,Dropoff_latitude,Passenger_count,Trip_distance,Fare_amount,Extra,MTA_tax,Tip_amount,Tolls_amount,Ehail_fee,improvement_surcharge,Total_amount,Payment_type,Trip_type + +2,2016-06-01 02:46:38,2016-06-01 03:06:40,N,1,-73.930580139160156,40.695178985595703,-74.000053405761719,40.729045867919922,1,5.24,19.5,0.5,0.5,6.24,0,,0.3,27.04,1,1 +2,2016-06-01 02:55:26,2016-06-01 03:06:52,N,1,-73.946929931640625,40.792552947998047,-73.951568603515625,40.825160980224609,1,3.14,11.5,0.5,0.5,2.56,0,,0.3,15.36,1,1 +2,2016-06-01 02:50:36,2016-06-01 03:08:39,N,1,-73.944534301757813,40.823955535888672,-73.994659423828125,40.750423431396484,1,7.50,23.5,0.5,0.5,2,0,,0.3,26.8,1,1 +2,2016-06-01 02:57:04,2016-06-01 03:07:52,N,1,-73.95220947265625,40.823871612548828,-73.914360046386719,40.814697265625,1,2.27,10.5,0.5,0.5,0,0,,0.3,11.8,2,1 +2,2016-06-01 02:52:03,2016-06-01 03:08:12,N,1,-73.957977294921875,40.717826843261719,-73.954017639160156,40.655120849609375,3,4.90,16.5,0.5,0.5,0,0,,0.3,17.8,1,1 +2,2016-06-01 02:59:03,2016-06-01 03:09:25,N,1,-73.965324401855469,40.711032867431641,-73.98968505859375,40.714168548583984,1,2.76,11,0.5,0.5,2.46,0,,0.3,14.76,1,1 +2,2016-06-01 02:58:43,2016-06-01 03:03:00,N,1,-73.921333312988281,40.766643524169922,-73.925422668457031,40.761703491210938,1,1.04,5.5,0.5,0.5,0,0,,0.3,6.8,2,1 +2,2016-06-01 02:37:36,2016-06-02 00:00:00,N,1,-73.949203491210938,40.682666778564453,-73.98358154296875,40.725902557373047,1,6.82,25.5,0.5,0.5,0,0,,0.3,26.8,2,1 diff --git a/csv/testdata/taxi/yellow_tripdata_2009-01.csv b/csv/testdata/taxi/yellow_tripdata_2009-01.csv new file mode 100644 index 0000000..aa46f34 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2009-01.csv @@ -0,0 +1,10 @@ +vendor_name,Trip_Pickup_DateTime,Trip_Dropoff_DateTime,Passenger_Count,Trip_Distance,Start_Lon,Start_Lat,Rate_Code,store_and_forward,End_Lon,End_Lat,Payment_Type,Fare_Amt,surcharge,mta_tax,Tip_Amt,Tolls_Amt,Total_Amt + +VTS,2009-01-04 02:52:00,2009-01-04 03:02:00,1,2.6299999999999999,-73.991956999999999,40.721567,,,-73.993803,40.695922000000003,CASH,8.9000000000000004,0.5,,0,0,9.4000000000000004 +VTS,2009-01-04 03:31:00,2009-01-04 03:38:00,3,4.5499999999999998,-73.982101999999998,40.736289999999997,,,-73.955849999999998,40.768030000000003,Credit,12.1,0.5,,2,0,14.6 +VTS,2009-01-03 15:43:00,2009-01-03 15:57:00,5,10.35,-74.002587000000005,40.739747999999999,,,-73.869983000000005,40.770225000000003,Credit,23.699999999999999,0,,4.7400000000000002,0,28.440000000000001 +DDS,2009-01-01 20:52:58,2009-01-01 21:14:00,1,5,-73.974266999999998,40.790954999999997,,,-73.996557999999993,40.731848999999997,CREDIT,14.9,0.5,,3.0499999999999998,0,18.449999999999999 +DDS,2009-01-24 16:18:23,2009-01-24 16:24:56,1,0.40000000000000002,-74.001580000000004,40.719382000000003,,,-74.008377999999993,40.720350000000003,CASH,3.7000000000000002,0,,0,0,3.7000000000000002 +DDS,2009-01-16 22:35:59,2009-01-16 22:43:35,2,1.2,-73.989806000000002,40.735005999999998,,,-73.985021000000003,40.724494,CASH,6.0999999999999996,0.5,,0,0,6.5999999999999996 +DDS,2009-01-21 08:55:57,2009-01-21 09:05:42,1,0.40000000000000002,-73.984049999999996,40.743544,,,-73.980260000000001,40.748925999999997,CREDIT,5.7000000000000002,0,,1,0,6.7000000000000002 +VTS,2009-01-04 04:31:00,2009-01-04 04:36:00,1,1.72,-73.992635000000007,40.748362,,,-73.995585000000005,40.728307000000001,CASH,6.0999999999999996,0.5,,0,0,6.5999999999999996 diff --git a/csv/testdata/taxi/yellow_tripdata_2009-02.csv b/csv/testdata/taxi/yellow_tripdata_2009-02.csv new file mode 100644 index 0000000..5186de9 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2009-02.csv @@ -0,0 +1,10 @@ +vendor_name,Trip_Pickup_DateTime,Trip_Dropoff_DateTime,Passenger_Count,Trip_Distance,Start_Lon,Start_Lat,Rate_Code,store_and_forward,End_Lon,End_Lat,Payment_Type,Fare_Amt,surcharge,mta_tax,Tip_Amt,Tolls_Amt,Total_Amt + +DDS,2009-02-03 08:25:00,2009-02-03 08:33:39,1,1.6000000000000001,-73.992767999999998,40.758324999999999,,,-73.994709999999998,40.739722999999998,CASH,6.9000000000000004,0,,0,0,6.9000000000000004 +VTS,2009-02-28 00:26:00,2009-02-28 00:40:00,5,3.0499999999999998,0,0,,,0,0,CASH,10.5,0.5,,0,0,11 +DDS,2009-02-22 00:39:23,2009-02-22 00:45:52,1,1.5,-73.137393000000003,41.366137999999999,,,-73.137393000000003,41.366137999999999,CASH,5.7000000000000002,0.5,,0,0,6.2000000000000002 +VTS,2009-02-28 12:47:00,2009-02-28 12:54:00,1,1.4199999999999999,-73.980457999999999,40.748444999999997,,,-73.996103000000005,40.737094999999997,CASH,6.0999999999999996,0,,0,0,6.0999999999999996 +DDS,2009-02-05 18:34:35,2009-02-05 18:43:26,1,1.5,-73.137393000000003,41.366137999999999,,,-73.137393000000003,41.366137999999999,CASH,6.9000000000000004,1,,0,0,7.9000000000000004 +VTS,2009-02-28 09:36:00,2009-02-28 09:43:00,1,1.6399999999999999,-73.972161999999997,40.745573,,,-73.972117999999995,40.762552999999997,CASH,6.9000000000000004,0,,0,0,6.9000000000000004 +DDS,2009-02-07 11:09:42,2009-02-07 11:18:15,1,1.8,-73.989546000000004,40.739077999999999,,,-73.974858999999995,40.758862999999998,CASH,7.2999999999999998,0,,0,0,7.2999999999999998 +DDS,2009-02-03 09:54:30,2009-02-03 09:58:34,1,0.5,-73.990943999999999,40.728529999999999,,,-73.997338999999997,40.725158,CASH,3.7000000000000002,0,,0,0,3.7000000000000002 diff --git a/csv/testdata/taxi/yellow_tripdata_2009-03.csv b/csv/testdata/taxi/yellow_tripdata_2009-03.csv new file mode 100644 index 0000000..fa150c9 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2009-03.csv @@ -0,0 +1,10 @@ +vendor_name,Trip_Pickup_DateTime,Trip_Dropoff_DateTime,Passenger_Count,Trip_Distance,Start_Lon,Start_Lat,Rate_Code,store_and_forward,End_Lon,End_Lat,Payment_Type,Fare_Amt,surcharge,mta_tax,Tip_Amt,Tolls_Amt,Total_Amt + +CMT,2009-03-26 15:30:14,2009-03-26 15:33:45,1,0.29999999999999999,-73.970708999999999,40.796382000000001,,0,-73.973602,40.792057999999997,Cash,4.0999999999999996,0,,0,0,4.0999999999999996 +CMT,2009-03-07 00:09:04,2009-03-07 00:16:06,1,1.6000000000000001,-74.007315000000006,40.739964000000001,,,-74.004716999999999,40.751879000000002,Cash,7,0,,0,0,7 +DDS,2009-03-11 19:49:43,2009-03-11 20:00:51,1,2,-73.976375000000004,40.756729,,,-73.954438999999994,40.767204999999997,CASH,8.0999999999999996,0.5,,0,0,8.5999999999999996 +CMT,2009-03-06 22:09:53,2009-03-06 22:16:55,3,1.6000000000000001,-73.990624999999994,40.751452999999998,,,-74.008426999999998,40.746578999999997,Cash,7,0,,0,0,7 +CMT,2009-03-06 22:55:45,2009-03-06 23:11:35,2,3.7999999999999998,-73.993797999999998,40.734164,,,-73.958811999999995,40.769911,Cash,12.6,0,,0,0,12.6 +CMT,2009-03-07 01:15:45,2009-03-07 01:19:42,1,0.69999999999999996,-73.967421000000002,40.780917000000002,,,-73.968086999999997,40.771853,Cash,5.4000000000000004,0,,0,0,5.4000000000000004 +CMT,2009-03-07 22:21:28,2009-03-07 22:21:35,1,0,-73.961619999999996,40.756548000000002,,,-73.961616000000006,40.756565999999999,No Charge,3,0,,0,0,3 +CMT,2009-03-01 07:27:29,2009-03-01 07:31:13,1,0.90000000000000002,-73.933819,40.854028,,0,-73.939632000000003,40.841354000000003,Cash,4.5,0,,0,0,4.5 diff --git a/csv/testdata/taxi/yellow_tripdata_2009-04.csv b/csv/testdata/taxi/yellow_tripdata_2009-04.csv new file mode 100644 index 0000000..163bbcc --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2009-04.csv @@ -0,0 +1,10 @@ +vendor_name,Trip_Pickup_DateTime,Trip_Dropoff_DateTime,Passenger_Count,Trip_Distance,Start_Lon,Start_Lat,Rate_Code,store_and_forward,End_Lon,End_Lat,Payment_Type,Fare_Amt,surcharge,mta_tax,Tip_Amt,Tolls_Amt,Total_Amt + +VTS,2009-04-08 12:19:00,2009-04-08 12:24:00,1,0.48999999999999999,-73.974467000000004,40.760793,,,-73.966769999999997,40.757057000000003,CASH,4.0999999999999996,0,,0,0,4.0999999999999996 +VTS,2009-04-08 15:20:00,2009-04-08 15:28:00,1,1.05,-73.978527999999997,40.753472000000002,,,-73.981245000000001,40.76549,Credit,6.0999999999999996,0,,0.90000000000000002,0,7 +VTS,2009-04-06 21:31:00,2009-04-06 21:35:00,1,1.1399999999999999,-73.995800000000003,40.738933000000003,,,-74.002082999999999,40.747672000000001,Credit,5.2999999999999998,0.5,,1.5,0,7.2999999999999998 +VTS,2009-04-06 19:02:00,2009-04-06 19:41:00,1,19.780000000000001,-73.863343,40.769942,,,-74.040087,40.619439999999997,Credit,43.700000000000003,1,,8.9399999999999995,0,53.640000000000001 +VTS,2009-04-08 09:37:00,2009-04-08 09:51:00,1,5.2800000000000002,-73.992262999999994,40.715288000000001,,,-73.962441999999996,40.767997000000001,CASH,14.5,0,,0,0,14.5 +VTS,2009-04-06 23:26:00,2009-04-06 23:41:00,5,3.3300000000000001,-73.982922000000002,40.757682000000003,,,-73.977716999999998,40.721342,CASH,11.300000000000001,0.5,,0,0,11.800000000000001 +VTS,2009-04-08 16:14:00,2009-04-08 16:37:00,1,3.6800000000000002,-73.949174999999997,40.773432,,,-73.990696999999997,40.751466999999998,CASH,14.5,1,,0,0,15.5 +VTS,2009-04-08 09:33:00,2009-04-08 09:35:00,1,0.39000000000000001,-73.906378000000004,40.773167999999998,,,-73.911742000000004,40.775435000000002,CASH,3.2999999999999998,0,,0,0,3.2999999999999998 diff --git a/csv/testdata/taxi/yellow_tripdata_2009-05.csv b/csv/testdata/taxi/yellow_tripdata_2009-05.csv new file mode 100644 index 0000000..9e60fa0 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2009-05.csv @@ -0,0 +1,10 @@ +vendor_name,Trip_Pickup_DateTime,Trip_Dropoff_DateTime,Passenger_Count,Trip_Distance,Start_Lon,Start_Lat,Rate_Code,store_and_forward,End_Lon,End_Lat,Payment_Type,Fare_Amt,surcharge,mta_tax,Tip_Amt,Tolls_Amt,Total_Amt + +CMT,2009-05-27 07:41:05,2009-05-27 07:42:28,1,0.29999999999999999,-73.974104999999994,40.742891999999998,,0,-73.973769000000004,40.746405000000003,Credit,3.2999999999999998,0,,1,0,4.2999999999999998 +CMT,2009-05-27 07:51:06,2009-05-27 07:58:43,1,1.8999999999999999,-74.008148000000006,40.738854000000003,,0,-74.015637999999996,40.714536000000003,Cash,7.2999999999999998,0,,0,0,7.2999999999999998 +DDS,2009-05-15 15:22:02,2009-05-15 15:30:15,3,1.3999999999999999,-73.973343,40.764046999999998,,,-73.952663000000001,40.769894999999998,CREDIT,6.0999999999999996,0,,1,0,7.0999999999999996 +CMT,2009-05-26 22:06:37,2009-05-26 22:15:33,1,2.2000000000000002,-74.005256000000003,40.719729999999998,,0,-74.005522999999997,40.745584999999998,Cash,7.7000000000000002,0,,1.23,0,8.9299999999999997 +CMT,2009-05-27 12:51:39,2009-05-27 13:00:46,1,0.90000000000000002,-73.997714000000002,40.741363,,0,-73.994257000000005,40.751049999999999,Cash,6.5,0,,0,0,6.5 +CMT,2009-05-28 10:29:18,2009-05-28 10:34:42,1,1.2,0,0,,0,0,0,Credit,5.7000000000000002,0,,1.1399999999999999,0,6.8399999999999999 +CMT,2009-05-26 22:42:39,2009-05-26 22:58:15,1,4.4000000000000004,-74.008335000000002,40.711829999999999,,0,-73.982074999999995,40.667095000000003,Credit,13.699999999999999,0,,2.8399999999999999,0,16.539999999999999 +CMT,2009-05-26 09:06:13,2009-05-26 09:18:35,1,2,-73.963102000000006,40.799137000000002,,0,-73.981999000000002,40.773294,Dispute,8.9000000000000004,0,,0,0,8.9000000000000004 diff --git a/csv/testdata/taxi/yellow_tripdata_2009-06.csv b/csv/testdata/taxi/yellow_tripdata_2009-06.csv new file mode 100644 index 0000000..00d86d3 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2009-06.csv @@ -0,0 +1,10 @@ +vendor_name,Trip_Pickup_DateTime,Trip_Dropoff_DateTime,Passenger_Count,Trip_Distance,Start_Lon,Start_Lat,Rate_Code,store_and_forward,End_Lon,End_Lat,Payment_Type,Fare_Amt,surcharge,mta_tax,Tip_Amt,Tolls_Amt,Total_Amt + +VTS,2009-06-14 23:23:00,2009-06-14 23:48:00,1,17.52,-73.787441999999999,40.641525000000001,,,-73.980072000000007,40.742963000000003,Credit,45,0,,9,4.1500000000000004,58.149999999999999 +VTS,2009-06-18 17:35:00,2009-06-18 17:43:00,1,1.5600000000000001,-74.009766999999997,40.722065000000001,,,-74.005697999999995,40.740186999999999,Credit,6.5,1,,1,0,8.5 +VTS,2009-06-10 18:08:00,2009-06-10 18:27:00,5,3.3700000000000001,-73.983037999999993,40.761944999999997,,,-74.004745,40.718043000000002,Credit,12.5,1,,2,0,15.5 +VTS,2009-06-14 23:54:00,2009-06-14 23:58:00,1,1.1100000000000001,-73.992247000000006,40.749802000000003,,,-73.985232999999994,40.739637000000002,CASH,4.9000000000000004,0.5,,0,0,5.4000000000000004 +VTS,2009-06-13 13:01:00,2009-06-13 13:23:00,1,11.09,-73.949233000000007,40.776825000000002,,,-73.852693000000002,40.730032000000001,CASH,25.699999999999999,0,,0,4.1500000000000004,29.850000000000001 +VTS,2009-06-10 19:43:00,2009-06-10 19:52:00,2,2.1000000000000001,-73.953652000000005,40.790582000000001,,,-73.976860000000002,40.777537000000002,Credit,7.2999999999999998,1,,2,0,10.300000000000001 +VTS,2009-06-10 20:06:00,2009-06-10 20:09:00,1,0.40000000000000002,-73.966408000000001,40.767147000000001,,,-73.962125,40.770277,Credit,3.7000000000000002,0.5,,1,0,5.2000000000000002 +VTS,2009-06-14 20:57:00,2009-06-14 21:08:00,2,2.2400000000000002,-73.977772999999999,40.761749999999999,,,-73.951464999999999,40.774042999999999,CASH,8.0999999999999996,0.5,,0,0,8.5999999999999996 diff --git a/csv/testdata/taxi/yellow_tripdata_2009-07.csv b/csv/testdata/taxi/yellow_tripdata_2009-07.csv new file mode 100644 index 0000000..de4fe66 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2009-07.csv @@ -0,0 +1,10 @@ +vendor_name,Trip_Pickup_DateTime,Trip_Dropoff_DateTime,Passenger_Count,Trip_Distance,Start_Lon,Start_Lat,Rate_Code,store_and_forward,End_Lon,End_Lat,Payment_Type,Fare_Amt,surcharge,mta_tax,Tip_Amt,Tolls_Amt,Total_Amt + +VTS,2009-07-15 17:39:00,2009-07-15 17:46:00,1,1.3200000000000001,-73.999132000000003,40.726542000000002,,,-73.984907000000007,40.736347000000002,Credit,6.0999999999999996,1,,1,0,8.0999999999999996 +VTS,2009-07-15 19:22:00,2009-07-15 19:27:00,1,1.03,-73.972234999999998,40.782170000000001,,,-73.958948000000007,40.778083000000002,CASH,5.2999999999999998,1,,0,0,6.2999999999999998 +VTS,2009-07-15 13:22:00,2009-07-15 13:35:00,1,1.53,-73.973804999999999,40.756858000000001,,,-73.98733,40.738387000000003,CASH,8.5,0,,0,0,8.5 +VTS,2009-07-15 17:22:00,2009-07-15 17:39:00,1,5.3799999999999999,-73.953952000000001,40.766579999999998,,,-74.000536999999994,40.727347000000002,CASH,15.699999999999999,1,,0,0,16.699999999999999 +VTS,2009-07-15 17:59:00,2009-07-15 18:06:00,1,0.93999999999999995,-73.958444999999998,40.716431999999998,,,-73.964671999999993,40.727494999999998,Credit,5.2999999999999998,1,,0.69999999999999996,0,7 +VTS,2009-07-15 12:18:00,2009-07-15 12:47:00,1,3.4900000000000002,-73.995487999999995,40.767453000000003,,,-73.995487999999995,40.767453000000003,Credit,16.100000000000001,0,,3.2200000000000002,0,19.32 +VTS,2009-07-14 21:49:00,2009-07-14 21:52:00,2,0.72999999999999998,-73.959590000000006,40.762872999999999,,,-73.952884999999995,40.772185,CASH,3.7000000000000002,0.5,,0,0,4.2000000000000002 +VTS,2009-07-15 17:28:00,2009-07-15 17:39:00,5,1.3999999999999999,-73.992126999999996,40.754035000000002,,,-73.973602,40.754843000000001,Credit,7.2999999999999998,1,,3,0,11.300000000000001 diff --git a/csv/testdata/taxi/yellow_tripdata_2009-08.csv b/csv/testdata/taxi/yellow_tripdata_2009-08.csv new file mode 100644 index 0000000..b9cd9d2 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2009-08.csv @@ -0,0 +1,10 @@ +vendor_name,Trip_Pickup_DateTime,Trip_Dropoff_DateTime,Passenger_Count,Trip_Distance,Start_Lon,Start_Lat,Rate_Code,store_and_forward,End_Lon,End_Lat,Payment_Type,Fare_Amt,surcharge,mta_tax,Tip_Amt,Tolls_Amt,Total_Amt + +VTS,2009-08-12 07:28:00,2009-08-12 07:36:00,1,1.8,0,0,,,0,0,CASH,6.9000000000000004,0,,0,0,6.9000000000000004 +VTS,2009-08-12 12:03:00,2009-08-12 12:06:00,2,0.28000000000000003,-73.964567000000002,40.772906999999996,,,-73.962715000000003,40.770712000000003,CASH,3.2999999999999998,0,,0,0,3.2999999999999998 +VTS,2009-08-12 17:42:00,2009-08-12 17:56:00,1,2.2400000000000002,-73.949727999999993,40.784520000000001,,,-73.968270000000004,40.759954999999998,CASH,9.6999999999999993,1,,0,0,10.699999999999999 +VTS,2009-08-11 09:34:00,2009-08-11 09:51:00,2,1.8899999999999999,-73.955884999999995,40.782271999999999,,,-73.971737000000005,40.762816999999998,CASH,10.5,0,,0,0,10.5 +VTS,2009-08-12 13:56:00,2009-08-12 14:12:00,1,3.0099999999999998,-74.005328000000006,40.728048000000001,,,-73.990672000000004,40.757548,CASH,11.699999999999999,0,,0,0,11.699999999999999 +VTS,2009-08-12 08:54:00,2009-08-12 09:08:00,2,3.0099999999999998,-73.989572999999993,40.720452999999999,,,-73.968487999999994,40.757505000000002,Credit,10.9,0,,1,0,11.9 +VTS,2009-08-12 13:34:00,2009-08-12 13:48:00,2,1.1499999999999999,-73.978093000000001,40.761691999999996,,,-73.976327999999995,40.750678000000001,CASH,8.5,0,,0,0,8.5 +VTS,2009-08-12 08:54:00,2009-08-12 09:08:00,1,2.6600000000000001,-73.988335000000006,40.759554999999999,,,-73.979163,40.790238000000002,CASH,10.1,0,,0,0,10.1 diff --git a/csv/testdata/taxi/yellow_tripdata_2009-09.csv b/csv/testdata/taxi/yellow_tripdata_2009-09.csv new file mode 100644 index 0000000..d32c742 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2009-09.csv @@ -0,0 +1,10 @@ +vendor_name,Trip_Pickup_DateTime,Trip_Dropoff_DateTime,Passenger_Count,Trip_Distance,Start_Lon,Start_Lat,Rate_Code,store_and_forward,End_Lon,End_Lat,Payment_Type,Fare_Amt,surcharge,mta_tax,Tip_Amt,Tolls_Amt,Total_Amt + +VTS,2009-09-24 09:00:00,2009-09-24 09:29:00,1,10.23,-73.978967999999995,40.766173000000002,,,-73.872268000000005,40.774529999999999,CASH,26.5,0,,0,4.5700000000000003,31.07 +VTS,2009-09-26 13:40:00,2009-09-26 13:43:00,2,0.62,-73.993183000000002,40.727843,,,-73.989653000000004,40.734628000000001,CASH,4.0999999999999996,0,,0,0,4.0999999999999996 +VTS,2009-09-26 14:19:00,2009-09-26 14:22:00,2,0.95999999999999996,-73.97748,40.750365000000002,,,-73.981227000000004,40.739615000000001,CASH,4.5,0,,0,0,4.5 +VTS,2009-09-07 18:15:00,2009-09-07 18:28:00,1,4.25,-73.947469999999996,40.799334999999999,,,-73.989005000000006,40.756641999999999,CASH,12.1,0,,0,0,12.1 +VTS,2009-09-23 17:58:00,2009-09-23 18:05:00,2,0.98999999999999999,-73.990527,40.733853000000003,,,-73.985361999999995,40.724559999999997,CASH,5.2999999999999998,1,,0,0,6.2999999999999998 +VTS,2009-09-26 11:59:00,2009-09-26 12:07:00,1,1.78,-74.006361999999996,40.733471999999999,,,-73.991356999999994,40.754668000000002,CASH,7.2999999999999998,0,,0,0,7.2999999999999998 +VTS,2009-09-18 12:14:00,2009-09-18 12:25:00,1,2.1600000000000001,-73.974112000000005,40.751558000000003,,,-73.951082999999997,40.771680000000003,Credit,8.5,0,,3,0,11.5 +VTS,2009-09-18 11:03:00,2009-09-18 11:19:00,1,3.3799999999999999,-73.984650000000002,40.742626999999999,,,-74.015235000000004,40.711415000000002,Credit,11.699999999999999,0,,2,0,13.699999999999999 diff --git a/csv/testdata/taxi/yellow_tripdata_2009-10.csv b/csv/testdata/taxi/yellow_tripdata_2009-10.csv new file mode 100644 index 0000000..469f179 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2009-10.csv @@ -0,0 +1,10 @@ +vendor_name,Trip_Pickup_DateTime,Trip_Dropoff_DateTime,Passenger_Count,Trip_Distance,Start_Lon,Start_Lat,Rate_Code,store_and_forward,End_Lon,End_Lat,Payment_Type,Fare_Amt,surcharge,mta_tax,Tip_Amt,Tolls_Amt,Total_Amt + +VTS,2009-10-26 13:06:00,2009-10-26 13:17:00,5,2.02,-73.967241999999999,40.803224999999998,,,-73.957516999999996,40.783532999999998,CASH,8.0999999999999996,0,,0,0,8.0999999999999996 +VTS,2009-10-27 22:12:00,2009-10-27 22:39:00,1,16.699999999999999,-73.783803000000006,40.648707999999999,,,-73.967056999999997,40.613641999999999,Credit,36.899999999999999,0.5,,7.4800000000000004,0,44.880000000000003 +VTS,2009-10-26 11:30:00,2009-10-26 11:41:00,2,1.79,-73.969984999999994,40.752457,,,-73.989717999999996,40.746265000000001,CASH,7.7000000000000002,0,,0,0,7.7000000000000002 +VTS,2009-10-26 14:41:00,2009-10-26 14:51:00,2,1.3400000000000001,-73.979862999999995,40.784092999999999,,,-73.960651999999996,40.775570000000002,CASH,6.9000000000000004,0,,0,0,6.9000000000000004 +VTS,2009-10-26 12:34:00,2009-10-26 12:51:00,2,9.2100000000000009,-73.976872,40.749082000000001,,,-73.862058000000005,40.768635000000003,Credit,22.100000000000001,0,,5.5,0,27.600000000000001 +VTS,2009-10-26 08:27:00,2009-10-26 08:34:00,1,0.72999999999999998,-73.997095000000002,40.722482999999997,,,-73.987342999999996,40.724668000000001,Credit,5.7000000000000002,0,,1,0,6.7000000000000002 +VTS,2009-10-26 11:18:00,2009-10-26 11:25:00,1,1.24,-74.005435000000006,40.728242999999999,,,-73.997487000000007,40.714255000000001,CASH,6.0999999999999996,0,,0,0,6.0999999999999996 +VTS,2009-10-26 11:24:00,2009-10-26 11:52:00,1,12.5,-73.874534999999995,40.774101999999999,,,-73.992823000000001,40.756914999999999,CASH,42.100000000000001,0,,0,4.5700000000000003,46.670000000000002 diff --git a/csv/testdata/taxi/yellow_tripdata_2009-11.csv b/csv/testdata/taxi/yellow_tripdata_2009-11.csv new file mode 100644 index 0000000..51bc8ad --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2009-11.csv @@ -0,0 +1,10 @@ +vendor_name,Trip_Pickup_DateTime,Trip_Dropoff_DateTime,Passenger_Count,Trip_Distance,Start_Lon,Start_Lat,Rate_Code,store_and_forward,End_Lon,End_Lat,Payment_Type,Fare_Amt,surcharge,mta_tax,Tip_Amt,Tolls_Amt,Total_Amt + +VTS,2009-11-23 16:19:00,2009-11-23 16:23:00,1,0.76000000000000001,-73.975326999999993,40.751015000000002,,,-73.983356999999998,40.745057000000003,CASH,4.5,1,0.5,0,0,6 +VTS,2009-11-05 16:48:00,2009-11-05 16:56:00,5,1.6000000000000001,-73.992660000000001,40.753630000000001,,,-74.002908000000005,40.728558,CASH,6.5,1,0.5,0,0,8 +VTS,2009-11-17 08:24:00,2009-11-17 08:31:00,1,1.28,-74.007536999999999,40.733597000000003,,,-73.993205000000003,40.742265000000003,CASH,6.0999999999999996,0,0.5,0,0,6.5999999999999996 +VTS,2009-11-01 14:34:00,2009-11-01 14:39:00,1,1.1399999999999999,-73.985597999999996,40.745660000000001,,,-73.988213000000002,40.762141999999997,CASH,5.2999999999999998,0,,0,0,5.7999999999999998 +VTS,2009-11-17 13:49:00,2009-11-17 13:57:00,1,0.79000000000000004,-73.978116999999997,40.760762999999997,,,-73.983258000000006,40.756568000000001,CASH,6.0999999999999996,0,0.5,0,0,6.5999999999999996 +VTS,2009-11-17 09:25:00,2009-11-17 09:32:00,2,1.53,-73.985086999999993,40.741616999999998,,,-73.972747999999996,40.759833,Credit,6.5,0,0.5,1,0,8 +VTS,2009-11-22 05:13:00,2009-11-22 05:22:00,1,3.6899999999999999,-73.987593000000004,40.748201999999999,,,-73.967496999999995,40.787708000000002,CASH,10.1,0.5,0.5,0,0,11.1 +VTS,2009-11-01 15:15:00,2009-11-01 15:28:00,1,2.6299999999999999,-73.987970000000004,40.769762999999998,,,-73.981982000000002,40.743414999999999,Credit,9.6999999999999993,0,,1,0,11.199999999999999 diff --git a/csv/testdata/taxi/yellow_tripdata_2009-12.csv b/csv/testdata/taxi/yellow_tripdata_2009-12.csv new file mode 100644 index 0000000..fcf527e --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2009-12.csv @@ -0,0 +1,10 @@ +vendor_name,Trip_Pickup_DateTime,Trip_Dropoff_DateTime,Passenger_Count,Trip_Distance,Start_Lon,Start_Lat,Rate_Code,store_and_forward,End_Lon,End_Lat,Payment_Type,Fare_Amt,surcharge,mta_tax,Tip_Amt,Tolls_Amt,Total_Amt + +VTS,2009-12-17 07:35:00,2009-12-17 07:40:00,1,0.11,-73.987927999999997,40.737884999999999,,,-73.990335000000002,40.748449999999998,Credit,4.9000000000000004,0,0.5,1,0,6.4000000000000004 +VTS,2009-12-21 14:19:00,2009-12-21 14:24:00,1,1.0700000000000001,-73.956007999999997,40.779558000000002,,,-73.967303000000001,40.787832999999999,CASH,4.9000000000000004,0,0.5,0,0,5.4000000000000004 +VTS,2009-12-18 03:09:00,2009-12-18 03:34:00,1,8.9800000000000004,-73.955744999999993,40.689503000000002,,,-73.937730000000002,40.737462999999998,CASH,23.699999999999999,0.5,0.5,0,0,24.699999999999999 +VTS,2009-12-14 21:24:00,2009-12-14 21:33:00,2,1.6599999999999999,-73.983985000000004,40.754644999999996,,,-73.986194999999995,40.737609999999997,Credit,6.9000000000000004,0.5,0.5,3,0,10.9 +VTS,2009-12-18 08:17:00,2009-12-18 08:29:00,1,1.55,-73.959131999999997,40.769264999999997,,,-73.976267000000007,40.760615000000001,CASH,7.7000000000000002,0,0.5,0,0,8.1999999999999993 +VTS,2009-12-17 12:33:00,2009-12-17 12:36:00,1,0.56999999999999995,-73.982212000000004,40.783087999999999,,,-73.988546999999997,40.778616999999997,CASH,4.0999999999999996,0,0.5,0,0,4.5999999999999996 +VTS,2009-12-14 15:02:00,2009-12-14 15:13:00,1,1.52,0,0,,,0,0,CASH,7.2999999999999998,0,0.5,0,0,7.7999999999999998 +VTS,2009-12-21 09:02:00,2009-12-21 09:15:00,2,1.74,-73.982119999999995,40.776826999999997,,,-73.960858000000002,40.774900000000002,CASH,8.5,0,0.5,0,0,9 diff --git a/csv/testdata/taxi/yellow_tripdata_2010-01.csv b/csv/testdata/taxi/yellow_tripdata_2010-01.csv new file mode 100644 index 0000000..a432e24 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2010-01.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2010-01-26 07:41:00,2010-01-26 07:45:00,1,0.75,-73.956778,40.767749999999999,1,,-73.965957000000003,40.765231999999997,CAS,4.5,0,0.5,0,0,5 +DDS,2010-01-30 23:31:00,2010-01-30 23:46:12,1,5.9000000000000004,-73.996117999999996,40.763931999999997,1,,-73.981511999999995,40.741193000000003,CAS,15.300000000000001,0.5,0.5,0,0,16.300000000000001 +DDS,2010-01-18 20:22:20,2010-01-18 20:38:12,1,4,-73.979673000000005,40.783790000000003,1,,-73.917851999999996,40.87856,CAS,11.699999999999999,0.5,0.5,0,0,12.699999999999999 +VTS,2010-01-09 01:18:00,2010-01-09 01:35:00,2,4.7000000000000002,-73.977922000000007,40.763997000000003,1,,-73.923907999999997,40.759725000000003,CAS,13.300000000000001,0.5,0.5,0,0,14.300000000000001 +CMT,2010-01-18 19:10:14,2010-01-18 19:17:07,1,0.59999999999999998,-73.990924000000007,40.734681999999999,1,0,-73.995510999999993,40.739088000000002,Cre,5.2999999999999998,0,0.5,0.87,0,6.6699999999999999 +DDS,2010-01-23 18:40:25,2010-01-23 18:54:51,1,3.2999999999999998,0,0,1,,0,0,CRE,10.5,0,0.5,1,0,12 +VTS,2010-01-17 09:18:00,2010-01-17 09:25:00,1,1.3300000000000001,-73.993746999999999,40.754916999999999,1,,-73.984714999999994,40.755927,CAS,6.0999999999999996,0,0.5,0,0,6.5999999999999996 +VTS,2010-01-09 13:49:00,2010-01-09 13:56:00,1,1.8300000000000001,-73.971029999999999,40.751306999999997,1,,-73.990560000000002,40.734923000000002,CAS,6.9000000000000004,0,0.5,0,0,7.4000000000000004 diff --git a/csv/testdata/taxi/yellow_tripdata_2010-02.csv b/csv/testdata/taxi/yellow_tripdata_2010-02.csv new file mode 100644 index 0000000..26d1294 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2010-02.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2010-02-13 01:46:36,2010-02-13 01:50:06,1,0.5,-73.967575999999994,40.756357000000001,1,0,-73.961301000000006,40.760596,CRE,4.0999999999999996,0.5,0.5,1,0,6.0999999999999996 +CMT,2010-02-13 21:50:04,2010-02-13 21:54:16,2,0.59999999999999998,-73.970890999999995,40.757593999999997,1,0,-73.962648000000002,40.761484000000003,CAS,4.5,0.5,0.5,0,0,5.5 +CMT,2010-02-12 23:32:17,2010-02-12 23:51:48,1,5.7999999999999998,-73.940613999999997,40.711773999999998,1,0,-73.983735999999993,40.672331,NA ,16.899999999999999,0.5,0.5,0,0,17.899999999999999 +CMT,2010-02-13 05:09:37,2010-02-13 05:14:39,1,1.2,-74.001726000000005,40.741343999999998,1,0,-73.988581999999994,40.731487999999999,NA ,5.2999999999999998,0.5,0.5,0,0,6.2999999999999998 +CMT,2010-02-13 16:11:05,2010-02-13 16:21:00,1,2.2000000000000002,-73.995935000000003,40.734535000000001,1,0,-74.001671999999999,40.756475999999999,CRE,8.0999999999999996,0,0.5,1.29,0,9.8900000000000006 +CMT,2010-02-12 22:50:33,2010-02-12 23:06:34,1,4.7000000000000002,-73.978648000000007,40.745128000000001,1,0,-73.951515000000001,40.728746000000001,CAS,13.699999999999999,0.5,0.5,0,0,14.699999999999999 +CMT,2010-02-12 08:39:35,2010-02-12 08:42:53,1,0.5,-74.000048000000007,40.727128999999998,1,0,-74.001868000000002,40.721617000000002,CAS,3.7000000000000002,0,0.5,0,0,4.2000000000000002 +CMT,2010-02-13 11:08:17,2010-02-13 11:12:25,1,0.59999999999999998,-73.958504000000005,40.772226000000003,1,0,-73.961523,40.764803999999998,CAS,4.0999999999999996,0,0.5,0,0,4.5999999999999996 diff --git a/csv/testdata/taxi/yellow_tripdata_2010-03.csv b/csv/testdata/taxi/yellow_tripdata_2010-03.csv new file mode 100644 index 0000000..416a820 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2010-03.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2010-03-22 17:05:03,2010-03-22 17:22:51,1,3.2999999999999998,-74.007548999999997,40.743324999999999,1,0,-73.967719000000002,40.760955000000003,Cas,12.1,1,0.5,0,0,13.6 +CMT,2010-03-22 19:24:29,2010-03-22 19:40:13,1,4.2999999999999998,-73.997759000000002,40.720905000000002,1,0,-73.984845000000007,40.768424000000003,Cas,12.5,1,0.5,0,0,14 +CMT,2010-03-22 23:09:18,2010-03-22 23:15:44,1,1.3,-73.992647000000005,40.693818,1,0,-73.973204999999993,40.693209000000003,Cas,6.0999999999999996,0.5,0.5,0,0,7.0999999999999996 +CMT,2010-03-22 20:06:44,2010-03-22 20:16:57,3,2.1000000000000001,-73.973624000000001,40.763188999999997,1,0,-73.977140000000006,40.785209999999999,Cas,7.7000000000000002,0,0.5,0,0,8.1999999999999993 +CMT,2010-03-22 17:42:53,2010-03-22 18:12:35,1,4.4000000000000004,-73.966922999999994,40.762537999999999,1,0,-74.007288000000003,40.717841,Cas,17.699999999999999,1,0.5,0,0,19.199999999999999 +CMT,2010-03-22 20:10:42,2010-03-22 20:15:30,1,0.59999999999999998,-73.980403999999993,40.775415000000002,1,0,-73.981444999999994,40.766098,Cre,4.5,0.5,0.5,1,0,6.5 +CMT,2010-03-22 22:08:15,2010-03-22 22:16:49,1,2.3999999999999999,-73.976048000000006,40.741906999999998,1,0,-73.960986000000005,40.768172999999997,Cre,8.0999999999999996,0.5,0.5,2,0,11.1 +CMT,2010-03-22 22:03:18,2010-03-22 22:10:13,1,2.5,-73.973483000000002,40.743940000000002,1,0,-73.949330000000003,40.773080999999998,Cas,7.7000000000000002,0.5,0.5,0,0,8.6999999999999993 diff --git a/csv/testdata/taxi/yellow_tripdata_2010-04.csv b/csv/testdata/taxi/yellow_tripdata_2010-04.csv new file mode 100644 index 0000000..1b43154 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2010-04.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2010-04-11 20:09:00,2010-04-11 20:15:00,5,1.0900000000000001,-73.970226999999994,40.750458000000002,1,,-73.983491999999998,40.757097999999999,CAS,5.2999999999999998,0.5,0.5,0,0,6.2999999999999998 +CMT,2010-04-13 11:39:22,2010-04-13 11:52:45,4,1.8,-73.968677999999997,40.764240999999998,1,0,-73.984857000000005,40.747708000000003,Cre,8.5,0,0.5,2.5800000000000001,0,11.58 +VTS,2010-04-16 17:28:00,2010-04-16 17:32:00,4,0.33000000000000002,0,0,1,,0,0,Cre,3.7000000000000002,1,0.5,0.40999999999999998,0,5.6100000000000003 +VTS,2010-04-15 00:39:00,2010-04-15 00:44:00,1,1.4399999999999999,-73.952811999999994,40.776443,1,,-73.938433000000003,40.791907000000002,CAS,5.7000000000000002,0.5,0.5,0,0,6.7000000000000002 +VTS,2010-04-01 06:47:00,2010-04-01 06:50:00,1,0.71999999999999997,-74.000799999999998,40.725847000000002,1,,-73.999395000000007,40.733801999999997,CAS,3.7000000000000002,0,0.5,0,0,4.2000000000000002 +VTS,2010-04-19 23:51:00,2010-04-19 23:58:00,5,1.5900000000000001,-73.983042999999995,40.771807000000003,1,,-73.971648000000002,40.757105000000003,CAS,6.0999999999999996,0.5,0.5,0,0,7.0999999999999996 +CMT,2010-04-30 13:32:15,2010-04-30 13:40:51,1,1.2,-73.980817000000002,40.765293999999997,1,0,-73.991759999999999,40.749476000000001,Cas,6.5,0,0.5,0,0,7 +CMT,2010-04-09 19:08:30,2010-04-09 19:16:29,1,1.7,-74.006474999999995,40.733719999999998,1,0,-73.988414000000006,40.746298000000003,Cas,6.5,1,0.5,0,0,8 diff --git a/csv/testdata/taxi/yellow_tripdata_2010-05.csv b/csv/testdata/taxi/yellow_tripdata_2010-05.csv new file mode 100644 index 0000000..8771467 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2010-05.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2010-05-28 21:09:20,2010-05-28 21:30:50,2,4.5999999999999996,-74.001684999999995,40.721344999999999,1,0,-73.958202999999997,40.769568,Cas,15.300000000000001,0.5,0.5,0,0,16.300000000000001 +CMT,2010-05-28 15:58:09,2010-05-28 16:01:31,1,0.69999999999999996,-73.955654999999993,40.776583000000002,1,0,-73.947615999999996,40.782789999999999,Cas,4.0999999999999996,1,0.5,0,0,5.5999999999999996 +CMT,2010-05-28 10:42:44,2010-05-28 10:46:14,1,0.40000000000000002,-73.987707999999998,40.775210000000001,1,0,-73.982101,40.775250999999997,Cas,3.7000000000000002,0,0.5,0,0,4.2000000000000002 +CMT,2010-05-27 23:14:35,2010-05-27 23:22:23,1,2.1000000000000001,-73.972201999999996,40.755898999999999,1,0,-73.986386999999993,40.730299000000002,Cas,7.2999999999999998,0.5,0.5,0,0,8.3000000000000007 +CMT,2010-05-28 00:10:10,2010-05-28 00:12:18,1,0.5,-73.992769999999993,40.748280999999999,1,0,-74.000422,40.747920999999998,Cas,3.7000000000000002,0.5,0.5,0,0,4.7000000000000002 +CMT,2010-05-27 21:46:54,2010-05-27 21:55:53,1,2.1000000000000001,-73.981346000000002,40.743934000000003,1,0,-74.004014999999995,40.723495999999997,Cas,7.7000000000000002,0.5,0.5,0,0,8.6999999999999993 +CMT,2010-05-27 08:56:52,2010-05-27 09:01:13,1,0.69999999999999996,-73.976590000000002,40.759918999999996,1,0,-73.985714999999999,40.759675999999999,Cre,4.5,0,0.5,1,0,6 +CMT,2010-05-31 01:07:52,2010-05-31 01:10:16,1,0.80000000000000004,-74.001617999999993,40.730907999999999,1,0,-74.001120999999998,40.740212999999997,Cre,4.0999999999999996,0.5,0.5,0.76000000000000001,0,5.8600000000000003 diff --git a/csv/testdata/taxi/yellow_tripdata_2010-06.csv b/csv/testdata/taxi/yellow_tripdata_2010-06.csv new file mode 100644 index 0000000..4186121 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2010-06.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2010-06-18 10:43:46,2010-06-18 11:22:12,4,20,-73.972893999999997,40.792810000000003,2,0,-73.776308,40.645527999999999,Cas,45,0,0.5,0,4.5700000000000003,50.07 +CMT,2010-06-26 15:02:57,2010-06-26 15:07:15,1,0.69999999999999996,-73.987814999999998,40.748480000000001,1,0,-73.977535000000003,40.753880000000002,Cas,4.5,0,0.5,0,0,5 +CMT,2010-06-24 07:36:37,2010-06-24 07:43:29,1,1,-74.006496999999996,40.732920999999997,1,0,-73.998121999999995,40.725982000000002,Cas,5.7000000000000002,0,0.5,0,0,6.2000000000000002 +CMT,2010-06-23 18:42:21,2010-06-23 18:55:41,2,3.5,-73.987533999999997,40.733396999999997,1,0,-73.951097000000004,40.770543000000004,Cre,10.9,1,0.5,1,0,13.4 +CMT,2010-06-23 14:10:43,2010-06-23 14:17:02,1,1.2,-73.955832000000001,40.779476000000003,1,0,-73.968117000000007,40.765118000000001,Cas,5.7000000000000002,0,0.5,0,0,6.2000000000000002 +CMT,2010-06-23 09:47:52,2010-06-23 10:01:10,4,4.5999999999999996,-73.981108000000006,40.737856999999998,1,0,-74.017213999999996,40.704726999999998,Cas,13.300000000000001,0,0.5,0,0,13.800000000000001 +CMT,2010-06-22 20:55:25,2010-06-22 21:03:29,3,1,-73.986598999999998,40.726505000000003,1,0,-74.000411999999997,40.730207,Cas,6.0999999999999996,0.5,0.5,0,0,7.0999999999999996 +CMT,2010-06-22 17:25:42,2010-06-22 17:51:11,1,4.5,-73.98827,40.743158999999999,4,0,-73.99127,40.688231000000002,Cas,15.300000000000001,1,0.5,0,0,16.800000000000001 diff --git a/csv/testdata/taxi/yellow_tripdata_2010-07.csv b/csv/testdata/taxi/yellow_tripdata_2010-07.csv new file mode 100644 index 0000000..ab7e174 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2010-07.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2010-07-22 09:44:00,2010-07-22 09:55:00,1,1.04,-73.974005000000005,40.754170000000002,1,,-73.986396999999997,40.757852999999997,CAS,6.9000000000000004,0,0.5,0,0,7.4000000000000004 +VTS,2010-07-08 15:15:00,2010-07-08 15:33:00,1,5.1100000000000003,-73.995807999999997,40.761200000000002,1,,-73.995807999999997,40.761200000000002,Cre,14.1,0,0.5,3,0,17.600000000000001 +VTS,2010-07-30 11:36:00,2010-07-30 11:53:00,1,3.98,-73.967287999999996,40.772407999999999,1,,-73.937271999999993,40.740600000000001,Cre,12.9,0,0.5,2,0,15.4 +VTS,2010-07-26 22:23:00,2010-07-26 22:32:00,2,2.7999999999999998,-73.996951999999993,40.745311999999998,1,,-74.005979999999994,40.721842000000002,CAS,8.5,0.5,0.5,0,0,9.5 +VTS,2010-07-23 12:28:00,2010-07-23 12:53:00,1,3.0600000000000001,-74.006163000000001,40.733620000000002,1,,-73.980149999999995,40.762565000000002,CAS,13.699999999999999,0,0.5,0,0,14.199999999999999 +VTS,2010-07-28 15:36:00,2010-07-28 16:02:00,1,2.3599999999999999,-73.969440000000006,40.757623000000002,1,,-73.947235000000006,40.787979999999997,CAS,14.5,0,0.5,0,0,15 +VTS,2010-07-18 21:17:00,2010-07-18 21:21:00,1,1.29,-73.965452999999997,40.759250000000002,1,,-73.978089999999995,40.745697,CAS,5.2999999999999998,0.5,0.5,0,0,6.2999999999999998 +VTS,2010-07-18 01:18:00,2010-07-18 01:24:00,2,1.46,-73.977971999999994,40.746201999999997,1,,-73.988425000000007,40.734807000000004,CAS,6.0999999999999996,0.5,0.5,0,0,7.0999999999999996 diff --git a/csv/testdata/taxi/yellow_tripdata_2010-08.csv b/csv/testdata/taxi/yellow_tripdata_2010-08.csv new file mode 100644 index 0000000..bf80bcc --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2010-08.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2010-08-25 16:55:54,2010-08-25 17:02:51,1,0.90000000000000002,-73.990554000000003,40.750067000000001,1,N,-73.984792999999996,40.748424,CSH,5.2999999999999998,1,0.5,0,0,6.7999999999999998 +CMT,2010-08-25 16:49:27,2010-08-25 17:02:15,2,1.8,-73.960153000000005,40.766382,1,N,-73.980570999999998,40.765318000000001,CSH,8.5,1,0.5,0,0,10 +CMT,2010-08-25 18:56:44,2010-08-25 19:03:56,2,1.8,-73.960911999999993,40.764876000000001,1,N,-73.971935999999999,40.743986999999997,CSH,7.2999999999999998,1,0.5,0,0,8.8000000000000007 +CMT,2010-08-25 17:42:53,2010-08-25 17:52:00,1,1.6000000000000001,-73.978348999999994,40.753754999999998,1,N,-73.994618000000003,40.734712999999999,CSH,6.9000000000000004,1,0.5,0,0,8.4000000000000004 +CMT,2010-08-25 18:33:36,2010-08-25 18:46:43,1,5.9000000000000004,-74.011604000000005,40.707754000000001,1,N,-73.964280000000002,40.756410000000002,CSH,15.300000000000001,1,0.5,0,0,16.800000000000001 +CMT,2010-08-25 18:26:02,2010-08-25 18:33:01,1,1.5,0,0,1,N,0,0,CSH,6.0999999999999996,1,0.5,0,0,7.5999999999999996 +CMT,2010-08-25 09:39:42,2010-08-25 09:48:56,1,1.6000000000000001,-73.970817999999994,40.788291000000001,1,N,-73.952251000000004,40.789791000000001,CSH,7.2999999999999998,0,0.5,0,0,7.7999999999999998 +CMT,2010-08-25 07:10:51,2010-08-25 07:17:42,1,1.6000000000000001,-73.999172999999999,40.732382000000001,1,N,-74.010951000000006,40.716006,CRD,6.5,0,0.5,1,0,8 diff --git a/csv/testdata/taxi/yellow_tripdata_2010-09.csv b/csv/testdata/taxi/yellow_tripdata_2010-09.csv new file mode 100644 index 0000000..b82b4a5 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2010-09.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2010-09-15 06:52:00,2010-09-15 06:56:00,1,0.39000000000000001,-73.981724999999997,40.763627,1,,-73.975279999999998,40.761040000000001,CSH,3.7000000000000002,0,0.5,0,0,4.2000000000000002 +VTS,2010-09-13 10:21:00,2010-09-13 10:34:00,1,1.48,-73.981769999999997,40.745294999999999,1,,-73.992272999999997,40.764130000000002,CSH,8.0999999999999996,0,0.5,0,0,8.5999999999999996 +VTS,2010-09-13 11:50:00,2010-09-13 11:59:00,1,1.3700000000000001,-73.974164999999999,40.750149999999998,1,,-73.948047000000003,40.781815000000002,CSH,6.9000000000000004,0,0.5,0,0,7.4000000000000004 +VTS,2010-09-13 13:35:00,2010-09-13 14:07:00,1,11.109999999999999,-73.985448000000005,40.717913000000003,1,,-74.015867999999998,40.714947000000002,CSH,27.300000000000001,0,0.5,0,4.5700000000000003,32.369999999999997 +VTS,2010-09-13 14:54:00,2010-09-13 15:13:00,3,3.52,-73.979482000000004,40.746462999999999,1,,-73.967770000000002,40.760060000000003,CSH,12.5,0,0.5,0,0,13 +VTS,2010-09-15 11:09:00,2010-09-15 11:13:00,1,0.73999999999999999,-73.994225,40.746032999999997,1,,-73.961472999999998,40.774293,CSH,4.5,0,0.5,0,0,5 +VTS,2010-09-15 06:48:00,2010-09-15 06:53:00,1,1.5700000000000001,-73.989033000000006,40.756712,1,,-73.918188000000001,40.868605000000002,CRD,6.0999999999999996,0,0.5,1,0,7.5999999999999996 +VTS,2010-09-13 07:40:00,2010-09-13 07:47:00,5,1.6899999999999999,-73.992891999999998,40.748672999999997,1,,-73.949302000000003,40.770997999999999,CSH,6.5,0,0.5,0,0,7 diff --git a/csv/testdata/taxi/yellow_tripdata_2010-10.csv b/csv/testdata/taxi/yellow_tripdata_2010-10.csv new file mode 100644 index 0000000..4dfe4a3 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2010-10.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2010-10-01 04:04:02,2010-10-01 04:08:31,1,0.59999999999999998,-73.981667999999999,40.763660999999999,1,N,-73.991517999999999,40.760421999999998,CSH,4.5,0.5,0.5,0,0,5.5 +CMT,2010-10-01 02:46:21,2010-10-01 02:54:31,1,2.5,-74.004524000000004,40.722785000000002,1,N,-73.989923000000005,40.751826999999999,CSH,8.0999999999999996,0.5,0.5,0,0,9.0999999999999996 +CMT,2010-10-01 04:55:37,2010-10-01 04:55:43,1,0,-73.994984000000002,40.761136,1,N,-73.994984000000002,40.761136,CSH,2.5,0.5,0.5,0,0,3.5 +CMT,2010-10-01 03:09:08,2010-10-01 03:11:47,1,0.40000000000000002,-73.834215,40.766077000000003,1,N,-73.833483000000001,40.769151999999998,CSH,3.2999999999999998,0.5,0.5,0,0,4.2999999999999998 +CMT,2010-10-01 09:05:52,2010-10-01 09:36:22,1,10,-73.862780999999998,40.676825999999998,1,N,-73.909848999999994,40.698855999999999,CSH,24.5,0,0.5,0,0,25 +CMT,2010-10-01 02:28:57,2010-10-01 02:42:55,1,4.5,-74.004981999999998,40.729624000000001,1,N,-73.959531999999996,40.767451999999999,CSH,12.1,0.5,0.5,0,0,13.1 +CMT,2010-10-01 02:17:29,2010-10-01 02:21:48,1,0.90000000000000002,-74.002319,40.734668999999997,1,N,-73.995852999999997,40.725206999999997,CSH,4.5,0.5,0.5,0,0,5.5 +CMT,2010-10-01 08:36:09,2010-10-01 09:34:29,1,10.800000000000001,-73.776714999999996,40.645924999999998,1,N,-73.823617999999996,40.724772999999999,CSH,36.899999999999999,0,0.5,0,0,37.399999999999999 diff --git a/csv/testdata/taxi/yellow_tripdata_2010-11.csv b/csv/testdata/taxi/yellow_tripdata_2010-11.csv new file mode 100644 index 0000000..0fbc5b4 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2010-11.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2010-11-01 05:13:00,2010-11-01 05:13:00,1,0,-73.980031999999994,40.734887000000001,1,,-73.980000000000004,40.734915000000001,CSH,2.5,0.5,0.5,0,0,3.5 +VTS,2010-11-05 00:44:00,2010-11-05 00:59:00,1,3.9199999999999999,-73.990290000000002,40.719571999999999,1,,-73.988017999999997,40.763367000000002,CRD,11.699999999999999,0.5,0.5,4,0,16.699999999999999 +VTS,2010-11-05 19:01:00,2010-11-05 19:28:00,5,3.8999999999999999,-73.959298000000004,40.781056999999997,1,,-73.995517000000007,40.733393,CRD,15.300000000000001,1,0.5,3,0,19.800000000000001 +VTS,2010-11-05 19:13:00,2010-11-05 19:17:00,1,0.92000000000000004,-73.955623000000003,40.782274999999998,1,,-73.955178000000004,40.773293000000002,CSH,4.5,1,0.5,0,0,6 +VTS,2010-11-05 19:09:00,2010-11-05 19:22:00,2,3.02,-73.948437999999996,40.77816,1,,-73.964399999999998,40.807197000000002,CRD,10.1,1,0.5,1.75,0,13.35 +VTS,2010-11-05 19:06:00,2010-11-05 19:19:00,5,1.28,-73.959232,40.770038,1,,-73.973038000000003,40.758895000000003,CRD,7.7000000000000002,1,0.5,0,0,9.1999999999999993 +CMT,2010-11-07 01:15:30,2010-11-07 01:38:09,1,6.2999999999999998,-73.995527999999993,40.743989999999997,1,N,-73.909718999999996,40.742291000000002,CSH,18.100000000000001,0.5,0.5,0,0,19.100000000000001 +CMT,2010-11-07 01:13:06,2010-11-07 01:26:42,1,3.2999999999999998,-73.956503999999995,40.771298999999999,1,N,-73.992829999999998,40.744424000000002,CSH,10.9,0.5,0.5,0,0,11.9 diff --git a/csv/testdata/taxi/yellow_tripdata_2010-12.csv b/csv/testdata/taxi/yellow_tripdata_2010-12.csv new file mode 100644 index 0000000..004dae8 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2010-12.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2010-12-03 00:22:00,2010-12-03 00:30:00,1,1.99,-73.982186999999996,40.768090000000001,1,,-73.996224999999995,40.74306,CSH,7.2999999999999998,0.5,0.5,0,0,8.3000000000000007 +VTS,2010-12-05 01:37:00,2010-12-05 01:41:00,1,0.78000000000000003,-73.986397999999994,40.729621999999999,1,,-73.981219999999993,40.736362,CRD,4.0999999999999996,0.5,0.5,1,0,6.0999999999999996 +VTS,2010-12-01 12:14:00,2010-12-01 12:36:00,2,2.0600000000000001,-73.996944999999997,40.737186999999999,1,,-73.975430000000003,40.757272,CSH,11.699999999999999,0,0.5,0,0,12.199999999999999 +VTS,2010-12-05 06:44:00,2010-12-05 06:58:00,1,5.0300000000000002,-73.989324999999994,40.748199999999997,1,,-73.921611999999996,40.743971999999999,CSH,13.300000000000001,0,0.5,0,0,13.800000000000001 +VTS,2010-12-03 08:28:00,2010-12-03 08:51:00,1,2.6099999999999999,-73.95514,40.765327999999997,1,,-73.984570000000005,40.744515,CSH,12.5,0,0.5,0,0,13 +VTS,2010-12-05 03:23:00,2010-12-05 03:32:00,1,2.3399999999999999,-74.006887000000006,40.744214999999997,1,,-73.973889999999997,40.743403000000001,CRD,8.0999999999999996,0.5,0.5,0,0,9.0999999999999996 +VTS,2010-12-02 12:54:00,2010-12-02 13:22:00,1,11.720000000000001,-73.865575000000007,40.770968000000003,1,,-73.987840000000006,40.743962000000003,CRD,28.5,0,0.5,0,4.5700000000000003,33.57 +VTS,2010-12-01 15:15:00,2010-12-01 15:33:00,1,3.46,-73.969836999999998,40.759912,1,,-73.987880000000004,40.719197000000001,CSH,12.1,0,0.5,0,0,12.6 diff --git a/csv/testdata/taxi/yellow_tripdata_2011-01.csv b/csv/testdata/taxi/yellow_tripdata_2011-01.csv new file mode 100644 index 0000000..bd79528 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2011-01.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2011-01-29 02:38:35,2011-01-29 02:47:07,1,1.2,-74.005253999999994,40.729084,1,N,-73.988697000000002,40.727127000000003,CSH,6.0999999999999996,0.5,0.5,0,0,7.0999999999999996 +CMT,2011-01-28 10:38:19,2011-01-28 10:42:18,1,0.40000000000000002,-73.968585000000004,40.759171000000002,1,N,-73.964336000000003,40.764665000000001,CSH,4.0999999999999996,0,0.5,0,0,4.5999999999999996 +CMT,2011-01-28 23:49:58,2011-01-28 23:57:44,3,1.2,-73.980710000000002,40.74239,1,N,-73.987027999999995,40.729531999999999,CSH,6.0999999999999996,0.5,0.5,0,0,7.0999999999999996 +CMT,2011-01-28 23:52:09,2011-01-28 23:59:21,3,0.80000000000000004,-73.993773000000004,40.747329000000001,1,N,-73.991377999999997,40.750050000000002,CSH,5.2999999999999998,0.5,0.5,0,0,6.2999999999999998 +CMT,2011-01-28 10:34:39,2011-01-28 11:25:50,1,5.2999999999999998,-73.991474999999994,40.749935999999998,1,N,-73.950237000000001,40.775626000000003,CSH,25.300000000000001,0,0.5,0,0,25.800000000000001 +CMT,2011-01-28 23:50:00,2011-01-28 23:58:11,2,1.2,-73.950346999999994,40.806409000000002,1,N,-73.960708999999994,40.818275999999997,CSH,6.5,0.5,0.5,0,0,7.5 +CMT,2011-01-29 02:38:48,2011-01-29 02:50:37,1,2.7000000000000002,-73.951971999999998,40.777428,1,N,-73.982152999999997,40.761342999999997,CSH,9.3000000000000007,0.5,0.5,0,0,10.300000000000001 +CMT,2011-01-29 02:41:16,2011-01-29 02:45:45,2,0.90000000000000002,-73.944978000000006,40.718231000000003,1,N,-73.956905000000006,40.712722999999997,CSH,4.9000000000000004,0.5,0.5,0,0,5.9000000000000004 diff --git a/csv/testdata/taxi/yellow_tripdata_2011-02.csv b/csv/testdata/taxi/yellow_tripdata_2011-02.csv new file mode 100644 index 0000000..d4799b9 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2011-02.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2011-02-04 15:07:00,2011-02-04 15:21:00,1,1.71,-73.991254999999995,40.770252999999997,1,,-73.965774999999994,40.766098,CSH,9.3000000000000007,0,0.5,0,0,9.8000000000000007 +VTS,2011-02-04 14:40:00,2011-02-04 14:51:00,5,1.6399999999999999,-73.991849999999999,40.759388000000001,1,,-73.974812,40.761136999999998,CRD,7.7000000000000002,0,0.5,0.80000000000000004,0,9 +VTS,2011-02-04 14:08:00,2011-02-04 14:21:00,1,1.3100000000000001,-73.976042000000007,40.791378000000002,1,,-73.988321999999997,40.774658000000002,CSH,8.0999999999999996,0,0.5,0,0,8.5999999999999996 +VTS,2011-02-04 14:35:00,2011-02-04 14:46:00,5,1.04,-73.958737999999997,40.778301999999996,1,,-73.960970000000003,40.766607,CRD,7.2999999999999998,0,0.5,1,0,8.8000000000000007 +VTS,2011-02-04 14:59:00,2011-02-04 15:08:00,2,1,0,0,1,,0,0,CSH,6.0999999999999996,0,0.5,0,0,6.5999999999999996 +VTS,2011-02-04 15:12:00,2011-02-04 15:15:00,1,0.40000000000000002,-73.985968,40.777588000000002,1,,-73.980701999999994,40.778215000000003,CSH,3.7000000000000002,0,0.5,0,0,4.2000000000000002 +VTS,2011-02-04 14:53:00,2011-02-04 15:23:00,1,2.9700000000000002,-73.995782000000005,40.726208,1,,-73.985776999999999,40.758110000000002,CSH,15.699999999999999,0,0.5,0,0,16.199999999999999 +VTS,2011-02-04 14:42:00,2011-02-04 14:47:00,1,0.51000000000000001,-73.948915,40.777692000000002,1,,-73.949185,40.781481999999997,CSH,4.0999999999999996,0,0.5,0,0,4.5999999999999996 diff --git a/csv/testdata/taxi/yellow_tripdata_2011-03.csv b/csv/testdata/taxi/yellow_tripdata_2011-03.csv new file mode 100644 index 0000000..f045877 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2011-03.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2011-03-12 23:13:52,2011-03-12 23:18:15,3,0.80000000000000004,-73.996350000000007,40.744394999999997,1,N,-73.990414000000001,40.736776999999996,CSH,4.5,0.5,0.5,0,0,5.5 +CMT,2011-03-12 23:07:00,2011-03-12 23:14:06,1,0,-74.001082307513997,40.731597151667003,1,N,-73.984950314171002,40.728027452848998,CSH,5.2999999999999998,0.5,0.5,0,0,6.2999999999999998 +CMT,2011-03-12 20:53:37,2011-03-12 21:22:09,1,0,-73.999968939398002,40.761392476975999,1,N,-74.036639021181998,40.624900827201003,CSH,28.899999999999999,0.5,0.5,0,4.7999999999999998,34.700000000000003 +CMT,2011-03-12 23:12:50,2011-03-12 23:15:48,1,0.5,-73.955231999999995,40.773479000000002,1,N,-73.955467999999996,40.779432999999997,CSH,4.0999999999999996,0.5,0.5,0,0,5.0999999999999996 +CMT,2011-03-12 23:14:23,2011-03-12 23:20:15,2,0.80000000000000004,-73.967439999999996,40.766710000000003,1,N,-73.963418000000004,40.774498999999999,CSH,5.2999999999999998,0.5,0.5,0,0,6.2999999999999998 +CMT,2011-03-12 23:05:58,2011-03-12 23:18:08,4,3.8999999999999999,-74.002377999999993,40.750335,1,N,-73.960070000000002,40.776102999999999,CSH,11.300000000000001,0.5,0.5,0,0,12.300000000000001 +CMT,2011-03-13 05:14:51,2011-03-13 05:19:13,1,0.90000000000000002,-74.008899999999997,40.713960999999998,1,N,-74.00197,40.708129999999997,CSH,4.9000000000000004,0.5,0.5,0,0,5.9000000000000004 +CMT,2011-03-13 05:07:13,2011-03-13 05:28:08,1,0,-73.974693114291995,40.754522152116003,1,N,-75.412146107061005,40.725892808826998,CSH,20.100000000000001,0.5,0.5,0,0,21.100000000000001 diff --git a/csv/testdata/taxi/yellow_tripdata_2011-04.csv b/csv/testdata/taxi/yellow_tripdata_2011-04.csv new file mode 100644 index 0000000..90c7690 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2011-04.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2011-04-29 05:50:00,2011-04-29 05:55:00,1,2.1499999999999999,-73.940545,40.834910000000001,1,,-73.942723000000001,40.811692000000001,CSH,6.9000000000000004,0.5,0.5,0,0,7.9000000000000004 +VTS,2011-04-25 09:13:00,2011-04-25 09:25:00,1,2.3900000000000001,-73.957881999999998,40.773792999999998,1,,-73.981958000000006,40.76717,CRD,8.9000000000000004,0,0.5,1,0,10.4 +VTS,2011-04-29 04:01:00,2011-04-29 04:07:00,1,1.45,-73.983530000000002,40.749833000000002,1,,-73.987070000000003,40.763482000000003,CRD,5.7000000000000002,0.5,0.5,2,0,8.6999999999999993 +VTS,2011-04-29 05:32:00,2011-04-29 05:44:00,1,2.3599999999999999,-73.959041999999997,40.814886999999999,1,,-73.953469999999996,40.790835000000001,CRD,9.3000000000000007,0.5,0.5,2,0,12.300000000000001 +VTS,2011-04-29 05:05:00,2011-04-29 05:22:00,1,6.1799999999999997,-73.983778000000001,40.729765,1,,-73.954515000000001,40.801600000000001,CRD,16.100000000000001,0.5,0.5,2,0,19.100000000000001 +VTS,2011-04-29 05:12:00,2011-04-29 05:17:00,3,1.3899999999999999,-73.951014999999998,40.771721999999997,1,,-73.971857999999997,40.782195000000002,CSH,5.7000000000000002,0.5,0.5,0,0,6.7000000000000002 +VTS,2011-04-25 08:50:00,2011-04-25 09:05:00,1,2.0800000000000001,-73.996487999999999,40.731771999999999,1,,-73.983256999999995,40.756614999999996,CSH,9.3000000000000007,0,0.5,0,0,9.8000000000000007 +VTS,2011-04-28 18:49:00,2011-04-28 19:01:00,5,1.29,-73.964641999999998,40.764834999999998,1,,-73.982527000000005,40.772494999999999,CSH,7.7000000000000002,1,0.5,0,0,9.1999999999999993 diff --git a/csv/testdata/taxi/yellow_tripdata_2011-05.csv b/csv/testdata/taxi/yellow_tripdata_2011-05.csv new file mode 100644 index 0000000..91d446d --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2011-05.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2011-05-13 20:48:53,2011-05-13 20:59:28,1,0,-73.974574670875,40.750450184117,1,N,-73.942594948377007,39.610568217374997,CRD,9.3000000000000007,0.5,0.5,2.0499999999999998,0,12.35 +CMT,2011-05-13 20:51:55,2011-05-13 20:54:49,2,0.40000000000000002,-73.998858999999996,40.739082000000003,1,N,-73.998960999999994,40.734786,CRD,3.7000000000000002,0.5,0.5,0.93999999999999995,0,5.6399999999999997 +CMT,2011-05-13 20:44:59,2011-05-13 20:53:51,1,1.3999999999999999,-73.991577000000007,40.726658999999998,1,N,-73.977538999999993,40.743837999999997,CRD,6.9000000000000004,0.5,0.5,1.5800000000000001,0,9.4800000000000004 +VTS,2011-05-15 21:57:00,2011-05-15 22:01:00,1,1.0800000000000001,-73.993246999999997,40.757387999999999,1,,-73.999832999999995,40.745122000000002,CRD,4.9000000000000004,0.5,0.5,1,0,6.9000000000000004 +VTS,2011-05-15 21:57:00,2011-05-15 22:06:00,1,1.55,-73.974459999999993,40.753822,1,,-73.998261999999997,40.760972000000002,CRD,6.9000000000000004,0.5,0.5,1.6000000000000001,0,9.5 +CMT,2011-05-13 14:36:49,2011-05-13 14:44:26,1,1,-73.999003000000002,40.734312000000003,1,N,-74.008066999999997,40.739255,CRD,5.7000000000000002,0,0.5,1.8600000000000001,0,8.0600000000000005 +VTS,2011-05-15 22:03:00,2011-05-15 22:10:00,5,1.8500000000000001,-73.982898000000006,40.760210000000001,1,,-73.981717000000003,40.741407000000002,CRD,6.9000000000000004,0.5,0.5,2,0,9.9000000000000004 +VTS,2011-05-15 21:50:00,2011-05-15 22:07:00,1,6.0999999999999996,-73.959811999999999,40.762777999999997,1,,-74.011242999999993,40.701768000000001,CRD,16.899999999999999,0.5,0.5,2,0,19.899999999999999 diff --git a/csv/testdata/taxi/yellow_tripdata_2011-06.csv b/csv/testdata/taxi/yellow_tripdata_2011-06.csv new file mode 100644 index 0000000..98a2894 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2011-06.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2011-06-15 17:50:28,2011-06-15 18:11:32,1,1.1000000000000001,-73.967978000000002,40.753256,1,N,-73.983118000000005,40.756233000000002,CSH,10.9,1,0.5,0,0,12.4 +CMT,2011-06-16 15:24:41,2011-06-16 15:28:41,1,0.69999999999999996,-73.999369000000002,40.739030999999997,1,N,-74.001597000000004,40.729320000000001,CSH,4.5,0,0.5,0,0,5 +CMT,2011-06-15 22:46:41,2011-06-15 22:49:09,0,0.59999999999999998,-74.004099999999994,40.747799999999998,1,N,-73.997799999999998,40.756500000000003,CSH,4.0999999999999996,0.5,0.5,0,0,5.0999999999999996 +CMT,2011-06-15 22:58:38,2011-06-15 23:00:52,1,0.69999999999999996,-73.969471999999996,40.761364999999998,1,N,-73.973388,40.750588999999998,CSH,3.7000000000000002,0.5,0.5,0,0,4.7000000000000002 +CMT,2011-06-15 17:47:20,2011-06-15 17:53:07,1,1.3,-73.986307999999994,40.745759999999997,1,N,-74.000755999999996,40.733894999999997,CSH,5.7000000000000002,1,0.5,0,0,7.2000000000000002 +CMT,2011-06-15 22:51:53,2011-06-15 22:56:55,1,0.69999999999999996,-73.980958999999999,40.760334999999998,1,N,-73.969233000000003,40.761307000000002,CSH,4.9000000000000004,0.5,0.5,0,0,5.9000000000000004 +CMT,2011-06-15 23:00:45,2011-06-15 23:54:47,1,5.9000000000000004,-73.982721999999995,40.749411000000002,1,N,-74.000946999999996,40.736780000000003,CSH,16.5,0.5,0.5,0,0,17.5 +CMT,2011-06-15 22:55:45,2011-06-15 23:21:57,0,6.7999999999999998,-74.0077,40.742800000000003,1,N,-73.960599999999999,40.659500000000001,CSH,20.899999999999999,0.5,0.5,0,0,21.899999999999999 diff --git a/csv/testdata/taxi/yellow_tripdata_2011-07.csv b/csv/testdata/taxi/yellow_tripdata_2011-07.csv new file mode 100644 index 0000000..f9bc506 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2011-07.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2011-07-12 07:12:05,2011-07-12 07:17:59,1,2,-73.980599999999995,40.733699999999999,1,N,-73.968900000000005,40.7577,CRD,6.9000000000000004,0,0.5,1.8500000000000001,0,9.25 +CMT,2011-07-12 10:28:50,2011-07-12 10:47:54,1,7.7000000000000002,-73.970352000000005,40.761975,1,N,-74.014159000000006,40.715893999999999,CRD,20.5,0,0.5,4.2000000000000002,0,25.199999999999999 +CMT,2011-07-10 22:33:29,2011-07-10 22:43:53,1,2.2000000000000002,-74.003992999999994,40.712986000000001,1,N,-74.008950999999996,40.738114000000003,CRD,8.5,0.5,0.5,1.8999999999999999,0,11.4 +CMT,2011-07-11 09:04:16,2011-07-11 09:13:24,1,1.8999999999999999,-73.959316000000001,40.777574000000001,1,N,-73.979723000000007,40.765959000000002,CRD,7.7000000000000002,0,0.5,1.8,0,10 +CMT,2011-07-11 13:11:05,2011-07-11 13:25:10,1,1.3,-73.974349000000004,40.756590000000003,1,N,-73.979543000000007,40.762355999999997,CRD,8.5,0,0.5,0,0,9 +CMT,2011-07-10 19:32:03,2011-07-10 19:46:20,1,3.2000000000000002,-73.961394999999996,40.780107999999998,1,N,-73.981240999999997,40.741917000000001,CRD,10.5,0,0.5,1,0,12 +CMT,2011-07-12 09:14:39,2011-07-12 09:30:46,1,2.2000000000000002,-73.954931999999999,40.789098000000003,1,N,-73.978204000000005,40.76473,CRD,10.1,0,0.5,1.3999999999999999,0,12 +CMT,2011-07-12 06:31:26,2011-07-12 06:37:13,1,1.8999999999999999,-73.998309000000006,40.735509999999998,1,N,-73.985090999999997,40.759205000000001,CRD,6.5,0,0.5,2,0,9 diff --git a/csv/testdata/taxi/yellow_tripdata_2011-08.csv b/csv/testdata/taxi/yellow_tripdata_2011-08.csv new file mode 100644 index 0000000..e37104c --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2011-08.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2011-08-03 21:31:00,2011-08-03 21:36:00,1,0.87,-73.991191999999998,40.754370000000002,1,,-73.978027999999995,40.751404999999998,CSH,4.9000000000000004,0.5,0.5,0,0,5.9000000000000004 +VTS,2011-08-03 21:11:00,2011-08-03 21:35:00,2,5.0099999999999998,-73.994882000000004,40.728074999999997,1,,-73.978035000000006,40.773437999999999,CSH,15.699999999999999,0.5,0.5,0,0,16.699999999999999 +CMT,2011-08-03 14:20:41,2011-08-03 14:28:44,1,1.3,-74.000567000000004,40.762041000000004,1,N,-73.983307999999994,40.762193000000003,CSH,6.5,0,0.5,0,0,7 +CMT,2011-08-03 14:20:58,2011-08-03 14:28:43,1,1.3999999999999999,-73.972966999999997,40.748390999999998,1,N,-73.970027999999999,40.761490999999999,CSH,6.5,0,0.5,0,0,7 +VTS,2011-08-03 21:24:00,2011-08-03 21:28:00,5,0.94999999999999996,-73.959680000000006,40.770218,1,,-73.962153000000001,40.779111999999998,CSH,4.5,0.5,0.5,0,0,5.5 +VTS,2011-08-03 21:14:00,2011-08-03 21:28:00,2,2.7000000000000002,-74.013514999999998,40.705311999999999,1,,-73.992932999999994,40.742592999999999,CRD,9.6999999999999993,0.5,0.5,3,0,13.699999999999999 +VTS,2011-08-03 21:26:00,2011-08-03 21:34:00,1,1.71,-73.954734999999999,40.783327,1,,-73.965405000000004,40.778002000000001,CSH,6.9000000000000004,0.5,0.5,0,0,7.9000000000000004 +CMT,2011-08-04 22:51:55,2011-08-04 23:03:44,4,3.1000000000000001,-73.968590000000006,40.767429,1,N,-74.001448999999994,40.739291000000001,CSH,10.1,0.5,0.5,0,0,11.1 diff --git a/csv/testdata/taxi/yellow_tripdata_2011-09.csv b/csv/testdata/taxi/yellow_tripdata_2011-09.csv new file mode 100644 index 0000000..9bcc516 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2011-09.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2011-09-20 09:37:49,2011-09-20 10:28:24,1,6.2999999999999998,-73.934798000000001,40.801074999999997,1,N,-73.997643999999994,40.733114999999998,CRD,26.5,0,0.5,6.75,0,33.75 +CMT,2011-09-20 09:41:28,2011-09-20 09:54:18,3,2.1000000000000001,-74.007786999999993,40.732129999999998,1,N,-74.001367000000002,40.756369999999997,CRD,8.9000000000000004,0,0.5,1.8799999999999999,0,11.279999999999999 +CMT,2011-09-19 09:07:10,2011-09-19 09:23:50,1,2.2999999999999998,-73.955490999999995,40.788496000000002,1,N,-73.968012999999999,40.760489,CRD,10.1,0,0.5,2.6499999999999999,0,13.25 +CMT,2011-09-19 09:04:34,2011-09-19 09:18:12,1,1.1000000000000001,-73.978339000000005,40.762189999999997,1,N,-73.984183999999999,40.748840999999999,CRD,8.0999999999999996,0,0.5,2.1499999999999999,0,10.75 +CMT,2011-09-21 01:20:40,2011-09-21 01:30:14,1,2.7000000000000002,-73.968104999999994,40.758743000000003,1,N,-74.001411000000004,40.745494999999998,CRD,8.9000000000000004,0.5,0.5,1.98,0,11.880000000000001 +CMT,2011-09-20 09:34:17,2011-09-20 10:25:38,1,3.7999999999999998,-74.011115000000004,40.715592999999998,1,N,-73.975230999999994,40.755113999999999,CRD,24.100000000000001,0,0.5,2,0,26.600000000000001 +CMT,2011-09-21 00:49:10,2011-09-21 00:55:18,1,1.2,-73.975882999999996,40.754621999999998,1,N,-73.962494000000007,40.763359000000001,CRD,5.7000000000000002,0.5,0.5,0.80000000000000004,0,7.5 +CMT,2011-09-21 01:01:15,2011-09-21 01:05:47,1,1.3,-73.986999999999995,40.729399999999998,1,N,-73.977800000000002,40.746000000000002,CRD,5.2999999999999998,0.5,0.5,2,0,8.3000000000000007 diff --git a/csv/testdata/taxi/yellow_tripdata_2011-10.csv b/csv/testdata/taxi/yellow_tripdata_2011-10.csv new file mode 100644 index 0000000..40cf432 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2011-10.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2011-10-12 22:46:46,2011-10-12 22:55:27,1,1.1000000000000001,-74.009445999999997,40.723418000000002,1,N,-74.010463999999999,40.709606000000001,CSH,6.5,0.5,0.5,0,0,7.5 +CMT,2011-10-13 14:47:29,2011-10-13 14:55:11,1,1,-73.967309999999998,40.769387000000002,1,N,-73.972823000000005,40.760818999999998,CRD,5.7000000000000002,0,0.5,1.24,0,7.4400000000000004 +CMT,2011-10-12 22:41:43,2011-10-12 23:07:32,1,5,-73.985799999999998,40.727699999999999,1,N,-73.955100000000002,40.678400000000003,CSH,17.300000000000001,0.5,0.5,0,0,18.300000000000001 +VTS,2011-10-03 07:30:00,2011-10-03 07:31:00,1,0.34999999999999998,-73.958442000000005,40.760252000000001,1,,-73.956102000000001,40.763457000000002,CSH,3.2999999999999998,0,0.5,0,0,3.7999999999999998 +VTS,2011-10-03 07:22:00,2011-10-03 07:34:00,1,4.6100000000000003,-73.975920000000002,40.748685000000002,1,,-74.005823000000007,40.706223000000001,CRD,12.9,0,0.5,2,0,15.4 +CMT,2011-10-12 22:39:39,2011-10-12 22:44:07,1,0.40000000000000002,-73.995909999999995,40.724550999999998,1,N,-73.999875000000003,40.721491999999998,CSH,4.0999999999999996,0.5,0.5,0,0,5.0999999999999996 +CMT,2011-10-12 22:30:42,2011-10-12 22:39:01,1,1.8,-73.970600000000005,40.761299999999999,1,N,-73.991600000000005,40.749600000000001,CSH,6.9000000000000004,0.5,0.5,0,0,7.9000000000000004 +CMT,2011-10-12 22:54:17,2011-10-12 22:58:38,1,0.90000000000000002,-73.982923,40.726776999999998,1,N,-73.974829999999997,40.732948,CSH,4.9000000000000004,0.5,0.5,0,0,5.9000000000000004 diff --git a/csv/testdata/taxi/yellow_tripdata_2011-11.csv b/csv/testdata/taxi/yellow_tripdata_2011-11.csv new file mode 100644 index 0000000..01c10f3 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2011-11.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2011-11-29 11:31:00,2011-11-29 11:47:00,1,2.0299999999999998,-73.977753000000007,40.751753000000001,1,,-73.960970000000003,40.779032999999998,CSH,9.3000000000000007,0,0.5,0,0,9.8000000000000007 +VTS,2011-11-29 11:25:00,2011-11-29 11:44:00,2,2.8199999999999998,-73.982969999999995,40.768172999999997,1,,-73.948734999999999,40.773671999999998,CRD,11.699999999999999,0,0.5,2,0,14.199999999999999 +VTS,2011-11-29 10:54:00,2011-11-29 11:39:00,1,4.4699999999999998,-74.002497000000005,40.714087999999997,1,,-73.971833000000004,40.754126999999997,CRD,22.899999999999999,0,0.5,0,0,23.399999999999999 +VTS,2011-11-29 11:18:00,2011-11-29 11:41:00,1,6.9400000000000004,-73.939773000000002,40.841298000000002,1,,-73.953732000000002,40.766244999999998,CRD,19.300000000000001,0,0.5,4.8200000000000003,0,24.620000000000001 +VTS,2011-11-29 11:33:00,2011-11-29 11:47:00,1,0.77000000000000002,-73.990565000000004,40.756078000000002,1,,-73.981416999999993,40.762571999999999,CRD,7.7000000000000002,0,0.5,2,0,10.199999999999999 +VTS,2011-11-29 20:50:00,2011-11-29 21:02:00,1,3.27,-73.975324999999998,40.733027,1,,-74.005988000000002,40.710712000000001,CRD,10.1,0.5,0.5,2,0,13.1 +VTS,2011-11-29 20:41:00,2011-11-29 20:51:00,3,6.6100000000000003,-73.976967000000002,40.785066999999998,1,,-73.932230000000004,40.866244999999999,CRD,15.699999999999999,0.5,0.5,4,0,20.699999999999999 +VTS,2011-11-29 20:44:00,2011-11-29 20:52:00,1,2.1499999999999999,-73.992457000000002,40.730241999999997,1,,-73.987369999999999,40.751171999999997,CRD,7.2999999999999998,0.5,0.5,0.66000000000000003,0,8.9600000000000009 diff --git a/csv/testdata/taxi/yellow_tripdata_2011-12.csv b/csv/testdata/taxi/yellow_tripdata_2011-12.csv new file mode 100644 index 0000000..3019f6f --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2011-12.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2011-12-02 03:53:38,2011-12-02 03:57:56,1,0.80000000000000004,-73.971326000000005,40.762956000000003,1,N,-73.981921,40.765996999999999,CRD,4.9000000000000004,0.5,0.5,1.1799999999999999,0,7.0800000000000001 +CMT,2011-12-02 04:16:44,2011-12-02 04:32:39,1,8.0999999999999996,-73.979975999999994,40.775965999999997,1,N,-73.919149000000004,40.833218000000002,CRD,20.100000000000001,0.5,0.5,2,0,23.100000000000001 +CMT,2011-12-02 05:24:22,2011-12-02 05:33:33,1,2.3999999999999999,-73.982003000000006,40.728216000000003,1,N,-74.01003,40.721162,CRD,8.5,0.5,0.5,1,0,10.5 +CMT,2011-12-02 02:57:24,2011-12-02 03:08:51,0,2.6000000000000001,-74.005399999999995,40.751399999999997,1,N,-73.976200000000006,40.7286,CRD,9.3000000000000007,0.5,0.5,2.5499999999999998,0,12.85 +CMT,2011-12-02 04:16:52,2011-12-02 04:26:23,1,3,-73.962315000000004,40.759070999999999,1,N,-73.996195,40.743485999999997,CRD,9.3000000000000007,0.5,0.5,2.0600000000000001,0,12.359999999999999 +CMT,2011-12-02 04:03:34,2011-12-02 04:09:03,2,1,-74.001839000000004,40.737817999999997,1,N,-74.002262999999999,40.747818000000002,CRD,5.2999999999999998,0.5,0.5,1.26,0,7.5599999999999996 +CMT,2011-12-02 04:10:42,2011-12-02 04:11:44,1,0.20000000000000001,-74.002720999999994,40.749783000000001,1,N,-74.003567000000004,40.752329000000003,CRD,2.8999999999999999,0.5,0.5,0.78000000000000003,0,4.6799999999999997 +CMT,2011-12-02 05:29:19,2011-12-02 05:41:41,1,3.2999999999999998,-73.978272000000004,40.744891000000003,1,N,-74.013896000000003,40.713884,CRD,10.9,0.5,0.5,2,0,13.9 diff --git a/csv/testdata/taxi/yellow_tripdata_2012-01.csv b/csv/testdata/taxi/yellow_tripdata_2012-01.csv new file mode 100644 index 0000000..137dba0 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2012-01.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2012-01-10 23:55:50,2012-01-11 00:03:39,1,1.7,-73.994692999999998,40.725031000000001,1,N,-73.975950999999995,40.730781,CRD,6.9000000000000004,0.5,0.5,1,0,8.9000000000000004 +CMT,2012-01-11 19:18:25,2012-01-11 19:26:10,1,1.1000000000000001,-73.987954999999999,40.752946999999999,1,N,-73.994532000000007,40.761040000000001,CSH,6.0999999999999996,1,0.5,0,0,7.5999999999999996 +CMT,2012-01-11 19:19:19,2012-01-11 19:48:15,2,18,-73.783092999999994,40.64855,2,N,-73.996133999999998,40.747624000000002,CRD,45,0,0.5,10.06,4.7999999999999998,60.359999999999999 +CMT,2012-01-11 19:19:21,2012-01-11 19:27:00,1,1.7,-73.967515000000006,40.758454,1,N,-73.956582999999995,40.779902999999997,CRD,6.9000000000000004,1,0.5,1,0,9.4000000000000004 +CMT,2012-01-11 14:38:15,2012-01-11 14:43:51,1,1.2,-74.011314999999996,40.711449000000002,1,N,-74.002871999999996,40.728130999999998,CSH,5.7000000000000002,0,0.5,0,0,6.2000000000000002 +VTS,2012-01-09 19:14:00,2012-01-09 19:20:00,1,1.25,-73.993335000000002,40.727716999999998,1,,-73.981566999999998,40.739244999999997,CSH,6.0999999999999996,1,0.5,0,0,7.5999999999999996 +CMT,2012-01-11 14:39:08,2012-01-11 15:24:45,1,13.800000000000001,-73.781899999999993,40.644799999999996,1,N,-73.986999999999995,40.700000000000003,CSH,36.5,0,0.5,0,0,37 +VTS,2012-01-09 19:12:00,2012-01-09 19:20:00,2,1.3700000000000001,-73.975662999999997,40.75038,1,,-73.991624999999999,40.73827,CSH,6.5,1,0.5,0,0,8 diff --git a/csv/testdata/taxi/yellow_tripdata_2012-02.csv b/csv/testdata/taxi/yellow_tripdata_2012-02.csv new file mode 100644 index 0000000..d8f18b2 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2012-02.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2012-02-09 14:48:06,2012-02-09 15:06:39,1,2.8999999999999999,-73.982767999999993,40.769174,1,N,-73.958792000000003,40.777842,CSH,12.1,0,0.5,0,0,12.6 +CMT,2012-02-09 11:27:42,2012-02-09 11:48:16,0,3.6000000000000001,-73.960832999999994,40.771875000000001,1,N,-74.002697999999995,40.744864999999997,CRD,13.300000000000001,0,0.5,2,0,15.800000000000001 +CMT,2012-02-09 11:29:01,2012-02-09 11:33:32,1,0.90000000000000002,-73.981459999999998,40.780616000000002,1,N,-73.973337999999998,40.791958000000001,CRD,5.2999999999999998,0,0.5,1.1599999999999999,0,6.96 +CMT,2012-02-09 13:38:44,2012-02-09 13:53:56,2,2,-73.960631000000006,40.781241999999999,1,N,-73.978149000000002,40.755747999999997,CSH,9.6999999999999993,0,0.5,0,0,10.199999999999999 +CMT,2012-02-09 13:36:47,2012-02-09 13:42:14,1,0.90000000000000002,-73.994675999999998,40.740338000000001,1,N,-74.001846999999998,40.745854999999999,CSH,5.2999999999999998,0,0.5,0,0,5.7999999999999998 +CMT,2012-02-09 13:37:41,2012-02-09 13:51:55,1,1.3,-74.006270000000001,40.740447000000003,1,N,-73.988003000000006,40.740020000000001,CSH,8.9000000000000004,0,0.5,0,0,9.4000000000000004 +CMT,2012-02-09 13:36:40,2012-02-09 13:42:34,1,1.3999999999999999,-73.978943000000001,40.756827000000001,1,N,-73.991191999999998,40.739396999999997,CSH,5.7000000000000002,0,0.5,0,0,6.2000000000000002 +CMT,2012-02-09 13:37:57,2012-02-09 13:48:30,1,1.5,-73.992192000000003,40.754910000000002,1,N,-73.972853000000001,40.761738999999999,CSH,7.2999999999999998,0,0.5,0,0,7.7999999999999998 diff --git a/csv/testdata/taxi/yellow_tripdata_2012-03.csv b/csv/testdata/taxi/yellow_tripdata_2012-03.csv new file mode 100644 index 0000000..a703cec --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2012-03.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2012-03-02 10:27:58,2012-03-02 10:48:01,1,2.7000000000000002,-73.999938999999998,40.728507,1,N,-73.979733999999993,40.761961999999997,CRD,12.1,0,0.5,1.5,0,14.1 +CMT,2012-03-02 05:15:19,2012-03-02 05:19:07,1,1.7,-73.956940000000003,40.766632000000001,1,N,-73.970665999999994,40.753307,CRD,5.7000000000000002,0.5,0.5,1.6699999999999999,0,8.3699999999999992 +CMT,2012-03-01 10:39:15,2012-03-01 10:57:24,1,2.3999999999999999,-74.008015,40.705443000000002,1,N,-73.986817000000002,40.701487,CRD,11.300000000000001,0,0.5,1,0,12.800000000000001 +CMT,2012-03-01 12:27:19,2012-03-01 12:34:50,1,1.5,-73.961033,40.760739999999998,1,N,-73.946200000000005,40.780549000000001,CRD,6.5,0,0.5,1.75,0,8.75 +CMT,2012-03-02 12:06:59,2012-03-02 12:18:04,2,1.3,-73.992621999999997,40.733879000000002,1,Y,-73.981229999999996,40.748027,CRD,7.2999999999999998,0,0.5,1,0,8.8000000000000007 +CMT,2012-03-01 12:32:26,2012-03-01 12:43:47,1,2,-73.976855,40.743451,1,N,-73.996306000000004,40.720370000000003,CRD,8.5,0,0.5,1.8,0,10.800000000000001 +CMT,2012-03-01 09:06:19,2012-03-01 09:17:56,1,1.1000000000000001,0,0,1,N,0,0,CRD,7.7000000000000002,0,0.5,1.5,0,9.6999999999999993 +CMT,2012-03-01 08:16:00,2012-03-01 08:25:15,1,0.80000000000000004,-73.973616000000007,40.755347,1,N,-73.985821000000001,40.752249999999997,CRD,6.0999999999999996,0,0.5,1.3200000000000001,0,7.9199999999999999 diff --git a/csv/testdata/taxi/yellow_tripdata_2012-04.csv b/csv/testdata/taxi/yellow_tripdata_2012-04.csv new file mode 100644 index 0000000..2c4f897 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2012-04.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2012-04-30 08:36:20,2012-04-30 08:50:46,1,1.7,-73.991636,40.744104999999998,1,N,-73.974041999999997,40.760353000000002,CRD,9.3000000000000007,0,0.5,1.96,0,11.76 +CMT,2012-04-30 08:27:28,2012-04-30 08:40:49,1,3.8999999999999999,-73.995558000000003,40.759816000000001,1,N,-74.009069999999994,40.713225999999999,CRD,12.1,0,0.5,2.52,0,15.119999999999999 +CMT,2012-04-30 08:36:44,2012-04-30 08:59:17,1,6.0999999999999996,-73.978087000000002,40.748812999999998,1,N,-73.987418000000005,40.693491000000002,CRD,17.699999999999999,0,0.5,3.6400000000000001,0,21.84 +CMT,2012-04-30 08:44:59,2012-04-30 09:00:22,1,4.7999999999999998,-73.990232000000006,40.771807000000003,1,N,-74.015032000000005,40.711284999999997,CRD,13.699999999999999,0,0.5,2.7999999999999998,0,17 +CMT,2012-04-30 08:43:55,2012-04-30 08:51:53,1,1.2,-73.964602999999997,40.807288,1,N,-73.974615999999997,40.790985999999997,CRD,6.5,0,0.5,1.3999999999999999,0,8.4000000000000004 +CMT,2012-04-30 08:45:35,2012-04-30 08:50:59,1,0.90000000000000002,-73.992290999999994,40.748196,1,N,-73.985444999999999,40.739848000000002,CRD,4.9000000000000004,0,0.5,1.3500000000000001,0,6.75 +CMT,2012-04-30 08:31:39,2012-04-30 08:38:05,1,1.2,-73.978452000000004,40.773048000000003,1,N,-73.964555000000004,40.773178000000001,CRD,5.7000000000000002,0,0.5,1.8600000000000001,0,8.0600000000000005 +CMT,2012-04-30 08:35:59,2012-04-30 08:43:10,1,1.1000000000000001,-73.994320999999999,40.761037000000002,1,N,-73.975995999999995,40.754942999999997,CRD,5.7000000000000002,0,0.5,1.55,0,7.75 diff --git a/csv/testdata/taxi/yellow_tripdata_2012-05.csv b/csv/testdata/taxi/yellow_tripdata_2012-05.csv new file mode 100644 index 0000000..b7665b0 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2012-05.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2012-05-07 10:32:00,2012-05-07 10:43:00,2,3.1499999999999999,-73.987179999999995,40.733179999999997,1,,-73.956118000000004,40.763562,CSH,10.1,0,0.5,0,0,10.6 +VTS,2012-05-07 09:28:00,2012-05-07 09:40:00,1,1.9199999999999999,-73.995107000000004,40.749859999999998,1,,-73.970237999999995,40.753973000000002,CRD,8.0999999999999996,0,0.5,1.6200000000000001,0,10.220000000000001 +VTS,2012-05-08 13:27:00,2012-05-08 13:33:00,5,0.87,-73.995737000000005,40.74897,1,,-73.998587000000001,40.755446999999997,CSH,4.9000000000000004,0,0.5,0,0,5.4000000000000004 +VTS,2012-05-08 13:17:00,2012-05-08 13:37:00,1,2.04,-74.007422000000005,40.733002999999997,1,,-73.981191999999993,40.745212000000002,CRD,11.300000000000001,0,0.5,2.2599999999999998,0,14.06 +VTS,2012-05-08 13:21:00,2012-05-08 13:39:00,1,1.4299999999999999,-73.977922000000007,40.745964999999998,1,,-73.980620000000002,40.760502000000002,CRD,10.1,0,0.5,2.02,0,12.619999999999999 +VTS,2012-05-11 21:39:00,2012-05-11 21:49:00,1,1.96,-73.984938,40.741695,1,,-73.973353000000003,40.760938000000003,CSH,8.0999999999999996,0.5,0.5,0,0,9.0999999999999996 +VTS,2012-05-07 09:27:00,2012-05-07 09:31:00,1,0.87,-73.992457000000002,40.743651999999997,1,,-73.987764999999996,40.753892,CSH,4.5,0,0.5,0,0,5 +VTS,2012-05-07 09:03:00,2012-05-07 09:28:00,1,2.1000000000000001,-73.973133000000004,40.755277,1,,-73.995424999999997,40.769362000000001,CRD,13.300000000000001,0,0.5,2,0,15.800000000000001 diff --git a/csv/testdata/taxi/yellow_tripdata_2012-06.csv b/csv/testdata/taxi/yellow_tripdata_2012-06.csv new file mode 100644 index 0000000..c664fa6 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2012-06.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2012-06-12 09:44:00,2012-06-12 09:45:00,1,0.17000000000000001,-73.977356999999998,40.761192000000001,1,,-73.978863000000004,40.762155,CSH,2.8999999999999999,0,0.5,0,0,3.3999999999999999 +CMT,2012-06-12 11:50:25,2012-06-12 12:18:53,1,2.7999999999999998,-73.978159000000005,40.751348,1,N,-73.945857000000004,40.775246000000003,CSH,15.300000000000001,0,0.5,0,0,15.800000000000001 +CMT,2012-06-12 11:29:12,2012-06-12 11:46:59,1,3.7000000000000002,-73.993988000000002,40.761406999999998,1,N,-74.007420999999994,40.726520000000001,CRD,12.5,0,0.5,2.6000000000000001,0,15.6 +VTS,2012-06-12 11:29:00,2012-06-12 12:03:00,1,10,-73.873410000000007,40.774011999999999,1,,-74.002347,40.740631999999998,CRD,27.699999999999999,0,0.5,6.9199999999999999,4.7999999999999998,39.920000000000002 +VTS,2012-06-12 11:30:00,2012-06-12 11:38:00,1,0.85999999999999999,-73.975733000000005,40.759506999999999,1,,-73.969686999999993,40.753442,CRD,5.2999999999999998,0,0.5,1,0,6.7999999999999998 +CMT,2012-06-12 07:43:42,2012-06-12 07:58:04,1,3.6000000000000001,-74.010270000000006,40.713372,1,N,-73.979577000000006,40.757593999999997,CRD,11.300000000000001,0,0.5,1,0,12.800000000000001 +VTS,2012-06-12 07:44:00,2012-06-12 07:57:00,1,1.77,-74.007694999999998,40.740738,1,,-74.000397000000007,40.720616999999997,CRD,8.5,0,0.5,7.0000000000000007E-2,0,9.0700000000000003 +CMT,2012-06-12 12:24:59,2012-06-12 12:37:42,1,1.2,-73.995805000000004,40.748888000000001,1,N,-73.980024999999998,40.753239999999998,CSH,8.0999999999999996,0,0.5,0,0,8.5999999999999996 diff --git a/csv/testdata/taxi/yellow_tripdata_2012-07.csv b/csv/testdata/taxi/yellow_tripdata_2012-07.csv new file mode 100644 index 0000000..78feaa3 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2012-07.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2012-07-01 03:32:51,2012-07-01 03:44:22,1,3.2000000000000002,-73.966130000000007,40.761308999999997,1,N,-73.986273999999995,40.722112000000003,CSH,10.1,0.5,0.5,0,0,11.1 +VTS,2012-07-01 00:00:00,2012-07-01 00:05:00,2,1.8999999999999999,-73.866037000000006,40.834670000000003,1,,-73.866392000000005,40.832757000000001,CRD,6.0999999999999996,0.5,0.5,1.6499999999999999,0,8.75 +VTS,2012-07-01 03:33:00,2012-07-01 03:54:00,2,10.550000000000001,-73.981165000000004,40.76708,1,,-73.840485000000001,40.699652,CSH,25.300000000000001,0.5,0.5,0,0,26.300000000000001 +CMT,2012-07-01 03:32:40,2012-07-01 03:56:31,1,12,-73.950839000000002,40.785786999999999,1,N,-73.850864000000001,40.875776000000002,CSH,28.899999999999999,0.5,0.5,0,0,29.899999999999999 +CMT,2012-07-01 03:33:15,2012-07-01 03:56:08,2,14.9,-73.991607999999999,40.764887999999999,1,N,-73.830372999999994,40.847396000000003,CRD,32.899999999999999,0.5,0.5,6.7800000000000002,0,40.68 +VTS,2012-07-01 03:33:00,2012-07-01 03:34:00,1,0.44,-73.969915,40.797525,1,,-73.968187,40.800714999999997,CSH,3.2999999999999998,0.5,0.5,0,0,4.2999999999999998 +VTS,2012-07-01 03:34:00,2012-07-01 03:36:00,2,0.53000000000000003,-74.003912,40.742082000000003,1,,-73.996915000000001,40.739154999999997,CRD,3.2999999999999998,0.5,0.5,0.76000000000000001,0,5.0599999999999996 +VTS,2012-07-01 00:00:00,2012-07-01 00:16:00,1,4,0,0,1,,0,0,CRD,12.1,0.5,0.5,1,0,14.1 diff --git a/csv/testdata/taxi/yellow_tripdata_2012-08.csv b/csv/testdata/taxi/yellow_tripdata_2012-08.csv new file mode 100644 index 0000000..1af1189 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2012-08.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2012-08-01 01:09:00,2012-08-01 01:25:00,1,3.7999999999999998,-74.005480000000006,40.738596999999999,1,,-73.965235000000007,40.759346999999998,CRD,12.1,0.5,0.5,2,0,15.1 +CMT,2012-08-01 01:10:24,2012-08-01 01:17:28,1,0.90000000000000002,-74.000296000000006,40.734026,1,N,-74.003331000000003,40.727401,CSH,5.7000000000000002,0.5,0.5,0,0,6.7000000000000002 +CMT,2012-08-01 03:52:44,2012-08-01 04:05:14,1,5.0999999999999996,-74.005610000000004,40.726261999999998,1,N,-73.953412,40.776890000000002,CRD,12.9,0.5,0.5,2,0,15.9 +VTS,2012-08-01 07:20:00,2012-08-01 07:32:00,1,2.52,-73.986971999999994,40.720714999999998,1,,-73.984369999999998,40.74586,CSH,9.3000000000000007,0,0.5,0,0,9.8000000000000007 +CMT,2012-08-01 07:20:45,2012-08-01 07:25:19,1,0.90000000000000002,-73.992422000000005,40.750588,1,N,-74.004858999999996,40.751953,CRD,4.9000000000000004,0,0.5,1.3500000000000001,0,6.75 +CMT,2012-08-01 03:51:46,2012-08-01 04:00:55,1,3,-73.955420000000004,40.719802999999999,1,N,-73.983041999999998,40.726607999999999,CRD,9.6999999999999993,0.5,0.5,1,0,11.699999999999999 +VTS,2012-08-01 03:55:00,2012-08-01 03:58:00,1,1.2,-73.980457000000001,40.751669999999997,1,,-73.967403000000004,40.7575,CRD,4.9000000000000004,0.5,0.5,0.5,0,6.4000000000000004 +CMT,2012-08-01 01:09:23,2012-08-01 01:25:33,1,4.4000000000000004,-73.949870000000004,40.784483000000002,1,N,-73.898957999999993,40.823734999999999,CSH,13.699999999999999,0.5,0.5,0,0,14.699999999999999 diff --git a/csv/testdata/taxi/yellow_tripdata_2012-09.csv b/csv/testdata/taxi/yellow_tripdata_2012-09.csv new file mode 100644 index 0000000..8ec5cf6 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2012-09.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2012-09-01 05:35:00,2012-09-01 05:41:00,1,2.27,-73.995642000000004,40.725271999999997,1,,-73.992367000000002,40.749377000000003,CRD,7.2999999999999998,0.5,0.5,1.5600000000000001,0,9.8599999999999994 +VTS,2012-09-01 05:31:00,2012-09-01 05:41:00,1,3.9399999999999999,-73.973276999999996,40.792907999999997,1,,-73.976046999999994,40.750342000000003,CRD,10.9,0.5,0.5,2.2799999999999998,0,14.18 +VTS,2012-09-01 05:16:00,2012-09-01 05:40:00,2,16.75,-73.937562999999997,40.801259999999999,2,,-73.783299999999997,40.643875000000001,CSH,45,0,0.5,0,0,45.5 +VTS,2012-09-01 05:41:00,2012-09-01 05:44:00,1,1.23,-73.987702999999996,40.765295000000002,1,,-73.994442000000006,40.753075000000003,CSH,4.9000000000000004,0.5,0.5,0,0,5.9000000000000004 +VTS,2012-09-01 05:27:00,2012-09-01 05:39:00,2,3.9100000000000001,-73.954577,40.720492,1,,-73.993452000000005,40.727375000000002,CSH,11.300000000000001,0.5,0.5,0,0,12.300000000000001 +VTS,2012-09-01 05:37:00,2012-09-01 05:43:00,5,1.1299999999999999,-74.001412000000002,40.731161999999998,1,,-73.987508000000005,40.728577000000001,CSH,5.2999999999999998,0.5,0.5,0,0,6.2999999999999998 +VTS,2012-09-01 05:32:00,2012-09-01 05:46:00,3,11.359999999999999,-73.783207000000004,40.649172,1,,-73.786767999999995,40.740561999999997,CSH,25.300000000000001,0.5,0.5,0,0,26.300000000000001 +VTS,2012-09-01 05:33:00,2012-09-01 05:43:00,1,5.7199999999999998,-74.075087999999994,40.595325000000003,1,,-74.075087999999994,40.595325000000003,CRD,14.5,0.5,0.5,1,0,16.5 diff --git a/csv/testdata/taxi/yellow_tripdata_2012-10.csv b/csv/testdata/taxi/yellow_tripdata_2012-10.csv new file mode 100644 index 0000000..0893385 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2012-10.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2012-10-01 07:57:00,2012-10-01 08:09:00,1,3.0099999999999998,-73.994532000000007,40.755887000000001,1,,-74.009990000000002,40.721494999999997,CRD,12.5,0,0.5,2,0,15 +VTS,2012-10-01 07:57:00,2012-10-01 08:08:00,1,1.5600000000000001,-73.956346999999994,40.767488,1,,-73.954065,40.784652000000001,CSH,9,0,0.5,0,0,9.5 +VTS,2012-10-01 07:57:00,2012-10-01 08:08:00,1,1.23,-73.962950000000006,40.772382,1,,-73.946950000000001,40.771946999999997,CSH,9,0,0.5,0,0,9.5 +VTS,2012-10-01 07:57:00,2012-10-01 08:10:00,1,0.27000000000000002,-74.007671999999999,40.732267,1,,-73.979212000000004,40.752642000000002,CSH,12,0,0.5,0,0,12.5 +VTS,2012-10-01 07:57:00,2012-10-01 08:25:00,1,2.9500000000000002,-73.966341999999997,40.793354999999998,1,,-73.954012000000006,40.766401999999999,CSH,18,0,0.5,0,0,18.5 +CMT,2012-10-01 07:57:08,2012-10-01 08:10:03,1,2.2000000000000002,-73.976149000000007,40.788201999999998,1,N,-73.972713999999996,40.761926000000003,CRD,11,0,0.5,1,0,12.5 +CMT,2012-10-01 07:57:09,2012-10-01 08:03:49,1,1.5,-73.958729000000005,40.781035000000003,1,N,-73.968886999999995,40.764451000000001,CRD,7.5,0,0.5,1.2,0,9.1999999999999993 +CMT,2012-10-01 07:57:12,2012-10-01 08:14:33,1,3,-73.967502999999994,40.757398000000002,1,N,-74.002775,40.743156999999997,CSH,13.5,0,0.5,0,0,14 diff --git a/csv/testdata/taxi/yellow_tripdata_2012-11.csv b/csv/testdata/taxi/yellow_tripdata_2012-11.csv new file mode 100644 index 0000000..1a6aa0d --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2012-11.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2012-11-01 01:18:26,2012-11-01 01:35:06,1,5.4000000000000004,-73.984519000000006,40.779775999999998,1,N,-73.947342000000006,40.764681000000003,CRD,18,0.5,0.5,3.7999999999999998,0,22.800000000000001 +CMT,2012-11-01 01:18:27,2012-11-01 01:28:20,1,2.3999999999999999,-73.996082000000001,40.753301999999998,1,N,-73.985782999999998,40.727865000000001,CSH,10,0.5,0.5,0,0,11 +CMT,2012-11-01 01:18:45,2012-11-01 02:16:18,1,5.4000000000000004,-73.970534999999998,40.799143999999998,1,N,-73.957025999999999,40.770164000000001,CSH,35.5,0.5,0.5,0,0,36.5 +CMT,2012-11-01 01:18:49,2012-11-01 01:22:38,1,1.1000000000000001,-73.956559999999996,40.771124,1,N,-73.960993999999999,40.757342999999999,CRD,5.5,0.5,0.5,1.6200000000000001,0,8.1199999999999992 +CMT,2012-11-01 01:18:55,2012-11-01 01:27:53,1,2.7000000000000002,-73.959062000000003,40.771721999999997,1,N,-73.967997999999994,40.800170000000001,CRD,10.5,0.5,0.5,2.8500000000000001,0,14.35 +CMT,2012-11-01 01:18:57,2012-11-01 01:31:34,1,3.5,-73.956458999999995,40.777968000000001,1,N,-73.991489000000001,40.746352000000002,CRD,12.5,0.5,0.5,1,0,14.5 +CMT,2012-11-01 01:18:59,2012-11-01 01:30:29,1,3.2000000000000002,-73.978982999999999,40.787686000000001,1,N,-73.997304,40.753697000000003,CRD,12.5,0.5,0.5,1.5,0,15 +VTS,2012-11-01 01:19:00,2012-11-01 01:23:00,4,0.90000000000000002,-73.965787000000006,40.712837,1,,-73.957710000000006,40.719816999999999,CRD,5,0.5,0.5,1,0,7 diff --git a/csv/testdata/taxi/yellow_tripdata_2012-12.csv b/csv/testdata/taxi/yellow_tripdata_2012-12.csv new file mode 100644 index 0000000..ffe1e5b --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2012-12.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2012-12-13 08:17:27,2012-12-13 08:38:40,1,1.8999999999999999,-73.964110000000005,40.757821,1,N,-73.983536000000001,40.754967000000001,CRD,14.5,0,0.5,3,0,18 +CMT,2012-12-13 08:35:37,2012-12-13 08:54:25,1,3.3999999999999999,-73.968500000000006,40.798955999999997,1,N,-73.984139999999996,40.759404000000004,CRD,15,0,0.5,3.1000000000000001,0,18.600000000000001 +CMT,2012-12-13 08:38:59,2012-12-13 08:43:43,1,0.69999999999999996,-74.006574999999998,40.731895999999999,1,N,-73.998295999999996,40.736336999999999,CRD,5.5,0,0.5,1,0,7 +CMT,2012-12-13 08:56:37,2012-12-13 09:00:01,1,0.59999999999999998,-73.975471999999996,40.785339999999998,1,N,-73.971739999999997,40.791868999999998,CRD,4.5,0,0.5,1,0,6 +CMT,2012-12-13 07:43:04,2012-12-13 07:47:53,1,0.69999999999999996,-73.984874000000005,40.763159000000002,1,N,-73.974123000000006,40.759363,CRD,5,0,0.5,1.1000000000000001,0,6.5999999999999996 +CMT,2012-12-13 08:55:17,2012-12-13 09:04:04,1,2.2999999999999998,-73.974767,40.782949000000002,1,N,-73.958414000000005,40.810063,CRD,9.5,0,0.5,2.5,0,12.5 +CMT,2012-12-13 06:03:31,2012-12-13 06:07:48,1,1.7,-73.993222000000003,40.736654000000001,1,N,-73.975375999999997,40.749668,CRD,6.5,0,0.5,1,0,8 +CMT,2012-12-13 08:50:52,2012-12-13 09:13:11,1,6.2999999999999998,-74.012248999999997,40.701582000000002,1,N,-73.968947999999997,40.763981999999999,CRD,22.5,0,0.5,2,0,25 diff --git a/csv/testdata/taxi/yellow_tripdata_2013-01.csv b/csv/testdata/taxi/yellow_tripdata_2013-01.csv new file mode 100644 index 0000000..7e6b6bb --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2013-01.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2013-01-01 15:11:48,2013-01-01 15:18:10,4,1,-73.978165000000004,40.757976999999997,1,N,-73.989840000000001,40.751173000000001,CSH,6.5,0,0.5,0,0,7 +CMT,2013-01-06 00:18:35,2013-01-06 00:22:54,1,1.5,-74.006680000000003,40.731780999999998,1,N,-73.994499000000005,40.750658999999999,CSH,6,0.5,0.5,0,0,7 +CMT,2013-01-05 18:49:41,2013-01-05 18:54:23,1,1.1000000000000001,-74.004711,40.737769999999998,1,N,-74.009831000000005,40.725999999999999,CSH,5.5,1,0.5,0,0,7 +CMT,2013-01-07 23:54:15,2013-01-07 23:58:20,2,0.69999999999999996,-73.974599999999995,40.759945000000002,1,N,-73.984736999999996,40.759388000000001,CSH,5,0.5,0.5,0,0,6 +CMT,2013-01-07 23:25:03,2013-01-07 23:34:24,1,2.1000000000000001,-73.976252000000002,40.748528,1,N,-74.002583000000001,40.747866999999999,CSH,9.5,0.5,0.5,0,0,10.5 +CMT,2013-01-07 15:27:48,2013-01-07 15:38:37,1,1.7,-73.966742999999994,40.764251999999999,1,N,-73.983322000000001,40.743761999999997,CSH,9.5,0,0.5,0,0,10 +CMT,2013-01-08 11:01:15,2013-01-08 11:08:14,1,0.80000000000000004,-73.995801,40.743977999999998,1,N,-74.007418000000001,40.744343999999998,CSH,6,0,0.5,0,0,6.5 +CMT,2013-01-07 12:39:18,2013-01-07 13:10:56,3,10.699999999999999,-73.989936999999998,40.756773000000003,1,N,-73.865247999999994,40.770629999999997,CSH,34,0,0.5,0,4.7999999999999998,39.299999999999997 diff --git a/csv/testdata/taxi/yellow_tripdata_2013-02.csv b/csv/testdata/taxi/yellow_tripdata_2013-02.csv new file mode 100644 index 0000000..6c37a49 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2013-02.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2013-02-08 23:35:14,2013-02-08 23:42:58,1,0.80000000000000004,-73.992438000000007,40.724488000000001,1,N,-73.984423000000007,40.718901000000002,CRD,6,0.5,0.5,0,0,7 +CMT,2013-02-07 12:20:16,2013-02-07 12:50:27,4,3.1000000000000001,-73.989492999999996,40.769590000000001,1,N,-73.990302999999997,40.737347999999997,CRD,20,0,0.5,3,0,23.5 +CMT,2013-02-08 08:56:54,2013-02-08 08:59:43,1,1,-73.963032999999996,40.799142000000003,1,N,-73.972168999999994,40.786445999999998,CRD,5,0,0.5,1.1000000000000001,0,6.5999999999999996 +CMT,2013-02-08 09:37:02,2013-02-08 09:50:50,1,2.1000000000000001,-73.987952000000007,40.728763000000001,1,N,-74.007114999999999,40.705399,CRD,11,0,0.5,2.2999999999999998,0,13.800000000000001 +CMT,2013-02-08 19:31:25,2013-02-08 19:46:23,1,3.2999999999999998,-73.987279999999998,40.743043,1,N,-74.010287000000005,40.703963000000002,CRD,13,1,0.5,1.5,0,16 +CMT,2013-02-08 23:10:01,2013-02-08 23:46:15,4,7.5999999999999996,-73.993005999999994,40.720153000000003,1,N,-73.959748000000005,40.808542000000003,CRD,26.5,0.5,0.5,5.5,0,33 +CMT,2013-02-07 22:02:30,2013-02-07 22:06:18,1,0.69999999999999996,-73.979967000000002,40.780544999999996,1,N,-73.971485999999999,40.782407999999997,CRD,5,0.5,0.5,1,0,7 +CMT,2013-02-06 22:25:26,2013-02-06 22:41:32,1,3.7999999999999998,-73.978510999999997,40.759912,1,N,-74.003901999999997,40.715969999999999,CRD,14,0.5,0.5,3,0,18 diff --git a/csv/testdata/taxi/yellow_tripdata_2013-03.csv b/csv/testdata/taxi/yellow_tripdata_2013-03.csv new file mode 100644 index 0000000..c1ad6d4 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2013-03.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2013-03-01 00:00:04,2013-03-01 00:19:03,1,14.300000000000001,-73.776700000000005,40.645162999999997,1,N,-73.913921999999999,40.772612000000002,CRD,39.5,0.5,0.5,10.1,0,50.600000000000001 +CMT,2013-03-01 00:00:18,2013-03-01 00:12:11,1,3.2999999999999998,-73.987345000000005,40.752057999999998,1,N,-73.965335999999994,40.792718999999998,CRD,13,0.5,0.5,2.1000000000000001,0,16.100000000000001 +CMT,2013-03-01 00:00:18,2013-03-01 00:14:24,1,4.5,-73.991490999999996,40.726444999999998,1,N,-73.980863999999997,40.778368,CRD,15,0.5,0.5,3.2000000000000002,0,19.199999999999999 +CMT,2013-03-01 00:00:23,2013-03-01 00:14:56,1,9,-73.873110999999994,40.774092000000003,1,N,-73.960930000000005,40.769534999999998,CRD,25.5,0.5,0.5,6.2599999999999998,4.7999999999999998,37.560000000000002 +CMT,2013-03-01 00:00:35,2013-03-01 00:05:44,1,1.3,-73.990561,40.750928000000002,1,N,-73.982881000000006,40.739133000000002,CRD,6.5,0.5,0.5,1,0,8.5 +CMT,2013-03-01 00:00:39,2013-03-01 00:16:09,1,4.0999999999999996,-73.981033999999994,40.759202999999999,1,N,-73.921645999999996,40.738354000000001,CRD,15,0.5,0.5,2,0,18 +CMT,2013-03-01 00:00:39,2013-03-01 00:10:14,1,2.3999999999999999,-74.000095999999999,40.734696999999997,1,N,-73.974517000000006,40.753489999999999,CRD,10,0.5,0.5,2,0,13 +CMT,2013-03-01 00:00:51,2013-03-01 00:11:40,1,2.7999999999999998,-73.977484000000004,40.738221000000003,1,N,-73.988910000000004,40.719062999999998,CRD,11.5,0.5,0.5,3.75,0,16.25 diff --git a/csv/testdata/taxi/yellow_tripdata_2013-04.csv b/csv/testdata/taxi/yellow_tripdata_2013-04.csv new file mode 100644 index 0000000..13abce4 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2013-04.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2013-04-04 18:47:45,2013-04-04 19:00:25,1,2.5,-73.957854999999995,40.765320000000003,1,N,-73.976274000000004,40.785646999999997,CRD,11,1,0.5,2.5,0,15 +CMT,2013-04-05 07:08:34,2013-04-05 07:17:34,1,1.6000000000000001,0,0,1,N,0,0,CRD,8.5,0,0.5,1.8,0,10.800000000000001 +CMT,2013-04-04 17:59:50,2013-04-04 18:21:48,1,3.6000000000000001,-73.982876000000005,40.754989000000002,1,N,-74.009181999999996,40.715375000000002,CRD,16.5,1,0.5,3.6000000000000001,0,21.600000000000001 +CMT,2013-04-04 18:12:01,2013-04-04 18:25:24,1,1.8999999999999999,-73.978116999999997,40.763449000000001,1,N,-73.955669,40.776640999999998,CRD,10,1,0.5,3.4500000000000002,0,14.949999999999999 +CMT,2013-04-04 20:12:57,2013-04-04 20:29:55,1,3.6000000000000001,-74.006369000000007,40.744756000000002,1,N,-73.961659999999995,40.76108,CRD,15,0.5,0.5,3.2000000000000002,0,19.199999999999999 +CMT,2013-04-05 02:48:11,2013-04-05 02:51:21,2,0.69999999999999996,-73.985195000000004,40.754933000000001,1,N,-73.990778000000006,40.747996000000001,CRD,4.5,0.5,0.5,1.1000000000000001,0,6.5999999999999996 +CMT,2013-04-05 06:16:10,2013-04-05 06:22:05,1,1.2,-73.985167000000004,40.763421000000001,1,N,-73.978869000000003,40.751142999999999,CRD,6.5,0,0.5,1,0,8 +CMT,2013-04-05 06:20:10,2013-04-05 06:27:43,1,1.6000000000000001,-73.980028000000004,40.745786000000003,1,N,-74.002744000000007,40.756166999999998,CRD,8,0,0.5,2.1000000000000001,0,10.6 diff --git a/csv/testdata/taxi/yellow_tripdata_2013-05.csv b/csv/testdata/taxi/yellow_tripdata_2013-05.csv new file mode 100644 index 0000000..99460ce --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2013-05.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2013-05-01 00:04:00,2013-05-01 00:12:00,1,1.3400000000000001,-73.982286999999999,40.772815000000001,1,,-73.98621,40.758741999999998,CSH,7,0.5,0.5,0,0,8 +VTS,2013-05-01 00:03:00,2013-05-01 00:10:00,5,2.6000000000000001,-73.963009999999997,40.7119,1,,-73.991874999999993,40.721916999999998,CRD,9.5,0.5,0.5,2,0,12.5 +VTS,2013-05-01 00:04:00,2013-05-01 00:10:00,2,1.3100000000000001,-73.981780000000001,40.724352000000003,1,,-73.973754999999997,40.736891999999997,CRD,6.5,0.5,0.5,1,0,8.5 +VTS,2013-05-01 00:05:00,2013-05-01 00:09:00,1,0.81999999999999995,-73.964016999999998,40.709691999999997,1,,-73.950895000000003,40.710971999999998,CSH,5.5,0.5,0.5,0,0,6.5 +VTS,2013-05-01 00:05:00,2013-05-01 00:14:00,1,1.6499999999999999,-73.973917,40.752786999999998,1,,-73.996202999999994,40.755867000000002,CRD,8.5,0.5,0.5,1.8,0,11.300000000000001 +VTS,2013-05-01 00:00:00,2013-05-01 00:12:00,5,2.4100000000000001,-74.002354999999994,40.750324999999997,1,,-73.972881999999998,40.756096999999997,CRD,11,0.5,0.5,2.2999999999999998,0,14.300000000000001 +VTS,2013-05-01 00:01:00,2013-05-01 00:10:00,1,2.4399999999999999,-73.950114999999997,40.771766999999997,1,,-73.977317999999997,40.759239999999998,CRD,10,0.5,0.5,1.5,0,12.5 +VTS,2013-05-01 00:05:00,2013-05-01 00:12:00,1,2.4199999999999999,-74.009294999999995,40.724732000000003,1,,-73.998671999999999,40.754931999999997,CRD,9,0.5,0.5,1,0,11 diff --git a/csv/testdata/taxi/yellow_tripdata_2013-06.csv b/csv/testdata/taxi/yellow_tripdata_2013-06.csv new file mode 100644 index 0000000..5e252b6 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2013-06.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2013-06-03 00:02:12,2013-06-03 00:10:07,1,1.3,-73.981579999999994,40.773530000000001,1,N,-73.981823000000006,40.782124000000003,CRD,7.5,0.5,0.5,2.1200000000000001,0,10.619999999999999 +CMT,2013-06-03 00:03:03,2013-06-03 00:19:27,1,4.9000000000000004,-73.999562999999995,40.728364999999997,1,N,-73.952929999999995,40.729545000000002,CRD,17,0.5,0.5,3.6000000000000001,0,21.600000000000001 +CMT,2013-06-03 00:01:30,2013-06-03 00:28:11,1,17.699999999999999,-73.788441000000006,40.641151000000001,2,N,-73.98545,40.744194999999998,CRD,52,0,0.5,5,5.3300000000000001,62.829999999999998 +CMT,2013-06-03 00:04:14,2013-06-03 00:27:50,1,12.1,-73.862817000000007,40.768875000000001,1,N,-74.008797000000001,40.738841999999998,CRD,34.5,0.5,0.5,7.0999999999999996,0,42.600000000000001 +CMT,2013-06-03 00:04:53,2013-06-03 00:10:46,1,1.1000000000000001,-73.964900999999998,40.80688,1,N,-73.962348000000006,40.794986999999999,CRD,6.5,0.5,0.5,1.5,0,9 +CMT,2013-06-03 00:04:27,2013-06-03 00:19:17,2,3.2999999999999998,-73.987380000000002,40.719738,1,N,-73.969223,40.690044999999998,CRD,13.5,0.5,0.5,2.8999999999999999,0,17.399999999999999 +CMT,2013-06-03 00:02:50,2013-06-03 00:10:53,1,1.2,-73.993216000000004,40.752107000000002,1,N,-73.978149000000002,40.741736000000003,CRD,7.5,0.5,0.5,1.5,0,10 +CMT,2013-06-03 00:04:15,2013-06-03 00:14:31,1,2.8999999999999999,-73.988444999999999,40.721733,1,N,-73.972899999999996,40.755670000000002,CRD,11,0.5,0.5,1.8,0,13.800000000000001 diff --git a/csv/testdata/taxi/yellow_tripdata_2013-07.csv b/csv/testdata/taxi/yellow_tripdata_2013-07.csv new file mode 100644 index 0000000..100eb57 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2013-07.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2013-07-01 01:47:00,2013-07-01 01:52:00,1,1.1699999999999999,-74.013227000000001,40.714717,1,,-74.003493000000006,40.725285,CRD,6,0.5,0.5,1.3,0,8.3000000000000007 +VTS,2013-07-01 01:39:00,2013-07-01 01:52:00,1,4.1699999999999999,-73.980862000000002,40.763947000000002,1,,-73.933881999999997,40.767676999999999,CRD,14,0.5,0.5,2.8999999999999999,0,17.899999999999999 +VTS,2013-07-01 01:36:00,2013-07-01 01:50:00,1,2.98,-74.007565,40.740896999999997,1,,-73.980114999999998,40.713987000000003,CRD,12,0.5,0.5,1.5,0,14.5 +VTS,2013-07-01 01:53:00,2013-07-01 01:57:00,1,0.75,-73.985467,40.718342,1,,-73.982842000000005,40.727016999999996,CRD,5,0.5,0.5,1.1000000000000001,0,7.0999999999999996 +VTS,2013-07-01 01:47:00,2013-07-01 01:59:00,1,1.76,-74.005632000000006,40.726447,1,,-74.003906999999998,40.742032000000002,CRD,9.5,0.5,0.5,2,0,12.5 +VTS,2013-07-01 01:54:00,2013-07-01 01:58:00,1,1.54,-74.006495000000001,40.737937000000002,1,,-74.004987,40.752164999999998,CSH,7,0.5,0.5,0,0,8 +VTS,2013-07-01 01:56:00,2013-07-01 02:02:00,5,1.8200000000000001,-73.996082000000001,40.759725000000003,1,,-74.007452000000001,40.748444999999997,CSH,7.5,0.5,0.5,0,0,8.5 +VTS,2013-07-01 01:57:00,2013-07-01 02:01:00,1,1.25,-73.991794999999996,40.759712,1,,-74.001921999999993,40.750770000000003,CSH,6,0.5,0.5,0,0,7 diff --git a/csv/testdata/taxi/yellow_tripdata_2013-08.csv b/csv/testdata/taxi/yellow_tripdata_2013-08.csv new file mode 100644 index 0000000..ccfda60 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2013-08.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2013-08-26 15:33:22,2013-08-26 15:44:47,1,1,-73.989877000000007,40.734282,1,N,-73.999419000000003,40.731369999999998,CSH,8.5,0,0.5,0,0,9 +CMT,2013-08-29 19:08:04,2013-08-29 19:36:56,1,6.7000000000000002,-73.979861,40.760016,1,N,-73.941869999999994,40.807037999999999,CSH,24.5,1,0.5,0,0,26 +CMT,2013-08-26 13:54:27,2013-08-26 14:08:44,1,3,-74.010441999999998,40.710102999999997,1,N,-73.987447000000003,40.749763999999999,CSH,12.5,0,0.5,0,0,13 +CMT,2013-08-30 08:48:17,2013-08-30 09:30:51,2,5.2000000000000002,-73.927998000000002,40.763857000000002,1,N,-73.990953000000005,40.756045,CSH,27,0,0.5,0,0,27.5 +CMT,2013-08-26 14:38:38,2013-08-26 14:57:43,1,4.2000000000000002,-73.990083999999996,40.742317999999997,1,N,-74.014300000000006,40.704144999999997,CSH,17,0,0.5,0,0,17.5 +CMT,2013-08-31 02:18:59,2013-08-31 02:22:14,1,1.1000000000000001,-73.968052,40.759396000000002,1,N,-73.955765999999997,40.768182000000003,CSH,5.5,0.5,0.5,0,0,6.5 +CMT,2013-08-26 23:24:54,2013-08-26 23:31:51,2,1.6000000000000001,-73.981102000000007,40.758696999999998,1,Y,-73.99718,40.744726,CSH,7.5,0.5,0.5,0,0,8.5 +CMT,2013-08-26 19:20:32,2013-08-26 19:24:54,1,1.3999999999999999,-73.959671,40.790207000000002,1,N,-73.977851000000001,40.782913000000001,CSH,6.5,1,0.5,0,0,8 diff --git a/csv/testdata/taxi/yellow_tripdata_2013-09.csv b/csv/testdata/taxi/yellow_tripdata_2013-09.csv new file mode 100644 index 0000000..bc98582 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2013-09.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2013-09-01 16:35:05,2013-09-01 16:47:53,2,2.6000000000000001,-73.987902000000005,40.724077999999999,1,N,-73.994600000000005,40.750579999999999,CRD,11.5,0,0.5,2.3999999999999999,0,14.4 +CMT,2013-09-01 17:44:05,2013-09-01 17:58:37,1,3.5,-74.007867000000005,40.710233000000002,1,N,-74.003977000000006,40.756570000000004,CRD,14.5,0,0.5,3.75,0,18.75 +CMT,2013-09-01 16:36:20,2013-09-01 16:50:53,1,3.1000000000000001,-74.016724999999994,40.709240000000001,1,N,-74.000437000000005,40.752535000000002,CRD,14,0,0.5,1,0,15.5 +CMT,2013-09-01 07:54:47,2013-09-01 08:09:17,1,9.5999999999999996,-73.984881999999999,40.736415000000001,1,N,-73.861418,40.768180000000001,CRD,27,0,0.5,5,5.3300000000000001,37.829999999999998 +CMT,2013-09-02 22:50:34,2013-09-02 22:58:22,1,1.5,-73.983377000000004,40.770254000000001,1,N,-73.976156000000003,40.789186999999998,CRD,8,0.5,0.5,2,0,11 +CMT,2013-09-02 23:39:36,2013-09-02 23:58:10,1,8.3000000000000007,-73.970512999999997,40.752357000000003,1,Y,-73.876568000000006,40.738854000000003,CRD,25.5,0.5,0.5,5.2999999999999998,0,31.800000000000001 +CMT,2013-09-02 20:56:24,2013-09-02 21:13:10,1,9.9000000000000004,-73.874452000000005,40.774062000000001,1,N,-73.984712000000002,40.703598,CRD,28,0.5,0.5,7.25,0,36.25 +CMT,2013-09-02 23:07:43,2013-09-02 23:20:50,2,3.8999999999999999,-74.008255000000005,40.721904000000002,1,N,-73.966477999999995,40.753864,CRD,13.5,0.5,0.5,2,0,16.5 diff --git a/csv/testdata/taxi/yellow_tripdata_2013-10.csv b/csv/testdata/taxi/yellow_tripdata_2013-10.csv new file mode 100644 index 0000000..b53112f --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2013-10.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2013-10-09 14:46:30,2013-10-09 14:56:02,1,1.1000000000000001,-73.986868000000001,40.756239999999998,1,N,-73.980560999999994,40.764797000000002,CRD,8,0,0.5,2.1200000000000001,0,10.619999999999999 +CMT,2013-10-09 17:32:08,2013-10-09 17:45:40,1,2.8999999999999999,-74.001103000000001,40.721381999999998,1,N,-73.952635999999998,40.712133999999999,CRD,12.5,1,0.5,4.2000000000000002,0,18.199999999999999 +CMT,2013-10-07 11:47:37,2013-10-07 11:55:42,1,1.2,-73.973727999999994,40.782853000000003,1,N,-73.984120000000004,40.768515999999998,CRD,7.5,0,0.5,1.6000000000000001,0,9.5999999999999996 +CMT,2013-10-12 04:51:00,2013-10-12 05:00:36,1,2.3999999999999999,-73.910289000000006,40.776280999999997,1,N,-73.961743999999996,40.76538,CRD,10,0.5,0.5,2.2000000000000002,0,13.199999999999999 +CMT,2013-10-13 22:56:01,2013-10-13 23:09:43,1,2.7000000000000002,-73.980677,40.784765,1,N,-73.958752000000004,40.760542999999998,CRD,12,0.5,0.5,2,0,15 +CMT,2013-10-07 08:09:41,2013-10-07 08:19:32,2,1.6000000000000001,-73.975803999999997,40.736713000000002,1,N,-73.989098999999996,40.729996999999997,CRD,8.5,0,0.5,1.8,0,10.800000000000001 +CMT,2013-10-07 09:31:36,2013-10-07 10:06:52,1,3.6000000000000001,-73.945240999999996,40.774487999999998,1,N,-73.988981999999993,40.751381000000002,CRD,22.5,0,0.5,4.5,0,27.5 +CMT,2013-10-07 21:15:36,2013-10-07 21:24:28,1,2.6000000000000001,-73.957014999999998,40.783144999999998,1,N,-73.983204999999998,40.76802,CRD,9.5,0.5,0.5,2.6000000000000001,0,13.1 diff --git a/csv/testdata/taxi/yellow_tripdata_2013-11.csv b/csv/testdata/taxi/yellow_tripdata_2013-11.csv new file mode 100644 index 0000000..af6327c --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2013-11.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +CMT,2013-11-25 15:53:33,2013-11-25 16:00:51,1,0.59999999999999998,-73.978104000000002,40.752966000000001,1,N,-73.985755999999995,40.762684999999998,CRD,6,1,0.5,1,0,8.5 +CMT,2013-11-25 15:24:41,2013-11-25 15:30:18,1,0.5,-73.982313000000005,40.764826999999997,1,N,-73.982129,40.758889000000003,CRD,5.5,0,0.5,3,0,9 +CMT,2013-11-25 09:43:42,2013-11-25 10:02:57,1,3.2999999999999998,-73.982012999999995,40.762506999999999,1,N,-74.006854000000004,40.719582000000003,CRD,15,0,0.5,2,0,17.5 +CMT,2013-11-25 06:49:58,2013-11-25 07:04:22,1,3.7999999999999998,-73.976005000000001,40.744481,1,N,-74.016063000000003,40.717298,CRD,14,0,0.5,2.8999999999999999,0,17.399999999999999 +CMT,2013-11-25 10:02:12,2013-11-25 10:17:15,1,2.2000000000000002,-73.952624999999998,40.780962000000002,1,N,-73.981629999999996,40.777977999999997,CRD,12,0,0.5,2,0,14.5 +CMT,2013-11-25 15:18:07,2013-11-25 15:33:25,1,1,-73.992423000000002,40.749516999999997,1,N,-73.988159999999993,40.746557000000003,CRD,10,0,0.5,2.2200000000000002,0,12.720000000000001 +CMT,2013-11-25 21:20:50,2013-11-25 21:26:22,1,1.1000000000000001,-73.946370999999999,40.775368999999998,1,N,-73.953090000000003,40.785102999999999,CRD,6.5,0.5,0.5,1.5,0,9 +CMT,2013-11-25 07:00:55,2013-11-25 07:04:37,1,1.2,-73.983356999999998,40.767192999999999,1,N,-73.978393999999994,40.755580000000002,CRD,5.5,0,0.5,1,0,7 diff --git a/csv/testdata/taxi/yellow_tripdata_2013-12.csv b/csv/testdata/taxi/yellow_tripdata_2013-12.csv new file mode 100644 index 0000000..d32499d --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2013-12.csv @@ -0,0 +1,10 @@ +vendor_id,pickup_datetime,dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,rate_code,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,surcharge,mta_tax,tip_amount,tolls_amount,total_amount + +VTS,2013-12-01 00:13:00,2013-12-01 00:31:00,1,3.8999999999999999,-73.979342000000003,40.776651999999999,1,,-73.981866999999994,40.734279999999998,CRD,15.5,0.5,0.5,3,0,19.5 +VTS,2013-12-01 00:40:00,2013-12-01 00:48:00,6,3.2000000000000002,-73.939672000000002,40.726154999999999,1,,-73.985579999999999,40.718074999999999,CSH,11.5,0.5,0.5,0,0,12.5 +VTS,2013-12-01 02:21:00,2013-12-01 02:30:00,5,3.2799999999999998,-73.958752000000004,40.768076999999998,1,,-73.958752000000004,40.768076999999998,CRD,11,0.5,0.5,2.2999999999999998,0,14.300000000000001 +VTS,2013-12-01 02:14:00,2013-12-01 02:22:00,1,1.8400000000000001,-73.978835000000004,40.724195000000002,1,,-73.979740000000007,40.743411999999999,CSH,8.5,0.5,0.5,0,0,9.5 +VTS,2013-12-01 04:45:00,2013-12-01 04:50:00,1,1.02,-73.991361999999995,40.735072000000002,1,,-73.978944999999996,40.7346,CSH,6,0.5,0.5,0,0,7 +VTS,2013-12-01 04:45:00,2013-12-01 04:50:00,3,0.98999999999999999,-73.987174999999993,40.760762,1,,-73.993797999999998,40.765517000000003,CSH,6,0.5,0.5,0,0,7 +VTS,2013-12-01 06:16:00,2013-12-01 06:35:00,1,22.039999999999999,-73.973236999999997,40.755699999999997,2,,-73.783687,40.643762000000002,CSH,52,0,0.5,0,5.3300000000000001,57.829999999999998 +VTS,2013-12-01 08:35:00,2013-12-01 08:45:00,6,3.0499999999999998,0,0,1,,0,0,CSH,11,0,0.5,0,0,11.5 diff --git a/csv/testdata/taxi/yellow_tripdata_2014-01.csv b/csv/testdata/taxi/yellow_tripdata_2014-01.csv new file mode 100644 index 0000000..3cc78a4 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2014-01.csv @@ -0,0 +1,10 @@ +vendor_id, pickup_datetime, dropoff_datetime, passenger_count, trip_distance, pickup_longitude, pickup_latitude, rate_code, store_and_fwd_flag, dropoff_longitude, dropoff_latitude, payment_type, fare_amount, surcharge, mta_tax, tip_amount, tolls_amount, total_amount + +CMT,2014-01-09 20:45:25,2014-01-09 20:52:31,1,0.69999999999999996,-73.994770000000003,40.736828000000003,1,N,-73.982226999999995,40.731789999999997,CRD,6.5,0.5,0.5,1.3999999999999999,0,8.9000000000000004 +CMT,2014-01-09 20:46:12,2014-01-09 20:55:12,1,1.3999999999999999,-73.982392000000004,40.773381999999998,1,N,-73.960448999999997,40.763995000000001,CRD,8.5,0.5,0.5,1.8999999999999999,0,11.4 +CMT,2014-01-09 20:44:47,2014-01-09 20:59:46,2,2.2999999999999998,-73.988569999999996,40.739406000000002,1,N,-73.986626000000001,40.765217,CRD,11.5,0.5,0.5,1.5,0,14 +CMT,2014-01-09 20:44:57,2014-01-09 20:51:40,1,1.7,-73.960212999999996,40.770463999999997,1,N,-73.979862999999995,40.777050000000003,CRD,7.5,0.5,0.5,1.7,0,10.199999999999999 +CMT,2014-01-09 20:47:09,2014-01-09 20:53:32,1,0.90000000000000002,-73.995371000000006,40.717247999999998,1,N,-73.984367000000006,40.720523999999997,CRD,6,0.5,0.5,1.75,0,8.75 +CMT,2014-01-09 20:45:07,2014-01-09 20:51:01,1,0.90000000000000002,-73.983811000000003,40.749654999999997,1,N,-73.989746999999994,40.756574999999998,CRD,6,0.5,0.5,1.3999999999999999,0,8.4000000000000004 +CMT,2014-01-09 20:44:04,2014-01-09 21:05:45,1,3.6000000000000001,-73.984138000000002,40.726317000000002,1,N,-73.962868999999998,40.758443,CRD,16.5,0.5,0.5,5.25,0,22.75 +CMT,2014-01-09 20:43:23,2014-01-09 20:52:07,1,2.1000000000000001,-73.979906,40.745849999999997,1,N,-73.959090000000003,40.773639000000003,CRD,9,0.5,0.5,2,0,12 diff --git a/csv/testdata/taxi/yellow_tripdata_2014-02.csv b/csv/testdata/taxi/yellow_tripdata_2014-02.csv new file mode 100644 index 0000000..d2ab721 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2014-02.csv @@ -0,0 +1,10 @@ +vendor_id, pickup_datetime, dropoff_datetime, passenger_count, trip_distance, pickup_longitude, pickup_latitude, rate_code, store_and_fwd_flag, dropoff_longitude, dropoff_latitude, payment_type, fare_amount, surcharge, mta_tax, tip_amount, tolls_amount, total_amount + +VTS,2014-02-09 14:51:00,2014-02-09 14:58:00,1,1.0600000000000001,-73.982370000000003,40.768287000000001,1,,-73.977575000000002,40.758175000000001,CSH,6.5,0,0.5,0,0,7 +VTS,2014-02-06 14:27:00,2014-02-06 14:44:00,2,1.48,-73.983069999999998,40.764429999999997,1,,-73.993750000000006,40.750329999999998,CRD,11,0,0.5,1,0,12.5 +VTS,2014-02-06 14:22:00,2014-02-06 14:45:00,1,2.8399999999999999,-74.002667000000002,40.723770000000002,1,,-73.982271999999995,40.757964999999999,CRD,15.5,0,0.5,3.1000000000000001,0,19.100000000000001 +VTS,2014-02-06 13:24:00,2014-02-06 13:37:00,1,1.1100000000000001,-73.965050000000005,40.791181999999999,1,,-73.949822999999995,40.785311999999998,CSH,9.5,0,0.5,0,0,10 +VTS,2014-02-06 14:30:00,2014-02-06 14:43:00,1,0.71999999999999997,-73.980109999999996,40.760939999999998,1,,-73.971112000000005,40.757671999999999,CSH,9,0,0.5,0,0,9.5 +VTS,2014-02-06 14:12:00,2014-02-06 14:42:00,2,9.3000000000000007,-73.865390000000005,40.771470000000001,1,,-73.979290000000006,40.750450000000001,CRD,31,0,0.5,10,5.3300000000000001,46.829999999999998 +VTS,2014-02-06 14:35:00,2014-02-06 14:42:00,1,1.29,-73.960059999999999,40.776527000000002,1,,-73.975542000000004,40.776736999999997,CSH,7.5,0,0.5,0,0,8 +VTS,2014-02-06 14:27:00,2014-02-06 14:44:00,1,3.5899999999999999,-73.969481999999999,40.753585000000001,1,,-73.988146999999998,40.718795,CSH,15.5,0,0.5,0,0,16 diff --git a/csv/testdata/taxi/yellow_tripdata_2014-03.csv b/csv/testdata/taxi/yellow_tripdata_2014-03.csv new file mode 100644 index 0000000..99a21a5 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2014-03.csv @@ -0,0 +1,10 @@ +vendor_id, pickup_datetime, dropoff_datetime, passenger_count, trip_distance, pickup_longitude, pickup_latitude, rate_code, store_and_fwd_flag, dropoff_longitude, dropoff_latitude, payment_type, fare_amount, surcharge, mta_tax, tip_amount, tolls_amount, total_amount + +CMT,2014-03-01 01:07:38,2014-03-01 01:16:26,1,2,-73.951504,40.714112999999998,1,N,-73.944277999999997,40.690677000000001,CRD,9,0.5,0.5,2,0,12 +CMT,2014-03-01 01:08:03,2014-03-01 01:12:51,2,1.2,-74.004734999999997,40.742173000000001,1,N,-73.992227999999997,40.74821,CRD,6,0.5,0.5,1,0,8 +CMT,2014-03-01 01:08:51,2014-03-01 01:13:18,3,0.5,-73.949302000000003,40.707568999999999,1,N,-73.951654000000005,40.71425,CRD,5,0.5,0.5,1.2,0,7.2000000000000002 +CMT,2014-03-01 01:09:20,2014-03-01 01:24:18,3,3.5,-73.993538999999998,40.721299999999999,1,N,-73.961437000000004,40.760412000000002,CRD,14,0.5,0.5,3,0,18 +CMT,2014-03-01 01:09:46,2014-03-01 01:22:34,1,1.8,-73.987881000000002,40.744579000000002,1,N,-74.004384000000002,40.722774999999999,CRD,10.5,0.5,0.5,1,0,12.5 +CMT,2014-03-01 01:12:41,2014-03-01 01:15:38,1,0.5,-74.002110000000002,40.724483999999997,1,N,-73.998613000000006,40.720010000000002,CRD,4,0.5,0.5,0.5,0,5.5 +CMT,2014-03-01 01:12:11,2014-03-01 01:27:38,2,3.7000000000000002,-73.971920999999995,40.762605999999998,1,N,-74.005544,40.726855,CRD,14.5,0.5,0.5,3.1000000000000001,0,18.600000000000001 +CMT,2014-03-01 01:13:55,2014-03-01 01:34:54,1,5.4000000000000004,-73.991225999999997,40.749797000000001,1,N,-73.976973999999998,40.681621999999997,CRD,20,0.5,0.5,3,0,24 diff --git a/csv/testdata/taxi/yellow_tripdata_2014-04.csv b/csv/testdata/taxi/yellow_tripdata_2014-04.csv new file mode 100644 index 0000000..ab90d64 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2014-04.csv @@ -0,0 +1,10 @@ +vendor_id, pickup_datetime, dropoff_datetime, passenger_count, trip_distance, pickup_longitude, pickup_latitude, rate_code, store_and_fwd_flag, dropoff_longitude, dropoff_latitude, payment_type, fare_amount, surcharge, mta_tax, tip_amount, tolls_amount, total_amount + +CMT,2014-04-08 08:59:39,2014-04-08 09:28:57,1,2.5,-73.958848000000003,40.763584999999999,1,N,-73.986283999999998,40.752034000000002,CRD,18,0,0.5,3.7000000000000002,0,22.199999999999999 +CMT,2014-04-08 14:59:22,2014-04-08 15:04:52,1,0.90000000000000002,0,0,1,N,0,0,CRD,6,0,0.5,1.3,0,7.7999999999999998 +CMT,2014-04-08 08:45:28,2014-04-08 08:50:41,2,0.59999999999999998,-73.992232999999999,40.729135999999997,1,N,-73.991387000000003,40.735185999999999,CRD,5.5,0,0.5,1.2,0,7.2000000000000002 +CMT,2014-04-08 08:00:20,2014-04-08 08:11:31,2,0.5,-73.973725999999999,40.750095000000002,1,N,-73.976889,40.755623,CRD,8,0,0.5,1.7,0,10.199999999999999 +CMT,2014-04-08 08:38:36,2014-04-08 08:44:37,1,0.40000000000000002,-73.973501999999996,40.755479000000001,1,Y,-73.979197999999997,40.758619000000003,CRD,5.5,0,0.5,1.2,0,7.2000000000000002 +CMT,2014-04-08 07:52:53,2014-04-08 07:59:12,1,0.80000000000000004,-73.963002000000003,40.766354999999997,1,N,-73.953767999999997,40.778865000000003,CRD,6,0,0.5,1.3,0,7.7999999999999998 +CMT,2014-04-08 16:08:16,2014-04-08 16:12:38,1,1,-73.989555999999993,40.741782999999998,1,N,-74.002437999999998,40.739806999999999,CRD,5.5,1,0.5,1.3999999999999999,0,8.4000000000000004 +CMT,2014-04-08 12:04:09,2014-04-08 12:14:30,1,1,-73.975847999999999,40.757125000000002,1,N,-73.972513000000006,40.750821999999999,CRD,8,0,0.5,1.7,0,10.199999999999999 diff --git a/csv/testdata/taxi/yellow_tripdata_2014-05.csv b/csv/testdata/taxi/yellow_tripdata_2014-05.csv new file mode 100644 index 0000000..89edf4d --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2014-05.csv @@ -0,0 +1,10 @@ +vendor_id, pickup_datetime, dropoff_datetime, passenger_count, trip_distance, pickup_longitude, pickup_latitude, rate_code, store_and_fwd_flag, dropoff_longitude, dropoff_latitude, payment_type, fare_amount, surcharge, mta_tax, tip_amount, tolls_amount, total_amount + +VTS,2014-05-31 17:17:00,2014-05-31 17:17:00,2,0,0,0,1,,0,0,CSH,2.5,0,0.5,0,0,3 +VTS,2014-05-31 17:07:00,2014-05-31 17:19:00,2,2.2200000000000002,-73.987039999999993,40.760120000000001,1,,-74.005549999999999,40.745609999999999,CSH,10.5,0,0.5,0,0,11 +VTS,2014-05-31 16:29:00,2014-05-31 17:17:00,1,17.109999999999999,0,0,2,,-73.982200000000006,40.750920000000001,CRD,52,0,0.5,11.470000000000001,5.3300000000000001,69.299999999999997 +VTS,2014-05-29 13:49:00,2014-05-29 14:03:00,1,0.81999999999999995,-73.983311999999998,40.734602000000002,1,,-73.995767000000001,40.736919999999998,CRD,9.5,0,0.5,1.8999999999999999,0,11.9 +VTS,2014-05-29 13:56:00,2014-05-29 14:06:00,1,0.85999999999999999,-73.989924999999999,40.745145000000001,1,,-73.985974999999996,40.754944999999999,CRD,8,0,0.5,1.6000000000000001,0,10.1 +VTS,2014-05-30 11:53:00,2014-05-30 12:10:00,2,3.3799999999999999,-73.98563,40.735587000000002,1,,-73.956062000000003,40.772126999999998,CSH,14,0,0.5,0,0,14.5 +VTS,2014-05-31 16:58:00,2014-05-31 17:09:00,6,1.04,-73.987915000000001,40.750152,1,,-73.977990000000005,40.761547,CSH,8.5,0,0.5,0,0,9 +VTS,2014-05-31 16:52:00,2014-05-31 17:07:00,1,1.5,-73.993160000000003,40.747881999999997,1,,-73.984994999999998,40.763485000000003,CSH,10.5,0,0.5,0,0,11 diff --git a/csv/testdata/taxi/yellow_tripdata_2014-06.csv b/csv/testdata/taxi/yellow_tripdata_2014-06.csv new file mode 100644 index 0000000..98bdbfa --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2014-06.csv @@ -0,0 +1,10 @@ +vendor_id, pickup_datetime, dropoff_datetime, passenger_count, trip_distance, pickup_longitude, pickup_latitude, rate_code, store_and_fwd_flag, dropoff_longitude, dropoff_latitude, payment_type, fare_amount, surcharge, mta_tax, tip_amount, tolls_amount, total_amount + +CMT,2014-06-30 00:10:16,2014-06-30 00:25:10,1,4.4000000000000004,-73.994076000000007,40.720134000000002,1,N,-73.957111999999995,40.680574999999997,CRD,15.5,0.5,0.5,2.5,0,19 +CMT,2014-06-30 02:09:23,2014-06-30 02:20:36,2,3,-74.007391999999996,40.741115999999998,1,N,-73.987470999999999,40.765773000000003,CRD,12,0.5,0.5,2.6000000000000001,0,15.6 +CMT,2014-06-30 03:53:38,2014-06-30 04:08:15,2,4.7000000000000002,-74.005937000000003,40.740136,1,N,-73.954162999999994,40.778651000000004,CRD,16,0.5,0.5,3.3999999999999999,0,20.399999999999999 +CMT,2014-06-30 00:51:46,2014-06-30 01:08:46,1,4.5999999999999996,-73.986054999999993,40.756408,1,N,-73.959327000000002,40.811957,CRD,16,0.5,0.5,1,0,18 +CMT,2014-06-30 01:04:22,2014-06-30 01:13:40,1,2.5,-73.948076999999998,40.706921000000001,1,N,-73.959046999999998,40.683061000000002,CRD,9.5,0.5,0.5,2.1000000000000001,0,12.6 +CMT,2014-06-30 00:06:59,2014-06-30 00:30:26,1,5.9000000000000004,-73.998451000000003,40.750132999999998,5,N,-74.027158,40.749732999999999,CRD,60,0,0,0,9,69 +CMT,2014-06-30 00:53:12,2014-06-30 01:09:19,1,8,-73.988838999999999,40.763556999999999,1,N,-73.930570000000003,40.853625999999998,CRD,25,0.5,0.5,1,0,27 +CMT,2014-06-30 02:10:18,2014-06-30 02:15:35,2,0.69999999999999996,-74.012542999999994,40.714992000000002,1,N,-74.007222999999996,40.708393999999998,CRD,5.5,0.5,0.5,1,0,7.5 diff --git a/csv/testdata/taxi/yellow_tripdata_2014-07.csv b/csv/testdata/taxi/yellow_tripdata_2014-07.csv new file mode 100644 index 0000000..4f25214 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2014-07.csv @@ -0,0 +1,10 @@ +vendor_id, pickup_datetime, dropoff_datetime, passenger_count, trip_distance, pickup_longitude, pickup_latitude, rate_code, store_and_fwd_flag, dropoff_longitude, dropoff_latitude, payment_type, fare_amount, surcharge, mta_tax, tip_amount, tolls_amount, total_amount + +CMT,2014-07-10 10:07:55,2014-07-10 10:16:19,1,1.2,-73.981240999999997,40.781829000000002,1,N,-73.974051000000003,40.787320000000001,CSH,7.5,0,0.5,0,0,8 +CMT,2014-07-10 09:06:52,2014-07-10 09:36:36,1,3.3999999999999999,-74.000952999999996,40.733972000000001,1,N,-73.970544000000004,40.764482000000001,CSH,19,0,0.5,0,0,19.5 +CMT,2014-07-10 10:37:55,2014-07-10 10:49:25,1,1,-74.006416999999999,40.733199999999997,1,N,-73.999509000000003,40.723267,CSH,8.5,0,0.5,0,0,9 +CMT,2014-07-10 12:43:30,2014-07-10 12:47:15,1,0.69999999999999996,-73.981444999999994,40.780760999999998,1,N,-73.974930999999998,40.788041999999997,CSH,5,0,0.5,0,0,5.5 +CMT,2014-07-10 07:51:07,2014-07-10 07:57:56,1,1.1000000000000001,-74.013998000000001,40.713855000000002,1,N,-74.012623000000005,40.702553000000002,CSH,6.5,0,0.5,0,0,7 +CMT,2014-07-10 10:23:15,2014-07-10 10:30:20,1,0.69999999999999996,-73.979968,40.749549000000002,1,N,-73.98357,40.756798000000003,CSH,6,0,0.5,0,0,6.5 +CMT,2014-07-10 05:08:31,2014-07-10 05:23:07,1,5,-73.989069000000001,40.758426999999998,1,N,-73.917637999999997,40.739390999999998,CSH,17,0.5,0.5,0,0,18 +CMT,2014-07-10 06:28:46,2014-07-10 06:32:26,1,0.5,-74.000192999999996,40.738841999999998,1,N,-74.008368000000004,40.742964000000001,CSH,4.5,0,0.5,0,0,5 diff --git a/csv/testdata/taxi/yellow_tripdata_2014-08.csv b/csv/testdata/taxi/yellow_tripdata_2014-08.csv new file mode 100644 index 0000000..3272e4e --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2014-08.csv @@ -0,0 +1,10 @@ +vendor_id, pickup_datetime, dropoff_datetime, passenger_count, trip_distance, pickup_longitude, pickup_latitude, rate_code, store_and_fwd_flag, dropoff_longitude, dropoff_latitude, payment_type, fare_amount, surcharge, mta_tax, tip_amount, tolls_amount, total_amount + +CMT,2014-08-16 14:58:49,2014-08-16 15:15:59,1,2.7000000000000002,-73.946537000000006,40.776812999999997,1,N,-73.976192999999995,40.755625000000002,CSH,14,0,0.5,0,0,14.5 +CMT,2014-08-16 08:10:48,2014-08-16 08:58:16,3,20.399999999999999,-73.776857000000007,40.645099000000002,1,Y,-73.916248999999993,40.837356999999997,CSH,58.5,0,0.5,0,5.3300000000000001,64.329999999999998 +CMT,2014-08-16 09:44:07,2014-08-16 09:54:37,1,2.1000000000000001,-73.986585000000005,40.725847999999999,1,N,-73.977157000000005,40.751961000000001,CSH,9.5,0,0.5,0,0,10 +CMT,2014-08-16 10:46:13,2014-08-16 10:51:25,1,1.3,-73.976290000000006,40.765231,1,N,-73.961484999999996,40.777889000000002,CSH,6,0,0.5,0,0,6.5 +CMT,2014-08-16 09:27:23,2014-08-16 09:39:37,2,1.7,-73.995248000000004,40.754646000000001,1,Y,-73.995902999999998,40.769201000000002,CSH,10.5,0,0.5,0,0,11 +CMT,2014-08-16 14:14:16,2014-08-16 14:25:33,2,1.7,-73.991534999999999,40.759863000000003,1,N,-74.005722000000006,40.737558,CSH,10,0,0.5,0,0,10.5 +CMT,2014-08-16 15:55:16,2014-08-16 16:00:10,1,1,-73.972307000000001,40.794075999999997,1,N,-73.963864999999998,40.807858000000003,CSH,6,0,0.5,0,0,6.5 +CMT,2014-08-16 14:08:29,2014-08-16 14:32:03,1,9.1999999999999993,-73.967337999999998,40.766008999999997,1,N,-73.872972000000004,40.774487000000001,CSH,28.5,0,0.5,0,0,29 diff --git a/csv/testdata/taxi/yellow_tripdata_2014-09.csv b/csv/testdata/taxi/yellow_tripdata_2014-09.csv new file mode 100644 index 0000000..671db2c --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2014-09.csv @@ -0,0 +1,10 @@ +vendor_id, pickup_datetime, dropoff_datetime, passenger_count, trip_distance, pickup_longitude, pickup_latitude, rate_code, store_and_fwd_flag, dropoff_longitude, dropoff_latitude, payment_type, fare_amount, surcharge, mta_tax, tip_amount, tolls_amount, total_amount + +VTS,2014-09-10 12:19:00,2014-09-10 12:39:00,2,1.53,-73.962270000000004,40.779057000000002,1,,-73.976247000000001,40.759452000000003,CSH,13.5,0,0.5,0,0,14 +VTS,2014-09-10 12:33:00,2014-09-10 12:41:00,1,0.5,-73.974029999999999,40.762791999999997,1,,-73.978341999999998,40.756787000000003,UNK,6,0,0.5,1.2,0,7.7000000000000002 +VTS,2014-09-10 12:31:00,2014-09-10 12:39:00,5,1.25,-73.980519999999999,40.738287,1,,-73.994564999999994,40.729317000000002,CSH,7,0,0.5,0,0,7.5 +VTS,2014-09-10 12:33:00,2014-09-10 12:42:00,1,0.87,-73.97072,40.764769999999999,1,,-73.964609999999993,40.757930000000002,CRD,6.5,0,0.5,1.3,0,8.3000000000000007 +VTS,2014-09-10 12:30:00,2014-09-10 12:41:00,6,1.7,-73.978070000000002,40.737490000000001,1,,-73.962320000000005,40.759219999999999,CRD,9.5,0,0.5,2.3799999999999999,0,12.380000000000001 +VTS,2014-09-10 12:27:00,2014-09-10 12:38:00,1,1.2,-73.961129999999997,40.774876999999996,1,,-73.972142000000005,40.760317000000001,CRD,8.5,0,0.5,2.1200000000000001,0,11.119999999999999 +VTS,2014-09-10 12:37:00,2014-09-10 12:41:00,5,0.77000000000000002,-73.992985000000004,40.758040000000001,1,,-73.989607000000007,40.765430000000002,CRD,5,0,0.5,1,0,6.5 +VTS,2014-09-10 12:40:00,2014-09-10 12:44:00,1,0.45000000000000001,-74.000349999999997,40.737895000000002,1,,-74.007352999999995,40.740949999999998,CRD,4.5,0,0.5,0.75,0,5.75 diff --git a/csv/testdata/taxi/yellow_tripdata_2014-10.csv b/csv/testdata/taxi/yellow_tripdata_2014-10.csv new file mode 100644 index 0000000..7a59ef3 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2014-10.csv @@ -0,0 +1,10 @@ +vendor_id, pickup_datetime, dropoff_datetime, passenger_count, trip_distance, pickup_longitude, pickup_latitude, rate_code, store_and_fwd_flag, dropoff_longitude, dropoff_latitude, payment_type, fare_amount, surcharge, mta_tax, tip_amount, tolls_amount, total_amount + +CMT,2014-10-01 08:55:07,2014-10-01 09:11:03,1,2.2000000000000002,-74.005866999999995,40.737569999999998,1,Y,-74.015534000000002,40.708277000000002,CRD,12,0,0.5,1,0,13.5 +CMT,2014-10-01 10:51:17,2014-10-01 11:26:11,1,15.699999999999999,-73.873193000000001,40.774056000000002,1,Y,-73.999846000000005,40.631132000000001,CRD,45.5,0,0.5,9.1999999999999993,0,55.200000000000003 +CMT,2014-10-01 02:03:03,2014-10-01 02:06:55,1,1,0,0,1,N,0,0,CRD,5,0.5,0.5,1,0,7 +CMT,2014-10-01 00:06:35,2014-10-01 00:17:05,2,2.5,-73.987150999999997,40.732922000000002,1,N,-73.991831000000005,40.758147999999998,CRD,10,0.5,0.5,2.2000000000000002,0,13.199999999999999 +CMT,2014-10-01 01:34:13,2014-10-01 01:47:02,1,4.2000000000000002,-73.983266999999998,40.726576999999999,1,N,-73.937556000000001,40.716380000000001,CRD,15,0.5,0.5,3.2000000000000002,0,19.199999999999999 +CMT,2014-10-01 01:28:24,2014-10-01 01:50:25,1,6,-74.006640000000004,40.744301999999998,1,N,-74.015866000000003,40.705061000000001,CRD,21.5,0.5,0.5,4.5,0,27 +CMT,2014-10-01 00:50:49,2014-10-01 01:01:35,1,4.2999999999999998,-73.983412000000001,40.765830999999999,1,N,-73.955209999999994,40.804502999999997,CRD,14,0.5,0.5,3,0,18 +CMT,2014-10-01 01:44:33,2014-10-01 01:50:22,1,0.90000000000000002,-74.007599999999996,40.740855000000003,1,N,-73.993703999999994,40.737439999999999,CRD,6,0.5,0.5,1.3999999999999999,0,8.4000000000000004 diff --git a/csv/testdata/taxi/yellow_tripdata_2014-11.csv b/csv/testdata/taxi/yellow_tripdata_2014-11.csv new file mode 100644 index 0000000..ef529fa --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2014-11.csv @@ -0,0 +1,10 @@ +vendor_id, pickup_datetime, dropoff_datetime, passenger_count, trip_distance, pickup_longitude, pickup_latitude, rate_code, store_and_fwd_flag, dropoff_longitude, dropoff_latitude, payment_type, fare_amount, surcharge, mta_tax, tip_amount, tolls_amount, total_amount + +VTS,2014-11-01 00:47:00,2014-11-01 01:00:00,5,2.71,-73.96096,40.818779999999997,1,,-73.92362,40.808399999999999,CRD,11.5,0.5,0.5,2.3999999999999999,0,14.9 +VTS,2014-11-01 00:28:00,2014-11-01 01:00:00,5,4.6699999999999999,-73.990148000000005,40.729315,1,,-73.954932999999997,40.769334999999998,CRD,23.5,0.5,0.5,6,0,30.5 +VTS,2014-11-01 00:39:00,2014-11-01 01:00:00,2,4.5199999999999996,-73.980121999999994,40.742846999999998,1,,-73.977007,40.784256999999997,CRD,17,0.5,0.5,4.3799999999999999,0,22.379999999999999 +VTS,2014-11-01 00:46:00,2014-11-01 00:56:00,1,1.7,-73.932772999999997,40.703381999999998,1,,-73.951693000000006,40.691347,CRD,8.5,0.5,0.5,2.25,0,11.75 +VTS,2014-11-01 00:06:00,2014-11-01 00:59:00,3,8.1799999999999997,-73.997955000000005,40.729407000000002,1,,-73.884077000000005,40.748452,CSH,37.5,0.5,0.5,0,0,38.5 +VTS,2014-11-01 00:53:00,2014-11-01 00:58:00,1,0.68999999999999995,-73.992345,40.743206999999998,1,,-73.991607000000002,40.749679999999998,CSH,5,0.5,0.5,0,0,6 +VTS,2014-11-01 00:23:00,2014-11-01 00:59:00,1,9.7300000000000004,-73.989564999999999,40.723077000000004,1,,-73.868944999999997,40.762999999999998,CSH,34,0.5,0.5,0,0,35 +VTS,2014-11-01 00:54:00,2014-11-01 00:59:00,1,1.1699999999999999,-73.956559999999996,40.775232000000003,1,,-73.953597000000002,40.788007,CRD,6,0.5,0.5,1.3,0,8.3000000000000007 diff --git a/csv/testdata/taxi/yellow_tripdata_2014-12.csv b/csv/testdata/taxi/yellow_tripdata_2014-12.csv new file mode 100644 index 0000000..41d69f8 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2014-12.csv @@ -0,0 +1,10 @@ +vendor_id, pickup_datetime, dropoff_datetime, passenger_count, trip_distance, pickup_longitude, pickup_latitude, rate_code, store_and_fwd_flag, dropoff_longitude, dropoff_latitude, payment_type, fare_amount, surcharge, mta_tax, tip_amount, tolls_amount, total_amount + +VTS,2014-12-12 18:16:00,2014-12-12 18:35:00,3,4.0300000000000002,-74.014049999999997,40.711706999999997,1,,-73.995626999999999,40.759461999999999,CSH,16,1,0.5,0,0,17.5 +VTS,2014-12-12 18:18:00,2014-12-12 18:36:00,1,4.0999999999999996,-73.945876999999996,40.780524999999997,1,,-73.972558000000006,40.740457999999997,CRD,15,1,0.5,4,0,20.5 +VTS,2014-12-12 18:31:00,2014-12-12 18:35:00,1,0.95999999999999996,-73.961449999999999,40.796259999999997,1,,-73.955629999999999,40.787759999999999,CSH,5.5,1,0.5,0,0,7 +VTS,2014-12-08 01:53:00,2014-12-08 01:55:00,5,0.76000000000000001,-73.955280000000002,40.768675000000002,1,,-73.948976999999999,40.777361999999997,CRD,4.5,0.5,0.5,1,0,6.5 +VTS,2014-12-12 17:58:00,2014-12-12 18:34:00,1,11.19,-73.862690000000001,40.76896,1,,-73.745810000000006,40.7667,CSH,35,1,0.5,0,0,36.5 +VTS,2014-12-08 01:33:00,2014-12-08 01:55:00,2,6.9299999999999997,-73.988190000000003,40.759399999999999,1,,-73.879840000000002,40.750230000000002,CRD,22.5,0.5,0.5,6.9000000000000004,0,30.399999999999999 +VTS,2014-12-08 01:46:00,2014-12-08 01:53:00,1,2.1200000000000001,-73.986540000000005,40.753320000000002,1,,-74.005080000000007,40.735689999999998,CSH,8.5,0.5,0.5,0,0,9.5 +VTS,2014-12-08 01:57:00,2014-12-08 02:05:00,1,2.8799999999999999,-74.000405000000001,40.730127000000003,1,,-73.978155000000001,40.764485000000001,CSH,10,0.5,0.5,0,0,11 diff --git a/csv/testdata/taxi/yellow_tripdata_2015-01.csv b/csv/testdata/taxi/yellow_tripdata_2015-01.csv new file mode 100644 index 0000000..27209a8 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2015-01.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RateCodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +2,2015-01-15 19:05:39,2015-01-15 19:23:42,1,1.59,-73.993896484375,40.750110626220703,1,N,-73.974784851074219,40.750617980957031,1,12,1,0.5,3.25,0,0.3,17.05 +1,2015-01-10 20:33:38,2015-01-10 20:53:28,1,3.30,-74.00164794921875,40.7242431640625,1,N,-73.994415283203125,40.759109497070313,1,14.5,0.5,0.5,2,0,0.3,17.8 +1,2015-01-10 20:33:38,2015-01-10 20:43:41,1,1.80,-73.963340759277344,40.802787780761719,1,N,-73.951820373535156,40.824413299560547,2,9.5,0.5,0.5,0,0,0.3,10.8 +1,2015-01-10 20:33:39,2015-01-10 20:35:31,1,.50,-74.009086608886719,40.713817596435547,1,N,-74.004325866699219,40.719985961914063,2,3.5,0.5,0.5,0,0,0.3,4.8 +1,2015-01-10 20:33:39,2015-01-10 20:52:58,1,3.00,-73.971176147460938,40.762428283691406,1,N,-74.004180908203125,40.742652893066406,2,15,0.5,0.5,0,0,0.3,16.3 +1,2015-01-10 20:33:39,2015-01-10 20:53:52,1,9.00,-73.874374389648438,40.7740478515625,1,N,-73.986976623535156,40.758193969726563,1,27,0.5,0.5,6.7,5.33,0.3,40.33 +1,2015-01-10 20:33:39,2015-01-10 20:58:31,1,2.20,-73.9832763671875,40.726009368896484,1,N,-73.992469787597656,40.7496337890625,2,14,0.5,0.5,0,0,0.3,15.3 +1,2015-01-10 20:33:39,2015-01-10 20:42:20,3,.80,-74.002662658691406,40.734142303466797,1,N,-73.995010375976563,40.726325988769531,1,7,0.5,0.5,1.66,0,0.3,9.96 +1,2015-01-10 20:33:39,2015-01-10 21:11:35,3,18.20,-73.783042907714844,40.644355773925781,2,N,-73.987594604492187,40.759357452392578,2,52,0,0.5,0,5.33,0.3,58.13 diff --git a/csv/testdata/taxi/yellow_tripdata_2015-02.csv b/csv/testdata/taxi/yellow_tripdata_2015-02.csv new file mode 100644 index 0000000..5903d0e --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2015-02.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RateCodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +2,2015-02-08 11:33:46,2015-02-08 11:37:45,1,.56,-73.949905395507813,40.717437744140625,1,N,-73.95001220703125,40.724010467529297,1,4.5,0,0.5,1.06,0,0.3,6.36 +2,2015-02-28 23:52:39,2015-03-01 00:00:03,1,1.24,-73.983497619628906,40.756103515625,1,N,-73.989540100097656,40.767860412597656,1,6.5,0.5,0.5,1.56,0,0.3,9.36 +2,2015-02-28 23:52:39,2015-03-01 00:03:25,1,2.07,-74.003189086914063,40.733058929443359,1,N,-73.992362976074219,40.715099334716797,1,10,0.5,0.5,2.26,0,0.3,13.56 +2,2015-02-28 23:52:39,2015-03-01 00:04:58,1,2.29,-73.958549499511719,40.760478973388672,1,N,-73.979248046875,40.736660003662109,1,10.5,0.5,0.5,2.36,0,0.3,14.16 +2,2015-02-28 23:52:39,2015-03-01 00:00:16,1,1.36,-73.974052429199219,40.751335144042969,1,N,-73.981788635253906,40.763149261474609,1,7,0.5,0.5,1,0,0.3,9.3 +2,2015-02-28 23:52:40,2015-02-28 23:57:53,1,.96,-73.981010437011719,40.729751586914063,1,N,-73.975822448730469,40.74066162109375,1,6,0.5,0.5,1.46,0,0.3,8.76 +2,2015-02-28 23:52:40,2015-03-01 00:02:30,1,2.58,-73.9912109375,40.740573883056641,1,N,-73.982345581054687,40.768836975097656,2,10,0.5,0.5,0,0,0.3,11.3 +2,2015-02-28 23:52:40,2015-02-28 23:58:46,6,1.21,-73.980613708496094,40.733860015869141,1,N,-73.992637634277344,40.744529724121094,1,6,0.5,0.5,1.46,0,0.3,8.76 +2,2015-02-28 23:52:41,2015-03-01 00:07:55,1,4.95,-73.946861267089844,40.785118103027344,1,N,-73.986915588378906,40.732448577880859,2,17,0.5,0.5,0,0,0.3,18.3 diff --git a/csv/testdata/taxi/yellow_tripdata_2015-03.csv b/csv/testdata/taxi/yellow_tripdata_2015-03.csv new file mode 100644 index 0000000..fa787a2 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2015-03.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RateCodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +1,2015-03-06 08:02:31,2015-03-06 08:09:55,1,1.20,-73.990211486816406,40.750968933105469,1,N,-73.987892150878906,40.738037109375,2,7,0,0.5,0,0,0.3,7.8 +1,2015-03-06 08:02:31,2015-03-06 08:15:23,1,3.20,-73.935188293457031,40.80072021484375,1,N,-73.952552795410156,40.765373229980469,2,11.5,0,0.5,0,0,0.3,12.3 +1,2015-03-06 08:02:31,2015-03-06 08:12:27,1,1.10,-73.963752746582031,40.767936706542969,1,N,-73.956947326660156,40.780269622802734,2,8,0,0.5,0,0,0.3,8.8 +1,2015-03-06 08:02:31,2015-03-06 08:09:09,1,.80,-73.997177124023438,40.742168426513672,1,N,-74.008064270019531,40.739280700683594,1,6,0,0.5,1,0,0.3,7.8 +1,2015-03-06 08:02:32,2015-03-06 08:19:37,1,2.70,-74.006843566894531,40.730266571044922,1,N,-73.976860046386719,40.75067138671875,1,13,0,0.5,2.75,0,0.3,16.55 +1,2015-03-06 08:02:32,2015-03-06 08:14:35,1,2.90,-73.954597473144531,40.765613555908203,1,N,-73.976516723632813,40.739612579345703,1,12.5,0,0.5,2.65,0,0.3,15.95 +1,2015-03-06 08:02:33,2015-03-06 08:16:40,1,6.10,-73.95404052734375,40.764015197753906,1,N,-74.012603759765625,40.701835632324219,1,19,0,0.5,0,0,0.3,19.8 +1,2015-03-06 08:02:34,2015-03-06 08:16:16,3,2.20,-73.959190368652344,40.814949035644531,1,N,-73.975868225097656,40.786441802978516,1,11,0,0.5,2.36,0,0.3,14.16 +1,2015-03-06 08:02:34,2015-03-06 08:14:45,1,3.80,-73.889595031738281,40.747310638427734,1,N,-73.861953735351563,40.768550872802734,2,14,0,0.5,0,0,0.3,14.8 diff --git a/csv/testdata/taxi/yellow_tripdata_2015-04.csv b/csv/testdata/taxi/yellow_tripdata_2015-04.csv new file mode 100644 index 0000000..8b394b4 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2015-04.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RateCodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +2,2015-04-27 15:16:29,2015-04-27 15:26:14,1,1.05,-73.988868713378906,40.758396148681641,1,N,-73.984779357910156,40.748271942138672,2,7.5,0,0.5,0,0,0.3,8.3 +2,2015-04-27 15:16:29,2015-04-27 15:25:03,1,2.67,-74.008567810058594,40.704479217529297,1,N,-73.978271484375,40.720542907714844,1,10.5,0,0.5,1,0,0.3,12.3 +2,2015-04-27 15:16:29,2015-04-27 15:31:48,1,2.89,-73.994117736816406,40.756431579589844,1,N,-73.993263244628906,40.724441528320313,1,12.5,0,0.5,3.32,0,0.3,16.62 +2,2015-04-27 15:16:29,2015-04-27 15:25:26,1,.94,-73.974388122558594,40.761665344238281,1,N,-73.985542297363281,40.756168365478516,2,7.5,0,0.5,0,0,0.3,8.3 +2,2015-04-27 15:16:29,2015-04-27 15:45:38,1,2.30,-73.972183227539063,40.762725830078125,1,N,-73.9908447265625,40.75067138671875,2,17.5,0,0.5,0,0,0.3,18.3 +2,2015-04-27 15:16:30,2015-04-27 15:34:43,5,2.50,-73.972091674804688,40.763172149658203,1,N,-73.948951721191406,40.794448852539063,1,13,0,0.5,3,0,0.3,16.8 +2,2015-04-27 15:16:30,2015-04-27 15:53:44,1,9.57,-73.863868713378906,40.770084381103516,1,N,-73.986579895019531,40.758308410644531,1,34,0,0.5,10.08,5.54,0.3,50.42 +2,2015-04-27 15:16:30,2015-04-27 15:39:29,1,8.39,-73.870979309082031,40.773769378662109,1,N,-73.972610473632812,40.749820709228516,1,26,0,0.5,5,5.54,0.3,37.34 +2,2015-04-27 15:16:30,2015-04-27 15:26:35,6,1.27,-74.002853393554687,40.760486602783203,1,N,-73.9854736328125,40.759105682373047,1,8,0,0.5,1.76,0,0.3,10.56 diff --git a/csv/testdata/taxi/yellow_tripdata_2015-05.csv b/csv/testdata/taxi/yellow_tripdata_2015-05.csv new file mode 100644 index 0000000..e00657a --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2015-05.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RateCodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +1,2015-05-05 23:37:40,2015-05-05 23:45:41,1,2.00,-74.001678466796875,40.739311218261719,1,N,-73.978294372558594,40.752109527587891,2,8.5,0.5,0.5,0,0,0.3,9.8 +2,2015-05-05 23:37:40,2015-05-05 23:40:36,1,.54,-73.930839538574219,40.744789123535156,1,N,-73.937515258789062,40.749359130859375,2,4.5,0.5,0.5,0,0,0.3,5.8 +2,2015-05-05 23:37:40,2015-05-05 23:44:03,3,2.10,-74.001411437988281,40.731086730957031,1,N,-73.981674194335937,40.758281707763672,2,8,0.5,0.5,0,0,0.3,9.3 +2,2015-05-05 23:37:40,2015-05-06 00:14:01,6,10.93,-73.970672607421875,40.758560180664063,1,N,-73.933761596679688,40.670543670654297,1,36,0.5,0.5,9.32,0,0.3,46.62 +2,2015-05-05 23:37:40,2015-05-05 23:46:03,5,.93,-73.986732482910156,40.755878448486328,1,N,-73.990959167480469,40.749980926513672,1,7,0.5,0.5,2.49,0,0.3,10.79 +1,2015-05-05 23:37:41,2015-05-05 23:50:34,3,2.40,-73.989326477050781,40.756599426269531,1,N,-73.979583740234375,40.735363006591797,2,11,0.5,0.5,0,0,0.3,12.3 +1,2015-05-05 23:37:42,2015-05-05 23:41:51,1,1.30,-73.955543518066406,40.776721954345703,1,N,-73.941337585449219,40.788120269775391,2,6,0.5,0.5,0,0,0.3,7.3 +2,2015-05-05 23:37:42,2015-05-05 23:53:25,1,3.70,-73.992210388183594,40.72918701171875,1,N,-73.960395812988281,40.775630950927734,1,15,0.5,0.5,3.26,0,0.3,19.56 +2,2015-05-05 23:37:42,2015-05-05 23:52:53,2,7.84,-73.987739562988281,40.740859985351563,1,N,-73.8731689453125,40.723091125488281,1,23.5,0.5,0.5,6.07,5.54,0.3,36.41 diff --git a/csv/testdata/taxi/yellow_tripdata_2015-06.csv b/csv/testdata/taxi/yellow_tripdata_2015-06.csv new file mode 100644 index 0000000..ae2dda7 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2015-06.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RateCodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +2,2015-06-02 11:19:29,2015-06-02 11:47:52,1,1.63,-73.954429626464844,40.764141082763672,1,N,-73.974754333496094,40.754093170166016,2,17,0,0.5,0,0,0.3,17.8 +2,2015-06-02 11:19:30,2015-06-02 11:27:56,1,.46,-73.971443176269531,40.758941650390625,1,N,-73.978538513183594,40.761909484863281,1,6.5,0,0.5,1,0,0.3,8.3 +2,2015-06-02 11:19:31,2015-06-02 11:30:30,1,.87,-73.978111267089844,40.738433837890625,1,N,-73.990272521972656,40.745437622070313,1,8,0,0.5,2.2,0,0.3,11 +2,2015-06-02 11:19:31,2015-06-02 11:39:02,1,2.13,-73.945892333984375,40.773529052734375,1,N,-73.971527099609375,40.760330200195312,1,13.5,0,0.5,2.86,0,0.3,17.16 +1,2015-06-02 11:19:32,2015-06-02 11:32:49,1,1.40,-73.979087829589844,40.776771545410156,1,N,-73.982162475585937,40.758998870849609,2,9.5,0,0.5,0,0,0.3,10.3 +1,2015-06-02 11:19:33,2015-06-02 11:28:48,1,1.40,-73.94464111328125,40.779464721679688,1,N,-73.96136474609375,40.771560668945313,1,8,0,0.5,1.75,0,0.3,10.55 +1,2015-06-02 11:19:34,2015-06-02 11:38:46,1,1.80,-73.992866516113281,40.748210906982422,1,N,-73.969772338867187,40.748458862304687,1,12.5,0,0.5,3,0,0.3,16.3 +1,2015-06-02 11:19:35,2015-06-02 12:36:46,4,11.90,-73.863075256347656,40.769252777099609,1,N,-73.986709594726563,40.761306762695313,1,52.5,0,0.5,15,5.54,0.3,73.84 +2,2015-06-02 11:19:36,2015-06-02 11:45:19,1,1.27,-73.991432189941406,40.749305725097656,1,N,-73.985061645507813,40.759525299072266,2,15,0,0.5,0,0,0.3,15.8 diff --git a/csv/testdata/taxi/yellow_tripdata_2015-07.csv b/csv/testdata/taxi/yellow_tripdata_2015-07.csv new file mode 100644 index 0000000..d392719 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2015-07.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RatecodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +1,2015-07-01 00:00:00,2015-07-01 00:15:26,1,3.50,-73.994155883789063,40.751125335693359,1,N,-73.976821899414063,40.788566589355469,2,14,0.5,0.5,0,0,0.3,15.3 +1,2015-07-01 00:00:00,2015-07-01 00:22:22,1,3.90,-73.984657287597656,40.768486022949219,1,N,-74.000129699707031,40.734897613525391,2,17,0.5,0.5,0,0,0.3,18.3 +1,2015-07-01 00:00:00,2015-07-01 00:07:42,1,2.30,-73.978889465332031,40.762287139892578,1,N,-74.004219055175781,40.752532958984375,2,9,0.5,0.5,0,0,0.3,10.3 +1,2015-07-01 00:00:00,2015-07-01 00:39:37,1,9.20,-73.992790222167969,40.742759704589844,1,N,-73.971511840820313,40.637153625488281,1,33,0.5,0.5,8.55,0,0.3,42.85 +1,2015-07-01 00:00:00,2015-07-01 00:05:34,1,1.10,-73.912429809570313,40.769809722900391,1,N,-73.920333862304688,40.757442474365234,1,6,0.5,0.5,2,0,0.3,9.3 +1,2015-07-01 00:00:00,2015-07-01 00:06:46,2,1.00,-73.959159851074219,40.773429870605469,1,N,-73.969352722167969,40.769245147705078,2,6.5,0.5,0.5,0,0,0.3,7.8 +2,2015-07-01 00:00:00,2015-07-01 00:36:57,2,19.12,-73.789459228515625,40.647258758544922,1,N,-73.974937438964844,40.649429321289063,1,54.5,0.5,0.5,13.95,0,0.3,69.75 +2,2015-07-01 00:00:00,2015-07-01 06:30:15,1,.00,0,0,1,N,-73.9859619140625,40.766399383544922,2,2.5,0,0.5,0,0,0,3 +2,2015-07-01 00:00:00,2015-07-01 11:27:07,1,2.58,-73.998931884765625,40.744678497314453,1,N,-73.982215881347656,40.776885986328125,1,15,0.3,0.5,1,0,0.3,17.1 diff --git a/csv/testdata/taxi/yellow_tripdata_2015-08.csv b/csv/testdata/taxi/yellow_tripdata_2015-08.csv new file mode 100644 index 0000000..a9efdc0 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2015-08.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RatecodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +2,2015-08-01 00:00:15,2015-08-01 00:36:21,1,7.22,-73.999809265136719,40.743339538574219,1,N,-73.942848205566406,40.806621551513672,2,29.5,0.5,0.5,0,0,0.3,30.8 +1,2015-08-01 00:00:16,2015-08-01 00:14:52,1,2.30,-73.977043151855469,40.77490234375,1,N,-73.978256225585937,40.749862670898438,1,12,0.5,0.5,2.93,0,0.3,16.23 +1,2015-08-01 00:00:16,2015-08-01 00:06:30,1,1.50,-73.959121704101563,40.775127410888672,1,N,-73.980392456054688,40.782314300537109,1,7,0.5,0.5,1.65,0,0.3,9.95 +1,2015-08-01 00:00:16,2015-08-01 00:06:18,1,.90,-73.97662353515625,40.780746459960937,1,N,-73.970558166503906,40.788845062255859,1,6,0.5,0.5,1.45,0,0.3,8.75 +2,2015-08-01 00:00:16,2015-08-01 00:16:28,1,2.44,-73.978591918945313,40.785919189453125,1,N,-73.997352600097656,40.756301879882813,1,13,0.5,0.5,2,0,0.3,16.3 +2,2015-08-01 00:00:16,2015-08-01 00:13:17,1,3.36,-73.97637939453125,40.785888671875,1,N,-73.942413330078125,40.822090148925781,1,13,0.5,0.5,3.58,0,0.3,17.88 +2,2015-08-01 00:00:16,2015-08-01 00:14:00,2,2.34,-73.986213684082031,40.760871887207031,1,N,-73.956924438476563,40.771560668945313,1,11.5,0.5,0.5,1,0,0.3,13.8 +2,2015-08-01 00:00:16,2015-08-01 00:25:25,1,10.19,-73.78997802734375,40.644058227539063,1,N,-73.931221008300781,40.675880432128906,2,31.5,0.5,0.5,0,0,0.3,32.8 +1,2015-08-01 00:00:17,2015-08-01 00:26:59,2,3.30,-73.993743896484375,40.727382659912109,1,N,-73.998161315917969,40.76409912109375,1,18,0.5,0.5,2,0,0.3,21.3 diff --git a/csv/testdata/taxi/yellow_tripdata_2015-09.csv b/csv/testdata/taxi/yellow_tripdata_2015-09.csv new file mode 100644 index 0000000..8e64faa --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2015-09.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RatecodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +2,2015-09-01 00:05:55,2015-09-01 00:31:02,1,17.45,-73.791351318359375,40.646690368652344,1,N,-73.857437133789063,40.848262786865234,1,47.5,0.5,0.5,5,5.54,0.3,59.34 +1,2015-09-01 00:05:56,2015-09-01 00:07:42,1,.40,-73.978935241699219,40.752853393554687,1,N,-73.986061096191406,40.755397796630859,2,3.5,0.5,0.5,0,0,0.3,4.8 +1,2015-09-01 00:05:57,2015-09-01 00:16:48,1,1.50,-73.990890502929688,40.723972320556641,1,N,-74.009559631347656,40.728916168212891,2,9,0.5,0.5,0,0,0.3,10.3 +1,2015-09-01 00:05:57,2015-09-01 00:05:57,1,.00,-73.932655334472656,40.803768157958984,1,N,0,0,2,4,0.5,0.5,0,0,0.3,5.3 +1,2015-09-01 00:05:57,2015-09-01 00:30:32,1,7.50,-73.987777709960937,40.738193511962891,1,N,-73.944755554199219,40.828166961669922,1,23.5,0.5,0.5,4.95,0,0.3,29.75 +2,2015-09-01 00:05:57,2015-09-01 00:12:41,3,1.61,-73.990257263183594,40.737289428710938,1,N,-73.995086669921875,40.744850158691406,2,7,0.5,0.5,0,0,0.3,8.3 +2,2015-09-01 00:05:57,2015-09-01 00:08:56,2,.69,-73.943656921386719,40.820594787597656,1,N,-73.937637329101563,40.828693389892578,2,4.5,0.5,0.5,0,0,0.3,5.8 +1,2015-09-01 00:05:58,2015-09-01 00:10:23,1,.80,-74.00408935546875,40.751827239990234,1,N,-74.003807067871094,40.742019653320312,1,5.5,0.5,0.5,1,0,0.3,7.8 +1,2015-09-01 00:05:58,2015-09-01 00:32:10,3,5.80,-73.987030029296875,40.766490936279297,1,N,-73.910835266113281,40.771129608154297,1,22,0.5,0.5,4.65,0,0.3,27.95 diff --git a/csv/testdata/taxi/yellow_tripdata_2015-10.csv b/csv/testdata/taxi/yellow_tripdata_2015-10.csv new file mode 100644 index 0000000..af823a9 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2015-10.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RatecodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +1,2015-10-01 00:00:00,2015-10-01 00:05:48,1,1.10,-73.935516357421875,40.761238098144531,1,N,-73.944351196289063,40.75457763671875,2,6,0.5,0.5,0,0,0.3,7.3 +2,2015-10-01 00:00:00,2015-10-01 00:00:00,1,7.68,-73.989936828613281,40.743438720703125,1,N,-73.986686706542969,40.689128875732422,2,27.5,0,0.5,0,0,0.3,28.3 +2,2015-10-01 00:00:00,2015-10-01 00:00:00,2,2.53,-73.987327575683594,40.720020294189453,1,N,-73.99908447265625,40.744380950927734,1,12.5,0,0.5,2,0,0.3,15.3 +2,2015-10-01 00:00:00,2015-10-01 00:00:00,0,1.20,-73.953758239746094,40.743385314941406,5,N,-73.930007934570313,40.736621856689453,2,25.26,0,0.5,0,0,0.3,26.06 +1,2015-10-01 00:00:01,2015-10-01 00:16:19,1,3.80,-73.984016418457031,40.755222320556641,1,N,-73.959869384765625,40.801322937011719,1,15.5,0.5,0.5,3,0,0.3,19.8 +1,2015-10-01 00:00:01,2015-10-01 00:13:41,1,3.10,-73.975296020507812,40.751396179199219,1,N,-73.970924377441406,40.785984039306641,1,12.5,0.5,0.5,1,0,0.3,14.8 +1,2015-10-01 00:00:01,2015-10-01 00:21:23,1,4.50,-73.997077941894531,40.7222900390625,1,N,-73.960472106933594,40.761516571044922,2,16.5,0.5,0.5,0,0,0.3,17.8 +2,2015-10-01 00:00:01,2015-10-01 00:05:50,1,.84,-73.998710632324219,40.734756469726563,1,N,-74.003082275390625,40.742546081542969,1,6,0.5,0.5,0.7,0,0.3,8 +2,2015-10-01 00:00:01,2015-10-01 00:14:39,6,8.02,-73.982109069824219,40.773761749267578,1,N,-73.924179077148438,40.864158630371094,1,23.5,0.5,0.5,5.2,0,0.3,30 diff --git a/csv/testdata/taxi/yellow_tripdata_2015-11.csv b/csv/testdata/taxi/yellow_tripdata_2015-11.csv new file mode 100644 index 0000000..0a513c1 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2015-11.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RatecodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +1,2015-11-01 00:00:00,2015-11-01 00:26:00,1,2.90,-73.983993530273437,40.729499816894531,1,N,-73.984550476074219,40.759170532226563,2,17.5,0.5,0.5,0,0,0.3,18.8 +1,2015-11-01 00:00:00,2015-11-01 00:08:24,1,.80,-73.9840087890625,40.765289306640625,1,N,-73.990432739257813,40.759326934814453,1,7,0.5,0.5,1.65,0,0.3,9.95 +2,2015-11-01 00:00:00,2015-11-01 00:12:17,1,.74,-74.002616882324219,40.734058380126953,1,N,-74.004364013671875,40.7406005859375,2,8.5,0.5,0.5,0,0,0.3,9.8 +2,2015-11-01 00:00:00,2015-11-01 00:25:08,1,2.89,-73.987312316894531,40.718711853027344,1,N,-73.9915771484375,40.749118804931641,1,16.5,0.5,0.5,0,0,0.3,17.8 +2,2015-11-01 00:00:00,2015-11-01 00:19:44,1,5.07,-73.949928283691406,40.772190093994141,1,N,-73.988121032714844,40.722919464111328,2,18,0.5,0.5,0,0,0.3,19.3 +2,2015-11-01 00:00:00,2015-11-01 00:26:30,1,5.21,-73.948051452636719,40.778541564941406,1,N,-73.983207702636719,40.726520538330078,2,22,0.5,0.5,0,0,0.3,23.3 +2,2015-11-01 00:00:00,2015-11-01 00:00:00,1,14.83,-73.957572937011719,40.774200439453125,1,N,-73.939620971679687,40.751338958740234,1,46,0.5,0.5,7.93,5.54,0.3,60.77 +1,2015-11-01 00:00:01,2015-11-01 00:18:47,1,1.50,-73.989532470703125,40.762493133544922,1,N,-74.003860473632813,40.742885589599609,1,12.5,0.5,0.5,0,0,0.3,13.8 +1,2015-11-01 00:00:01,2015-11-01 00:42:36,2,5.90,-73.981918334960938,40.743419647216797,1,N,-73.97198486328125,40.683635711669922,1,28.5,0.5,0.5,4,0,0.3,33.8 diff --git a/csv/testdata/taxi/yellow_tripdata_2015-12.csv b/csv/testdata/taxi/yellow_tripdata_2015-12.csv new file mode 100644 index 0000000..558038a --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2015-12.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RatecodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +2,2015-12-01 00:00:00,2015-12-01 00:05:16,5,.96,-73.979942321777344,40.765380859375,1,N,-73.96630859375,40.763088226318359,1,5.5,0.5,0.5,1,0,0.3,7.8 +2,2015-12-01 00:00:00,2015-12-01 00:00:00,2,2.69,-73.972335815429687,40.762378692626953,1,N,-73.993629455566406,40.745998382568359,1,21.5,0,0.5,3.34,0,0.3,25.64 +2,2015-12-01 00:00:00,2015-12-01 00:00:00,1,2.62,-73.968849182128906,40.764530181884766,1,N,-73.97454833984375,40.791641235351563,1,17,0,0.5,3.56,0,0.3,21.36 +1,2015-12-01 00:00:01,2015-12-01 00:05:56,1,1.20,-73.993934631347656,40.741683959960937,1,N,-73.997665405273438,40.747467041015625,1,6.5,0.5,0.5,0.2,0,0.3,8 +1,2015-12-01 00:00:01,2015-12-01 00:09:28,2,3.00,-73.988922119140625,40.72698974609375,1,N,-73.975593566894531,40.696868896484375,2,11,0.5,0.5,0,0,0.3,12.3 +1,2015-12-01 00:00:02,2015-12-01 00:16:12,1,6.30,-73.974082946777344,40.762912750244141,1,N,-74.012802124023438,40.70220947265625,1,20.5,0.5,0.5,4.35,0,0.3,26.15 +2,2015-12-01 00:00:02,2015-12-01 00:02:49,6,.63,-73.968315124511719,40.755329132080078,1,N,-73.962081909179688,40.758914947509766,1,4,0.5,0.5,1.06,0,0.3,6.36 +2,2015-12-01 00:00:02,2015-12-01 00:08:06,2,1.91,-73.994209289550781,40.746101379394531,1,N,-74.004249572753906,40.721809387207031,1,8,0.5,0.5,1.86,0,0.3,11.16 +2,2015-12-01 00:00:02,2015-12-01 00:17:11,1,4.50,-74.006759643554687,40.718906402587891,1,N,-73.989692687988281,40.772853851318359,1,16.5,0.5,0.5,3.56,0,0.3,21.36 diff --git a/csv/testdata/taxi/yellow_tripdata_2016-01.csv b/csv/testdata/taxi/yellow_tripdata_2016-01.csv new file mode 100644 index 0000000..24650fc --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2016-01.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RatecodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +2,2016-01-01 00:00:00,2016-01-01 00:00:00,2,1.10,-73.990371704101563,40.734695434570313,1,N,-73.981842041015625,40.732406616210937,2,7.5,0.5,0.5,0,0,0.3,8.8 +2,2016-01-01 00:00:00,2016-01-01 00:00:00,5,4.90,-73.980781555175781,40.729911804199219,1,N,-73.944473266601563,40.716678619384766,1,18,0.5,0.5,0,0,0.3,19.3 +2,2016-01-01 00:00:00,2016-01-01 00:00:00,1,10.54,-73.984550476074219,40.6795654296875,1,N,-73.950271606445313,40.788925170898438,1,33,0.5,0.5,0,0,0.3,34.3 +2,2016-01-01 00:00:00,2016-01-01 00:00:00,1,4.75,-73.99346923828125,40.718990325927734,1,N,-73.962242126464844,40.657333374023437,2,16.5,0,0.5,0,0,0.3,17.3 +2,2016-01-01 00:00:00,2016-01-01 00:00:00,3,1.76,-73.960624694824219,40.781330108642578,1,N,-73.977264404296875,40.758514404296875,2,8,0,0.5,0,0,0.3,8.8 +2,2016-01-01 00:00:00,2016-01-01 00:18:30,2,5.52,-73.980117797851563,40.743049621582031,1,N,-73.913490295410156,40.763141632080078,2,19,0.5,0.5,0,0,0.3,20.3 +2,2016-01-01 00:00:00,2016-01-01 00:26:45,2,7.45,-73.994056701660156,40.719989776611328,1,N,-73.966361999511719,40.789871215820313,2,26,0.5,0.5,0,0,0.3,27.3 +1,2016-01-01 00:00:01,2016-01-01 00:11:55,1,1.20,-73.979423522949219,40.744613647460938,1,N,-73.992034912109375,40.753944396972656,2,9,0.5,0.5,0,0,0.3,10.3 +1,2016-01-01 00:00:02,2016-01-01 00:11:14,1,6.00,-73.947151184082031,40.791046142578125,1,N,-73.920768737792969,40.865577697753906,2,18,0.5,0.5,0,0,0.3,19.3 diff --git a/csv/testdata/taxi/yellow_tripdata_2016-02.csv b/csv/testdata/taxi/yellow_tripdata_2016-02.csv new file mode 100644 index 0000000..590bc74 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2016-02.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RatecodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +2,2016-02-25 17:24:20,2016-02-25 17:27:20,2,.70,-73.947250366210937,40.763771057128906,1,N,-73.992012023925781,40.735389709472656,2,5,0,0.5,0,0,0.3,5.8 +2,2016-02-25 23:10:50,2016-02-25 23:31:50,2,5.52,-73.983016967773437,40.750991821289063,1,N,-73.98858642578125,40.758838653564453,2,20,0.5,0.5,0,0,0.3,21.3 +2,2016-02-01 00:00:01,2016-02-01 00:10:52,6,1.99,-73.992340087890625,40.758201599121094,1,N,-73.96435546875,40.757976531982422,1,9.5,0.5,0.5,0.7,0,0.3,11.5 +1,2016-02-01 00:00:04,2016-02-01 00:05:16,1,1.50,-73.981452941894531,40.749721527099609,1,N,-73.982322692871094,40.763984680175781,2,6.5,0.5,0.5,0,0,0.3,7.8 +2,2016-02-01 00:00:05,2016-02-01 00:20:59,1,5.60,-74.000602722167969,40.729755401611328,1,N,-73.951324462890625,40.669834136962891,1,20,0.5,0.5,4,0,0.3,25.3 +2,2016-02-01 00:00:06,2016-02-01 00:15:01,1,4.69,-74.005104064941406,40.719005584716797,1,N,-73.947090148925781,40.688884735107422,2,16,0.5,0.5,0,0,0.3,17.3 +2,2016-02-01 00:00:09,2016-02-01 00:05:35,1,1.45,-73.986000061035156,40.76214599609375,1,N,-73.990608215332031,40.746208190917969,1,6.5,0.5,0.5,1.56,0,0.3,9.36 +2,2016-02-01 00:00:19,2016-02-01 00:06:35,1,1.10,-73.966331481933594,40.773422241210938,1,N,-73.956108093261719,40.781421661376953,2,6.5,0.5,0.5,0,0,0.3,7.8 +2,2016-02-01 00:00:21,2016-02-01 00:06:32,1,1.96,-73.976554870605469,40.765697479248047,1,N,-73.978973388671875,40.744380950927734,1,7.5,0.5,0.5,1,0,0.3,9.8 diff --git a/csv/testdata/taxi/yellow_tripdata_2016-03.csv b/csv/testdata/taxi/yellow_tripdata_2016-03.csv new file mode 100644 index 0000000..47bcc87 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2016-03.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RatecodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +1,2016-03-01 00:00:00,2016-03-01 00:07:55,1,2.50,-73.97674560546875,40.765151977539062,1,N,-74.004264831542969,40.746128082275391,1,9,0.5,0.5,2.05,0,0.3,12.35 +1,2016-03-01 00:00:00,2016-03-01 00:11:06,1,2.90,-73.983482360839844,40.767925262451172,1,N,-74.005943298339844,40.733165740966797,1,11,0.5,0.5,3.05,0,0.3,15.35 +2,2016-03-01 00:00:00,2016-03-01 00:31:06,2,19.98,-73.782020568847656,40.644809722900391,1,N,-73.974540710449219,40.675769805908203,1,54.5,0.5,0.5,8,0,0.3,63.8 +2,2016-03-01 00:00:00,2016-03-01 00:00:00,3,10.78,-73.863418579101562,40.769813537597656,1,N,-73.969650268554688,40.757766723632812,1,31.5,0,0.5,3.78,5.54,0.3,41.62 +2,2016-03-01 00:00:00,2016-03-01 00:00:00,5,30.43,-73.97174072265625,40.792182922363281,3,N,-74.177169799804688,40.695053100585937,1,98,0,0,0,15.5,0.3,113.8 +2,2016-03-01 00:00:00,2016-03-01 00:00:00,5,5.92,-74.017196655273438,40.70538330078125,1,N,-73.978073120117187,40.755786895751953,1,23.5,1,0.5,5.06,0,0.3,30.36 +2,2016-03-01 00:00:00,2016-03-01 00:00:00,6,5.72,-73.994583129882813,40.727848052978516,1,N,0,0,2,23,0.5,0.5,0,0,0.3,24.3 +1,2016-03-01 00:00:01,2016-03-01 00:16:04,1,6.20,-73.788772583007812,40.647758483886719,1,N,-73.829208374023438,40.712345123291016,3,20.5,0.5,0.5,0,0,0.3,21.8 +1,2016-03-01 00:00:01,2016-03-01 00:05:00,1,.70,-73.958221435546875,40.764640808105469,1,N,-73.9678955078125,40.762901306152344,1,5.5,0.5,0.5,2,0,0.3,8.8 diff --git a/csv/testdata/taxi/yellow_tripdata_2016-04.csv b/csv/testdata/taxi/yellow_tripdata_2016-04.csv new file mode 100644 index 0000000..c70ea7e --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2016-04.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RatecodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +1,2016-04-01 00:00:00,2016-04-01 00:01:59,1,.50,-73.976882934570313,40.758495330810547,1,N,-73.977668762207031,40.753902435302734,2,3.5,0.5,0.5,0,0,0.3,4.8 +1,2016-04-01 00:00:00,2016-04-01 00:12:07,2,2.20,-73.985206604003906,40.757293701171875,1,N,-73.989288330078125,40.732658386230469,1,10,0.5,0.5,2.25,0,0.3,13.55 +2,2016-04-01 00:00:00,2016-04-01 00:10:41,2,.96,-73.979202270507812,40.758869171142578,1,N,-73.990676879882813,40.751319885253906,2,8.5,0.5,0.5,0,0,0.3,9.8 +2,2016-04-01 00:00:00,2016-04-01 00:10:30,5,1.54,-73.984855651855469,40.767723083496094,1,N,-73.990829467773437,40.751186370849609,1,8.5,0.5,0.5,1.96,0,0.3,11.76 +2,2016-04-01 00:00:00,2016-04-01 00:00:00,2,10.45,-73.863739013671875,40.76947021484375,1,N,-73.976814270019531,40.775283813476563,1,34,0,0.5,8.07,5.54,0.3,48.41 +1,2016-04-01 00:00:01,2016-04-01 00:15:04,1,3.50,-73.973373413085937,40.757076263427734,1,N,-73.9334716796875,40.766304016113281,1,14,0.5,0.5,3,0,0.3,18.3 +1,2016-04-01 00:00:01,2016-04-01 00:08:10,1,4.40,-73.790092468261719,40.647083282470703,1,N,-73.793914794921875,40.667373657226563,2,13.5,0.5,0.5,0,0,0.3,14.8 +1,2016-04-01 00:00:01,2016-04-01 00:03:46,1,.60,-73.988899230957031,40.745426177978516,1,N,-73.9918212890625,40.738445281982422,1,4.5,0.5,0.5,1.15,0,0.3,6.95 +2,2016-04-01 00:00:01,2016-04-01 00:03:27,2,.81,-73.985275268554688,40.747364044189453,1,N,-73.98565673828125,40.755081176757813,1,4.5,0.5,0.5,1,0,0.3,6.8 diff --git a/csv/testdata/taxi/yellow_tripdata_2016-05.csv b/csv/testdata/taxi/yellow_tripdata_2016-05.csv new file mode 100644 index 0000000..c0c5995 --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2016-05.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RatecodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +1,2016-05-01 00:00:00,2016-05-01 00:17:31,1,3.60,-73.98590087890625,40.768039703369141,1,N,-73.983985900878906,40.730098724365234,1,15,0.5,0.5,1.5,0,0.3,17.8 +2,2016-05-01 00:00:00,2016-05-01 00:07:31,1,1.68,-73.9915771484375,40.7447509765625,1,N,-73.975700378417969,40.765468597412109,1,7.5,0.5,0.5,0.88,0,0.3,9.68 +2,2016-05-01 00:00:00,2016-05-01 00:07:01,6,1.09,-73.993072509765625,40.741573333740234,1,N,-73.980995178222656,40.744632720947266,1,6.5,0.5,0.5,1.56,0,0.3,9.36 +2,2016-05-01 00:00:00,2016-05-01 00:19:47,1,4.21,-73.991943359375,40.684600830078125,1,N,-74.00225830078125,40.733001708984375,1,17,0.5,0.5,3.66,0,0.3,21.96 +2,2016-05-01 00:00:00,2016-05-01 00:06:39,1,.56,-74.005279541015625,40.740192413330078,1,N,-73.99749755859375,40.737564086914063,1,6,0.5,0.5,1.46,0,0.3,8.76 +2,2016-05-01 00:00:00,2016-05-01 00:05:19,2,.63,-73.979293823242187,40.755764007568359,1,N,-73.988014221191406,40.758468627929687,1,5,0.5,0.5,0,0,0.3,6.3 +2,2016-05-01 00:00:00,2016-05-01 00:15:43,1,6.68,-73.981544494628906,40.780738830566406,1,N,-73.93780517578125,40.855342864990234,1,20.5,0.5,0.5,2,0,0.3,23.8 +2,2016-05-01 00:00:00,2016-05-01 00:06:31,1,1.48,-73.987197875976563,40.750507354736328,1,N,-73.980796813964844,40.767826080322266,1,7,0.5,0.5,1.66,0,0.3,9.96 +2,2016-05-01 00:00:00,2016-05-01 00:00:00,4,1.59,-74.001609802246094,40.740989685058594,1,N,-73.9810791015625,40.730674743652344,1,8,0.5,0.5,0,0,0.3,9.3 diff --git a/csv/testdata/taxi/yellow_tripdata_2016-06.csv b/csv/testdata/taxi/yellow_tripdata_2016-06.csv new file mode 100644 index 0000000..4988b2e --- /dev/null +++ b/csv/testdata/taxi/yellow_tripdata_2016-06.csv @@ -0,0 +1,10 @@ +VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RatecodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount +2,2016-06-09 21:06:36,2016-06-09 21:13:08,2,.79,-73.983360290527344,40.760936737060547,1,N,-73.977462768554688,40.753978729248047,2,6,0.5,0.5,0,0,0.3,7.3 +2,2016-06-09 21:06:36,2016-06-09 21:35:11,1,5.22,-73.981719970703125,40.736667633056641,1,N,-73.981636047363281,40.670242309570313,1,22,0.5,0.5,4,0,0.3,27.3 +2,2016-06-09 21:06:36,2016-06-09 21:13:10,1,1.26,-73.994316101074219,40.751071929931641,1,N,-74.004234313964844,40.742168426513672,1,6.5,0.5,0.5,1.56,0,0.3,9.36 +2,2016-06-09 21:06:36,2016-06-09 21:36:10,1,7.39,-73.98236083984375,40.773891448974609,1,N,-73.929466247558594,40.851539611816406,1,26,0.5,0.5,1,0,0.3,28.3 +2,2016-06-09 21:06:36,2016-06-09 21:23:23,1,3.10,-73.987106323242187,40.733173370361328,1,N,-73.985908508300781,40.766445159912109,1,13.5,0.5,0.5,2.96,0,0.3,17.76 +2,2016-06-09 21:06:36,2016-06-09 21:19:21,1,2.17,-73.995201110839844,40.739490509033203,1,N,-73.993202209472656,40.762641906738281,1,10.5,0.5,0.5,2.36,0,0.3,14.16 +2,2016-06-09 21:06:36,2016-06-09 21:30:13,5,6.02,-73.980476379394531,40.741680145263672,1,N,-73.902961730957031,40.748741149902344,2,21.5,0.5,0.5,0,0,0.3,22.8 +1,2016-06-09 21:06:37,2016-06-09 21:16:47,1,1.40,-73.985702514648437,40.746742248535156,1,N,-73.982505798339844,40.762771606445313,1,8.5,0.5,0.5,1.95,0,0.3,11.75 +1,2016-06-09 21:06:37,2016-06-09 21:15:44,1,1.20,-73.984275817871094,40.775009155273438,1,N,-73.968498229980469,40.766429901123047,2,8,0.5,0.5,0,0,0.3,9.3 diff --git a/go.mod b/go.mod index c570cde..0f2db0c 100644 --- a/go.mod +++ b/go.mod @@ -28,5 +28,6 @@ require ( github.com/spf13/viper v1.4.0 github.com/stretchr/testify v1.3.0 // indirect github.com/syndtr/goleveldb v0.0.0-20181128100959-b001fa50d6b2 + golang.org/x/sync v0.0.0-20190423024810-112230192c58 gopkg.in/linkedin/goavro.v1 v1.0.5 // indirect ) From 3fa0161216f8a9fce3df2246e6723b63227e77bb Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Sat, 31 Aug 2019 21:20:44 -0500 Subject: [PATCH 05/40] update deps --- cmd/picsv/README.md | 8 ++++---- go.mod | 2 +- go.sum | 2 ++ 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/cmd/picsv/README.md b/cmd/picsv/README.md index df42c56..df82ef8 100644 --- a/cmd/picsv/README.md +++ b/cmd/picsv/README.md @@ -96,10 +96,10 @@ won't be a good interface for workloads with lots of fields (hundreds or thousands) where many of them are often nil for any given record. If you want to see example usage of the Batch interface, check out the -code right [here](./batch.go) in the PDK's CSV tooling. The `picsv` -tool takes in CSV files and does it's best to ingest them to Pilosa -performantly with minimal supervision. It does, however, have an -optional configuration which allows one to do basic things like +code right [here](../../csv/batch.go) in the PDK's CSV tooling. The +`picsv` tool takes in CSV files and does it's best to ingest them to +Pilosa performantly with minimal supervision. It does, however, have +an optional configuration which allows one to do basic things like specify which fields are ints vs strings, and how the CSV field names map on to Pilosa fields. There are some examples of this in the [tests](./batch_test.go), and be on the look out for a more complete diff --git a/go.mod b/go.mod index 0f2db0c..792c568 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/pilosa/pdk -replace github.com/pilosa/go-pilosa => /Users/jaffee/go/src/github.com/pilosa/go-pilosa +replace github.com/pilosa/go-pilosa => github.com/jaffee/go-pilosa v0.4.1-0.20190831210635-c2b019c94ab7 require ( github.com/Shopify/sarama v1.19.0 diff --git a/go.sum b/go.sum index 7acf054..dd4a370 100644 --- a/go.sum +++ b/go.sum @@ -151,6 +151,8 @@ github.com/jaffee/commandeer v0.1.0 h1:UxHHnhKmtz8gAgqu67lYK5tlX5D9A86mGc9AWcEMS github.com/jaffee/commandeer v0.1.0/go.mod h1:x1WpthEI14PRNcPtVna43ontBxJ1o7plCOsZ8kksl8M= github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e h1:CC1usSIzu9p6zmz7jPj0QiP3FdpGW+PCGc9d1yhSls0= github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e/go.mod h1:N5yIzoHN6EwVFi0QCKvpFPJeECoZyEcFBQSR8r+7Mz0= +github.com/jaffee/go-pilosa v0.4.1-0.20190831210635-c2b019c94ab7 h1:DyezXZFZgOssV5VH/a7+XH3iXNtj8SFX8FKiEZbu3iY= +github.com/jaffee/go-pilosa v0.4.1-0.20190831210635-c2b019c94ab7/go.mod h1:B4omOlxvZcuKLrlHICT6bMEeHPmT8tpSyxgHuEF1PsU= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8 h1:12VvqtR6Aowv3l/EQUlocDHW2Cp4G9WJVH7uyH8QFJE= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= From 8faaad46ed7748905894952e04c0c93b8449ae72 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Wed, 4 Sep 2019 17:16:18 -0500 Subject: [PATCH 06/40] fixes and test cases for csv taxi test --- csv/batch.go | 36 +++++++++++++++++--- csv/batch_test.go | 86 ++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 105 insertions(+), 17 deletions(-) diff --git a/csv/batch.go b/csv/batch.go index 9f8924b..57945f5 100644 --- a/csv/batch.go +++ b/csv/batch.go @@ -96,7 +96,6 @@ func (m *Main) Run() error { mu.Lock() go func() { for stat := range stats { - // TODO add file name to stats log.Printf("processed %s\n", stat) totalRecords += stat.n } @@ -104,7 +103,7 @@ func (m *Main) Run() error { }() /////////////////////////////////////////////////////// - // for each file to process (just one right now) + // for each file to process for _, filename := range m.Files { f, err := os.Open(filename) if err != nil { @@ -128,6 +127,7 @@ func (m *Main) Run() error { client.SyncSchema(schema) jobs <- fileJob{ + name: filename, reader: reader, batch: batch, pc: parseConfig, @@ -143,18 +143,25 @@ func (m *Main) Run() error { type jobReport struct { n uint64 - err error duration time.Duration + name string + err error } func (j jobReport) String() string { + s := fmt.Sprintf("{name:%s n:%d duration:%s", j.name, j.n, j.duration) if j.err != nil { - return fmt.Sprintf("{n:%d duration:%s err:'%s'}", j.n, j.duration, j.err) + s += fmt.Sprintf(" err:'%s'}", j.err) + } else { + s += "}" } + + return s return fmt.Sprintf("{n:%d duration:%s}", j.n, j.duration) } type fileJob struct { + name string reader *csv.Reader batch *gpexp.Batch pc *parseConfig @@ -165,6 +172,7 @@ func fileProcessor(jobs <-chan fileJob, stats chan<- jobReport) { start := time.Now() n, err := processFile(fj.reader, fj.batch, fj.pc) stats <- jobReport{ + name: fj.name, n: n, err: err, duration: time.Since(start), @@ -307,8 +315,12 @@ func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, r continue case "int": valGetter = func(val string) interface{} { + if val == "" { + return nil + } intVal, err := strconv.ParseInt(val, 10, 64) if err != nil { + log.Printf("parsing '%s' for %s as int: %v\n", val, fieldName, err) return nil } return intVal @@ -317,14 +329,21 @@ func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, r case "float": if srcField.Multiplier != 0 { valGetter = func(val string) interface{} { + if val == "" { + return nil + } floatVal, err := strconv.ParseFloat(val, 64) if err != nil { + log.Printf("parsing '%s' for %s as float: %v\n", val, fieldName, err) return nil } return int64(floatVal * srcField.Multiplier) } } else { valGetter = func(val string) interface{} { + if val == "" { + return nil + } floatVal, err := strconv.ParseFloat(val, 64) if err != nil { return nil @@ -343,8 +362,12 @@ func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, r fields = append(fields, index.Field(fieldName, pilosaField.MakeOptions()...)) case "uint64", "rowID": valGetter = func(val string) interface{} { + if val == "" { + return nil + } uintVal, err := strconv.ParseUint(val, 0, 64) if err != nil { + log.Printf("parsing '%s' for %s as rowID: %v\n", val, fieldName, err) return nil } return uintVal @@ -355,9 +378,12 @@ func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, r return nil, nil, errors.Errorf("need time format for source field %s of type time", srcFieldName) } valGetter = func(val string) interface{} { + if val == "" { + return nil + } tim, err := time.Parse(srcField.TimeFormat, val) if err != nil { - // TODO some kind of logging or stats around failures in here. + log.Printf("parsing '%s' for %s as time w/ format '%s': %v\n", val, fieldName, srcField.TimeFormat, err) return nil } return tim.Unix() diff --git a/csv/batch_test.go b/csv/batch_test.go index dd13223..7c6ce8f 100644 --- a/csv/batch_test.go +++ b/csv/batch_test.go @@ -175,7 +175,6 @@ func TestImportMarketingCSV(t *testing.T) { } func TestImportMultipleTaxi(t *testing.T) { - // TODO (taxi files in place via: // for url in `grep -v fhv_tripdata ../usecase/taxi/urls.txt`; do curl -s $url | head > testdata/${url##*/}; done m := picsv.NewMain() m.BatchSize = 12 @@ -195,7 +194,7 @@ func TestImportMultipleTaxi(t *testing.T) { config := `{ "pilosa-fields": { - "cab_type": {"type": "set", "keys": false, "cache-type": "ranked", "cache-size": 3}, + "cab_type": {"type": "set", "keys": true, "cache-type": "ranked", "cache-size": 3}, "pickup_time": {"type": "int"}, "dropoff_time": {"type": "int"}, "passenger_count": {"type": "set", "keys": false, "cache-type": "ranked", "cache-size": 50}, @@ -214,14 +213,14 @@ func TestImportMultipleTaxi(t *testing.T) { "total_amount": {"type": "int"}, "improvement_surcharge": {"type": "int"}, "ehail_fee": {"type": "int"}, - "payment_type": {"type": "set", "keys": false} + "payment_type": {"type": "set", "keys": true} }, "id-field": "", "id-type": "", "source-fields": { - "VendorID": {"target-field": "cab_type", "type": "rowID"}, - "vendor_id": {"target-field": "cab_type", "type": "rowID"}, - "vendor_name": {"target-field": "cab_type", "type": "rowID"}, + "VendorID": {"target-field": "cab_type", "type": "string"}, + "vendor_id": {"target-field": "cab_type", "type": "string"}, + "vendor_name": {"target-field": "cab_type", "type": "string"}, "lpep_pickup_datetime": {"target-field": "pickup_time", "type": "time", "time-format": "2006-01-02 15:04:05"}, "tpep_pickup_datetime": {"target-field": "pickup_time", "type": "time", "time-format": "2006-01-02 15:04:05"}, "pickup_datetime": {"target-field": "pickup_time", "type": "time", "time-format": "2006-01-02 15:04:05"}, @@ -262,7 +261,7 @@ func TestImportMultipleTaxi(t *testing.T) { "Total_Amt": {"target-field": "total_amount", "type": "float", "multiplier": 100}, "total_amount": {"target-field": "total_amount", "type": "float", "multiplier": 100}, "ehail_fee": {"target-field": "ehail_fee", "type": "float", "multiplier": 100}, - "payment_type": {"target-field": "payment_type", "type": "rowID"}, + "payment_type": {"target-field": "payment_type", "type": "string"}, "extra": {"target-field": "extra", "type": "float", "multiplier": 100} } }` @@ -274,12 +273,75 @@ func TestImportMultipleTaxi(t *testing.T) { t.Fatalf("running ingest: %v", err) } - // schema, err := client.Schema() - // if err != nil { - // t.Fatalf("getting schema: %v", err) - // } + schema, err := client.Schema() + if err != nil { + t.Fatalf("getting schema: %v", err) + } - // index := schema.Index(m.Index) + index := schema.Index(m.Index) + cabType := index.Field("cab_type") + drop_long := index.Field("dropoff_longitude") + pick_long := index.Field("pickup_longitude") + + tests := []struct { + query *pilosa.PQLRowQuery + bash string + exp int64 + }{ + { + query: cabType.Row("1"), + bash: `cat ./testdata/taxi/* | awk -F, '{print $1}' | sort | uniq -c`, + exp: 79, + }, + { + query: cabType.Row("2"), + bash: `cat ./testdata/taxi/* | awk -F, '{print $1}' | sort | uniq -c`, + exp: 363, + }, + { + query: cabType.Row("CMT"), + bash: `cat ./testdata/taxi/* | awk -F, '{print $1}' | sort | uniq -c`, + exp: 318, + }, + { + query: cabType.Row("DDS"), + bash: `cat ./testdata/taxi/* | awk -F, '{print $1}' | sort | uniq -c`, + exp: 17, + }, + { + query: cabType.Row("VTS"), + bash: `cat ./testdata/taxi/* | awk -F, '{print $1}' | sort | uniq -c`, + exp: 249, + }, + { + query: drop_long.Equals(-738996), + bash: `cat * | grep '73\.8996'`, + exp: 1, + }, + { + query: drop_long.Equals(-738996), + bash: `cat * | grep '73\.8996'`, + exp: 1, + }, + { + query: index.Union(drop_long.Between(-739449, -739440), pick_long.Between(-739449, -739440)), + bash: `cat * | grep '73\.944' | wc`, + exp: 16, + }, + } + + for i, test := range tests { + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + q := index.Count(test.query) + resp, err := client.Query(q) + if err != nil { + t.Fatalf("running query '%s': %v", q.Serialize(), err) + } + if resp.Result().Count() != test.exp { + t.Fatalf("Got unexpected result %d instead of %d for\nquery: %s\nbash: %s", resp.Result().Count(), test.exp, q.Serialize(), test.bash) + } + }) + } } From 4b588ab64d2fdf5b61fba6a1a1c1c573e367fab5 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Thu, 5 Sep 2019 16:53:27 -0500 Subject: [PATCH 07/40] open files or urls --- csv/batch.go | 38 ++++++++++++++++++++++++++++++-------- csv/batch_test.go | 6 +++--- go.mod | 2 ++ 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/csv/batch.go b/csv/batch.go index 57945f5..bf50ab9 100644 --- a/csv/batch.go +++ b/csv/batch.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "log" + "net/http" "os" "strconv" "strings" @@ -21,12 +22,12 @@ import ( type Main struct { Pilosa []string `help:"Comma separated list of host:port describing Pilosa cluster."` - Files []string - Index string `help:"Name of index to ingest data into."` - BatchSize int `help:"Number of records to put in a batch before importing to Pilosa."` - ConfigFile string `help:"JSON configuration describing source fields, and how to parse and map them to Pilosa fields."` - RangeAllocator string `help:"Designates where to retrieve unused ranged of record IDs (if generating ids). If left blank, generate locally starting from 0."` - Concurrency int `help:"Number of goroutines to run processing files."` + Files []string `help:"File names or URLs to read."` + Index string `help:"Name of index to ingest data into."` + BatchSize int `help:"Number of records to put in a batch before importing to Pilosa."` + ConfigFile string `help:"JSON configuration describing source fields, and how to parse and map them to Pilosa fields."` + RangeAllocator string `help:"Designates where to retrieve unused ranged of record IDs (if generating ids). If left blank, generate locally starting from 0."` + Concurrency int `help:"Number of goroutines to run processing files."` Config *Config `flag:"-"` } @@ -105,9 +106,9 @@ func (m *Main) Run() error { /////////////////////////////////////////////////////// // for each file to process for _, filename := range m.Files { - f, err := os.Open(filename) + f, err := openFileOrURL(filename) if err != nil { - return errors.Wrap(err, "opening file") + return errors.Wrapf(err, "opening %s", filename) } defer f.Close() reader := csv.NewReader(f) @@ -407,6 +408,27 @@ func processHeader(config *Config, client *pilosa.Client, index *pilosa.Index, r return batch, pc, nil } +func openFileOrURL(name string) (io.ReadCloser, error) { + var content io.ReadCloser + if strings.HasPrefix(name, "http") { + resp, err := http.Get(name) + if err != nil { + return nil, errors.Wrap(err, "getting via http") + } + if resp.StatusCode > 299 { + return nil, errors.Errorf("got status %d via http.Get", resp.StatusCode) + } + content = resp.Body + } else { + f, err := os.Open(name) + if err != nil { + return nil, errors.Wrap(err, "opening file") + } + content = f + } + return content, nil +} + func NewConfig() *Config { return &Config{ PilosaFields: make(map[string]Field), diff --git a/csv/batch_test.go b/csv/batch_test.go index 7c6ce8f..406fe93 100644 --- a/csv/batch_test.go +++ b/csv/batch_test.go @@ -301,17 +301,17 @@ func TestImportMultipleTaxi(t *testing.T) { { query: cabType.Row("CMT"), bash: `cat ./testdata/taxi/* | awk -F, '{print $1}' | sort | uniq -c`, - exp: 318, + exp: 317, }, { query: cabType.Row("DDS"), bash: `cat ./testdata/taxi/* | awk -F, '{print $1}' | sort | uniq -c`, - exp: 17, + exp: 14, }, { query: cabType.Row("VTS"), bash: `cat ./testdata/taxi/* | awk -F, '{print $1}' | sort | uniq -c`, - exp: 249, + exp: 245, }, { query: drop_long.Equals(-738996), diff --git a/go.mod b/go.mod index 792c568..34a7612 100644 --- a/go.mod +++ b/go.mod @@ -31,3 +31,5 @@ require ( golang.org/x/sync v0.0.0-20190423024810-112230192c58 gopkg.in/linkedin/goavro.v1 v1.0.5 // indirect ) + +go 1.13 From b80e433bda08696f29d24f7bfc87431c7bff8ad1 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Thu, 5 Sep 2019 17:26:26 -0500 Subject: [PATCH 08/40] add option to pass urls/files in as a file --- csv/batch.go | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/csv/batch.go b/csv/batch.go index bf50ab9..4855628 100644 --- a/csv/batch.go +++ b/csv/batch.go @@ -1,6 +1,7 @@ package csv import ( + "bufio" "encoding/csv" "encoding/json" "fmt" @@ -23,6 +24,7 @@ import ( type Main struct { Pilosa []string `help:"Comma separated list of host:port describing Pilosa cluster."` Files []string `help:"File names or URLs to read."` + URLFile string `help:"Filename or URL containing line separated file names or urls to read data from. These will be appended to Files."` Index string `help:"Name of index to ingest data into."` BatchSize int `help:"Number of records to put in a batch before importing to Pilosa."` ConfigFile string `help:"JSON configuration describing source fields, and how to parse and map them to Pilosa fields."` @@ -35,7 +37,7 @@ type Main struct { func NewMain() *Main { return &Main{ Pilosa: []string{"localhost:10101"}, - Files: []string{"data.csv"}, + Files: []string{}, Index: "picsvtest", BatchSize: 1000, Concurrency: 4, @@ -59,6 +61,21 @@ func (m *Main) Run() error { return errors.Wrap(err, "decoding config file") } } + + if m.URLFile != "" { + f, err := openFileOrURL(m.URLFile) + if err != nil { + return errors.Wrap(err, "opening url file") + } + scan := bufio.NewScanner(f) + for scan.Scan() { + line := scan.Text() + m.Files = append(m.Files, line) + } + if err := scan.Err(); err != nil { + return errors.Wrap(err, "scanning URL file") + } + } // log.Printf("Flags: %+v\n", *m) // log.Printf("Config: %+v\n", *m.Config) From 9818944bbaf9b71a22ce81164b5e504aa1b5e6c1 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Thu, 5 Sep 2019 20:41:12 -0500 Subject: [PATCH 09/40] change go version... not sure how this works --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 34a7612..84184cf 100644 --- a/go.mod +++ b/go.mod @@ -32,4 +32,4 @@ require ( gopkg.in/linkedin/goavro.v1 v1.0.5 // indirect ) -go 1.13 +go 1.12 From 513aa2493de5849975171c21b5a814704dab8559 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Fri, 6 Sep 2019 07:39:16 -0500 Subject: [PATCH 10/40] update circleCI go versions --- .circleci/config.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 1bf79da..87553e7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -2,7 +2,7 @@ version: 2 defaults: &defaults working_directory: /go/src/github.com/pilosa/pdk docker: - - image: circleci/golang:1.11 + - image: circleci/golang:1.12 environment: GO111MODULE: "on" fast-checkout: &fast-checkout @@ -30,15 +30,15 @@ jobs: - *fast-checkout - run: make install-gometalinter - run: make gometalinter - test-golang-1.11: &base-test + test-golang-1.12: &base-test <<: *defaults steps: - *fast-checkout - run: make test - test-golang-1.12-rc: + test-golang-1.13: <<: *base-test docker: - - image: circleci/golang:1.12-rc + - image: circleci/golang:1.13 workflows: version: 2 test: @@ -47,9 +47,9 @@ workflows: - linter: requires: - build - - test-golang-1.11: + - test-golang-1.12: requires: - build - - test-golang-1.12-rc: + - test-golang-1.13: requires: - build From 39d8f0dba0642e33eab7916b0ab76a1078c30c31 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Wed, 25 Sep 2019 08:58:48 -0500 Subject: [PATCH 11/40] fixup csv/batch tests and check syncschema errors --- csv/batch.go | 5 ++++- csv/batch_test.go | 12 ++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/csv/batch.go b/csv/batch.go index 4855628..a8afd03 100644 --- a/csv/batch.go +++ b/csv/batch.go @@ -142,7 +142,10 @@ func (m *Main) Run() error { return errors.Wrap(err, "processing header") } // this has a non-obvious dependence on processHeader which sets up fields. TODO Do this inside processHeader? - client.SyncSchema(schema) + err = client.SyncSchema(schema) + if err != nil { + return errors.Wrap(err, "syncing schema") + } jobs <- fileJob{ name: filename, diff --git a/csv/batch_test.go b/csv/batch_test.go index 406fe93..3917385 100644 --- a/csv/batch_test.go +++ b/csv/batch_test.go @@ -17,7 +17,7 @@ func BenchmarkImportCSV(b *testing.B) { m := picsv.NewMain() m.BatchSize = 1 << 20 m.Index = "picsvbench" - m.Files = []string{"testdata/marketing-200k.csv"} + m.Files = []string{"marketing-200k.csv"} getRawData(b, m.Files[0]) client, err := pilosa.NewClient(m.Pilosa) if err != nil { @@ -84,7 +84,7 @@ func TestImportMarketingCSV(t *testing.T) { { name: "uint64", idField: "id", - idType: "string", + idType: "uint64", }, { name: "generatedID", @@ -99,7 +99,15 @@ func TestImportMarketingCSV(t *testing.T) { m.Index = "testpicsv" m.Files = []string{"marketing-200k.csv"} m.Config.SourceFields["age"] = picsv.SourceField{TargetField: "age", Type: "float"} + m.Config.SourceFields["pdays"] = picsv.SourceField{TargetField: "pdays", Type: "float"} + m.Config.SourceFields["campaign"] = picsv.SourceField{TargetField: "campaign", Type: "float"} + m.Config.SourceFields["previous"] = picsv.SourceField{TargetField: "previous", Type: "float"} + m.Config.SourceFields["duration"] = picsv.SourceField{TargetField: "duration", Type: "float"} m.Config.PilosaFields["age"] = picsv.Field{Type: "int"} + m.Config.PilosaFields["pdays"] = picsv.Field{Type: "int"} + m.Config.PilosaFields["campaign"] = picsv.Field{Type: "int"} + m.Config.PilosaFields["previous"] = picsv.Field{Type: "int"} + m.Config.PilosaFields["duration"] = picsv.Field{Type: "int"} m.Config.IDField = tst.idField m.Config.IDType = tst.idType getRawData(t, m.Files[0]) From 2f29e916805f9018d3683f5c726f87428758ab22 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Wed, 25 Sep 2019 09:10:06 -0500 Subject: [PATCH 12/40] initial mostly working v2 Kafka source --- v2/cmd/kafka/main.go | 14 + v2/interfaces.go | 101 ++++ v2/kafka/source.go | 528 +++++++++++++++++++ v2/kafka/source_test.go | 77 +++ v2/kafka/testdata/schemas/decimal.json | 8 + v2/kafka/testdata/schemas/fieldisrecord.json | 14 + v2/kafka/testdata/schemas/notarecord.json | 4 + v2/kafka/testdata/schemas/simple.json | 9 + 8 files changed, 755 insertions(+) create mode 100644 v2/cmd/kafka/main.go create mode 100644 v2/interfaces.go create mode 100644 v2/kafka/source.go create mode 100644 v2/kafka/source_test.go create mode 100644 v2/kafka/testdata/schemas/decimal.json create mode 100644 v2/kafka/testdata/schemas/fieldisrecord.json create mode 100644 v2/kafka/testdata/schemas/notarecord.json create mode 100644 v2/kafka/testdata/schemas/simple.json diff --git a/v2/cmd/kafka/main.go b/v2/cmd/kafka/main.go new file mode 100644 index 0000000..d61e30a --- /dev/null +++ b/v2/cmd/kafka/main.go @@ -0,0 +1,14 @@ +package main + +import ( + "log" + + "github.com/jaffee/commandeer" + "github.com/pilosa/pdk/v2/kafka" +) + +func main() { + if err := commandeer.Run(kafka.NewMain()); err != nil { + log.Fatal(err) + } +} diff --git a/v2/interfaces.go b/v2/interfaces.go new file mode 100644 index 0000000..4649a60 --- /dev/null +++ b/v2/interfaces.go @@ -0,0 +1,101 @@ +package pdk + +// Source is an interface implemented by sources of data which can be +// ingested into Pilosa. Each Record returned from Record is described +// by the slice of Fields returned from Source.Schema directly after +// the call to Source.Record. If the error returned from Source.Record +// is nil, then the call to Schema which applied to the previous +// Record also applies to this Record. Source implementations are +// fundamentally not threadsafe (due to the interplay between Record +// and Schema). +type Source interface { + + // Record returns a data record, and an optional error. If the + // error is ErrSchemaChange, then the record is valid, but one + // should call Source.Schema to understand how each of its fields + // should be interpreted. + Record() (Record, error) + + // Schema returns a slice of Fields which applies to the most + // recent Record returned from Source.Record. Every Field has a + // name and a type, and depending on the concrete type of the + // Field, may have other information which is relevant to how it + // should be indexed. + Schema() []Field +} + +type Error string + +func (e Error) Error() string { return string(e) } + +// ErrSchemaChange is returned from Source.Record when the returned +// record has a different schema from the previous record. +const ErrSchemaChange = Error("this record has a different schema from the previous record (or is the first one delivered). Please call Source.Schema() to fetch the schema in order to properly decode this record") + +type Record interface { + // Commit notifies the Source which produced this record that it + // and any record which came before it have been completely + // processed. The Source can then take any necessary action to + // record which records have been processed, and restart from the + // earliest unprocessed record in the event of a failure. + Commit() error + + Data() []interface{} +} + +type Field interface { + Name() string +} + +type IDField struct { + NameVal string + + // Mutex denotes whether we need to enforce that each record only + // has a single value for this field. Put another way, says + // whether a new value for this field be treated as adding an + // additional value, or replacing the existing value (if there is + // one). + Mutex bool +} + +func (id IDField) Name() string { return id.NameVal } + +type BoolField struct { + NameVal string +} + +func (b BoolField) Name() string { return b.NameVal } + +type StringField struct { + NameVal string + + // Mutex denotes whether we need to enforce that each record only + // has a single value for this field. Put another way, says + // whether a new value for this field be treated as adding an + // additional value, or replacing the existing value (if there is + // one). + Mutex bool +} + +func (s StringField) Name() string { return s.NameVal } + +type IntField struct { + NameVal string + Min *int64 + Max *int64 +} + +func (i IntField) Name() string { return i.NameVal } + +type DecimalField struct { + NameVal string + Scale uint +} + +func (d DecimalField) Name() string { return d.NameVal } + +type StringArrayField struct { + NameVal string +} + +func (s StringArrayField) Name() string { return s.NameVal } diff --git a/v2/kafka/source.go b/v2/kafka/source.go new file mode 100644 index 0000000..0d720d8 --- /dev/null +++ b/v2/kafka/source.go @@ -0,0 +1,528 @@ +package kafka + +import ( + "encoding/binary" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "log" + "net/http" + + "github.com/Shopify/sarama" + cluster "github.com/bsm/sarama-cluster" + "github.com/go-avro/avro" + "github.com/pilosa/go-pilosa" + pdk "github.com/pilosa/pdk/v2" + "github.com/pkg/errors" +) + +type Main struct { + PilosaHosts []string `help:"Comma separated list of host:port pairs for Pilosa."` + KafkaHosts []string `help:"Comma separated list of host:port pairs for Kafka."` + RegistryURL string `help:"Location of Confluent Schema Registry"` + BatchSize int `help:"Number of records to read before indexing all of them at once. Generally, larger means better throughput and more memory usage. 1,048,576 might be a good number."` + Group string `help:"Kafka group."` + Index string `help:"Name of Pilosa index."` + Topics []string `help:"Kafka topics to read from."` + LogPath string `help:"Log file to write to. Empty means stderr."e` + PrimaryKeyFields []string `help:"Data field(s) which make up the primary key for a record. These will be concatenated and translated to a Pilosa ID. If empty, record key translation will not be used."` + IDField string `help:"Field which contains the integer column ID. May not be used in conjunction with primary-key-fields. If both are empty, auto-generated IDs will be used."` + MaxMsgs int `help:"Number of messages to consume from Kafka before stopping. Useful for testing when you don't want to run indefinitely."` + // TODO implement the auto-generated IDs... hopefully using Pilosa to manage it. +} + +func NewMain() *Main { + return &Main{ + PilosaHosts: []string{"localhost:10101"}, + KafkaHosts: []string{"localhost:9092"}, + RegistryURL: "localhost:8081", + BatchSize: 1, // definitely increase this to achieve any amount of performance + Group: "defaultgroup", + Index: "defaultindex", + Topics: []string{"defaulttopic"}, + } +} + +func (m *Main) Run() error { + if err := m.validate(); err != nil { + return errors.Wrap(err, "validating configuration") + } + + client, err := pilosa.NewClient(m.PilosaHosts) + if err != nil { + return errors.Wrap(err, "getting pilosa client") + } + schema, err := client.Schema() + if err != nil { + return errors.Wrap(err, "getting schema") + } + keyTranslation := len(m.PrimaryKeyFields) > 0 + index := schema.Index(m.Index, pilosa.OptIndexKeys(keyTranslation)) + fmt.Println(index) + + source := NewSource() + source.Hosts = m.KafkaHosts + source.Topics = m.Topics + source.Group = m.Group + source.MaxMsgs = m.MaxMsgs + + // remember to flush old batch and make a new batch when schema changes + + return nil +} + +func (m *Main) validate() error { + if len(m.PrimaryKeyFields) != 0 && m.IDField != "" { + return errors.New("cannot set both primary key fields and id-field") + } + return nil +} + +// Source implements the pdk.Source interface using kafka as a data +// source. It is not threadsafe! Due to the way Kafka clients work, to +// achieve concurrency, create multiple Sources. +type Source struct { + Hosts []string + Topics []string + Group string + MaxMsgs int + RegistryURL string + + numMsgs int + consumer *cluster.Consumer + messages <-chan *sarama.ConsumerMessage + + // lastSchemaID and lastSchema keep track of the most recent + // schema in use. We expect this not to change often, but when it + // does, we need to notify the caller of Source.Record() + lastSchemaID int32 + lastSchema []pdk.Field + + // cache is a schema cache so we don't have to look up the same + // schema from the registry each time. + cache map[int32]avro.Schema + // stash is a local offset stash which source maintains so it can + // control when offsets are committed to Kafka. + stash *cluster.OffsetStash + + decBytes []byte + record *Record +} + +// NewSource gets a new Source +func NewSource() *Source { + src := &Source{ + Hosts: []string{"localhost:9092"}, + Topics: []string{"test"}, + Group: "group0", + + lastSchemaID: -1, + cache: make(map[int32]avro.Schema), + stash: cluster.NewOffsetStash(), + + decBytes: make([]byte, 8), + } + src.record = &Record{src: src} + + return src +} + +// Record returns the value of the next kafka message. The same Record +// object may be used by successive calls to Record, so it should not +// be retained. +func (s *Source) Record() (pdk.Record, error) { + if s.MaxMsgs > 0 { + s.numMsgs++ + if s.numMsgs > s.MaxMsgs { + return nil, io.EOF + } + } + msg, ok := <-s.messages + s.stash.MarkOffset(msg, "") + if ok { + ret := &Record{ // TODO reuse Record between calls + src: s, + } + val, err := s.decodeAvroValueWithSchemaRegistry(msg.Value) + if err != nil && err != pdk.ErrSchemaChange { + return nil, errors.Wrap(err, "decoding with schema registry") + } + if err == pdk.ErrSchemaChange { + s.record.data = make([]interface{}, len(s.lastSchema)) + } + err = s.toPDKRecord(val.(map[string]interface{})) // val must be map[string]interface{} because we only accept schemas which are Record type at the top level. + if err != nil { + // reset lastSchema so if Record gets called again, and + // the schema just changed, we'll notify of the change. + s.lastSchema = nil + s.lastSchemaID = -1 + return nil, errors.Wrap(err, "converting to PDK Record") + } + return ret, err // err must be nil or ErrSchemaChange at this point + } + return nil, errors.New("messages channel closed") +} + +func (s *Source) toPDKRecord(vals map[string]interface{}) error { + r := s.record + for i, field := range s.lastSchema { + val := vals[field.Name()] + switch field.(type) { + case pdk.DecimalField: + vb, ok := val.([]byte) + if !ok { + return errors.Errorf("decimal must be []byte, but got %v of %[1]T", val) + } + if len(vb) == 8 { + r.data[i] = binary.BigEndian.Uint64(vb) + } else if len(vb) < 8 { + copy(s.decBytes[8-len(vb):], vb) + r.data[i] = binary.BigEndian.Uint64(s.decBytes) + } else { + return errors.Errorf("can't support decimals of greater than 8 bytes, got %d for %s", len(vb), field.Name()) + } + default: + r.data[i] = val + } + } + return nil +} + +type Record struct { + src *Source + data []interface{} +} + +func (r *Record) Commit() error { + r.src.consumer.MarkOffsets(r.src.stash) + + // TODO this can return temporary errors according to the + // documentation. Might be good to detect those and retry in here. + return r.src.consumer.CommitOffsets() +} + +func (r *Record) Data() []interface{} { + return r.data +} + +// Open initializes the kafka source. +func (s *Source) Open() error { + // init (custom) config, enable errors and notifications + sarama.Logger = log.New(ioutil.Discard, "", 0) // TODO get logs? + config := cluster.NewConfig() + config.Config.Version = sarama.V0_10_0_0 + config.Consumer.Return.Errors = true + config.Consumer.Offsets.Initial = sarama.OffsetOldest + config.Group.Return.Notifications = true + + var err error + s.consumer, err = cluster.NewConsumer(s.Hosts, s.Group, s.Topics, config) + if err != nil { + return errors.Wrap(err, "getting new consumer") + } + s.messages = s.consumer.Messages() + + // consume errors + go func() { + for err := range s.consumer.Errors() { + log.Printf("Error: %s\n", err.Error()) + } + }() + + // consume notifications + go func() { + for ntf := range s.consumer.Notifications() { + log.Printf("Rebalanced: %+v\n", ntf) + } + }() + return nil +} + +// Close closes the underlying kafka consumer. +func (s *Source) Close() error { + err := s.consumer.Close() + return errors.Wrap(err, "closing kafka consumer") +} + +// TODO change name +func (s *Source) decodeAvroValueWithSchemaRegistry(val []byte) (interface{}, error) { + if len(val) <= 6 || val[0] != 0 { + return nil, errors.Errorf("unexpected magic byte or length in avro kafka value, should be 0x00, but got 0x%.8s", val) + } + id := int32(binary.BigEndian.Uint32(val[1:])) + codec, err := s.getCodec(id) + if err != nil { + return nil, errors.Wrap(err, "getting avro codec") + } + ret, err := avroDecode(codec, val[5:]) + if err != nil { + return nil, errors.Wrap(err, "decoding avro record") + } + if id != s.lastSchemaID { + s.lastSchema, err = avroToPDKSchema(codec) + if err != nil { + return nil, errors.Wrap(err, "converting to Pilosa schema") + } + s.lastSchemaID = id + return ret, pdk.ErrSchemaChange + } + + return ret, nil +} + +// avroToPDKSchema converts a full avro schema to the much more +// constrained []pdk.Field which maps pretty directly onto +// Pilosa. Many features of avro are unsupported and will cause this +// to return an error. The "codec" argument ot this function must be +// an avro.Record. +func avroToPDKSchema(codec avro.Schema) ([]pdk.Field, error) { + switch codec.Type() { + case avro.Record: + recordSchema, ok := codec.(*avro.RecordSchema) + if !ok { + panic(fmt.Sprintf("Record isn't a *avro.RecordSchema, got %+v of %[1]T", codec)) + } + pdkFields := make([]pdk.Field, 0, len(recordSchema.Fields)) + for _, field := range recordSchema.Fields { + pdkField, err := avroToPDKField(field) + if err != nil { + return nil, errors.Wrap(err, "converting avro field to pdk") + } + pdkFields = append(pdkFields, pdkField) + } + return pdkFields, nil + default: + return nil, errors.Errorf("unsupported Avro Schema type: %d", codec.Type()) // TODO error msg with int type is pretty opaque + } + +} + +func avroToPDKField(aField *avro.SchemaField) (pdk.Field, error) { + switch aField.Type.Type() { + case avro.Record: + return nil, errors.Errorf("nested fields are not currently supported, so the field type cannot be record.") + case avro.Enum, avro.String: + pdkField := pdk.StringField{NameVal: aField.Name} + if mutex, ok := aField.Prop("mutex"); ok { + if mutexb, ok := mutex.(bool); ok { + pdkField.Mutex = mutexb // TODO document this behavior + } + } + return pdkField, nil + case avro.Bytes, avro.Fixed: + fmt.Println("in bytes") + if lt, _ := stringProp(aField, "logicalType"); lt == "decimal" { + fmt.Println("in decimal") + precision, err := intProp(aField, "precision") + if precision > 18 || precision < 1 { + return nil, errors.Errorf("need precision for decimal in 1-18, but got:%d, err:%v", precision, err) + } + fmt.Println("got precision", precision) + scale, err := intProp(aField, "scale") + if scale > precision || err == wrongType { + return nil, errors.Errorf("0<=scale<=precision, got:%d err:%v", scale, err) + } + return pdk.DecimalField{ + NameVal: aField.Name, + Scale: uint(scale), + }, nil + } + // if not a decimal, then treat as string + pdkField := pdk.StringField{NameVal: aField.Name} + if mutex, ok := aField.Prop("mutex"); ok { + if mutexb, ok := mutex.(bool); ok { + pdkField.Mutex = mutexb // TODO document this behavior + } + } + return pdkField, nil + case avro.Union: + return avroUnionToPDKField(aField) + case avro.Array: + itemSchema := aField.Type.(*avro.ArraySchema).Items + switch typ := itemSchema.Type(); typ { + case avro.String, avro.Bytes, avro.Fixed, avro.Enum: + if lt, _ := stringProp(itemSchema, "logicalType"); lt == "decimal" { + return nil, errors.New("arrays of decimal are not supported") + } + return pdk.StringArrayField{NameVal: aField.Name}, nil + default: + return nil, errors.Errorf("array items type of %d is unsupported", itemSchema.Type()) + } + case avro.Int, avro.Long: + if lt, _ := stringProp(aField, "logicalType"); lt == "PilosaID" { + return pdk.IDField{ + NameVal: aField.Name, + }, nil + } + return pdk.IntField{ + NameVal: aField.Name, + }, nil + case avro.Float, avro.Double: + return pdk.IntField{ + NameVal: aField.Name, + }, nil + case avro.Boolean: + return pdk.BoolField{ + NameVal: aField.Name, + }, nil + case avro.Null: + return nil, errors.Errorf("null fields are not supported except inside Union") + case avro.Map: + return nil, errors.Errorf("nested fields are not currently supported, so the field type cannot be map.") + case avro.Recursive: + return nil, errors.Errorf("recursive schema fields are not currently supported.") + default: + return nil, errors.Errorf("unknown schema type: %+v", aField.Type) + } + +} + +func stringProp(p propper, s string) (string, error) { + ival, ok := p.Prop(s) + if !ok { + return "", notFound + } + sval, ok := ival.(string) + if !ok { + return "", wrongType + } + return sval, nil +} + +func intProp(p propper, s string) (int, error) { + ival, ok := p.Prop(s) + if !ok { + return 0, notFound + } + floatVal, ok := ival.(float64) + if !ok { + return 0, wrongType + } + return int(floatVal), nil +} + +type propper interface { + Prop(string) (interface{}, bool) +} + +var notFound = errors.New("prop not found") +var wrongType = errors.New("val is wrong type") + +// avroUnionToPDKField takes an avro SchemaField with a Union type, +// and reduces it to a SchemaField with the type of one of the Types +// contained in the Union. It can only do this if the Union only has +// one type, or if it has two types and one is null. +func avroUnionToPDKField(field *avro.SchemaField) (pdk.Field, error) { + if field.Type.Type() != avro.Union { + panic("it should be impossible to call avroUnionToPDKField with a non-union SchemaField") + } + uSchema := field.Type.(*avro.UnionSchema) + nf := &avro.SchemaField{ + Name: field.Name, + Doc: field.Doc, + Default: field.Default, + Properties: field.Properties, + } + if len(uSchema.Types) == 1 { + nf.Type = uSchema.Types[0] + return avroToPDKField(nf) + } + if len(uSchema.Types) == 2 { + if uSchema.Types[0].Type() == avro.Null { + nf.Type = uSchema.Types[0] + return avroToPDKField(nf) + } else if uSchema.Types[1].Type() == avro.Null { + nf.Type = uSchema.Types[1] + return avroToPDKField(nf) + } + } + return nil, errors.New("unions are only supported when they are a single type plus optionally a Null") +} + +// The Schema type is an object produced by the schema registry. +type Schema struct { + Schema string `json:"schema"` // The actual AVRO schema + Subject string `json:"subject"` // Subject where the schema is registered for + Version int `json:"version"` // Version within this subject + ID int `json:"id"` // Registry's unique id +} + +func (s *Source) getCodec(id int32) (rschema avro.Schema, rerr error) { + if codec, ok := s.cache[id]; ok { + return codec, nil + } + + r, err := http.Get(fmt.Sprintf("http://%s/schemas/ids/%d", s.RegistryURL, id)) + if err != nil { + return nil, errors.Wrap(err, "getting schema from registry") + } + defer func() { + // hahahahahaha + rerr = r.Body.Close() + }() + if r.StatusCode >= 300 { + bod, err := ioutil.ReadAll(r.Body) + if err != nil { + return nil, errors.Wrapf(err, "Failed to get schema, code: %d, no body", r.StatusCode) + } + return nil, errors.Errorf("Failed to get schema, code: %d, resp: %s", r.StatusCode, bod) + } + dec := json.NewDecoder(r.Body) + schema := &Schema{} + err = dec.Decode(schema) + if err != nil { + return nil, errors.Wrap(err, "decoding schema from registry") + } + codec, err := avro.ParseSchema(schema.Schema) + if err != nil { + return nil, errors.Wrap(err, "parsing schema") + } + s.cache[id] = codec + return codec, rerr +} + +func avroDecode(codec avro.Schema, data []byte) (map[string]interface{}, error) { + reader := avro.NewGenericDatumReader() + // SetSchema must be called before calling Read + reader.SetSchema(codec) + + // Create a new Decoder with a given buffer + decoder := avro.NewBinaryDecoder(data) + + decodedRecord := avro.NewGenericRecord(codec) + // Read data into given GenericRecord with a given Decoder. The first parameter to Read should be something to read into + err := reader.Read(decodedRecord, decoder) + if err != nil { + return nil, errors.Wrap(err, "reading generic datum") + } + + return decodedRecord.Map(), nil +} + +func toUint64(val interface{}) (uint64, error) { + switch vt := val.(type) { + case uint: + return uint64(vt), nil + case uint8: + return uint64(vt), nil + case uint16: + return uint64(vt), nil + case uint32: + return uint64(vt), nil + case uint64: + return vt, nil + case int: + return uint64(vt), nil + case int8: + return uint64(vt), nil + case int16: + return uint64(vt), nil + case int32: + return uint64(vt), nil + case int64: + return uint64(vt), nil + default: + return 0, errors.Errorf("couldn't convert %v of %[1]T to uint64", vt) + } +} diff --git a/v2/kafka/source_test.go b/v2/kafka/source_test.go new file mode 100644 index 0000000..b3568dd --- /dev/null +++ b/v2/kafka/source_test.go @@ -0,0 +1,77 @@ +package kafka + +import ( + "io/ioutil" + "reflect" + "strings" + "testing" + + "github.com/go-avro/avro" + pdk "github.com/pilosa/pdk/v2" +) + +func TestAvroToPDKSchema(t *testing.T) { + tests := []struct { + schemaFile string + exp []pdk.Field + expErr string + }{ + { + schemaFile: "simple.json", + exp: []pdk.Field{pdk.StringField{NameVal: "first"}, pdk.StringField{NameVal: "last"}}, + }, + { + schemaFile: "decimal.json", + exp: []pdk.Field{pdk.DecimalField{NameVal: "somenum", Scale: 2}}, + }, + { + schemaFile: "notarecord.json", + expErr: "unsupported Avro Schema type", + }, + { + schemaFile: "fieldisrecord.json", + expErr: "nested fields are not currently supported", + }, + } + + // check that we've covered all the test schemas + files, err := ioutil.ReadDir("./testdata/schemas") + if err != nil { + t.Fatalf("reading directory: %v", err) + } + if len(files) != len(tests) { + t.Fatalf("have different number of schemas and tests: %d and %d", len(files), len(tests)) + } + + for _, test := range tests { + t.Run(test.schemaFile, func(t *testing.T) { + codec := decodeTestSchema(t, test.schemaFile) + schema, err := avroToPDKSchema(codec) + if err != nil && test.expErr == "" { + t.Fatalf("unexpected error: %v", err) + } + if test.expErr != "" && err == nil { + t.Fatalf("expected error") + } + if test.expErr != "" && !strings.Contains(err.Error(), test.expErr) { + t.Fatalf("error expected/got\n%s\n%v", test.expErr, err.Error()) + } + if !reflect.DeepEqual(test.exp, schema) { + t.Fatalf("schema exp/got\n%+v\n%+v", test.exp, schema) + } + }) + } +} + +func decodeTestSchema(t *testing.T, filename string) avro.Schema { + bytes, err := ioutil.ReadFile("./testdata/schemas/" + filename) + if err != nil { + t.Fatalf("reading schema file: %v", err) + } + + codec, err := avro.ParseSchema(string(bytes)) + if err != nil { + t.Fatalf("parsing schema: %v", err) + } + return codec +} diff --git a/v2/kafka/testdata/schemas/decimal.json b/v2/kafka/testdata/schemas/decimal.json new file mode 100644 index 0000000..153f7bd --- /dev/null +++ b/v2/kafka/testdata/schemas/decimal.json @@ -0,0 +1,8 @@ +{ + "type": "record", + "namespace": "com.example", + "name": "Decimal", + "fields": [ + { "name": "somenum", "type": "bytes", "logicalType": "decimal", "scale": 2, "precision": 5 } + ] +} diff --git a/v2/kafka/testdata/schemas/fieldisrecord.json b/v2/kafka/testdata/schemas/fieldisrecord.json new file mode 100644 index 0000000..9b304a4 --- /dev/null +++ b/v2/kafka/testdata/schemas/fieldisrecord.json @@ -0,0 +1,14 @@ +{ + "type": "record", + "namespace": "com.example", + "name": "FullName", + "fields": [ + { "name": "first", "type": "string" }, + { "name": "last", "type": + {"type": "record", + "name": "blah", + "fields": [{"name": "field0", "type": "long"}] + } + } + ] +} diff --git a/v2/kafka/testdata/schemas/notarecord.json b/v2/kafka/testdata/schemas/notarecord.json new file mode 100644 index 0000000..344f0d1 --- /dev/null +++ b/v2/kafka/testdata/schemas/notarecord.json @@ -0,0 +1,4 @@ +{ + "type": "map", + "values": "long" +} diff --git a/v2/kafka/testdata/schemas/simple.json b/v2/kafka/testdata/schemas/simple.json new file mode 100644 index 0000000..c779ebd --- /dev/null +++ b/v2/kafka/testdata/schemas/simple.json @@ -0,0 +1,9 @@ +{ + "type": "record", + "namespace": "com.example", + "name": "FullName", + "fields": [ + { "name": "first", "type": "string" }, + { "name": "last", "type": "string" } + ] +} From 5e795d056eb98aa838479542115a0679a11a5bef Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Fri, 27 Sep 2019 14:51:57 -0500 Subject: [PATCH 13/40] more complete testing of Source.Record --- go.mod | 7 +- go.sum | 13 +++ kafka/source.go | 2 +- v2/kafka/source.go | 72 +++++++++--- v2/kafka/source_test.go | 128 ++++++++++++++++++++- v2/kafka/testdata/schemas/othertypes.json | 14 +++ v2/kafka/testdata/schemas/stringtypes.json | 10 ++ v2/kafka/testdata/schemas/unions.json | 11 ++ 8 files changed, 231 insertions(+), 26 deletions(-) create mode 100644 v2/kafka/testdata/schemas/othertypes.json create mode 100644 v2/kafka/testdata/schemas/stringtypes.json create mode 100644 v2/kafka/testdata/schemas/unions.json diff --git a/go.mod b/go.mod index 84184cf..a364ebc 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/pilosa/pdk -replace github.com/pilosa/go-pilosa => github.com/jaffee/go-pilosa v0.4.1-0.20190831210635-c2b019c94ab7 +replace github.com/pilosa/go-pilosa => /Users/jaffee/go/src/github.com/pilosa/go-pilosa + +replace github.com/go-avro/avro => /Users/jaffee/go/src/github.com/go-avro/avro require ( github.com/Shopify/sarama v1.19.0 @@ -12,9 +14,11 @@ require ( github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21 // indirect github.com/eapache/queue v1.1.0 // indirect github.com/elodina/go-avro v0.0.0-20160406082632-0c8185d9a3ba + github.com/go-avro/avro v0.0.0-20171219232920-444163702c11 github.com/hashicorp/go-uuid v1.0.1 // indirect github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e github.com/linkedin/goavro v0.0.0-20181018120728-1beee2a74088 + github.com/linkedin/goavro/v2 v2.9.6 github.com/mmcloughlin/geohash v0.0.0-20181009053802-f7f2bcae3294 github.com/onsi/ginkgo v1.7.0 // indirect github.com/onsi/gomega v1.4.3 // indirect @@ -28,6 +32,7 @@ require ( github.com/spf13/viper v1.4.0 github.com/stretchr/testify v1.3.0 // indirect github.com/syndtr/goleveldb v0.0.0-20181128100959-b001fa50d6b2 + github.com/y0ssar1an/q v1.0.7 golang.org/x/sync v0.0.0-20190423024810-112230192c58 gopkg.in/linkedin/goavro.v1 v1.0.5 // indirect ) diff --git a/go.sum b/go.sum index dd4a370..180258b 100644 --- a/go.sum +++ b/go.sum @@ -63,6 +63,8 @@ github.com/elodina/go-avro v0.0.0-20160406082632-0c8185d9a3ba/go.mod h1:3A7SOsr8 github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/go-avro/avro v0.0.0-20171219232920-444163702c11 h1:yswqe8UdKNWn4kjh1YTaAbvOSPeg95xhW7h4qeICL5E= +github.com/go-avro/avro v0.0.0-20171219232920-444163702c11/go.mod h1:kxj6THYP0dmFPk4Z+bijIAhJoGgeBfyOKXMduhvdJPA= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= @@ -91,6 +93,8 @@ github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db h1:woRePGFeVFfLKN/pOkfl+p/TAqKOfFu+7KPlMVpok/w= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= +github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c h1:964Od4U6p2jUkFxvCydnIczKteheJEzHRToSGK3Bnlw= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo= @@ -153,6 +157,8 @@ github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e h1:CC1usSIzu9p github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e/go.mod h1:N5yIzoHN6EwVFi0QCKvpFPJeECoZyEcFBQSR8r+7Mz0= github.com/jaffee/go-pilosa v0.4.1-0.20190831210635-c2b019c94ab7 h1:DyezXZFZgOssV5VH/a7+XH3iXNtj8SFX8FKiEZbu3iY= github.com/jaffee/go-pilosa v0.4.1-0.20190831210635-c2b019c94ab7/go.mod h1:B4omOlxvZcuKLrlHICT6bMEeHPmT8tpSyxgHuEF1PsU= +github.com/jaffee/go-pilosa v0.4.1-0.20190909235343-e40d84aa7666 h1:+O5nPgJ3ByJZxr0/rjNX1tYCEffoOJ+Q4T2589EkFaE= +github.com/jaffee/go-pilosa v0.4.1-0.20190909235343-e40d84aa7666/go.mod h1:B4omOlxvZcuKLrlHICT6bMEeHPmT8tpSyxgHuEF1PsU= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8 h1:12VvqtR6Aowv3l/EQUlocDHW2Cp4G9WJVH7uyH8QFJE= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= @@ -162,12 +168,17 @@ github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvW github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/linkedin/goavro v0.0.0-20181018120728-1beee2a74088 h1:T+kPxsfvkFtz7x6ysgOYjki7khHjowQW6DD1rcpOS0Q= github.com/linkedin/goavro v0.0.0-20181018120728-1beee2a74088/go.mod h1:vL3ODoWTPCBSeKVFgQ+lvSq0VOzTB5TcXvUX+4pU/+Q= +github.com/linkedin/goavro v2.1.0+incompatible h1:DV2aUlj2xZiuxQyvag8Dy7zjY69ENjS66bWkSfdpddY= +github.com/linkedin/goavro/v2 v2.9.6 h1:Qh8M4/oWMSJ8V3pKCl9QRZOZnefg/vU56t47AwzaSoQ= +github.com/linkedin/goavro/v2 v2.9.6/go.mod h1:UgQUb2N/pmueQYH9bfqFioWxzYCZXSfF8Jw03O5sjqA= github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= @@ -306,6 +317,8 @@ github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljT github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +github.com/y0ssar1an/q v1.0.7 h1:s3ckTY+wjk6Y0sFce4rIS1Ezf8S6d0UFJrKwe40MyiQ= +github.com/y0ssar1an/q v1.0.7/go.mod h1:Q1Rk1StqWjSOfA/CF4zJEW1fLmkl5Cy8EsILdkB+DgE= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= diff --git a/kafka/source.go b/kafka/source.go index bd54df4..2203721 100644 --- a/kafka/source.go +++ b/kafka/source.go @@ -43,7 +43,7 @@ import ( "sync" "github.com/Shopify/sarama" - "github.com/bsm/sarama-cluster" + cluster "github.com/bsm/sarama-cluster" "github.com/elodina/go-avro" "github.com/pkg/errors" ) diff --git a/v2/kafka/source.go b/v2/kafka/source.go index 0d720d8..d610ced 100644 --- a/v2/kafka/source.go +++ b/v2/kafka/source.go @@ -141,9 +141,6 @@ func (s *Source) Record() (pdk.Record, error) { msg, ok := <-s.messages s.stash.MarkOffset(msg, "") if ok { - ret := &Record{ // TODO reuse Record between calls - src: s, - } val, err := s.decodeAvroValueWithSchemaRegistry(msg.Value) if err != nil && err != pdk.ErrSchemaChange { return nil, errors.Wrap(err, "decoding with schema registry") @@ -151,19 +148,23 @@ func (s *Source) Record() (pdk.Record, error) { if err == pdk.ErrSchemaChange { s.record.data = make([]interface{}, len(s.lastSchema)) } - err = s.toPDKRecord(val.(map[string]interface{})) // val must be map[string]interface{} because we only accept schemas which are Record type at the top level. - if err != nil { + recErr := s.toPDKRecord(val.(map[string]interface{})) // val must be map[string]interface{} because we only accept schemas which are Record type at the top level. + if recErr != nil { // reset lastSchema so if Record gets called again, and // the schema just changed, we'll notify of the change. s.lastSchema = nil s.lastSchemaID = -1 - return nil, errors.Wrap(err, "converting to PDK Record") + return nil, errors.Wrap(recErr, "converting to PDK Record") } - return ret, err // err must be nil or ErrSchemaChange at this point + return s.record, err // err must be nil or ErrSchemaChange at this point } return nil, errors.New("messages channel closed") } +func (s *Source) Schema() []pdk.Field { + return s.lastSchema +} + func (s *Source) toPDKRecord(vals map[string]interface{}) error { r := s.record for i, field := range s.lastSchema { @@ -179,6 +180,9 @@ func (s *Source) toPDKRecord(vals map[string]interface{}) error { } else if len(vb) < 8 { copy(s.decBytes[8-len(vb):], vb) r.data[i] = binary.BigEndian.Uint64(s.decBytes) + for i := 8 - len(vb); i >= 0; i-- { + s.decBytes[i] = 0 + } } else { return errors.Errorf("can't support decimals of greater than 8 bytes, got %d for %s", len(vb), field.Name()) } @@ -311,14 +315,11 @@ func avroToPDKField(aField *avro.SchemaField) (pdk.Field, error) { } return pdkField, nil case avro.Bytes, avro.Fixed: - fmt.Println("in bytes") if lt, _ := stringProp(aField, "logicalType"); lt == "decimal" { - fmt.Println("in decimal") precision, err := intProp(aField, "precision") if precision > 18 || precision < 1 { return nil, errors.Errorf("need precision for decimal in 1-18, but got:%d, err:%v", precision, err) } - fmt.Println("got precision", precision) scale, err := intProp(aField, "scale") if scale > precision || err == wrongType { return nil, errors.Errorf("0<=scale<=precision, got:%d err:%v", scale, err) @@ -395,6 +396,7 @@ func intProp(p propper, s string) (int, error) { if !ok { return 0, notFound } + // json decodes numeric values into float64 floatVal, ok := ival.(float64) if !ok { return 0, wrongType @@ -419,27 +421,59 @@ func avroUnionToPDKField(field *avro.SchemaField) (pdk.Field, error) { } uSchema := field.Type.(*avro.UnionSchema) nf := &avro.SchemaField{ - Name: field.Name, - Doc: field.Doc, - Default: field.Default, - Properties: field.Properties, + Name: field.Name, + Doc: field.Doc, + Default: field.Default, } if len(uSchema.Types) == 1 { nf.Type = uSchema.Types[0] return avroToPDKField(nf) } if len(uSchema.Types) == 2 { + var useType avro.Schema if uSchema.Types[0].Type() == avro.Null { - nf.Type = uSchema.Types[0] - return avroToPDKField(nf) + useType = uSchema.Types[1] } else if uSchema.Types[1].Type() == avro.Null { - nf.Type = uSchema.Types[1] - return avroToPDKField(nf) + useType = uSchema.Types[0] + } else { + return nil, errors.New("unions are only supported when one type is Null") } + nf.Type = useType + nf.Properties = propertiesFromSchema(useType) + return avroToPDKField(nf) } return nil, errors.New("unions are only supported when they are a single type plus optionally a Null") } +// propertiesFromSchema (document and use!) +func propertiesFromSchema(sch avro.Schema) map[string]interface{} { + switch schT := sch.(type) { + case *avro.StringSchema, *avro.IntSchema, *avro.LongSchema, *avro.FloatSchema, *avro.DoubleSchema, *avro.BooleanSchema, *avro.NullSchema, *avro.UnionSchema: + return nil + case *avro.BytesSchema: + return schT.Properties + case *avro.RecordSchema: + return schT.Properties + case *avro.RecursiveSchema: + if schT.Actual != nil { + return schT.Actual.Properties + } + return nil + case *avro.EnumSchema: + return schT.Properties + case *avro.ArraySchema: + return schT.Properties + case *avro.MapSchema: + return schT.Properties + case *avro.FixedSchema: + return schT.Properties + default: + // TODO handle logging properly (e.g. respect log path, use an interface logger, etc.) + log.Printf("unhandled avro.Schema concrete type %T in propertiesFromSchema, value: %+v", schT, schT) + return nil + } +} + // The Schema type is an object produced by the schema registry. type Schema struct { Schema string `json:"schema"` // The actual AVRO schema @@ -458,7 +492,7 @@ func (s *Source) getCodec(id int32) (rschema avro.Schema, rerr error) { return nil, errors.Wrap(err, "getting schema from registry") } defer func() { - // hahahahahaha + // TODO this might obscure a more important error? rerr = r.Body.Close() }() if r.StatusCode >= 300 { diff --git a/v2/kafka/source_test.go b/v2/kafka/source_test.go index b3568dd..9926d6c 100644 --- a/v2/kafka/source_test.go +++ b/v2/kafka/source_test.go @@ -1,12 +1,16 @@ package kafka import ( + "encoding/binary" "io/ioutil" + "math/big" "reflect" "strings" "testing" + "github.com/Shopify/sarama" "github.com/go-avro/avro" + liavro "github.com/linkedin/goavro/v2" pdk "github.com/pilosa/pdk/v2" ) @@ -18,11 +22,23 @@ func TestAvroToPDKSchema(t *testing.T) { }{ { schemaFile: "simple.json", - exp: []pdk.Field{pdk.StringField{NameVal: "first"}, pdk.StringField{NameVal: "last"}}, + exp: expectedSchemas["simple.json"], + }, + { + schemaFile: "stringtypes.json", + exp: expectedSchemas["stringtypes.json"], }, { schemaFile: "decimal.json", - exp: []pdk.Field{pdk.DecimalField{NameVal: "somenum", Scale: 2}}, + exp: expectedSchemas["decimal.json"], + }, + { + schemaFile: "othertypes.json", + exp: expectedSchemas["othertypes.json"], + }, + { + schemaFile: "unions.json", + exp: expectedSchemas["unions.json"], }, { schemaFile: "notarecord.json", @@ -40,7 +56,7 @@ func TestAvroToPDKSchema(t *testing.T) { t.Fatalf("reading directory: %v", err) } if len(files) != len(tests) { - t.Fatalf("have different number of schemas and tests: %d and %d", len(files), len(tests)) + t.Errorf("have different number of schemas and tests: %d and %d\n%+v", len(files), len(tests), files) } for _, test := range tests { @@ -64,14 +80,116 @@ func TestAvroToPDKSchema(t *testing.T) { } func decodeTestSchema(t *testing.T, filename string) avro.Schema { + codec, err := avro.ParseSchema(readTestSchema(t, filename)) + if err != nil { + t.Fatalf("parsing schema: %v", err) + } + return codec +} + +func readTestSchema(t *testing.T, filename string) string { bytes, err := ioutil.ReadFile("./testdata/schemas/" + filename) if err != nil { t.Fatalf("reading schema file: %v", err) } + return string(bytes) +} - codec, err := avro.ParseSchema(string(bytes)) +func liDecodeTestSchema(t *testing.T, filename string) *liavro.Codec { + codec, err := liavro.NewCodec(readTestSchema(t, filename)) if err != nil { - t.Fatalf("parsing schema: %v", err) + t.Fatalf("li parsing schema: %v", err) } return codec } + +func TestKafkaSource(t *testing.T) { + // this is not an integration test, so we'll take steps to avoid + // actually connecting to Kafka or Schema Registry. + + tests := []struct { + data []map[string]interface{} + schemaFile string + exp [][]interface{} + }{ + { + schemaFile: "simple.json", + data: []map[string]interface{}{{"first": "hello", "last": "goodbye"}, {"first": "one", "last": "two"}}, + exp: [][]interface{}{{"hello", "goodbye"}, {"one", "two"}}, + }, + { + schemaFile: "stringtypes.json", + data: []map[string]interface{}{{"first": "blah", "last": "goodbye", "middle": "123456789"}}, + exp: [][]interface{}{{"blah", []byte("goodbye"), []byte("123456789")}}, + }, + { + schemaFile: "decimal.json", + data: []map[string]interface{}{{"somenum": &big.Rat{}}, {"somenum": big.NewRat(10, 1)}, {"somenum": big.NewRat(1, 1)}, {"somenum": big.NewRat(5, 2)}, {"somenum": big.NewRat(1234567890, 1)}}, + exp: [][]interface{}{[]interface{}{uint64(0)}, {uint64(1000)}, {uint64(100)}, {uint64(250)}, {uint64(123456789000)}}, + }, + // { + // schemaFile: "othertypes.json", + // data: []map[string]interface{}{}, + // }, + // { + // schemaFile: "unions.json", + // data: []map[string]interface{}{}, + // }, + } + + src := NewSource() + // note: we will not call Open on the source which would connect + // to Kafka. Instead, we'll set the src.messages manually so we + // can inject messages. + messages := make(chan *sarama.ConsumerMessage, 10) + src.messages = messages + + for i, test := range tests { + i := i + schema := liDecodeTestSchema(t, test.schemaFile) + + // prefill the schema cache so the registry isn't contacted. + src.cache[int32(i)] = decodeTestSchema(t, test.schemaFile) + t.Run(test.schemaFile, func(t *testing.T) { + + for j, record := range test.data { + buf := make([]byte, 5, 1000) + buf[0] = 0 + binary.BigEndian.PutUint32(buf[1:], uint32(i)) + buf, err := schema.BinaryFromNative(buf, record) + if err != nil { + t.Errorf("encoding:\n%+v\nerr: %v", record, err) + } + + messages <- &sarama.ConsumerMessage{Value: buf} + pdkRec, err := src.Record() + if j == 0 { + if err != pdk.ErrSchemaChange { + t.Errorf("expected schema changed signal, got: %v", err) + } + gotSchema := src.Schema() + if !reflect.DeepEqual(gotSchema, expectedSchemas[test.schemaFile]) { + t.Errorf("unexpected schema got/exp:\n%+v\n%+v", gotSchema, expectedSchemas[test.schemaFile]) + } + } else if err != nil { + t.Fatalf("unexpected error getting record: %v", err) + } + if pdkRec == nil { + t.Fatalf("should have a record") + } + if !reflect.DeepEqual(pdkRec.Data(), test.exp[j]) { + t.Errorf("data mismatch exp/got:\n%+v\n%+v\n%[1]T %[2]T", test.exp[j][0], pdkRec.Data()[0]) + } + } + }) + } + +} + +var expectedSchemas = map[string][]pdk.Field{ + "simple.json": []pdk.Field{pdk.StringField{NameVal: "first"}, pdk.StringField{NameVal: "last"}}, + "stringtypes.json": []pdk.Field{pdk.StringField{NameVal: "first"}, pdk.StringField{NameVal: "last"}, pdk.StringField{NameVal: "middle"}}, + "decimal.json": []pdk.Field{pdk.DecimalField{NameVal: "somenum", Scale: 2}}, + "unions.json": []pdk.Field{pdk.StringField{NameVal: "first"}, pdk.BoolField{NameVal: "second"}, pdk.IntField{NameVal: "third"}, pdk.DecimalField{NameVal: "fourth", Scale: 3}}, + "othertypes.json": []pdk.Field{pdk.StringField{NameVal: "first", Mutex: true}, pdk.StringArrayField{NameVal: "second"}, pdk.IntField{NameVal: "third"}, pdk.IntField{NameVal: "fourth"}, pdk.IntField{NameVal: "fifth"}, pdk.IntField{NameVal: "sixth"}, pdk.BoolField{NameVal: "seventh"}}, +} diff --git a/v2/kafka/testdata/schemas/othertypes.json b/v2/kafka/testdata/schemas/othertypes.json new file mode 100644 index 0000000..a09e5ff --- /dev/null +++ b/v2/kafka/testdata/schemas/othertypes.json @@ -0,0 +1,14 @@ +{ + "type": "record", + "namespace": "com.example", + "name": "FullName", + "fields": [ + { "name": "first", "type": "string", "mutex": true }, + { "name": "second", "type": {"type": "array", "items": "string"}}, + { "name": "third", "type": "int"}, + { "name": "fourth", "type": "long"}, + { "name": "fifth", "type": "float"}, + { "name": "sixth", "type": "double"}, + { "name": "seventh", "type": "boolean"} + ] +} diff --git a/v2/kafka/testdata/schemas/stringtypes.json b/v2/kafka/testdata/schemas/stringtypes.json new file mode 100644 index 0000000..5bf9e6a --- /dev/null +++ b/v2/kafka/testdata/schemas/stringtypes.json @@ -0,0 +1,10 @@ +{ + "type": "record", + "namespace": "com.example", + "name": "FullName", + "fields": [ + { "name": "first", "type": {"name": "firsts", "type": "enum", "symbols": ["blah", "bleh"]} }, + { "name": "last", "type": "bytes" }, + {"name": "middle", "type": {"type": "fixed", "name": "middlefixed", "size": 9}} + ] +} diff --git a/v2/kafka/testdata/schemas/unions.json b/v2/kafka/testdata/schemas/unions.json new file mode 100644 index 0000000..79ad56d --- /dev/null +++ b/v2/kafka/testdata/schemas/unions.json @@ -0,0 +1,11 @@ +{ + "type": "record", + "namespace": "com.example", + "name": "FullName", + "fields": [ + { "name": "first", "type": [null, "string"]}, + { "name": "second", "type": [null, "boolean"]}, + { "name": "third", "type": [null, "long"]}, + { "name": "fourth", "type": [null, {"type": "bytes", "logicalType": "decimal", "scale": 3, "precision": 8}]} + ] +} From 1038c73db6fe77dd61a39438973e22219c4804a1 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Fri, 27 Sep 2019 15:45:57 -0500 Subject: [PATCH 14/40] v2 kafka source unit tests looking good --- v2/kafka/source.go | 9 +++++-- v2/kafka/source_test.go | 39 +++++++++++++++++++-------- v2/kafka/testdata/schemas/unions.json | 8 +++--- 3 files changed, 39 insertions(+), 17 deletions(-) diff --git a/v2/kafka/source.go b/v2/kafka/source.go index d610ced..2b2317e 100644 --- a/v2/kafka/source.go +++ b/v2/kafka/source.go @@ -15,6 +15,7 @@ import ( "github.com/pilosa/go-pilosa" pdk "github.com/pilosa/pdk/v2" "github.com/pkg/errors" + // "github.com/y0ssar1an/q" ) type Main struct { @@ -169,6 +170,10 @@ func (s *Source) toPDKRecord(vals map[string]interface{}) error { r := s.record for i, field := range s.lastSchema { val := vals[field.Name()] + if val == nil { + r.data[i] = nil + continue + } switch field.(type) { case pdk.DecimalField: vb, ok := val.([]byte) @@ -180,8 +185,8 @@ func (s *Source) toPDKRecord(vals map[string]interface{}) error { } else if len(vb) < 8 { copy(s.decBytes[8-len(vb):], vb) r.data[i] = binary.BigEndian.Uint64(s.decBytes) - for i := 8 - len(vb); i >= 0; i-- { - s.decBytes[i] = 0 + for j := range s.decBytes { + s.decBytes[j] = 0 } } else { return errors.Errorf("can't support decimals of greater than 8 bytes, got %d for %s", len(vb), field.Name()) diff --git a/v2/kafka/source_test.go b/v2/kafka/source_test.go index 9926d6c..0e5a932 100644 --- a/v2/kafka/source_test.go +++ b/v2/kafka/source_test.go @@ -125,16 +125,23 @@ func TestKafkaSource(t *testing.T) { { schemaFile: "decimal.json", data: []map[string]interface{}{{"somenum": &big.Rat{}}, {"somenum": big.NewRat(10, 1)}, {"somenum": big.NewRat(1, 1)}, {"somenum": big.NewRat(5, 2)}, {"somenum": big.NewRat(1234567890, 1)}}, - exp: [][]interface{}{[]interface{}{uint64(0)}, {uint64(1000)}, {uint64(100)}, {uint64(250)}, {uint64(123456789000)}}, + exp: [][]interface{}{{uint64(0)}, {uint64(1000)}, {uint64(100)}, {uint64(250)}, {uint64(123456789000)}}, + }, + { + schemaFile: "othertypes.json", + data: []map[string]interface{}{{"first": "a", "second": []string{"b", "c"}, "third": -8, "fourth": 99, "fifth": 99.9, "sixth": 101.1, "seventh": true}}, + exp: [][]interface{}{{"a", []interface{}{"b", "c"}, int32(-8), int64(99), float32(99.9), float64(101.1), true}}, + }, + { + schemaFile: "unions.json", + data: []map[string]interface{}{ + {"first": map[string]interface{}{"string": "a"}, "second": map[string]interface{}{"boolean": true}, "third": map[string]interface{}{"long": 101}, "fourth": map[string]interface{}{"bytes.decimal": big.NewRat(5, 2)}}, + {"first": nil, "second": nil, "third": map[string]interface{}{"null": nil}, "fourth": nil}, + }, + exp: [][]interface{}{ + {"a", true, int64(101), uint64(2500)}, + {nil, nil, nil, nil}}, }, - // { - // schemaFile: "othertypes.json", - // data: []map[string]interface{}{}, - // }, - // { - // schemaFile: "unions.json", - // data: []map[string]interface{}{}, - // }, } src := NewSource() @@ -177,8 +184,18 @@ func TestKafkaSource(t *testing.T) { if pdkRec == nil { t.Fatalf("should have a record") } - if !reflect.DeepEqual(pdkRec.Data(), test.exp[j]) { - t.Errorf("data mismatch exp/got:\n%+v\n%+v\n%[1]T %[2]T", test.exp[j][0], pdkRec.Data()[0]) + data := pdkRec.Data() + if !reflect.DeepEqual(data, test.exp[j]) { + t.Errorf("data mismatch exp/got:\n%+v\n%+v", test.exp[j], data) + if len(data) != len(test.exp[j]) { + t.Fatalf("mismatched lengths exp/got %d/%d", len(test.exp[j]), len(data)) + } + for k := range test.exp[j] { + if !reflect.DeepEqual(test.exp[j][k], data[k]) { + t.Errorf("Mismatch at %d, exp/got\n%v of %[2]T\n%v of %[3]T", k, test.exp[j][k], data[k]) + } + } + } } }) diff --git a/v2/kafka/testdata/schemas/unions.json b/v2/kafka/testdata/schemas/unions.json index 79ad56d..f3297d7 100644 --- a/v2/kafka/testdata/schemas/unions.json +++ b/v2/kafka/testdata/schemas/unions.json @@ -3,9 +3,9 @@ "namespace": "com.example", "name": "FullName", "fields": [ - { "name": "first", "type": [null, "string"]}, - { "name": "second", "type": [null, "boolean"]}, - { "name": "third", "type": [null, "long"]}, - { "name": "fourth", "type": [null, {"type": "bytes", "logicalType": "decimal", "scale": 3, "precision": 8}]} + { "name": "first", "type": ["null", "string"]}, + { "name": "second", "type": ["null", "boolean"]}, + { "name": "third", "type": ["null", "long"]}, + { "name": "fourth", "type": ["null", {"type": "bytes", "logicalType": "decimal", "scale": 3, "precision": 8}]} ] } From fc3ddb2d19a764e699fd2f0ebfdd2051de030848 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Mon, 30 Sep 2019 09:16:04 -0500 Subject: [PATCH 15/40] full kafka source integration test working --- go.mod | 1 + go.sum | 2 + v2/kafka/csrc/csrc.go | 88 ++++++++++++ v2/kafka/csrc/csrc_integration_test.go | 41 ++++++ v2/kafka/source.go | 7 +- v2/kafka/source_test.go | 178 +++++++++++++++++++------ 6 files changed, 276 insertions(+), 41 deletions(-) create mode 100644 v2/kafka/csrc/csrc.go create mode 100644 v2/kafka/csrc/csrc_integration_test.go diff --git a/go.mod b/go.mod index a364ebc..6c9faf6 100644 --- a/go.mod +++ b/go.mod @@ -34,6 +34,7 @@ require ( github.com/syndtr/goleveldb v0.0.0-20181128100959-b001fa50d6b2 github.com/y0ssar1an/q v1.0.7 golang.org/x/sync v0.0.0-20190423024810-112230192c58 + gopkg.in/avro.v0 v0.0.0-20171217001914-a730b5802183 // indirect gopkg.in/linkedin/goavro.v1 v1.0.5 // indirect ) diff --git a/go.sum b/go.sum index 180258b..a958940 100644 --- a/go.sum +++ b/go.sum @@ -436,6 +436,8 @@ google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ij google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.22.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= +gopkg.in/avro.v0 v0.0.0-20171217001914-a730b5802183 h1:PGIdqvwfpMUyUP+QAlAnKTSWQ671SmYjoou2/5j7HXk= +gopkg.in/avro.v0 v0.0.0-20171217001914-a730b5802183/go.mod h1:FvqrFXt+jCsyQibeRv4xxEJBL5iG2DDW5aeJwzDiq4A= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/v2/kafka/csrc/csrc.go b/v2/kafka/csrc/csrc.go new file mode 100644 index 0000000..7515d60 --- /dev/null +++ b/v2/kafka/csrc/csrc.go @@ -0,0 +1,88 @@ +package csrc + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "net/http" + "strings" + + "github.com/pkg/errors" +) + +type Client struct { + URL string +} + +func NewClient(url string) *Client { + if !strings.HasPrefix(url, "http") { + url = "http://" + url + } + return &Client{ + URL: url, + } +} + +// GetSchema gets the schema with the ID. +// https://docs.confluent.io/current/schema-registry/develop/api.html#get--schemas-ids-int-%20id +func (c *Client) GetSchema(id int) (string, error) { + sr := SchemaResponse{} + resp, err := http.Get(fmt.Sprintf("%s/schemas/ids/%d", c.URL, id)) + err = unmarshalRespErr(resp, err, &sr) + if err != nil { + return "", errors.Wrap(err, "making http request") + } + return sr.Schema, nil +} + +type SchemaResponse struct { + Schema string `json:"schema"` // The actual AVRO schema + Subject string `json:"subject"` // Subject where the schema is registered for + Version int `json:"version"` // Version within this subject + ID int `json:"id"` // Registry's unique id +} + +type ErrorResponse struct { + StatusCode int `json:"error_code"` + Body string `json:"message"` +} + +func (e *ErrorResponse) Error() string { + return fmt.Sprintf("status %d: %s", e.StatusCode, e.Body) +} + +func (c *Client) PostSubjects(subj, schema string) (*SchemaResponse, error) { + schema = strings.Replace(schema, "\t", "", -1) + schema = strings.Replace(schema, "\n", `\n`, -1) + schema = fmt.Sprintf(`{"schema": "%s"}`, strings.Replace(schema, `"`, `\"`, -1)) // this is probably terrible + resp, err := http.Post(fmt.Sprintf("%s/subjects/%s/versions", c.URL, subj), "application/json", strings.NewReader(schema)) + sr := &SchemaResponse{} + err = unmarshalRespErr(resp, err, sr) + if err != nil { + return nil, errors.Wrap(err, "unmarshaling resp") + } + return sr, nil +} + +func unmarshalRespErr(resp *http.Response, err error, into interface{}) error { + if err != nil { + return errors.Wrap(err, "making http request") + } + if resp.StatusCode != 200 { + bod, err := ioutil.ReadAll(resp.Body) + if err != nil { + return errors.Wrap(err, "reading body") + } + errResp := &ErrorResponse{ + StatusCode: resp.StatusCode, + Body: string(bod), + } + return errResp + } + dec := json.NewDecoder(resp.Body) + err = dec.Decode(into) + if err != nil { + return errors.Wrap(err, "unmarshaling body") + } + return nil +} diff --git a/v2/kafka/csrc/csrc_integration_test.go b/v2/kafka/csrc/csrc_integration_test.go new file mode 100644 index 0000000..b541b2a --- /dev/null +++ b/v2/kafka/csrc/csrc_integration_test.go @@ -0,0 +1,41 @@ +package csrc_test + +import ( + "testing" + + "github.com/pilosa/pdk/v2/kafka/csrc" +) + +func TestPostGet(t *testing.T) { + if testing.Short() { + t.Skip() + } + sr := "localhost:8081" + client := csrc.NewClient(sr) + + schemaStr := `{"type":"record","name":"a","fields":[{"name":"blah","type":"string"}]}` + r, err := client.PostSubjects("aname", schemaStr) + if err != nil { + t.Fatalf("postsubjects: %v", err) + } + + // Docs indicate that schema and subject should be returned by the + // POST, but they are not. + // + // if r.Schema != schemaStr { + // t.Errorf("wrong schema: %s", r.Schema) + // } + + // if r.Subject != "aname" { + // t.Errorf("wrong name: %v", r.Subject) + // } + + sch, err := client.GetSchema(r.ID) + if err != nil { + t.Fatalf("getting schema: %v", err) + } + + if sch != schemaStr { + t.Errorf("unexpected schema\n%s\n%s", sch, schemaStr) + } +} diff --git a/v2/kafka/source.go b/v2/kafka/source.go index 2b2317e..743bd4b 100644 --- a/v2/kafka/source.go +++ b/v2/kafka/source.go @@ -114,9 +114,10 @@ type Source struct { // NewSource gets a new Source func NewSource() *Source { src := &Source{ - Hosts: []string{"localhost:9092"}, - Topics: []string{"test"}, - Group: "group0", + Hosts: []string{"localhost:9092"}, + Topics: []string{"test"}, + Group: "group0", + RegistryURL: "localhost:8081", lastSchemaID: -1, cache: make(map[int32]avro.Schema), diff --git a/v2/kafka/source_test.go b/v2/kafka/source_test.go index 0e5a932..1e2f965 100644 --- a/v2/kafka/source_test.go +++ b/v2/kafka/source_test.go @@ -2,16 +2,20 @@ package kafka import ( "encoding/binary" + "fmt" "io/ioutil" "math/big" + "math/rand" "reflect" "strings" "testing" + "time" "github.com/Shopify/sarama" "github.com/go-avro/avro" liavro "github.com/linkedin/goavro/v2" pdk "github.com/pilosa/pdk/v2" + "github.com/pilosa/pdk/v2/kafka/csrc" ) func TestAvroToPDKSchema(t *testing.T) { @@ -103,47 +107,47 @@ func liDecodeTestSchema(t *testing.T, filename string) *liavro.Codec { return codec } -func TestKafkaSource(t *testing.T) { +var tests = []struct { + data []map[string]interface{} + schemaFile string + exp [][]interface{} +}{ + { + schemaFile: "simple.json", + data: []map[string]interface{}{{"first": "hello", "last": "goodbye"}, {"first": "one", "last": "two"}}, + exp: [][]interface{}{{"hello", "goodbye"}, {"one", "two"}}, + }, + { + schemaFile: "stringtypes.json", + data: []map[string]interface{}{{"first": "blah", "last": "goodbye", "middle": "123456789"}}, + exp: [][]interface{}{{"blah", []byte("goodbye"), []byte("123456789")}}, + }, + { + schemaFile: "decimal.json", + data: []map[string]interface{}{{"somenum": &big.Rat{}}, {"somenum": big.NewRat(10, 1)}, {"somenum": big.NewRat(1, 1)}, {"somenum": big.NewRat(5, 2)}, {"somenum": big.NewRat(1234567890, 1)}}, + exp: [][]interface{}{{uint64(0)}, {uint64(1000)}, {uint64(100)}, {uint64(250)}, {uint64(123456789000)}}, + }, + { + schemaFile: "othertypes.json", + data: []map[string]interface{}{{"first": "a", "second": []string{"b", "c"}, "third": -8, "fourth": 99, "fifth": 99.9, "sixth": 101.1, "seventh": true}}, + exp: [][]interface{}{{"a", []interface{}{"b", "c"}, int32(-8), int64(99), float32(99.9), float64(101.1), true}}, + }, + { + schemaFile: "unions.json", + data: []map[string]interface{}{ + {"first": map[string]interface{}{"string": "a"}, "second": map[string]interface{}{"boolean": true}, "third": map[string]interface{}{"long": 101}, "fourth": map[string]interface{}{"bytes.decimal": big.NewRat(5, 2)}}, + {"first": nil, "second": nil, "third": map[string]interface{}{"null": nil}, "fourth": nil}, + }, + exp: [][]interface{}{ + {"a", true, int64(101), uint64(2500)}, + {nil, nil, nil, nil}}, + }, +} + +func TestKafkaSourceLocal(t *testing.T) { // this is not an integration test, so we'll take steps to avoid // actually connecting to Kafka or Schema Registry. - tests := []struct { - data []map[string]interface{} - schemaFile string - exp [][]interface{} - }{ - { - schemaFile: "simple.json", - data: []map[string]interface{}{{"first": "hello", "last": "goodbye"}, {"first": "one", "last": "two"}}, - exp: [][]interface{}{{"hello", "goodbye"}, {"one", "two"}}, - }, - { - schemaFile: "stringtypes.json", - data: []map[string]interface{}{{"first": "blah", "last": "goodbye", "middle": "123456789"}}, - exp: [][]interface{}{{"blah", []byte("goodbye"), []byte("123456789")}}, - }, - { - schemaFile: "decimal.json", - data: []map[string]interface{}{{"somenum": &big.Rat{}}, {"somenum": big.NewRat(10, 1)}, {"somenum": big.NewRat(1, 1)}, {"somenum": big.NewRat(5, 2)}, {"somenum": big.NewRat(1234567890, 1)}}, - exp: [][]interface{}{{uint64(0)}, {uint64(1000)}, {uint64(100)}, {uint64(250)}, {uint64(123456789000)}}, - }, - { - schemaFile: "othertypes.json", - data: []map[string]interface{}{{"first": "a", "second": []string{"b", "c"}, "third": -8, "fourth": 99, "fifth": 99.9, "sixth": 101.1, "seventh": true}}, - exp: [][]interface{}{{"a", []interface{}{"b", "c"}, int32(-8), int64(99), float32(99.9), float64(101.1), true}}, - }, - { - schemaFile: "unions.json", - data: []map[string]interface{}{ - {"first": map[string]interface{}{"string": "a"}, "second": map[string]interface{}{"boolean": true}, "third": map[string]interface{}{"long": 101}, "fourth": map[string]interface{}{"bytes.decimal": big.NewRat(5, 2)}}, - {"first": nil, "second": nil, "third": map[string]interface{}{"null": nil}, "fourth": nil}, - }, - exp: [][]interface{}{ - {"a", true, int64(101), uint64(2500)}, - {nil, nil, nil, nil}}, - }, - } - src := NewSource() // note: we will not call Open on the source which would connect // to Kafka. Instead, we'll set the src.messages manually so we @@ -203,6 +207,104 @@ func TestKafkaSource(t *testing.T) { } +// TestKafkaSource uses a real Kafka and Schema Registry. I downloaded +// the tar archive of the Confluent Platform (self managed software) +// from confluent.io/download (I got version 5.3.1). I ran `tar xzf` +// on the file, changed into the directory, ran `curl -L +// https://cnfl.io/cli | sh -s -- -b /Users/jaffee/bin` (that +// directory is on my PATH), then ran `confluent local start +// schema-registry`. +// +// I find that this test runs much faster after a `confluent local +// destroy` followed by `confluent local start schema-registry`. The +// difference is stark—10s of seconds—and I don't know why this should +// be, but I think it has something to do with kafka rebalancing +// itself when a new client joins. +func TestKafkaSourceIntegration(t *testing.T) { + if testing.Short() { + t.Skip() + } + src := NewSource() + src.Topics = []string{"test"} + src.Group = "group0" + err := src.Open() + if err != nil { + t.Fatalf("opening source: %v", err) + } + + schemaClient := csrc.NewClient("localhost:8081") + + conf := sarama.NewConfig() + conf.Version = sarama.V0_10_0_0 + conf.Producer.Return.Successes = true + producer, err := sarama.NewSyncProducer([]string{"localhost:9092"}, conf) + if err != nil { + t.Fatalf("getting new producer: %v", err) + } + defer producer.Close() + + rnd := rand.New(rand.NewSource(time.Now().UnixNano())) + + key := fmt.Sprintf("%d", rnd.Int()) + for i, test := range tests { + schemaStr := readTestSchema(t, test.schemaFile) + resp, err := schemaClient.PostSubjects(fmt.Sprintf("schema%d", i), schemaStr) + if err != nil { + t.Fatalf("posting schema: %v", err) + } + schemaID := resp.ID + schema := liDecodeTestSchema(t, test.schemaFile) + t.Run(test.schemaFile, func(t *testing.T) { + + for j, record := range test.data { + buf := make([]byte, 5, 1000) + buf[0] = 0 + binary.BigEndian.PutUint32(buf[1:], uint32(schemaID)) + buf, err := schema.BinaryFromNative(buf, record) + if err != nil { + t.Errorf("encoding:\n%+v\nerr: %v", record, err) + } + + // post buf to kafka + _, _, err = producer.SendMessage(&sarama.ProducerMessage{Topic: "test", Key: sarama.StringEncoder(key), Value: sarama.ByteEncoder(buf)}) + if err != nil { + t.Fatalf("sending message to kafka: %v", err) + } + + pdkRec, err := src.Record() + if j == 0 { + if err != pdk.ErrSchemaChange { + t.Errorf("expected schema changed signal, got: %v", err) + } + gotSchema := src.Schema() + if !reflect.DeepEqual(gotSchema, expectedSchemas[test.schemaFile]) { + t.Errorf("unexpected schema got/exp:\n%+v\n%+v", gotSchema, expectedSchemas[test.schemaFile]) + } + } else if err != nil { + t.Fatalf("unexpected error getting record: %v", err) + } + if pdkRec == nil { + t.Fatalf("should have a record") + } + data := pdkRec.Data() + if !reflect.DeepEqual(data, test.exp[j]) { + t.Errorf("data mismatch exp/got:\n%+v\n%+v", test.exp[j], data) + if len(data) != len(test.exp[j]) { + t.Fatalf("mismatched lengths exp/got %d/%d", len(test.exp[j]), len(data)) + } + for k := range test.exp[j] { + if !reflect.DeepEqual(test.exp[j][k], data[k]) { + t.Errorf("Mismatch at %d, exp/got\n%v of %[2]T\n%v of %[3]T", k, test.exp[j][k], data[k]) + } + } + + } + } + }) + } + +} + var expectedSchemas = map[string][]pdk.Field{ "simple.json": []pdk.Field{pdk.StringField{NameVal: "first"}, pdk.StringField{NameVal: "last"}}, "stringtypes.json": []pdk.Field{pdk.StringField{NameVal: "first"}, pdk.StringField{NameVal: "last"}, pdk.StringField{NameVal: "middle"}}, From 7102a1d519b43dcca5db678738072414f5135901 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Fri, 4 Oct 2019 12:54:43 -0500 Subject: [PATCH 16/40] split out cmd and source, implement cmd, tests --- go.mod | 4 +- go.sum | 4 + v2/kafka/cmd.go | 388 +++++++++++++++++++++++++++++++++++++++++++ v2/kafka/cmd_test.go | 266 +++++++++++++++++++++++++++++ v2/kafka/source.go | 63 ------- 5 files changed, 660 insertions(+), 65 deletions(-) create mode 100644 v2/kafka/cmd.go create mode 100644 v2/kafka/cmd_test.go diff --git a/go.mod b/go.mod index 6c9faf6..2431e36 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,8 @@ module github.com/pilosa/pdk -replace github.com/pilosa/go-pilosa => /Users/jaffee/go/src/github.com/pilosa/go-pilosa +replace github.com/pilosa/go-pilosa => github.com/jaffee/go-pilosa v0.4.1-0.20191004142116-bfe8680b131f -replace github.com/go-avro/avro => /Users/jaffee/go/src/github.com/go-avro/avro +replace github.com/go-avro/avro => github.com/jaffee/avro v0.0.0-20190926030934-2b116da4fa22 require ( github.com/Shopify/sarama v1.19.0 diff --git a/go.sum b/go.sum index a958940..454223a 100644 --- a/go.sum +++ b/go.sum @@ -151,6 +151,8 @@ github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/jaffee/avro v0.0.0-20190926030934-2b116da4fa22 h1:CQuJJwOitTYqHgp52XQJ/9DVcNJM+MMfFMmR/jhMIMg= +github.com/jaffee/avro v0.0.0-20190926030934-2b116da4fa22/go.mod h1:6ilXMAGKrNFwlSrER0Y6hkZeJOH0ogH6I+90pCh6d1U= github.com/jaffee/commandeer v0.1.0 h1:UxHHnhKmtz8gAgqu67lYK5tlX5D9A86mGc9AWcEMSWU= github.com/jaffee/commandeer v0.1.0/go.mod h1:x1WpthEI14PRNcPtVna43ontBxJ1o7plCOsZ8kksl8M= github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e h1:CC1usSIzu9p6zmz7jPj0QiP3FdpGW+PCGc9d1yhSls0= @@ -159,6 +161,8 @@ github.com/jaffee/go-pilosa v0.4.1-0.20190831210635-c2b019c94ab7 h1:DyezXZFZgOss github.com/jaffee/go-pilosa v0.4.1-0.20190831210635-c2b019c94ab7/go.mod h1:B4omOlxvZcuKLrlHICT6bMEeHPmT8tpSyxgHuEF1PsU= github.com/jaffee/go-pilosa v0.4.1-0.20190909235343-e40d84aa7666 h1:+O5nPgJ3ByJZxr0/rjNX1tYCEffoOJ+Q4T2589EkFaE= github.com/jaffee/go-pilosa v0.4.1-0.20190909235343-e40d84aa7666/go.mod h1:B4omOlxvZcuKLrlHICT6bMEeHPmT8tpSyxgHuEF1PsU= +github.com/jaffee/go-pilosa v0.4.1-0.20191004142116-bfe8680b131f h1:5yjVcr7CZ/KxMLL9/NhZwa0PAe6i/rTi2ArQtF5XI04= +github.com/jaffee/go-pilosa v0.4.1-0.20191004142116-bfe8680b131f/go.mod h1:B4omOlxvZcuKLrlHICT6bMEeHPmT8tpSyxgHuEF1PsU= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8 h1:12VvqtR6Aowv3l/EQUlocDHW2Cp4G9WJVH7uyH8QFJE= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= diff --git a/v2/kafka/cmd.go b/v2/kafka/cmd.go new file mode 100644 index 0000000..6a3c6b2 --- /dev/null +++ b/v2/kafka/cmd.go @@ -0,0 +1,388 @@ +package kafka + +import ( + "bytes" + "encoding/binary" + + "github.com/pilosa/go-pilosa" + "github.com/pilosa/go-pilosa/gpexp" + pdk "github.com/pilosa/pdk/v2" + "github.com/pkg/errors" + "golang.org/x/sync/errgroup" + // "github.com/y0ssar1an/q" +) + +type Main struct { + PilosaHosts []string `help:"Comma separated list of host:port pairs for Pilosa."` + KafkaHosts []string `help:"Comma separated list of host:port pairs for Kafka."` + RegistryURL string `help:"Location of Confluent Schema Registry"` + BatchSize int `help:"Number of records to read before indexing all of them at once. Generally, larger means better throughput and more memory usage. 1,048,576 might be a good number."` + Group string `help:"Kafka group."` + Index string `help:"Name of Pilosa index."` + Topics []string `help:"Kafka topics to read from."` + LogPath string `help:"Log file to write to. Empty means stderr."e` + PrimaryKeyFields []string `help:"Data field(s) which make up the primary key for a record. These will be concatenated and translated to a Pilosa ID. If empty, record key translation will not be used."` + IDField string `help:"Field which contains the integer column ID. May not be used in conjunction with primary-key-fields. If both are empty, auto-generated IDs will be used."` + MaxMsgs int `help:"Number of messages to consume from Kafka before stopping. Useful for testing when you don't want to run indefinitely."` + Concurrency int `help:"Number of concurrent kafka readers and indexing routines to launch. MaxMsgs will be read *from each*."` + PackBools string `help:"If non-empty, boolean fields will be packed into two set fields—one with this name, and one with -exists."` + // TODO implement the auto-generated IDs... hopefully using Pilosa to manage it. + + client *pilosa.Client + schema *pilosa.Schema + index *pilosa.Index +} + +func NewMain() *Main { + return &Main{ + PilosaHosts: []string{"localhost:10101"}, + KafkaHosts: []string{"localhost:9092"}, + RegistryURL: "localhost:8081", + BatchSize: 1, // definitely increase this to achieve any amount of performance + Group: "defaultgroup", + Index: "defaultindex", + Topics: []string{"defaulttopic"}, + Concurrency: 1, + PackBools: "bools", + } +} + +func (m *Main) Run() (err error) { + err = m.setup() + if err != nil { + return errors.Wrap(err, "setting up") + } + eg := errgroup.Group{} + for c := 0; c < m.Concurrency; c++ { + c := c + eg.Go(func() error { + return m.runIngester(c) + }) + } + + return eg.Wait() +} + +func (m *Main) setup() (err error) { + if err := m.validate(); err != nil { + return errors.Wrap(err, "validating configuration") + } + + m.client, err = pilosa.NewClient(m.PilosaHosts) + if err != nil { + return errors.Wrap(err, "getting pilosa client") + } + m.schema, err = m.client.Schema() + if err != nil { + return errors.Wrap(err, "getting schema") + } + keyTranslation := len(m.PrimaryKeyFields) > 0 + m.index = m.schema.Index(m.Index, pilosa.OptIndexKeys(keyTranslation)) + if m.PackBools != "" { + m.index.Field(m.PackBools, pilosa.OptFieldTypeSet(pilosa.CacheTypeRanked, 50000), pilosa.OptFieldKeys(true)) + m.index.Field(m.PackBools+"-exists", pilosa.OptFieldTypeSet(pilosa.CacheTypeRanked, 50000), pilosa.OptFieldKeys(true)) + } + err = m.client.SyncSchema(m.schema) + if err != nil { + return errors.Wrap(err, "syncing schema") + } + + return nil +} + +func (m *Main) runIngester(c int) error { + source := NewSource() + source.Hosts = m.KafkaHosts + source.Topics = m.Topics + source.Group = m.Group + source.MaxMsgs = m.MaxMsgs + + err := source.Open() + if err != nil { + return errors.Wrap(err, "opening source") + } + + var batch gpexp.RecordBatch + var recordizers []Recordizer + var prevRec pdk.Record + row := &gpexp.Row{} + for rec, err := source.Record(); err == pdk.ErrSchemaChange || err == nil; rec, err = source.Record() { + if err == pdk.ErrSchemaChange { + // finish previous batch if this is not the first + if batch != nil { + err = batch.Import() + if err != nil { + return errors.Wrap(err, "importing") + } + err = prevRec.Commit() + if err != nil { + return errors.Wrap(err, "committing") + } + } + schema := source.Schema() + recordizers, batch, err = m.batchFromSchema(schema) + if err != nil { + return errors.Wrap(err, "batchFromSchema") + } + } + for i := range row.Values { + row.Values[i] = nil + } + data := rec.Data() + for _, rdz := range recordizers { + err = rdz(data, row) + } + err = batch.Add(*row) + } + return nil +} + +type Recordizer func(rawRec []interface{}, rec *gpexp.Row) error + +func (m *Main) batchFromSchema(schema []pdk.Field) ([]Recordizer, gpexp.RecordBatch, error) { + // from the schema, and the configuration stored on Main, we need + // to create a []pilosa.Field and a []Recordizer processing + // functions which take a []interface{} which conforms to the + // schema, and converts it to a record which conforms to the + // []pilosa.Field. + // + // The relevant config options on Main are: + // 1. PrimaryKeyFields and IDField + // 2. PackBools + // 3. BatchSize (gets passed directly to the batch) + // + // For PrimaryKeyFields and IDField there is some complexity. There are 3 top level options. 1, the other, or neither (auto-generated IDs). + // + // 1. PrimarKeyFields - the main question here is whether in + // addition to combining these and translating them to column ID, + // do we index them separately? I think the answer by default + // should be yes. + // 2. IDField — this is pretty easy. Use the integer value as the column ID. Do not index it separately by default. + // 3. Autogenerate IDs. Ideally using a RangeAllocator per concurrent goroutine. OK, let's assume that if we set row.ID to nil, the auto generation can happen inside the Batch. + + recordizers := make([]Recordizer, 0) + + var rz Recordizer + skips := make(map[int]struct{}) + var err error + + // primary key stuff + if len(m.PrimaryKeyFields) != 0 { + rz, skips, err = getPrimaryKeyRecordizer(schema, m.PrimaryKeyFields) + if err != nil { + return nil, nil, errors.Wrap(err, "getting primary key recordizer") + } + } else if m.IDField != "" { + for fieldIndex, field := range schema { + if field.Name() == m.IDField { + if _, ok := field.(pdk.IDField); !ok { + return nil, nil, errors.Errorf("specified IDField %s is not an IDField but is %T", m.IDField, field) + } + fieldIndex := fieldIndex + rz = func(rawRec []interface{}, rec *gpexp.Row) error { + rec.ID = rawRec[fieldIndex] + return nil + } + skips[fieldIndex] = struct{}{} + break + } + } + if rz == nil { + return nil, nil, errors.Errorf("ID field %s not found", m.IDField) + } + } else { + return nil, nil, errors.New("autogen IDs is currently unimplemented; specify an IDField or primary key fields") + } + recordizers = append(recordizers, rz) + + // set up bool fields + var boolField, boolFieldExists *pilosa.Field + if m.PackBools != "" { + boolField = m.index.Field(m.PackBools, pilosa.OptFieldTypeBool()) + boolFieldExists = m.index.Field(m.PackBools+"-exists", pilosa.OptFieldTypeBool()) + } + + fields := make([]*pilosa.Field, 0, len(schema)) + for i, pdkField := range schema { + // need to redefine these inside the loop since we're + // capturing them in closures + i := i + pdkField := pdkField + // see if we previously decided to skip this field of the raw + // record. + if _, ok := skips[i]; ok { + continue + } + + // handle records where pilosa already has the field + _, isBool := pdkField.(pdk.BoolField) + if (m.PackBools == "" || !isBool) && m.index.HasField(pdkField.Name()) { + // TODO validate that Pilosa's existing field matches the + // type and options of the PDK field. + fields = append(fields, m.index.Field(pdkField.Name())) + valIdx := len(fields) - 1 + // TODO may need to have more sophisticated recordizer by type at some point + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) error { + rec.Values[valIdx] = rawRec[i] + return nil + }) + continue + } + + // now handle this field if it was not already found in pilosa + switch fld := pdkField.(type) { + case pdk.StringField, pdk.IDField, pdk.StringArrayField: + opts := []pilosa.FieldOption{} + if hasMutex(fld) { + opts = append(opts, pilosa.OptFieldTypeMutex(pilosa.CacheTypeRanked, 50000)) + } else { + opts = append(opts, pilosa.OptFieldTypeSet(pilosa.CacheTypeRanked, 50000)) + } + _, ok1 := fld.(pdk.StringArrayField) + if _, ok2 := fld.(pdk.StringField); ok1 || ok2 { + opts = append(opts, pilosa.OptFieldKeys(true)) + } + fields = append(fields, m.index.Field(fld.Name(), opts...)) + valIdx := len(fields) - 1 + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) error { + rec.Values[valIdx] = rawRec[i] + return nil + }) + case pdk.BoolField: + if m.PackBools == "" { + fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeBool())) + valIdx := len(fields) - 1 + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) error { + rec.Values[valIdx] = rawRec[i] + return nil + }) + } else { + fields = append(fields, boolField, boolFieldExists) + fieldIdx := len(fields) - 2 + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) error { + b, ok := rawRec[i].(bool) + if b { + rec.Values[fieldIdx] = pdkField.Name() + } + if ok { + rec.Values[fieldIdx+1] = pdkField.Name() + } + return nil + }) + continue + } + case pdk.IntField: + if fld.Min != nil { + min := *fld.Min + if fld.Max != nil { + fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt(min, *fld.Max))) + } else { + fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt(min))) + } + } + valIdx := len(fields) - 1 + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) error { + rec.Values[valIdx] = rawRec[i] + return nil + }) + case pdk.DecimalField: + // TODO handle scale + fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt())) + valIdx := len(fields) - 1 + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) error { + rec.Values[valIdx] = rawRec[i] + return nil + }) + } + } + err = m.client.SyncSchema(m.schema) + if err != nil { + return nil, nil, errors.Wrap(err, "syncing schema") + } + batch, err := gpexp.NewBatch(m.client, m.BatchSize, m.index, fields) + if err != nil { + return nil, nil, errors.Wrap(err, "creating batch") + } + return recordizers, batch, nil +} + +func hasMutex(fld pdk.Field) bool { + if sfld, ok := fld.(pdk.StringField); ok { + return sfld.Mutex + } + if sfld, ok := fld.(pdk.IDField); ok { + return sfld.Mutex + } + return false +} + +// getPrimaryKeyRecordizer returns a Recordizer function which +// extracts the primary key fields from a record, combines them, and +// sets the ID on the record. If pkFields is a single field, and that +// field is of type string, we'll return it in skipFields, because we +// won't want to index it separately. +func getPrimaryKeyRecordizer(schema []pdk.Field, pkFields []string) (recordizer Recordizer, skipFields map[int]struct{}, err error) { + if len(schema) == 0 { + return nil, nil, errors.New("can't call getPrimaryKeyRecordizer with empty schema") + } + if len(pkFields) == 0 { + return nil, nil, errors.New("can't call getPrimaryKeyRecordizer with empty pkFields") + } + fieldIndices := make([]int, 0, len(pkFields)) + for pkIndex, pk := range pkFields { + for fieldIndex, field := range schema { + if pk == field.Name() { + switch field.(type) { + case pdk.StringArrayField: + return nil, nil, errors.Errorf("field %s cannot be a primary key field because it is a StringArray field.", pk) + } + fieldIndices = append(fieldIndices, fieldIndex) + break + } + } + if len(fieldIndices) != pkIndex+1 { + return nil, nil, errors.Errorf("no field with primary key field name %s found", pk) + } + } + if len(pkFields) == 1 { + if _, ok := schema[fieldIndices[0]].(pdk.StringField); ok { + skipFields = make(map[int]struct{}, 1) + skipFields[fieldIndices[0]] = struct{}{} + } + } + recordizer = func(rawRec []interface{}, rec *gpexp.Row) error { + idbytes, ok := rec.ID.([]byte) + if ok { + idbytes = idbytes[:0] + } else { + idbytes = make([]byte, 0) + } + buf := bytes.NewBuffer(idbytes) // TODO does the buffer escape to heap? + + // TODO... will want to change this encoding logic to length-prefix the different fields or something. + for _, fieldIdx := range fieldIndices { + val := rawRec[fieldIdx] + switch vt := val.(type) { + case string: + buf.WriteString(vt) // err is always nil + case []byte: + buf.Write(vt) // err is always nil + default: + err = binary.Write(buf, binary.BigEndian, val) + if err != nil { + return errors.Wrapf(err, "writing %+v of type %[1]T", val) + } + } + } + rec.ID = buf.Bytes() + return nil + } + return recordizer, skipFields, nil +} + +func (m *Main) validate() error { + if len(m.PrimaryKeyFields) != 0 && m.IDField != "" { + return errors.New("cannot set both primary key fields and id-field") + } + return nil +} diff --git a/v2/kafka/cmd_test.go b/v2/kafka/cmd_test.go new file mode 100644 index 0000000..d6f8a24 --- /dev/null +++ b/v2/kafka/cmd_test.go @@ -0,0 +1,266 @@ +package kafka + +import ( + "reflect" + "strings" + "testing" + + "github.com/pilosa/go-pilosa/gpexp" + pdk "github.com/pilosa/pdk/v2" +) + +func TestCmdMain(t *testing.T) { + if testing.Short() { + t.Skip() + } + + // load big schema + // make a bunch of data and insert it + + type testcase struct { + } +} + +func TestGetPrimaryKeyRecordizer(t *testing.T) { + tests := []struct { + name string + schema []pdk.Field + pkFields []string + expErr string + expSkip map[int]struct{} + rawRec []interface{} + expID interface{} + }{ + { + name: "no schema", + expErr: "can't call getPrimaryKeyRecordizer with empty schema", + }, + { + name: "no pkfields", + schema: []pdk.Field{pdk.StringField{}}, + expErr: "can't call getPrimaryKeyRecordizer with empty pkFields", + }, + { + name: "primary is StringArray", + schema: []pdk.Field{pdk.StringArrayField{NameVal: "blah"}}, + pkFields: []string{"blah"}, + expErr: "field blah cannot be a primary key field because it is a StringArray field.", + }, + { + name: "primary is StringArray complex", + schema: []pdk.Field{pdk.StringField{NameVal: "zaa"}, pdk.IntField{NameVal: "hey"}, pdk.StringArrayField{NameVal: "blah"}}, + pkFields: []string{"blah", "zaa"}, + expErr: "field blah cannot be a primary key field because it is a StringArray field.", + }, + { + name: "unknown pkfield", + schema: []pdk.Field{pdk.StringField{NameVal: "zaa"}}, + pkFields: []string{"zaa", "zz"}, + expErr: "no field with primary key field name zz found", + }, + { + name: "unknown pkfield complex", + schema: []pdk.Field{pdk.StringField{NameVal: "zaa"}, pdk.IntField{NameVal: "hey"}, pdk.StringField{NameVal: "blah"}}, + pkFields: []string{"blah", "zz", "zaa"}, + expErr: "no field with primary key field name zz found", + }, + { + name: "skip primary", + schema: []pdk.Field{pdk.StringField{NameVal: "a"}, pdk.IntField{NameVal: "b"}}, + pkFields: []string{"a"}, + expSkip: map[int]struct{}{0: struct{}{}}, + rawRec: []interface{}{"a", 9}, + expID: []byte("a"), + }, + { + name: "primaries as ints", + schema: []pdk.Field{pdk.StringField{NameVal: "a"}, pdk.IntField{NameVal: "b"}, pdk.IntField{NameVal: "c"}, pdk.IntField{NameVal: "d"}}, + pkFields: []string{"c", "d", "b"}, + rawRec: []interface{}{"a", uint32(1), uint32(2), uint32(4)}, + expID: []byte{0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 1}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + rdz, skips, err := getPrimaryKeyRecordizer(test.schema, test.pkFields) + if test.expErr != "" { + if err == nil { + t.Fatalf("nil err, expected %s", test.expErr) + } + if !strings.Contains(err.Error(), test.expErr) { + t.Fatalf("unmatched errs exp/got\n%s\n%v", test.expErr, err) + } + return + } else if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if !reflect.DeepEqual(skips, test.expSkip) { + t.Errorf("unmatched skips exp/got\n%+v\n%+v", test.expSkip, skips) + } + + row := &gpexp.Row{} + err = rdz(test.rawRec, row) + if err != nil { + t.Fatalf("unexpected error from recordizer: %v", err) + } + if !reflect.DeepEqual(test.expID, row.ID) { + t.Fatalf("mismatched row IDs exp: %+v, got: %+v", test.expID, row.ID) + } + + }) + } +} + +func TestBatchFromSchema(t *testing.T) { + if testing.Short() { + t.Skip() + } + type testcase struct { + name string + schema []pdk.Field + IDField string + pkFields []string + packBools string + rawRec []interface{} + rowID interface{} + rowVals []interface{} + err string + batchErr string + } + runTest := func(t *testing.T, test testcase, removeIndex bool) { + m := NewMain() + m.Index = "cmd_test_index23lkjdkfj" + m.PrimaryKeyFields = test.pkFields + m.IDField = test.IDField + m.PackBools = test.packBools + m.BatchSize = 2 + + err := m.setup() + if err != nil { + t.Fatalf("%v", err) + } + if removeIndex { + defer func() { + err := m.client.DeleteIndex(m.index) + if err != nil { + t.Logf("deleting test index: %v", err) + } + }() + } + + rdzs, batch, err := m.batchFromSchema(test.schema) + if testErr(t, test.err, err) { + return + } + + row := &gpexp.Row{} + row.Values = make([]interface{}, len(test.rowVals)) + for _, rdz := range rdzs { + err = rdz(test.rawRec, row) + } + + if !reflect.DeepEqual(row.ID, test.rowID) { + t.Fatalf("row IDs exp: %+v got %+v", test.rowID, row.ID) + } + if !reflect.DeepEqual(row.Values, test.rowVals) { + t.Fatalf("row values exp/got:\n%+v\n%+v", test.rowVals, row.Values) + } + + err = batch.Add(*row) + if testErr(t, test.batchErr, err) { + return + } + } + + tests := []testcase{ + { + name: "empty", + err: "autogen IDs is currently unimplemented", + }, + { + name: "no id field", + schema: []pdk.Field{pdk.StringField{}}, + IDField: "nope", + err: "ID field nope not found", + }, + { + name: "pk error", + pkFields: []string{"zoop"}, + err: "getting primary key recordizer", + }, + { + name: "pack bools", + schema: []pdk.Field{pdk.BoolField{NameVal: "a"}, pdk.IDField{NameVal: "b"}, pdk.BoolField{NameVal: "c"}}, + IDField: "b", + packBools: "bff", + rawRec: []interface{}{true, uint64(7), false}, + rowID: uint64(7), + rowVals: []interface{}{"a", "a", nil, "c"}, + }, + { + name: "don't pack bools", + schema: []pdk.Field{pdk.BoolField{NameVal: "a"}, pdk.IDField{NameVal: "b"}, pdk.BoolField{NameVal: "c"}}, + IDField: "b", + rawRec: []interface{}{true, uint64(7), false}, + rowID: uint64(7), + rowVals: []interface{}{true, false}, + err: "field type bool is not currently supported through Batch", + }, + { + name: "mutex field", + schema: []pdk.Field{pdk.StringField{NameVal: "a", Mutex: true}, pdk.IDField{NameVal: "b"}}, + IDField: "b", + rawRec: []interface{}{"aval", uint64(7)}, + rowID: uint64(7), + rowVals: []interface{}{"aval"}, + err: "field type mutex is not currently supported through Batch", + }, + { + name: "string array field", + schema: []pdk.Field{pdk.StringArrayField{NameVal: "a"}, pdk.StringField{NameVal: "b"}}, + pkFields: []string{"b"}, + rawRec: []interface{}{[]string{"aval", "aval2"}, uint64(7)}, + rowID: []byte{0, 0, 0, 0, 0, 0, 0, 7}, + rowVals: []interface{}{[]string{"aval", "aval2"}}, + batchErr: "[]string is not currently supported.", // TODO support this in gpexp.Batch + }, + { + name: "decimal field", + schema: []pdk.Field{pdk.StringField{NameVal: "a"}, pdk.DecimalField{NameVal: "b", Scale: 2}}, + pkFields: []string{"a"}, + rawRec: []interface{}{"blah", uint64(321)}, + rowID: []byte("blah"), + rowVals: []interface{}{uint64(321)}, + }, + } + + for _, test := range tests { + // test on fresh Pilosa + t.Run(test.name+"-1", func(t *testing.T) { + runTest(t, test, false) + }) + // test again with index/fields in place + t.Run(test.name+"-2", func(t *testing.T) { + runTest(t, test, true) + }) + } +} + +func testErr(t *testing.T, exp string, actual error) (done bool) { + t.Helper() + if exp == "" && actual == nil { + return false + } + if exp == "" && actual != nil { + t.Fatalf("unexpected error: %v", actual) + } + if exp != "" && actual == nil { + t.Fatalf("expected error like '%s'", exp) + } + if !strings.Contains(actual.Error(), exp) { + t.Fatalf("unmatched errs exp/got\n%s\n%v", exp, actual) + } + return true +} diff --git a/v2/kafka/source.go b/v2/kafka/source.go index 743bd4b..d29542e 100644 --- a/v2/kafka/source.go +++ b/v2/kafka/source.go @@ -12,74 +12,11 @@ import ( "github.com/Shopify/sarama" cluster "github.com/bsm/sarama-cluster" "github.com/go-avro/avro" - "github.com/pilosa/go-pilosa" pdk "github.com/pilosa/pdk/v2" "github.com/pkg/errors" // "github.com/y0ssar1an/q" ) -type Main struct { - PilosaHosts []string `help:"Comma separated list of host:port pairs for Pilosa."` - KafkaHosts []string `help:"Comma separated list of host:port pairs for Kafka."` - RegistryURL string `help:"Location of Confluent Schema Registry"` - BatchSize int `help:"Number of records to read before indexing all of them at once. Generally, larger means better throughput and more memory usage. 1,048,576 might be a good number."` - Group string `help:"Kafka group."` - Index string `help:"Name of Pilosa index."` - Topics []string `help:"Kafka topics to read from."` - LogPath string `help:"Log file to write to. Empty means stderr."e` - PrimaryKeyFields []string `help:"Data field(s) which make up the primary key for a record. These will be concatenated and translated to a Pilosa ID. If empty, record key translation will not be used."` - IDField string `help:"Field which contains the integer column ID. May not be used in conjunction with primary-key-fields. If both are empty, auto-generated IDs will be used."` - MaxMsgs int `help:"Number of messages to consume from Kafka before stopping. Useful for testing when you don't want to run indefinitely."` - // TODO implement the auto-generated IDs... hopefully using Pilosa to manage it. -} - -func NewMain() *Main { - return &Main{ - PilosaHosts: []string{"localhost:10101"}, - KafkaHosts: []string{"localhost:9092"}, - RegistryURL: "localhost:8081", - BatchSize: 1, // definitely increase this to achieve any amount of performance - Group: "defaultgroup", - Index: "defaultindex", - Topics: []string{"defaulttopic"}, - } -} - -func (m *Main) Run() error { - if err := m.validate(); err != nil { - return errors.Wrap(err, "validating configuration") - } - - client, err := pilosa.NewClient(m.PilosaHosts) - if err != nil { - return errors.Wrap(err, "getting pilosa client") - } - schema, err := client.Schema() - if err != nil { - return errors.Wrap(err, "getting schema") - } - keyTranslation := len(m.PrimaryKeyFields) > 0 - index := schema.Index(m.Index, pilosa.OptIndexKeys(keyTranslation)) - fmt.Println(index) - - source := NewSource() - source.Hosts = m.KafkaHosts - source.Topics = m.Topics - source.Group = m.Group - source.MaxMsgs = m.MaxMsgs - - // remember to flush old batch and make a new batch when schema changes - - return nil -} - -func (m *Main) validate() error { - if len(m.PrimaryKeyFields) != 0 && m.IDField != "" { - return errors.New("cannot set both primary key fields and id-field") - } - return nil -} - // Source implements the pdk.Source interface using kafka as a data // source. It is not threadsafe! Due to the way Kafka clients work, to // achieve concurrency, create multiple Sources. From 425107a458bbf3111090670ab40f17351a3741be Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Mon, 7 Oct 2019 08:25:45 -0500 Subject: [PATCH 17/40] v2 kafka consumer mostly working except for string array --- csv/batch.go | 4 +- csv/batch_test.go | 4 +- go.mod | 5 +- go.sum | 4 + v2/interfaces.go | 147 +++++++++++++++++++++++ v2/kafka/cmd.go | 81 +++++++------ v2/kafka/cmd_test.go | 79 +++++++++++- v2/kafka/source.go | 44 ++----- v2/kafka/source_test.go | 59 +++++---- v2/kafka/testdata/schemas/bigschema.json | 139 +++++++++++++++++++++ 10 files changed, 462 insertions(+), 104 deletions(-) create mode 100644 v2/kafka/testdata/schemas/bigschema.json diff --git a/csv/batch.go b/csv/batch.go index a8afd03..deba2ba 100644 --- a/csv/batch.go +++ b/csv/batch.go @@ -176,9 +176,7 @@ func (j jobReport) String() string { } else { s += "}" } - return s - return fmt.Sprintf("{n:%d duration:%s}", j.n, j.duration) } type fileJob struct { @@ -201,7 +199,7 @@ func fileProcessor(jobs <-chan fileJob, stats chan<- jobReport) { } } -func processFile(reader *csv.Reader, batch *gpexp.Batch, pc *parseConfig) (n uint64, err error) { +func processFile(reader *csv.Reader, batch gpexp.RecordBatch, pc *parseConfig) (n uint64, err error) { defer pc.nexter.Return() record := gpexp.Row{ Values: make([]interface{}, len(pc.fieldConfig)), diff --git a/csv/batch_test.go b/csv/batch_test.go index 3917385..722e6a4 100644 --- a/csv/batch_test.go +++ b/csv/batch_test.go @@ -449,12 +449,12 @@ FEFF,,,,,6 { query: result.GT(0), resType: "rowKeys", - exp: []string{"ABDJ", "EJSK"}, + exp: []string{"ABDJ", "EJSK", "HFZP"}, }, { query: result.GT(100000), resType: "rowKeys", - exp: []string{"ABDJ", "EJSK"}, + exp: []string{"ABDJ", "EJSK", "HFZP"}, }, { query: day.Row(1), diff --git a/go.mod b/go.mod index 2431e36..1cfa123 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/pilosa/pdk -replace github.com/pilosa/go-pilosa => github.com/jaffee/go-pilosa v0.4.1-0.20191004142116-bfe8680b131f +replace github.com/pilosa/go-pilosa => github.com/jaffee/go-pilosa v0.4.1-0.20191004202728-391eb01cd51c + +//replace github.com/pilosa/go-pilosa => /Users/jaffee/go/src/github.com/pilosa/go-pilosa replace github.com/go-avro/avro => github.com/jaffee/avro v0.0.0-20190926030934-2b116da4fa22 @@ -32,7 +34,6 @@ require ( github.com/spf13/viper v1.4.0 github.com/stretchr/testify v1.3.0 // indirect github.com/syndtr/goleveldb v0.0.0-20181128100959-b001fa50d6b2 - github.com/y0ssar1an/q v1.0.7 golang.org/x/sync v0.0.0-20190423024810-112230192c58 gopkg.in/avro.v0 v0.0.0-20171217001914-a730b5802183 // indirect gopkg.in/linkedin/goavro.v1 v1.0.5 // indirect diff --git a/go.sum b/go.sum index 454223a..d77be5f 100644 --- a/go.sum +++ b/go.sum @@ -19,6 +19,7 @@ github.com/Shopify/toxiproxy v2.1.4+incompatible h1:TKdv8HiTLgE5wdJuEML90aBgNWso github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6 h1:fLjPD/aNc3UIOA6tDi6QXUemppXK3P9BI7mr2hd6gx8= github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= +github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d h1:G0m3OIz70MZUWq3EgK3CesDbo8upS2Vm9/P3FtgI+Jk= github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= @@ -71,6 +72,7 @@ github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9 github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-ole/go-ole v1.2.1 h1:2lOsA72HgjxAuMlKpFiCbHTvu44PIVkZ5hqm3RSdI/E= github.com/go-ole/go-ole v1.2.1/go.mod h1:7FAglXiTm7HKlQRDeOQ6ZNUHidzCWXuZWq/1dTyBNF8= +github.com/go-ole/go-ole v1.2.4 h1:nNBDSCOigTSiarFpYE9J/KtEA1IOW4CNeqT9TQDqCxI= github.com/go-ole/go-ole v1.2.4/go.mod h1:XCwSNxSkXRo4vlyPy93sltvi/qJq0jqQhjqQNIwKuxM= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/gogo/protobuf v1.1.1 h1:72R+M5VuhED/KujmZVcIquuo8mBgX4oVda//DQb3PXo= @@ -163,6 +165,8 @@ github.com/jaffee/go-pilosa v0.4.1-0.20190909235343-e40d84aa7666 h1:+O5nPgJ3ByJZ github.com/jaffee/go-pilosa v0.4.1-0.20190909235343-e40d84aa7666/go.mod h1:B4omOlxvZcuKLrlHICT6bMEeHPmT8tpSyxgHuEF1PsU= github.com/jaffee/go-pilosa v0.4.1-0.20191004142116-bfe8680b131f h1:5yjVcr7CZ/KxMLL9/NhZwa0PAe6i/rTi2ArQtF5XI04= github.com/jaffee/go-pilosa v0.4.1-0.20191004142116-bfe8680b131f/go.mod h1:B4omOlxvZcuKLrlHICT6bMEeHPmT8tpSyxgHuEF1PsU= +github.com/jaffee/go-pilosa v0.4.1-0.20191004202728-391eb01cd51c h1:3sArB8fcf4AyWT2B/mDTJy0VzRMBnX2NEF/zxXRCNe4= +github.com/jaffee/go-pilosa v0.4.1-0.20191004202728-391eb01cd51c/go.mod h1:B4omOlxvZcuKLrlHICT6bMEeHPmT8tpSyxgHuEF1PsU= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8 h1:12VvqtR6Aowv3l/EQUlocDHW2Cp4G9WJVH7uyH8QFJE= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= diff --git a/v2/interfaces.go b/v2/interfaces.go index 4649a60..850b33c 100644 --- a/v2/interfaces.go +++ b/v2/interfaces.go @@ -1,5 +1,11 @@ package pdk +import ( + "math" + + "github.com/pkg/errors" +) + // Source is an interface implemented by sources of data which can be // ingested into Pilosa. Each Record returned from Record is described // by the slice of Fields returned from Source.Schema directly after @@ -45,6 +51,7 @@ type Record interface { type Field interface { Name() string + PilosafyVal(val interface{}) (interface{}, error) // TODO rename this } type IDField struct { @@ -59,12 +66,24 @@ type IDField struct { } func (id IDField) Name() string { return id.NameVal } +func (id IDField) PilosafyVal(val interface{}) (interface{}, error) { + if val == nil { + return nil, nil + } + return toUint64(val) +} type BoolField struct { NameVal string } func (b BoolField) Name() string { return b.NameVal } +func (b BoolField) PilosafyVal(val interface{}) (interface{}, error) { + if val == nil { + return nil, nil + } + return toBool(val) +} type StringField struct { NameVal string @@ -78,6 +97,12 @@ type StringField struct { } func (s StringField) Name() string { return s.NameVal } +func (s StringField) PilosafyVal(val interface{}) (interface{}, error) { + if val == nil { + return nil, nil + } + return toString(val) +} type IntField struct { NameVal string @@ -86,6 +111,12 @@ type IntField struct { } func (i IntField) Name() string { return i.NameVal } +func (i IntField) PilosafyVal(val interface{}) (interface{}, error) { + if val == nil { + return nil, nil + } + return toInt64(val) +} type DecimalField struct { NameVal string @@ -93,9 +124,125 @@ type DecimalField struct { } func (d DecimalField) Name() string { return d.NameVal } +func (i DecimalField) PilosafyVal(val interface{}) (interface{}, error) { + if val == nil { + return nil, nil + } + switch vt := val.(type) { + case float32: + v64 := float64(vt) * math.Pow(10, float64(i.Scale)) + return int64(v64), nil + case float64: + vt = vt * math.Pow(10, float64(i.Scale)) + return int64(vt), nil + default: + return toInt64(val) + } +} type StringArrayField struct { NameVal string } func (s StringArrayField) Name() string { return s.NameVal } +func (i StringArrayField) PilosafyVal(val interface{}) (interface{}, error) { + if val == nil { + return nil, nil + } + return toStringArray(val) +} + +func toUint64(val interface{}) (uint64, error) { + switch vt := val.(type) { + case uint: + return uint64(vt), nil + case uint8: + return uint64(vt), nil + case uint16: + return uint64(vt), nil + case uint32: + return uint64(vt), nil + case uint64: + return vt, nil + case int: + return uint64(vt), nil + case int8: + return uint64(vt), nil + case int16: + return uint64(vt), nil + case int32: + return uint64(vt), nil + case int64: + return uint64(vt), nil + default: + return 0, errors.Errorf("couldn't convert %v of %[1]T to uint64", vt) + } +} + +func toBool(val interface{}) (bool, error) { + switch vt := val.(type) { + case bool: + return vt, nil + case byte: + return vt != 0, nil + default: + return false, errors.Errorf("couldn't convert %v of %[1]T to bool", vt) + } +} + +func toString(val interface{}) (string, error) { + switch vt := val.(type) { + case string: + return vt, nil + case []byte: + return string(vt), nil + default: + return "", errors.Errorf("couldn't convert %v of %[1]T to string", vt) + } +} + +func toInt64(val interface{}) (int64, error) { + switch vt := val.(type) { + case uint: + return int64(vt), nil + case uint8: + return int64(vt), nil + case uint16: + return int64(vt), nil + case uint32: + return int64(vt), nil + case uint64: + return int64(vt), nil + case int: + return int64(vt), nil + case int8: + return int64(vt), nil + case int16: + return int64(vt), nil + case int32: + return int64(vt), nil + case int64: + return vt, nil + default: + return 0, errors.Errorf("couldn't convert %v of %[1]T to int64", vt) + } +} + +func toStringArray(val interface{}) ([]string, error) { + switch vt := val.(type) { + case []string: + return vt, nil + case []interface{}: + ret := make([]string, len(vt)) + for i, v := range vt { + vs, ok := v.(string) + if !ok { + return nil, errors.Errorf("couldn't convert []interface{} to []string, value %v of type %[1]T at %d", v, i) + } + ret[i] = vs + } + return ret, nil + default: + return nil, errors.Errorf("couldn't convert %v of %[1]T to []string", vt) + } +} diff --git a/v2/kafka/cmd.go b/v2/kafka/cmd.go index 6a3c6b2..aa618f9 100644 --- a/v2/kafka/cmd.go +++ b/v2/kafka/cmd.go @@ -12,6 +12,11 @@ import ( // "github.com/y0ssar1an/q" ) +// TODO Jaeger +// TODO profiling endpoint +// TODO Prometheus + +// Main holds all config for Kafka indexing w/ schema registry. type Main struct { PilosaHosts []string `help:"Comma separated list of host:port pairs for Pilosa."` KafkaHosts []string `help:"Comma separated list of host:port pairs for Kafka."` @@ -20,7 +25,7 @@ type Main struct { Group string `help:"Kafka group."` Index string `help:"Name of Pilosa index."` Topics []string `help:"Kafka topics to read from."` - LogPath string `help:"Log file to write to. Empty means stderr."e` + LogPath string `help:"Log file to write to. Empty means stderr."` // TODO implement PrimaryKeyFields []string `help:"Data field(s) which make up the primary key for a record. These will be concatenated and translated to a Pilosa ID. If empty, record key translation will not be used."` IDField string `help:"Field which contains the integer column ID. May not be used in conjunction with primary-key-fields. If both are empty, auto-generated IDs will be used."` MaxMsgs int `help:"Number of messages to consume from Kafka before stopping. Useful for testing when you don't want to run indefinitely."` @@ -101,12 +106,12 @@ func (m *Main) runIngester(c int) error { if err != nil { return errors.Wrap(err, "opening source") } - var batch gpexp.RecordBatch var recordizers []Recordizer var prevRec pdk.Record - row := &gpexp.Row{} - for rec, err := source.Record(); err == pdk.ErrSchemaChange || err == nil; rec, err = source.Record() { + var row *gpexp.Row + rec, err := source.Record() + for ; err == pdk.ErrSchemaChange || err == nil; rec, err = source.Record() { if err == pdk.ErrSchemaChange { // finish previous batch if this is not the first if batch != nil { @@ -120,7 +125,7 @@ func (m *Main) runIngester(c int) error { } } schema := source.Schema() - recordizers, batch, err = m.batchFromSchema(schema) + recordizers, batch, row, err = m.batchFromSchema(schema) if err != nil { return errors.Wrap(err, "batchFromSchema") } @@ -131,15 +136,21 @@ func (m *Main) runIngester(c int) error { data := rec.Data() for _, rdz := range recordizers { err = rdz(data, row) + if err != nil { + return errors.Wrap(err, "recordizing") + } } err = batch.Add(*row) + if err != nil { + return errors.Wrap(err, "adding to batch") + } } - return nil + return errors.Wrap(err, "getting record") } type Recordizer func(rawRec []interface{}, rec *gpexp.Row) error -func (m *Main) batchFromSchema(schema []pdk.Field) ([]Recordizer, gpexp.RecordBatch, error) { +func (m *Main) batchFromSchema(schema []pdk.Field) ([]Recordizer, gpexp.RecordBatch, *gpexp.Row, error) { // from the schema, and the configuration stored on Main, we need // to create a []pilosa.Field and a []Recordizer processing // functions which take a []interface{} which conforms to the @@ -159,7 +170,6 @@ func (m *Main) batchFromSchema(schema []pdk.Field) ([]Recordizer, gpexp.RecordBa // should be yes. // 2. IDField — this is pretty easy. Use the integer value as the column ID. Do not index it separately by default. // 3. Autogenerate IDs. Ideally using a RangeAllocator per concurrent goroutine. OK, let's assume that if we set row.ID to nil, the auto generation can happen inside the Batch. - recordizers := make([]Recordizer, 0) var rz Recordizer @@ -170,28 +180,28 @@ func (m *Main) batchFromSchema(schema []pdk.Field) ([]Recordizer, gpexp.RecordBa if len(m.PrimaryKeyFields) != 0 { rz, skips, err = getPrimaryKeyRecordizer(schema, m.PrimaryKeyFields) if err != nil { - return nil, nil, errors.Wrap(err, "getting primary key recordizer") + return nil, nil, nil, errors.Wrap(err, "getting primary key recordizer") } } else if m.IDField != "" { for fieldIndex, field := range schema { if field.Name() == m.IDField { if _, ok := field.(pdk.IDField); !ok { - return nil, nil, errors.Errorf("specified IDField %s is not an IDField but is %T", m.IDField, field) + return nil, nil, nil, errors.Errorf("specified IDField %s is not an IDField but is %T", m.IDField, field) } fieldIndex := fieldIndex - rz = func(rawRec []interface{}, rec *gpexp.Row) error { - rec.ID = rawRec[fieldIndex] - return nil + rz = func(rawRec []interface{}, rec *gpexp.Row) (err error) { + rec.ID, err = field.PilosafyVal(rawRec[fieldIndex]) + return errors.Wrapf(err, "converting %+v to ID", rawRec[fieldIndex]) } skips[fieldIndex] = struct{}{} break } } if rz == nil { - return nil, nil, errors.Errorf("ID field %s not found", m.IDField) + return nil, nil, nil, errors.Errorf("ID field %s not found", m.IDField) } } else { - return nil, nil, errors.New("autogen IDs is currently unimplemented; specify an IDField or primary key fields") + return nil, nil, nil, errors.New("autogen IDs is currently unimplemented; specify an IDField or primary key fields") } recordizers = append(recordizers, rz) @@ -222,9 +232,9 @@ func (m *Main) batchFromSchema(schema []pdk.Field) ([]Recordizer, gpexp.RecordBa fields = append(fields, m.index.Field(pdkField.Name())) valIdx := len(fields) - 1 // TODO may need to have more sophisticated recordizer by type at some point - recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) error { - rec.Values[valIdx] = rawRec[i] - return nil + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { + rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) + return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) }) continue } @@ -244,22 +254,22 @@ func (m *Main) batchFromSchema(schema []pdk.Field) ([]Recordizer, gpexp.RecordBa } fields = append(fields, m.index.Field(fld.Name(), opts...)) valIdx := len(fields) - 1 - recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) error { - rec.Values[valIdx] = rawRec[i] - return nil + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { + rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) + return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) }) case pdk.BoolField: if m.PackBools == "" { fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeBool())) valIdx := len(fields) - 1 - recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) error { + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { rec.Values[valIdx] = rawRec[i] return nil }) } else { fields = append(fields, boolField, boolFieldExists) fieldIdx := len(fields) - 2 - recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) error { + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { b, ok := rawRec[i].(bool) if b { rec.Values[fieldIdx] = pdkField.Name() @@ -281,29 +291,32 @@ func (m *Main) batchFromSchema(schema []pdk.Field) ([]Recordizer, gpexp.RecordBa } } valIdx := len(fields) - 1 - recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) error { - rec.Values[valIdx] = rawRec[i] - return nil + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { + rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) + return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) }) case pdk.DecimalField: // TODO handle scale fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt())) valIdx := len(fields) - 1 - recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) error { - rec.Values[valIdx] = rawRec[i] - return nil + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { + rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) + return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) }) } } err = m.client.SyncSchema(m.schema) if err != nil { - return nil, nil, errors.Wrap(err, "syncing schema") + return nil, nil, nil, errors.Wrap(err, "syncing schema") } batch, err := gpexp.NewBatch(m.client, m.BatchSize, m.index, fields) if err != nil { - return nil, nil, errors.Wrap(err, "creating batch") + return nil, nil, nil, errors.Wrap(err, "creating batch") + } + row := &gpexp.Row{ + Values: make([]interface{}, len(fields)), } - return recordizers, batch, nil + return recordizers, batch, row, nil } func hasMutex(fld pdk.Field) bool { @@ -341,7 +354,7 @@ func getPrimaryKeyRecordizer(schema []pdk.Field, pkFields []string) (recordizer } } if len(fieldIndices) != pkIndex+1 { - return nil, nil, errors.Errorf("no field with primary key field name %s found", pk) + return nil, nil, errors.Errorf("no field with primary key field name %s found. fields: %+v", pk, schema) } } if len(pkFields) == 1 { @@ -350,7 +363,7 @@ func getPrimaryKeyRecordizer(schema []pdk.Field, pkFields []string) (recordizer skipFields[fieldIndices[0]] = struct{}{} } } - recordizer = func(rawRec []interface{}, rec *gpexp.Row) error { + recordizer = func(rawRec []interface{}, rec *gpexp.Row) (err error) { idbytes, ok := rec.ID.([]byte) if ok { idbytes = idbytes[:0] diff --git a/v2/kafka/cmd_test.go b/v2/kafka/cmd_test.go index d6f8a24..6aed471 100644 --- a/v2/kafka/cmd_test.go +++ b/v2/kafka/cmd_test.go @@ -5,6 +5,7 @@ import ( "strings" "testing" + "github.com/Shopify/sarama" "github.com/pilosa/go-pilosa/gpexp" pdk "github.com/pilosa/pdk/v2" ) @@ -15,10 +16,67 @@ func TestCmdMain(t *testing.T) { } // load big schema + licodec := liDecodeTestSchema(t, "bigschema.json") + schemaID := postSchema(t, "bigschema.json", "bigschema2") + + fields := []string{"abc", "db", "user_id", "all_users", "has_deleted_date", "central_group", "custom_audiences", "desktop_boolean", "desktop_frequency", "desktop_recency", "product_boolean_historical_forestry_cravings_or_bugles", "ddd_category_total_current_rhinocerous_checking", "ddd_category_total_current_rhinocerous_thedog_cheetah", "survey1234", "days_since_last_logon", "elephant_added_for_account"} + // make a bunch of data and insert it + records := [][]interface{}{ + {"2", "1", 159, map[string]interface{}{"boolean": true}, map[string]interface{}{"boolean": false}, map[string]interface{}{"string": "cgr"}, map[string]interface{}{"array": []string{"a", "b"}}, nil, map[string]interface{}{"int": 7}, nil, nil, map[string]interface{}{"float": 5.4}, nil, map[string]interface{}{"org.test.survey1234": "yes"}, map[string]interface{}{"float": 8.0}, nil}, + } - type testcase struct { + // put records in kafka + conf := sarama.NewConfig() + conf.Version = sarama.V0_10_0_0 // TODO - do we need this? should we move it up? + conf.Producer.Return.Successes = true + producer, err := sarama.NewSyncProducer([]string{"localhost:9092"}, conf) + if err != nil { + t.Fatalf("getting new producer: %v", err) + } + topic := "testcmdmain" + for _, vals := range records { + rec := makeRecord(t, fields, vals) + putRecordKafka(t, producer, schemaID, licodec, "akey", topic, rec) + } + + // create Main and run with MaxMsgs + m := NewMain() + m.Index = "cmd_test_index23lkjdkfj" + m.PrimaryKeyFields = []string{"abc", "db", "user_id"} + m.PackBools = "bools" + m.BatchSize = 1 + m.Topics = []string{topic} + m.MaxMsgs = len(records) + + err = m.Run() + if err != nil { + t.Fatalf("running main: %v", err) + } + + // check data in Pilosa + if !m.index.HasField("abc") { + t.Fatalf("don't have abc") } + abc := m.index.Field("abc") + qr, err := m.client.Query(m.index.Count(abc.Row("2"))) + if err != nil { + t.Fatalf("querying: %v", err) + } + if qr.Result().Count() != 1 { + t.Fatalf("wrong count for abc, %d is not 1", qr.Result().Count()) + } +} + +func makeRecord(t *testing.T, fields []string, vals []interface{}) map[string]interface{} { + if len(fields) != len(vals) { + t.Fatalf("have %d fields and %d vals", len(fields), len(vals)) + } + ret := make(map[string]interface{}) + for i, field := range fields { + ret[field] = vals[i] + } + return ret } func TestGetPrimaryKeyRecordizer(t *testing.T) { @@ -150,22 +208,31 @@ func TestBatchFromSchema(t *testing.T) { }() } - rdzs, batch, err := m.batchFromSchema(test.schema) + rdzs, batch, row, err := m.batchFromSchema(test.schema) if testErr(t, test.err, err) { return } - row := &gpexp.Row{} - row.Values = make([]interface{}, len(test.rowVals)) for _, rdz := range rdzs { err = rdz(test.rawRec, row) + if err != nil { + t.Fatalf("recordizing: %v", err) + } } if !reflect.DeepEqual(row.ID, test.rowID) { t.Fatalf("row IDs exp: %+v got %+v", test.rowID, row.ID) } if !reflect.DeepEqual(row.Values, test.rowVals) { - t.Fatalf("row values exp/got:\n%+v\n%+v", test.rowVals, row.Values) + t.Errorf("row values exp/got:\n%+v %[1]T\n%+v %[2]T", test.rowVals, row.Values) + if len(row.Values) == len(test.rowVals) { + for i, v := range row.Values { + if !reflect.DeepEqual(v, test.rowVals[i]) { + t.Errorf("%v %[1]T != %v %[2]T", test.rowVals[i], v) + } + } + } + t.Fail() } err = batch.Add(*row) @@ -232,7 +299,7 @@ func TestBatchFromSchema(t *testing.T) { pkFields: []string{"a"}, rawRec: []interface{}{"blah", uint64(321)}, rowID: []byte("blah"), - rowVals: []interface{}{uint64(321)}, + rowVals: []interface{}{int64(321)}, }, } diff --git a/v2/kafka/source.go b/v2/kafka/source.go index d29542e..94466a5 100644 --- a/v2/kafka/source.go +++ b/v2/kafka/source.go @@ -14,7 +14,6 @@ import ( "github.com/go-avro/avro" pdk "github.com/pilosa/pdk/v2" "github.com/pkg/errors" - // "github.com/y0ssar1an/q" ) // Source implements the pdk.Source interface using kafka as a data @@ -116,7 +115,8 @@ func (s *Source) toPDKRecord(vals map[string]interface{}) error { case pdk.DecimalField: vb, ok := val.([]byte) if !ok { - return errors.Errorf("decimal must be []byte, but got %v of %[1]T", val) + r.data[i] = val + continue } if len(vb) == 8 { r.data[i] = binary.BigEndian.Uint64(vb) @@ -303,9 +303,18 @@ func avroToPDKField(aField *avro.SchemaField) (pdk.Field, error) { NameVal: aField.Name, }, nil case avro.Float, avro.Double: - return pdk.IntField{ + // TODO should probably require a logicalType if we're going + // to treat a float as a decimal. + field := pdk.DecimalField{ NameVal: aField.Name, - }, nil + } + scale, err := intProp(aField, "scale") + if err == wrongType { + return nil, errors.Wrap(err, "getting scale") + } else if err == nil { + field.Scale = uint(scale) + } + return field, nil case avro.Boolean: return pdk.BoolField{ NameVal: aField.Name, @@ -476,30 +485,3 @@ func avroDecode(codec avro.Schema, data []byte) (map[string]interface{}, error) return decodedRecord.Map(), nil } - -func toUint64(val interface{}) (uint64, error) { - switch vt := val.(type) { - case uint: - return uint64(vt), nil - case uint8: - return uint64(vt), nil - case uint16: - return uint64(vt), nil - case uint32: - return uint64(vt), nil - case uint64: - return vt, nil - case int: - return uint64(vt), nil - case int8: - return uint64(vt), nil - case int16: - return uint64(vt), nil - case int32: - return uint64(vt), nil - case int64: - return uint64(vt), nil - default: - return 0, errors.Errorf("couldn't convert %v of %[1]T to uint64", vt) - } -} diff --git a/v2/kafka/source_test.go b/v2/kafka/source_test.go index 1e2f965..204bfd1 100644 --- a/v2/kafka/source_test.go +++ b/v2/kafka/source_test.go @@ -59,7 +59,7 @@ func TestAvroToPDKSchema(t *testing.T) { if err != nil { t.Fatalf("reading directory: %v", err) } - if len(files) != len(tests) { + if len(files) != len(tests)+1 { // +1 because we aren't testing bigschema.json here t.Errorf("have different number of schemas and tests: %d and %d\n%+v", len(files), len(tests), files) } @@ -225,17 +225,15 @@ func TestKafkaSourceIntegration(t *testing.T) { t.Skip() } src := NewSource() - src.Topics = []string{"test"} + src.Topics = []string{"testKafkaSourceIntegration"} src.Group = "group0" err := src.Open() if err != nil { t.Fatalf("opening source: %v", err) } - schemaClient := csrc.NewClient("localhost:8081") - conf := sarama.NewConfig() - conf.Version = sarama.V0_10_0_0 + conf.Version = sarama.V0_10_0_0 // TODO - do we need this? should we move it up? conf.Producer.Return.Successes = true producer, err := sarama.NewSyncProducer([]string{"localhost:9092"}, conf) if err != nil { @@ -247,30 +245,12 @@ func TestKafkaSourceIntegration(t *testing.T) { key := fmt.Sprintf("%d", rnd.Int()) for i, test := range tests { - schemaStr := readTestSchema(t, test.schemaFile) - resp, err := schemaClient.PostSubjects(fmt.Sprintf("schema%d", i), schemaStr) - if err != nil { - t.Fatalf("posting schema: %v", err) - } - schemaID := resp.ID + schemaID := postSchema(t, test.schemaFile, fmt.Sprintf("schema%d", i)) schema := liDecodeTestSchema(t, test.schemaFile) t.Run(test.schemaFile, func(t *testing.T) { for j, record := range test.data { - buf := make([]byte, 5, 1000) - buf[0] = 0 - binary.BigEndian.PutUint32(buf[1:], uint32(schemaID)) - buf, err := schema.BinaryFromNative(buf, record) - if err != nil { - t.Errorf("encoding:\n%+v\nerr: %v", record, err) - } - - // post buf to kafka - _, _, err = producer.SendMessage(&sarama.ProducerMessage{Topic: "test", Key: sarama.StringEncoder(key), Value: sarama.ByteEncoder(buf)}) - if err != nil { - t.Fatalf("sending message to kafka: %v", err) - } - + putRecordKafka(t, producer, schemaID, schema, key, src.Topics[0], record) pdkRec, err := src.Record() if j == 0 { if err != pdk.ErrSchemaChange { @@ -305,10 +285,37 @@ func TestKafkaSourceIntegration(t *testing.T) { } +func postSchema(t *testing.T, schemaFile, subj string) (schemaID int) { + schemaClient := csrc.NewClient("localhost:8081") + schemaStr := readTestSchema(t, schemaFile) + resp, err := schemaClient.PostSubjects(subj, schemaStr) + if err != nil { + t.Fatalf("posting schema: %v", err) + } + return resp.ID +} + +func putRecordKafka(t *testing.T, producer sarama.SyncProducer, schemaID int, schema *liavro.Codec, key, topic string, record map[string]interface{}) { + t.Helper() + buf := make([]byte, 5, 1000) + buf[0] = 0 + binary.BigEndian.PutUint32(buf[1:], uint32(schemaID)) + buf, err := schema.BinaryFromNative(buf, record) + if err != nil { + t.Errorf("encoding:\n%+v\nerr: %v", record, err) + } + + // post buf to kafka + _, _, err = producer.SendMessage(&sarama.ProducerMessage{Topic: topic, Key: sarama.StringEncoder(key), Value: sarama.ByteEncoder(buf)}) + if err != nil { + t.Fatalf("sending message to kafka: %v", err) + } +} + var expectedSchemas = map[string][]pdk.Field{ "simple.json": []pdk.Field{pdk.StringField{NameVal: "first"}, pdk.StringField{NameVal: "last"}}, "stringtypes.json": []pdk.Field{pdk.StringField{NameVal: "first"}, pdk.StringField{NameVal: "last"}, pdk.StringField{NameVal: "middle"}}, "decimal.json": []pdk.Field{pdk.DecimalField{NameVal: "somenum", Scale: 2}}, "unions.json": []pdk.Field{pdk.StringField{NameVal: "first"}, pdk.BoolField{NameVal: "second"}, pdk.IntField{NameVal: "third"}, pdk.DecimalField{NameVal: "fourth", Scale: 3}}, - "othertypes.json": []pdk.Field{pdk.StringField{NameVal: "first", Mutex: true}, pdk.StringArrayField{NameVal: "second"}, pdk.IntField{NameVal: "third"}, pdk.IntField{NameVal: "fourth"}, pdk.IntField{NameVal: "fifth"}, pdk.IntField{NameVal: "sixth"}, pdk.BoolField{NameVal: "seventh"}}, + "othertypes.json": []pdk.Field{pdk.StringField{NameVal: "first", Mutex: true}, pdk.StringArrayField{NameVal: "second"}, pdk.IntField{NameVal: "third"}, pdk.IntField{NameVal: "fourth"}, pdk.DecimalField{NameVal: "fifth"}, pdk.DecimalField{NameVal: "sixth"}, pdk.BoolField{NameVal: "seventh"}}, } diff --git a/v2/kafka/testdata/schemas/bigschema.json b/v2/kafka/testdata/schemas/bigschema.json new file mode 100644 index 0000000..9185fd7 --- /dev/null +++ b/v2/kafka/testdata/schemas/bigschema.json @@ -0,0 +1,139 @@ +{ + "namespace": "org.test", + "type": "record", + "name": "user_traits", + "doc": "User Specific Traits", + "fields": [ + { + "name": "abc", + "doc": "The ABC", + "type": "string" + }, + { + "name": "db", + "doc": "TE DB Number", + "type": "string" + }, + { + "name": "user_id", + "doc": "User ID", + "type": "int" + }, + { + "name": "all_users", + "doc": "All Users Trait", + "type": [ + "null", + "boolean" + ] + }, + { + "name": "has_deleted_date", + "doc": "has_deleted_date", + "type": [ + "null", + "boolean" + ] + }, + { + "name": "central_group", + "doc": "central_group", + "type": [ + "null", + "string" + ] + }, + { + "name": "custom_audiences", + "doc": "custom_audiences NOTE: uuid maps to string internally?", + "type": [ + "null", + { + "type": "array", + "items": "string" + } + ] + }, + { + "name": "desktop_boolean", + "doc": "desktop_boolean", + "type": [ + "null", + "boolean" + ] + }, + { + "name": "desktop_frequency", + "doc": "desktop_frequency", + "type": [ + "null", + "int" + ] + }, + { + "name": "desktop_recency", + "doc": "desktop_recency NOTE: Should be DateTime type?", + "type": [ + "null", + "int" + ] + }, + { + "name": "product_boolean_historical_forestry_cravings_or_bugles", + "doc": "product_boolean_historical_forestry_cravings_or_bugles", + "type": [ + "null", + "boolean" + ] + }, + { + "name": "ddd_category_total_current_rhinocerous_checking", + "doc": "ddd_category_total_current_rhinocerous_checking NOTE: float? use decimal?", + "type": [ + "null", + "float" + ] + }, + { + "name": "ddd_category_total_current_rhinocerous_thedog_cheetah", + "doc": "ddd_category_total_current_rhinocerous_thedog_cheetah NOTE: float? use decimal?", + "type": [ + "null", + "float" + ] + }, + { + "name": "survey1234", + "doc": "Answers for Survey Question 1234", + "type": [ + "null", + { + "type": "enum", + "name": "survey1234", + "symbols": [ + "yes", + "no", + "seen", + "skipped" + ] + } + ] + }, + { + "name": "days_since_last_logon", + "doc": "Number of days since the user last logged on NOTE: float? decimal?", + "type": [ + "null", + "float" + ] + }, + { + "name": "elephant_added_for_account", + "doc": "Has a elephant been added", + "type": [ + "null", + "boolean" + ] + } + ] +} From 018238e43fb0e5081fa8a3528ac13b7abc2a4308 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Tue, 8 Oct 2019 14:26:18 -0500 Subject: [PATCH 18/40] fix some bugs, basic integration test passing --- v2/kafka/cmd.go | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/v2/kafka/cmd.go b/v2/kafka/cmd.go index aa618f9..ea4541e 100644 --- a/v2/kafka/cmd.go +++ b/v2/kafka/cmd.go @@ -3,6 +3,7 @@ package kafka import ( "bytes" "encoding/binary" + "io" "github.com/pilosa/go-pilosa" "github.com/pilosa/go-pilosa/gpexp" @@ -141,9 +142,22 @@ func (m *Main) runIngester(c int) error { } } err = batch.Add(*row) - if err != nil { + if err == gpexp.ErrBatchNowFull { + err = batch.Import() + if err != nil { + return errors.Wrap(err, "importing batch") + } + err = rec.Commit() + if err != nil { + return errors.Wrap(err, "commiting record") + } + } else if err != nil { return errors.Wrap(err, "adding to batch") } + prevRec = rec + } + if err == io.EOF { + err = nil } return errors.Wrap(err, "getting record") } @@ -211,7 +225,6 @@ func (m *Main) batchFromSchema(schema []pdk.Field) ([]Recordizer, gpexp.RecordBa boolField = m.index.Field(m.PackBools, pilosa.OptFieldTypeBool()) boolFieldExists = m.index.Field(m.PackBools+"-exists", pilosa.OptFieldTypeBool()) } - fields := make([]*pilosa.Field, 0, len(schema)) for i, pdkField := range schema { // need to redefine these inside the loop since we're @@ -289,6 +302,8 @@ func (m *Main) batchFromSchema(schema []pdk.Field) ([]Recordizer, gpexp.RecordBa } else { fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt(min))) } + } else { + fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt())) } valIdx := len(fields) - 1 recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { @@ -303,6 +318,8 @@ func (m *Main) batchFromSchema(schema []pdk.Field) ([]Recordizer, gpexp.RecordBa rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) }) + default: + return nil, nil, nil, errors.Errorf("unknown schema field type %T %[1]v", pdkField) } } err = m.client.SyncSchema(m.schema) From 6da03e25391fdb69dfc360070a0b23a1a5a2f9d4 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Tue, 8 Oct 2019 15:05:35 -0500 Subject: [PATCH 19/40] fixup tests --- csv/batch_test.go | 2 +- go.mod | 4 +-- go.sum | 66 ++++++-------------------------------------- v2/kafka/cmd_test.go | 1 - 4 files changed, 12 insertions(+), 61 deletions(-) diff --git a/csv/batch_test.go b/csv/batch_test.go index 722e6a4..37bd4ee 100644 --- a/csv/batch_test.go +++ b/csv/batch_test.go @@ -119,7 +119,7 @@ func TestImportMarketingCSV(t *testing.T) { defer func() { err = client.DeleteIndexByName(m.Index) if err != nil { - t.Fatalf("deleting index: %v", err) + t.Logf("deleting index: %v", err) } }() err = m.Run() diff --git a/go.mod b/go.mod index 1cfa123..775b5e2 100644 --- a/go.mod +++ b/go.mod @@ -1,9 +1,9 @@ module github.com/pilosa/pdk -replace github.com/pilosa/go-pilosa => github.com/jaffee/go-pilosa v0.4.1-0.20191004202728-391eb01cd51c - //replace github.com/pilosa/go-pilosa => /Users/jaffee/go/src/github.com/pilosa/go-pilosa +replace github.com/pilosa/go-pilosa => github.com/jaffee/go-pilosa v0.4.1-0.20191008194651-6791c1437ec4 + replace github.com/go-avro/avro => github.com/jaffee/avro v0.0.0-20190926030934-2b116da4fa22 require ( diff --git a/go.sum b/go.sum index d77be5f..3c90630 100644 --- a/go.sum +++ b/go.sum @@ -5,8 +5,6 @@ cloud.google.com/go v0.43.0/go.mod h1:BOSR3VbTLkk6FDC/TcffxP4NF/FFBGA5ku+jvKOP7p github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/CAFxX/gcnotifier v0.0.0-20170518020117-39b0596a2da3 h1:bZrDXM2lN6jLwij+LZ7OUZvhP3VjPZp9iCDC/FG+SC0= -github.com/CAFxX/gcnotifier v0.0.0-20170518020117-39b0596a2da3/go.mod h1:Rn2zM2MnHze07LwkneP48TWt6UiZhzQTwCvw6djVGfE= github.com/CAFxX/gcnotifier v0.0.0-20190112062741-224a280d589d h1:n0G4ckjMEj7bWuGYUX0i8YlBeBBJuZ+HEHvHfyBDZtI= github.com/CAFxX/gcnotifier v0.0.0-20190112062741-224a280d589d/go.mod h1:Rn2zM2MnHze07LwkneP48TWt6UiZhzQTwCvw6djVGfE= github.com/DataDog/datadog-go v0.0.0-20180822151419-281ae9f2d895 h1:dmc/C8bpE5VkQn65PNbbyACDC8xw8Hpp/NEurdPmQDQ= @@ -17,8 +15,6 @@ github.com/Shopify/sarama v1.19.0 h1:9oksLxC6uxVPHPVYUmq6xhr1BOF/hHobWH2UzO67z1s github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= github.com/Shopify/toxiproxy v2.1.4+incompatible h1:TKdv8HiTLgE5wdJuEML90aBgNWsokNbMijUGhmcoBJc= github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= -github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6 h1:fLjPD/aNc3UIOA6tDi6QXUemppXK3P9BI7mr2hd6gx8= -github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d h1:G0m3OIz70MZUWq3EgK3CesDbo8upS2Vm9/P3FtgI+Jk= github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= @@ -64,14 +60,10 @@ github.com/elodina/go-avro v0.0.0-20160406082632-0c8185d9a3ba/go.mod h1:3A7SOsr8 github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/go-avro/avro v0.0.0-20171219232920-444163702c11 h1:yswqe8UdKNWn4kjh1YTaAbvOSPeg95xhW7h4qeICL5E= -github.com/go-avro/avro v0.0.0-20171219232920-444163702c11/go.mod h1:kxj6THYP0dmFPk4Z+bijIAhJoGgeBfyOKXMduhvdJPA= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= -github.com/go-ole/go-ole v1.2.1 h1:2lOsA72HgjxAuMlKpFiCbHTvu44PIVkZ5hqm3RSdI/E= -github.com/go-ole/go-ole v1.2.1/go.mod h1:7FAglXiTm7HKlQRDeOQ6ZNUHidzCWXuZWq/1dTyBNF8= github.com/go-ole/go-ole v1.2.4 h1:nNBDSCOigTSiarFpYE9J/KtEA1IOW4CNeqT9TQDqCxI= github.com/go-ole/go-ole v1.2.4/go.mod h1:XCwSNxSkXRo4vlyPy93sltvi/qJq0jqQhjqQNIwKuxM= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= @@ -103,6 +95,7 @@ github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= @@ -116,8 +109,6 @@ github.com/gorilla/handlers v1.3.0/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/ github.com/gorilla/handlers v1.4.1 h1:BHvcRGJe/TrL+OqFxoKQGddTgeibiOjaBssV5a/N9sw= github.com/gorilla/handlers v1.4.1/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/OAirnOIQ= github.com/gorilla/mux v1.4.0/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/mux v1.6.2 h1:Pgr17XVTNXAk3q/r4CpKzC5xBM/qW1uVLV+IhRZpIIk= -github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/mux v1.7.0 h1:tOSd0UKHQd6urX6ApfOn4XdBMY6Sh1MfxV3kmaazO+U= github.com/gorilla/mux v1.7.0/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/mux v1.7.3 h1:gnP5JzjVOuiZD07fKKToCAOjS0yOpj/qPETTXCCS6hw= @@ -159,14 +150,10 @@ github.com/jaffee/commandeer v0.1.0 h1:UxHHnhKmtz8gAgqu67lYK5tlX5D9A86mGc9AWcEMS github.com/jaffee/commandeer v0.1.0/go.mod h1:x1WpthEI14PRNcPtVna43ontBxJ1o7plCOsZ8kksl8M= github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e h1:CC1usSIzu9p6zmz7jPj0QiP3FdpGW+PCGc9d1yhSls0= github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e/go.mod h1:N5yIzoHN6EwVFi0QCKvpFPJeECoZyEcFBQSR8r+7Mz0= -github.com/jaffee/go-pilosa v0.4.1-0.20190831210635-c2b019c94ab7 h1:DyezXZFZgOssV5VH/a7+XH3iXNtj8SFX8FKiEZbu3iY= -github.com/jaffee/go-pilosa v0.4.1-0.20190831210635-c2b019c94ab7/go.mod h1:B4omOlxvZcuKLrlHICT6bMEeHPmT8tpSyxgHuEF1PsU= -github.com/jaffee/go-pilosa v0.4.1-0.20190909235343-e40d84aa7666 h1:+O5nPgJ3ByJZxr0/rjNX1tYCEffoOJ+Q4T2589EkFaE= -github.com/jaffee/go-pilosa v0.4.1-0.20190909235343-e40d84aa7666/go.mod h1:B4omOlxvZcuKLrlHICT6bMEeHPmT8tpSyxgHuEF1PsU= -github.com/jaffee/go-pilosa v0.4.1-0.20191004142116-bfe8680b131f h1:5yjVcr7CZ/KxMLL9/NhZwa0PAe6i/rTi2ArQtF5XI04= -github.com/jaffee/go-pilosa v0.4.1-0.20191004142116-bfe8680b131f/go.mod h1:B4omOlxvZcuKLrlHICT6bMEeHPmT8tpSyxgHuEF1PsU= -github.com/jaffee/go-pilosa v0.4.1-0.20191004202728-391eb01cd51c h1:3sArB8fcf4AyWT2B/mDTJy0VzRMBnX2NEF/zxXRCNe4= -github.com/jaffee/go-pilosa v0.4.1-0.20191004202728-391eb01cd51c/go.mod h1:B4omOlxvZcuKLrlHICT6bMEeHPmT8tpSyxgHuEF1PsU= +github.com/jaffee/go-pilosa v0.4.1-0.20191008192729-4129aee12032 h1:uATKnbEhR3+K3L1YFYa4DfPIHpfvnLvNcG9v4iWYjrA= +github.com/jaffee/go-pilosa v0.4.1-0.20191008192729-4129aee12032/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= +github.com/jaffee/go-pilosa v0.4.1-0.20191008194651-6791c1437ec4 h1:IEGhQ3aUdbLHPkv+twI74W6ggBcAwl0cNJzquQ1IXdE= +github.com/jaffee/go-pilosa v0.4.1-0.20191008194651-6791c1437ec4/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8 h1:12VvqtR6Aowv3l/EQUlocDHW2Cp4G9WJVH7uyH8QFJE= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= @@ -184,19 +171,15 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/linkedin/goavro v0.0.0-20181018120728-1beee2a74088 h1:T+kPxsfvkFtz7x6ysgOYjki7khHjowQW6DD1rcpOS0Q= github.com/linkedin/goavro v0.0.0-20181018120728-1beee2a74088/go.mod h1:vL3ODoWTPCBSeKVFgQ+lvSq0VOzTB5TcXvUX+4pU/+Q= -github.com/linkedin/goavro v2.1.0+incompatible h1:DV2aUlj2xZiuxQyvag8Dy7zjY69ENjS66bWkSfdpddY= github.com/linkedin/goavro/v2 v2.9.6 h1:Qh8M4/oWMSJ8V3pKCl9QRZOZnefg/vU56t47AwzaSoQ= github.com/linkedin/goavro/v2 v2.9.6/go.mod h1:UgQUb2N/pmueQYH9bfqFioWxzYCZXSfF8Jw03O5sjqA= github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= -github.com/miekg/dns v1.1.1 h1:DVkblRdiScEnEr0LR9nTnEQqHYycjkXW9bOjd+2EL2o= -github.com/miekg/dns v1.1.1/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= github.com/miekg/dns v1.1.15 h1:CSSIDtllwGLMoA6zjdKnaE6Tx6eVUxQ29LUgGetiDCI= github.com/miekg/dns v1.1.15/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/mapstructure v1.0.0/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mmcloughlin/geohash v0.0.0-20181009053802-f7f2bcae3294 h1:QlTAK00UrY80KK9Da+foE04AjxhXFrgp87aZB6yfU5c= @@ -220,29 +203,7 @@ github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/9 github.com/pierrec/lz4 v0.0.0-20181005164709-635575b42742 h1:wKfigKMTgvSzBLIVvB5QaBBQI0odU6n45/UKSphjLus= github.com/pierrec/lz4 v0.0.0-20181005164709-635575b42742/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= github.com/pilosa/demo-taxi v0.0.0-20190604185441-6b6ef983bff7/go.mod h1:DM8Umjg0r/UscmOs49RJeE0WUb8Nj4PLUj4J02vigLk= -github.com/pilosa/go-pilosa v1.2.0 h1:EgokWNJt/yYRX1P09+uDy7QI3jUKa42iu6pe8hB6umE= -github.com/pilosa/go-pilosa v1.2.0/go.mod h1:uli4HiTymHocSAXJ9XpDbkH6kS63P8Yc0xyWDzooouc= -github.com/pilosa/go-pilosa v1.2.1-0.20190321212254-72b91a013211 h1:2NZOJBJoB2TjeSP1LkMYQfttqWyTHXRdAez+Mn4qDa4= -github.com/pilosa/go-pilosa v1.2.1-0.20190321212254-72b91a013211/go.mod h1:0kiAAME+mLvrmVX2YVo4kmIwtKsV16Re442mheTVqoE= -github.com/pilosa/go-pilosa v1.2.1-0.20190411142335-f4e04bc9733d h1:NsWUuT4LTqyWH5pPZHXALZu6CX8ij74d1nmcCIq5aHQ= -github.com/pilosa/go-pilosa v1.2.1-0.20190411142335-f4e04bc9733d/go.mod h1:rMziran1UW7p6kkKQTdYB+DCP9ZHbsQpGNmc4fF4PTY= -github.com/pilosa/go-pilosa v1.2.1-0.20190416174400-de264d55e76f h1:faokUYJ6BwIKltmqdFRRVf4ZXOoL+WwaIcEkpp82fC4= -github.com/pilosa/go-pilosa v1.2.1-0.20190416174400-de264d55e76f/go.mod h1:x18hYx4nI0bHzQSFyanUEB7pmja1HMAZAb3PPhUpOZc= -github.com/pilosa/go-pilosa v1.3.1-0.20190503193736-ad53edf56c18 h1:uUA588w4MeX0dFxhr0rj1XazSxaSwqL3qnG2MmTZRb8= -github.com/pilosa/go-pilosa v1.3.1-0.20190503193736-ad53edf56c18/go.mod h1:9ECbvb0EQJvjxBups5CUCzeLh8KrLgQVY9/1zSoQHQE= -github.com/pilosa/go-pilosa v1.3.1-0.20190612142550-e616c1393660 h1:0UUfONtKBe4n1yIRLshVPNCJbtXdg/WKNLDqmCxu/uw= -github.com/pilosa/go-pilosa v1.3.1-0.20190612142550-e616c1393660/go.mod h1:9ECbvb0EQJvjxBups5CUCzeLh8KrLgQVY9/1zSoQHQE= github.com/pilosa/pilosa v0.0.0-20181115192138-84148d4ee6c0/go.mod h1:NgpkJkefqUKUHV7O3TqBOu89tsao3ksth2wzTNe8CPQ= -github.com/pilosa/pilosa v0.0.0-20181130171212-dfb748ec5b01 h1:gtkt282G8/+8XN0D9+934/3zpUiEtsFINjqCs+vZs04= -github.com/pilosa/pilosa v0.0.0-20181130171212-dfb748ec5b01/go.mod h1:NgpkJkefqUKUHV7O3TqBOu89tsao3ksth2wzTNe8CPQ= -github.com/pilosa/pilosa v0.0.0-20190104143002-8c4b1548bc4b h1:2H/+JUxL4dv0uJ4G4i+C83S1yq/+pUrHHjsF8TEY85I= -github.com/pilosa/pilosa v0.0.0-20190104143002-8c4b1548bc4b/go.mod h1:NgpkJkefqUKUHV7O3TqBOu89tsao3ksth2wzTNe8CPQ= -github.com/pilosa/pilosa v1.2.1-0.20190326161037-4955dff22f76 h1:nwrmAMzbSIq99Tt1t/UYyJXI5rxN0wGLT8u47gxCZ9A= -github.com/pilosa/pilosa v1.2.1-0.20190326161037-4955dff22f76/go.mod h1:yTXtQGchazlKds24RYax8+N9lIlpisspMj4ZD4loMU0= -github.com/pilosa/pilosa v1.2.1-0.20190401200108-927e8b89425e h1:leJjlNm0+3Vbbp7qA+NdmDaWxQKivN3T2rVaVCNHpTk= -github.com/pilosa/pilosa v1.2.1-0.20190401200108-927e8b89425e/go.mod h1:rRLglQ1zRxKarDMyHhsLR+0XhacXWUNrNXaAs69b1LQ= -github.com/pilosa/pilosa v1.2.1-0.20190410162749-b973f8c96356 h1:jDxhpV4l+CpKqVVgld73e9/EyogdCcO1ftbCvifrhSc= -github.com/pilosa/pilosa v1.2.1-0.20190410162749-b973f8c96356/go.mod h1:QN7EwQwoQHNPVsd7CHXFDasPznLDA6DPswmnLr4eJ6o= github.com/pilosa/pilosa v1.3.1 h1:rLDVqJBuRzhPtue730D+EX0YEVS4R0oDzsE4bJBwLcE= github.com/pilosa/pilosa v1.3.1/go.mod h1:97yLL9mpUqOj9naKu5XA/b/U6JLe3JGGUlc2HOTDw+A= github.com/pilosa/tools v0.0.0-20190810124639-ee77232ff3aa/go.mod h1:n/Od1ErfFlaIEueOaQjlbo06EzKuRhSPxUGR3xmfEqE= @@ -266,6 +227,7 @@ github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40T github.com/rakyll/statik v0.0.0-20170410192944-89fe3459b5c8/go.mod h1:OEi9wJV/fMUAGx1eNjq75DKDsJVuEv1U0oYdX6GX8Zs= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a h1:9ZKAASQSHhDYGoxY8uLVpewe1GDZ2vu2Tr/vTdVAkFQ= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= +github.com/remyoudompheng/bigfft v0.0.0-20190321074620-2f0d2b0e0001 h1:YDeskXpkNDhPdWN3REluVa46HQOVuVkjkd2sWnrABNQ= github.com/remyoudompheng/bigfft v0.0.0-20190321074620-2f0d2b0e0001/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= @@ -274,8 +236,6 @@ github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= -github.com/shirou/gopsutil v2.18.11+incompatible h1:PMFTKnFTr/YTRW5rbLK4vWALV3a+IGXse5nvhSjztmg= -github.com/shirou/gopsutil v2.18.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= github.com/shirou/gopsutil v2.18.12+incompatible h1:1eaJvGomDnH74/5cF4CTmTbLHAriGFsTZppLXDX93OM= github.com/shirou/gopsutil v2.18.12+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= github.com/shirou/w32 v0.0.0-20160930032740-bb4de0191aa4 h1:udFKJ0aHUL60LboW/A+DfgoHVedieIzIXE8uylPue0U= @@ -286,7 +246,6 @@ github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1 github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spf13/afero v1.1.2 h1:m8/z1t7/fwjysjQRYbP0RD+bUIF/8tJwPdEZsI83ACI= github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= -github.com/spf13/cast v1.2.0/go.mod h1:r2rcYCSwa1IExKTDiTfzaxqT2FNHs8hODu4LnUfgKEg= github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8= github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cobra v0.0.3 h1:ZlrZ4XsMRm04Fr5pSFxBgfND2EBVa1nLpiy1stUsX/8= @@ -296,11 +255,8 @@ github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tL github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9Gc1vn7yk= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= github.com/spf13/pflag v0.0.0-20170427125145-f1d95a35e132/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= -github.com/spf13/pflag v1.0.2/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= -github.com/spf13/viper v1.2.1 h1:bIcUwXqLseLF3BDAZduuNfekWG87ibtFxi59Bq+oI9M= -github.com/spf13/viper v1.2.1/go.mod h1:P4AexN0a+C9tGAnUFNwDMYYZv3pjFuvmeiMyKRaNVlI= github.com/spf13/viper v1.3.1 h1:5+8j8FTpnFV4nEImW/ofkzEt8VoOiLXxdYIDsB73T38= github.com/spf13/viper v1.3.1/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= @@ -325,8 +281,6 @@ github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljT github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= -github.com/y0ssar1an/q v1.0.7 h1:s3ckTY+wjk6Y0sFce4rIS1Ezf8S6d0UFJrKwe40MyiQ= -github.com/y0ssar1an/q v1.0.7/go.mod h1:Q1Rk1StqWjSOfA/CF4zJEW1fLmkl5Cy8EsILdkB+DgE= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= @@ -362,8 +316,6 @@ golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181201002055-351d144fa1fc h1:a3CU5tJYVj92DY2LaA1kUkrsqD5/3mLDhx2NcNqyW+0= -golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -388,13 +340,10 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEha golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180906133057-8cf3aee42992/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181128092732-4ed8d59d0b35 h1:YAFjXN64LMvktoUZH9zgY4lGc/msGN7HQfoSuKCgaDU= -golang.org/x/sys v0.0.0-20181128092732-4ed8d59d0b35/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a h1:1n5lsVfiQW3yfsRGu98756EH1YthsFqr/5mxHduZW2A= golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -448,6 +397,7 @@ gopkg.in/avro.v0 v0.0.0-20171217001914-a730b5802183 h1:PGIdqvwfpMUyUP+QAlAnKTSWQ gopkg.in/avro.v0 v0.0.0-20171217001914-a730b5802183/go.mod h1:FvqrFXt+jCsyQibeRv4xxEJBL5iG2DDW5aeJwzDiq4A= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= @@ -464,6 +414,8 @@ honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +modernc.org/mathutil v1.0.0 h1:93vKjrJopTPrtTNpZ8XIovER7iCIH1QU7wNbOQXC60I= modernc.org/mathutil v1.0.0/go.mod h1:wU0vUrJsVWBZ4P6e7xtFJEhFSNsfRLJ8H458uRjg03k= +modernc.org/strutil v1.0.0 h1:XVFtQwFVwc02Wk+0L/Z/zDDXO81r5Lhe6iMKmGX3KhE= modernc.org/strutil v1.0.0/go.mod h1:lstksw84oURvj9y3tn8lGvRxyRC1S2+g5uuIzNfIOBs= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= diff --git a/v2/kafka/cmd_test.go b/v2/kafka/cmd_test.go index 6aed471..4b1d3f0 100644 --- a/v2/kafka/cmd_test.go +++ b/v2/kafka/cmd_test.go @@ -291,7 +291,6 @@ func TestBatchFromSchema(t *testing.T) { rawRec: []interface{}{[]string{"aval", "aval2"}, uint64(7)}, rowID: []byte{0, 0, 0, 0, 0, 0, 0, 7}, rowVals: []interface{}{[]string{"aval", "aval2"}}, - batchErr: "[]string is not currently supported.", // TODO support this in gpexp.Batch }, { name: "decimal field", From 083e17d90cb7a762a31ff1de44487091089e9b58 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Wed, 9 Oct 2019 10:36:03 -0500 Subject: [PATCH 20/40] refactor v2/kafka stuff --- go.mod | 4 +- go.sum | 2 + v2/ingest.go | 411 ++++++++++++++++++++++ v2/ingest_test.go | 262 ++++++++++++++ v2/kafka/cmd.go | 792 ++++++++++++++++++++++--------------------- v2/kafka/cmd_test.go | 266 +-------------- 6 files changed, 1092 insertions(+), 645 deletions(-) create mode 100644 v2/ingest.go create mode 100644 v2/ingest_test.go diff --git a/go.mod b/go.mod index 775b5e2..9046de4 100644 --- a/go.mod +++ b/go.mod @@ -1,7 +1,5 @@ module github.com/pilosa/pdk -//replace github.com/pilosa/go-pilosa => /Users/jaffee/go/src/github.com/pilosa/go-pilosa - replace github.com/pilosa/go-pilosa => github.com/jaffee/go-pilosa v0.4.1-0.20191008194651-6791c1437ec4 replace github.com/go-avro/avro => github.com/jaffee/avro v0.0.0-20190926030934-2b116da4fa22 @@ -18,7 +16,7 @@ require ( github.com/elodina/go-avro v0.0.0-20160406082632-0c8185d9a3ba github.com/go-avro/avro v0.0.0-20171219232920-444163702c11 github.com/hashicorp/go-uuid v1.0.1 // indirect - github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e + github.com/jaffee/commandeer v0.3.0 github.com/linkedin/goavro v0.0.0-20181018120728-1beee2a74088 github.com/linkedin/goavro/v2 v2.9.6 github.com/mmcloughlin/geohash v0.0.0-20181009053802-f7f2bcae3294 diff --git a/go.sum b/go.sum index 3c90630..b6deb2b 100644 --- a/go.sum +++ b/go.sum @@ -150,6 +150,8 @@ github.com/jaffee/commandeer v0.1.0 h1:UxHHnhKmtz8gAgqu67lYK5tlX5D9A86mGc9AWcEMS github.com/jaffee/commandeer v0.1.0/go.mod h1:x1WpthEI14PRNcPtVna43ontBxJ1o7plCOsZ8kksl8M= github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e h1:CC1usSIzu9p6zmz7jPj0QiP3FdpGW+PCGc9d1yhSls0= github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e/go.mod h1:N5yIzoHN6EwVFi0QCKvpFPJeECoZyEcFBQSR8r+7Mz0= +github.com/jaffee/commandeer v0.3.0 h1:9KEz8f9T6PwuzjdxfV8C5FevdJp6xih5yqPLmNzQarc= +github.com/jaffee/commandeer v0.3.0/go.mod h1:kCwfuSvZ2T0NVEr3LDSo6fDUgi0xSBnAVDdkOKTtpLQ= github.com/jaffee/go-pilosa v0.4.1-0.20191008192729-4129aee12032 h1:uATKnbEhR3+K3L1YFYa4DfPIHpfvnLvNcG9v4iWYjrA= github.com/jaffee/go-pilosa v0.4.1-0.20191008192729-4129aee12032/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= github.com/jaffee/go-pilosa v0.4.1-0.20191008194651-6791c1437ec4 h1:IEGhQ3aUdbLHPkv+twI74W6ggBcAwl0cNJzquQ1IXdE= diff --git a/v2/ingest.go b/v2/ingest.go new file mode 100644 index 0000000..6cab59d --- /dev/null +++ b/v2/ingest.go @@ -0,0 +1,411 @@ +package pdk + +import ( + "bytes" + "encoding/binary" + "io" + + "github.com/pilosa/go-pilosa" + "github.com/pilosa/go-pilosa/gpexp" + "github.com/pkg/errors" + "golang.org/x/sync/errgroup" +) + +// TODO Jaeger +// TODO profiling endpoint +// TODO Prometheus + +// Main holds all config for general ingest +type Main struct { + PilosaHosts []string `help:"Comma separated list of host:port pairs for Pilosa."` + BatchSize int `flag:"batch-size",help:"Number of records to read before indexing all of them at once. Generally, larger means better throughput and more memory usage. 1,048,576 might be a good number."` + Index string `help:"Name of Pilosa index."` + LogPath string `help:"Log file to write to. Empty means stderr."` // TODO implement + PrimaryKeyFields []string `help:"Data field(s) which make up the primary key for a record. These will be concatenated and translated to a Pilosa ID. If empty, record key translation will not be used."` + IDField string `help:"Field which contains the integer column ID. May not be used in conjunction with primary-key-fields. If both are empty, auto-generated IDs will be used."` + MaxMsgs int `help:"Number of messages to consume from Kafka before stopping. Useful for testing when you don't want to run indefinitely."` + Concurrency int `help:"Number of concurrent kafka readers and indexing routines to launch. MaxMsgs will be read *from each*."` + PackBools string `help:"If non-empty, boolean fields will be packed into two set fields—one with this name, and one with -exists."` + // TODO implement the auto-generated IDs... hopefully using Pilosa to manage it. + + NewSource func() (Source, error) `flag:"-"` + + client *pilosa.Client + schema *pilosa.Schema + index *pilosa.Index +} + +func (m *Main) PilosaClient() *pilosa.Client { + return m.client +} + +func NewMain() *Main { + return &Main{ + PilosaHosts: []string{"localhost:10101"}, + BatchSize: 1, // definitely increase this to achieve any amount of performance + Index: "defaultindex", + Concurrency: 1, + PackBools: "bools", + } +} + +func (m *Main) Run() (err error) { + err = m.setup() + if err != nil { + return errors.Wrap(err, "setting up") + } + eg := errgroup.Group{} + for c := 0; c < m.Concurrency; c++ { + c := c + eg.Go(func() error { + return m.runIngester(c) + }) + } + + return eg.Wait() +} + +func (m *Main) setup() (err error) { + if err := m.validate(); err != nil { + return errors.Wrap(err, "validating configuration") + } + + m.client, err = pilosa.NewClient(m.PilosaHosts) + if err != nil { + return errors.Wrap(err, "getting pilosa client") + } + m.schema, err = m.client.Schema() + if err != nil { + return errors.Wrap(err, "getting schema") + } + keyTranslation := len(m.PrimaryKeyFields) > 0 + m.index = m.schema.Index(m.Index, pilosa.OptIndexKeys(keyTranslation)) + if m.PackBools != "" { + m.index.Field(m.PackBools, pilosa.OptFieldTypeSet(pilosa.CacheTypeRanked, 50000), pilosa.OptFieldKeys(true)) + m.index.Field(m.PackBools+"-exists", pilosa.OptFieldTypeSet(pilosa.CacheTypeRanked, 50000), pilosa.OptFieldKeys(true)) + } + err = m.client.SyncSchema(m.schema) + if err != nil { + return errors.Wrap(err, "syncing schema") + } + + return nil +} + +func (m *Main) runIngester(c int) error { + source, err := m.NewSource() + if err != nil { + return errors.Wrap(err, "getting source") + } + var batch gpexp.RecordBatch + var recordizers []Recordizer + var prevRec Record + var row *gpexp.Row + rec, err := source.Record() + for ; err == ErrSchemaChange || err == nil; rec, err = source.Record() { + if err == ErrSchemaChange { + // finish previous batch if this is not the first + if batch != nil { + err = batch.Import() + if err != nil { + return errors.Wrap(err, "importing") + } + err = prevRec.Commit() + if err != nil { + return errors.Wrap(err, "committing") + } + } + schema := source.Schema() + recordizers, batch, row, err = m.batchFromSchema(schema) + if err != nil { + return errors.Wrap(err, "batchFromSchema") + } + } + for i := range row.Values { + row.Values[i] = nil + } + data := rec.Data() + for _, rdz := range recordizers { + err = rdz(data, row) + if err != nil { + return errors.Wrap(err, "recordizing") + } + } + err = batch.Add(*row) + if err == gpexp.ErrBatchNowFull { + err = batch.Import() + if err != nil { + return errors.Wrap(err, "importing batch") + } + err = rec.Commit() + if err != nil { + return errors.Wrap(err, "commiting record") + } + } else if err != nil { + return errors.Wrap(err, "adding to batch") + } + prevRec = rec + } + if err == io.EOF { + err = nil + } + return errors.Wrap(err, "getting record") +} + +type Recordizer func(rawRec []interface{}, rec *gpexp.Row) error + +func (m *Main) batchFromSchema(schema []Field) ([]Recordizer, gpexp.RecordBatch, *gpexp.Row, error) { + // from the schema, and the configuration stored on Main, we need + // to create a []pilosa.Field and a []Recordizer processing + // functions which take a []interface{} which conforms to the + // schema, and converts it to a record which conforms to the + // []pilosa.Field. + // + // The relevant config options on Main are: + // 1. PrimaryKeyFields and IDField + // 2. PackBools + // 3. BatchSize (gets passed directly to the batch) + // + // For PrimaryKeyFields and IDField there is some complexity. There are 3 top level options. 1, the other, or neither (auto-generated IDs). + // + // 1. PrimarKeyFields - the main question here is whether in + // addition to combining these and translating them to column ID, + // do we index them separately? I think the answer by default + // should be yes. + // 2. IDField — this is pretty easy. Use the integer value as the column ID. Do not index it separately by default. + // 3. Autogenerate IDs. Ideally using a RangeAllocator per concurrent goroutine. OK, let's assume that if we set row.ID to nil, the auto generation can happen inside the Batch. + recordizers := make([]Recordizer, 0) + + var rz Recordizer + skips := make(map[int]struct{}) + var err error + + // primary key stuff + if len(m.PrimaryKeyFields) != 0 { + rz, skips, err = getPrimaryKeyRecordizer(schema, m.PrimaryKeyFields) + if err != nil { + return nil, nil, nil, errors.Wrap(err, "getting primary key recordizer") + } + } else if m.IDField != "" { + for fieldIndex, field := range schema { + if field.Name() == m.IDField { + if _, ok := field.(IDField); !ok { + return nil, nil, nil, errors.Errorf("specified IDField %s is not an IDField but is %T", m.IDField, field) + } + fieldIndex := fieldIndex + rz = func(rawRec []interface{}, rec *gpexp.Row) (err error) { + rec.ID, err = field.PilosafyVal(rawRec[fieldIndex]) + return errors.Wrapf(err, "converting %+v to ID", rawRec[fieldIndex]) + } + skips[fieldIndex] = struct{}{} + break + } + } + if rz == nil { + return nil, nil, nil, errors.Errorf("ID field %s not found", m.IDField) + } + } else { + return nil, nil, nil, errors.New("autogen IDs is currently unimplemented; specify an IDField or primary key fields") + } + recordizers = append(recordizers, rz) + + // set up bool fields + var boolField, boolFieldExists *pilosa.Field + if m.PackBools != "" { + boolField = m.index.Field(m.PackBools, pilosa.OptFieldTypeBool()) + boolFieldExists = m.index.Field(m.PackBools+"-exists", pilosa.OptFieldTypeBool()) + } + fields := make([]*pilosa.Field, 0, len(schema)) + for i, pdkField := range schema { + // need to redefine these inside the loop since we're + // capturing them in closures + i := i + pdkField := pdkField + // see if we previously decided to skip this field of the raw + // record. + if _, ok := skips[i]; ok { + continue + } + + // handle records where pilosa already has the field + _, isBool := pdkField.(BoolField) + if (m.PackBools == "" || !isBool) && m.index.HasField(pdkField.Name()) { + // TODO validate that Pilosa's existing field matches the + // type and options of the PDK field. + fields = append(fields, m.index.Field(pdkField.Name())) + valIdx := len(fields) - 1 + // TODO may need to have more sophisticated recordizer by type at some point + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { + rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) + return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) + }) + continue + } + + // now handle this field if it was not already found in pilosa + switch fld := pdkField.(type) { + case StringField, IDField, StringArrayField: + opts := []pilosa.FieldOption{} + if hasMutex(fld) { + opts = append(opts, pilosa.OptFieldTypeMutex(pilosa.CacheTypeRanked, 50000)) + } else { + opts = append(opts, pilosa.OptFieldTypeSet(pilosa.CacheTypeRanked, 50000)) + } + _, ok1 := fld.(StringArrayField) + if _, ok2 := fld.(StringField); ok1 || ok2 { + opts = append(opts, pilosa.OptFieldKeys(true)) + } + fields = append(fields, m.index.Field(fld.Name(), opts...)) + valIdx := len(fields) - 1 + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { + rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) + return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) + }) + case BoolField: + if m.PackBools == "" { + fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeBool())) + valIdx := len(fields) - 1 + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { + rec.Values[valIdx] = rawRec[i] + return nil + }) + } else { + fields = append(fields, boolField, boolFieldExists) + fieldIdx := len(fields) - 2 + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { + b, ok := rawRec[i].(bool) + if b { + rec.Values[fieldIdx] = pdkField.Name() + } + if ok { + rec.Values[fieldIdx+1] = pdkField.Name() + } + return nil + }) + continue + } + case IntField: + if fld.Min != nil { + min := *fld.Min + if fld.Max != nil { + fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt(min, *fld.Max))) + } else { + fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt(min))) + } + } else { + fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt())) + } + valIdx := len(fields) - 1 + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { + rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) + return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) + }) + case DecimalField: + // TODO handle scale + fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt())) + valIdx := len(fields) - 1 + recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { + rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) + return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) + }) + default: + return nil, nil, nil, errors.Errorf("unknown schema field type %T %[1]v", pdkField) + } + } + err = m.client.SyncSchema(m.schema) + if err != nil { + return nil, nil, nil, errors.Wrap(err, "syncing schema") + } + batch, err := gpexp.NewBatch(m.client, m.BatchSize, m.index, fields) + if err != nil { + return nil, nil, nil, errors.Wrap(err, "creating batch") + } + row := &gpexp.Row{ + Values: make([]interface{}, len(fields)), + } + return recordizers, batch, row, nil +} + +func hasMutex(fld Field) bool { + if sfld, ok := fld.(StringField); ok { + return sfld.Mutex + } + if sfld, ok := fld.(IDField); ok { + return sfld.Mutex + } + return false +} + +// getPrimaryKeyRecordizer returns a Recordizer function which +// extracts the primary key fields from a record, combines them, and +// sets the ID on the record. If pkFields is a single field, and that +// field is of type string, we'll return it in skipFields, because we +// won't want to index it separately. +func getPrimaryKeyRecordizer(schema []Field, pkFields []string) (recordizer Recordizer, skipFields map[int]struct{}, err error) { + if len(schema) == 0 { + return nil, nil, errors.New("can't call getPrimaryKeyRecordizer with empty schema") + } + if len(pkFields) == 0 { + return nil, nil, errors.New("can't call getPrimaryKeyRecordizer with empty pkFields") + } + fieldIndices := make([]int, 0, len(pkFields)) + for pkIndex, pk := range pkFields { + for fieldIndex, field := range schema { + if pk == field.Name() { + switch field.(type) { + case StringArrayField: + return nil, nil, errors.Errorf("field %s cannot be a primary key field because it is a StringArray field.", pk) + } + fieldIndices = append(fieldIndices, fieldIndex) + break + } + } + if len(fieldIndices) != pkIndex+1 { + return nil, nil, errors.Errorf("no field with primary key field name %s found. fields: %+v", pk, schema) + } + } + if len(pkFields) == 1 { + if _, ok := schema[fieldIndices[0]].(StringField); ok { + skipFields = make(map[int]struct{}, 1) + skipFields[fieldIndices[0]] = struct{}{} + } + } + recordizer = func(rawRec []interface{}, rec *gpexp.Row) (err error) { + idbytes, ok := rec.ID.([]byte) + if ok { + idbytes = idbytes[:0] + } else { + idbytes = make([]byte, 0) + } + buf := bytes.NewBuffer(idbytes) // TODO does the buffer escape to heap? + + // TODO... will want to change this encoding logic to length-prefix the different fields or something. + for _, fieldIdx := range fieldIndices { + val := rawRec[fieldIdx] + switch vt := val.(type) { + case string: + buf.WriteString(vt) // err is always nil + case []byte: + buf.Write(vt) // err is always nil + default: + err = binary.Write(buf, binary.BigEndian, val) + if err != nil { + return errors.Wrapf(err, "writing %+v of type %[1]T", val) + } + } + } + rec.ID = buf.Bytes() + return nil + } + return recordizer, skipFields, nil +} + +func (m *Main) validate() error { + if len(m.PrimaryKeyFields) != 0 && m.IDField != "" { + return errors.New("cannot set both primary key fields and id-field") + } + if m.NewSource == nil { + return errors.New("must set a NewSource function on PDK ingester") + } + return nil +} diff --git a/v2/ingest_test.go b/v2/ingest_test.go new file mode 100644 index 0000000..ca1440d --- /dev/null +++ b/v2/ingest_test.go @@ -0,0 +1,262 @@ +package pdk + +import ( + "reflect" + "strings" + "testing" + + "github.com/pilosa/go-pilosa/gpexp" +) + +func TestGetPrimaryKeyRecordizer(t *testing.T) { + tests := []struct { + name string + schema []Field + pkFields []string + expErr string + expSkip map[int]struct{} + rawRec []interface{} + expID interface{} + }{ + { + name: "no schema", + expErr: "can't call getPrimaryKeyRecordizer with empty schema", + }, + { + name: "no pkfields", + schema: []Field{StringField{}}, + expErr: "can't call getPrimaryKeyRecordizer with empty pkFields", + }, + { + name: "primary is StringArray", + schema: []Field{StringArrayField{NameVal: "blah"}}, + pkFields: []string{"blah"}, + expErr: "field blah cannot be a primary key field because it is a StringArray field.", + }, + { + name: "primary is StringArray complex", + schema: []Field{StringField{NameVal: "zaa"}, IntField{NameVal: "hey"}, StringArrayField{NameVal: "blah"}}, + pkFields: []string{"blah", "zaa"}, + expErr: "field blah cannot be a primary key field because it is a StringArray field.", + }, + { + name: "unknown pkfield", + schema: []Field{StringField{NameVal: "zaa"}}, + pkFields: []string{"zaa", "zz"}, + expErr: "no field with primary key field name zz found", + }, + { + name: "unknown pkfield complex", + schema: []Field{StringField{NameVal: "zaa"}, IntField{NameVal: "hey"}, StringField{NameVal: "blah"}}, + pkFields: []string{"blah", "zz", "zaa"}, + expErr: "no field with primary key field name zz found", + }, + { + name: "skip primary", + schema: []Field{StringField{NameVal: "a"}, IntField{NameVal: "b"}}, + pkFields: []string{"a"}, + expSkip: map[int]struct{}{0: struct{}{}}, + rawRec: []interface{}{"a", 9}, + expID: []byte("a"), + }, + { + name: "primaries as ints", + schema: []Field{StringField{NameVal: "a"}, IntField{NameVal: "b"}, IntField{NameVal: "c"}, IntField{NameVal: "d"}}, + pkFields: []string{"c", "d", "b"}, + rawRec: []interface{}{"a", uint32(1), uint32(2), uint32(4)}, + expID: []byte{0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 1}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + rdz, skips, err := getPrimaryKeyRecordizer(test.schema, test.pkFields) + if test.expErr != "" { + if err == nil { + t.Fatalf("nil err, expected %s", test.expErr) + } + if !strings.Contains(err.Error(), test.expErr) { + t.Fatalf("unmatched errs exp/got\n%s\n%v", test.expErr, err) + } + return + } else if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if !reflect.DeepEqual(skips, test.expSkip) { + t.Errorf("unmatched skips exp/got\n%+v\n%+v", test.expSkip, skips) + } + + row := &gpexp.Row{} + err = rdz(test.rawRec, row) + if err != nil { + t.Fatalf("unexpected error from recordizer: %v", err) + } + if !reflect.DeepEqual(test.expID, row.ID) { + t.Fatalf("mismatched row IDs exp: %+v, got: %+v", test.expID, row.ID) + } + + }) + } +} + +func TestBatchFromSchema(t *testing.T) { + if testing.Short() { + t.Skip() + } + type testcase struct { + name string + schema []Field + IDField string + pkFields []string + packBools string + rawRec []interface{} + rowID interface{} + rowVals []interface{} + err string + batchErr string + } + runTest := func(t *testing.T, test testcase, removeIndex bool) { + m := NewMain() + m.Index = "cmd_test_index23lkjdkfj" + m.PrimaryKeyFields = test.pkFields + m.IDField = test.IDField + m.PackBools = test.packBools + m.BatchSize = 2 + m.NewSource = func() (Source, error) { return nil, nil } + + err := m.setup() + if err != nil { + t.Fatalf("%v", err) + } + if removeIndex { + defer func() { + err := m.client.DeleteIndex(m.index) + if err != nil { + t.Logf("deleting test index: %v", err) + } + }() + } + + rdzs, batch, row, err := m.batchFromSchema(test.schema) + if testErr(t, test.err, err) { + return + } + + for _, rdz := range rdzs { + err = rdz(test.rawRec, row) + if err != nil { + t.Fatalf("recordizing: %v", err) + } + } + + if !reflect.DeepEqual(row.ID, test.rowID) { + t.Fatalf("row IDs exp: %+v got %+v", test.rowID, row.ID) + } + if !reflect.DeepEqual(row.Values, test.rowVals) { + t.Errorf("row values exp/got:\n%+v %[1]T\n%+v %[2]T", test.rowVals, row.Values) + if len(row.Values) == len(test.rowVals) { + for i, v := range row.Values { + if !reflect.DeepEqual(v, test.rowVals[i]) { + t.Errorf("%v %[1]T != %v %[2]T", test.rowVals[i], v) + } + } + } + t.Fail() + } + + err = batch.Add(*row) + if testErr(t, test.batchErr, err) { + return + } + } + + tests := []testcase{ + { + name: "empty", + err: "autogen IDs is currently unimplemented", + }, + { + name: "no id field", + schema: []Field{StringField{}}, + IDField: "nope", + err: "ID field nope not found", + }, + { + name: "pk error", + pkFields: []string{"zoop"}, + err: "getting primary key recordizer", + }, + { + name: "pack bools", + schema: []Field{BoolField{NameVal: "a"}, IDField{NameVal: "b"}, BoolField{NameVal: "c"}}, + IDField: "b", + packBools: "bff", + rawRec: []interface{}{true, uint64(7), false}, + rowID: uint64(7), + rowVals: []interface{}{"a", "a", nil, "c"}, + }, + { + name: "don't pack bools", + schema: []Field{BoolField{NameVal: "a"}, IDField{NameVal: "b"}, BoolField{NameVal: "c"}}, + IDField: "b", + rawRec: []interface{}{true, uint64(7), false}, + rowID: uint64(7), + rowVals: []interface{}{true, false}, + err: "field type bool is not currently supported through Batch", + }, + { + name: "mutex field", + schema: []Field{StringField{NameVal: "a", Mutex: true}, IDField{NameVal: "b"}}, + IDField: "b", + rawRec: []interface{}{"aval", uint64(7)}, + rowID: uint64(7), + rowVals: []interface{}{"aval"}, + err: "field type mutex is not currently supported through Batch", + }, + { + name: "string array field", + schema: []Field{StringArrayField{NameVal: "a"}, StringField{NameVal: "b"}}, + pkFields: []string{"b"}, + rawRec: []interface{}{[]string{"aval", "aval2"}, uint64(7)}, + rowID: []byte{0, 0, 0, 0, 0, 0, 0, 7}, + rowVals: []interface{}{[]string{"aval", "aval2"}}, + }, + { + name: "decimal field", + schema: []Field{StringField{NameVal: "a"}, DecimalField{NameVal: "b", Scale: 2}}, + pkFields: []string{"a"}, + rawRec: []interface{}{"blah", uint64(321)}, + rowID: []byte("blah"), + rowVals: []interface{}{int64(321)}, + }, + } + + for _, test := range tests { + // test on fresh Pilosa + t.Run(test.name+"-1", func(t *testing.T) { + runTest(t, test, false) + }) + // test again with index/fields in place + t.Run(test.name+"-2", func(t *testing.T) { + runTest(t, test, true) + }) + } +} + +func testErr(t *testing.T, exp string, actual error) (done bool) { + t.Helper() + if exp == "" && actual == nil { + return false + } + if exp == "" && actual != nil { + t.Fatalf("unexpected error: %v", actual) + } + if exp != "" && actual == nil { + t.Fatalf("expected error like '%s'", exp) + } + if !strings.Contains(actual.Error(), exp) { + t.Fatalf("unmatched errs exp/got\n%s\n%v", exp, actual) + } + return true +} diff --git a/v2/kafka/cmd.go b/v2/kafka/cmd.go index ea4541e..cab254c 100644 --- a/v2/kafka/cmd.go +++ b/v2/kafka/cmd.go @@ -1,418 +1,444 @@ package kafka import ( - "bytes" - "encoding/binary" - "io" - - "github.com/pilosa/go-pilosa" - "github.com/pilosa/go-pilosa/gpexp" - pdk "github.com/pilosa/pdk/v2" + "github.com/pilosa/pdk/v2" "github.com/pkg/errors" - "golang.org/x/sync/errgroup" - // "github.com/y0ssar1an/q" ) -// TODO Jaeger -// TODO profiling endpoint -// TODO Prometheus +//////////////////new -// Main holds all config for Kafka indexing w/ schema registry. type Main struct { - PilosaHosts []string `help:"Comma separated list of host:port pairs for Pilosa."` - KafkaHosts []string `help:"Comma separated list of host:port pairs for Kafka."` - RegistryURL string `help:"Location of Confluent Schema Registry"` - BatchSize int `help:"Number of records to read before indexing all of them at once. Generally, larger means better throughput and more memory usage. 1,048,576 might be a good number."` - Group string `help:"Kafka group."` - Index string `help:"Name of Pilosa index."` - Topics []string `help:"Kafka topics to read from."` - LogPath string `help:"Log file to write to. Empty means stderr."` // TODO implement - PrimaryKeyFields []string `help:"Data field(s) which make up the primary key for a record. These will be concatenated and translated to a Pilosa ID. If empty, record key translation will not be used."` - IDField string `help:"Field which contains the integer column ID. May not be used in conjunction with primary-key-fields. If both are empty, auto-generated IDs will be used."` - MaxMsgs int `help:"Number of messages to consume from Kafka before stopping. Useful for testing when you don't want to run indefinitely."` - Concurrency int `help:"Number of concurrent kafka readers and indexing routines to launch. MaxMsgs will be read *from each*."` - PackBools string `help:"If non-empty, boolean fields will be packed into two set fields—one with this name, and one with -exists."` - // TODO implement the auto-generated IDs... hopefully using Pilosa to manage it. - - client *pilosa.Client - schema *pilosa.Schema - index *pilosa.Index + pdk.Main `flag:"!embed"` + KafkaHosts []string + RegistryURL string + Group string + Topics []string } func NewMain() *Main { - return &Main{ - PilosaHosts: []string{"localhost:10101"}, + m := &Main{ + Main: *pdk.NewMain(), KafkaHosts: []string{"localhost:9092"}, RegistryURL: "localhost:8081", - BatchSize: 1, // definitely increase this to achieve any amount of performance Group: "defaultgroup", - Index: "defaultindex", Topics: []string{"defaulttopic"}, - Concurrency: 1, - PackBools: "bools", } -} + m.NewSource = func() (pdk.Source, error) { + source := NewSource() + source.Hosts = m.KafkaHosts + source.Topics = m.Topics + source.Group = m.Group + source.MaxMsgs = m.MaxMsgs -func (m *Main) Run() (err error) { - err = m.setup() - if err != nil { - return errors.Wrap(err, "setting up") - } - eg := errgroup.Group{} - for c := 0; c < m.Concurrency; c++ { - c := c - eg.Go(func() error { - return m.runIngester(c) - }) + err := source.Open() + if err != nil { + return nil, errors.Wrap(err, "opening source") + } + return source, nil } - - return eg.Wait() + return m } -func (m *Main) setup() (err error) { - if err := m.validate(); err != nil { - return errors.Wrap(err, "validating configuration") - } +// /////////////////old - m.client, err = pilosa.NewClient(m.PilosaHosts) - if err != nil { - return errors.Wrap(err, "getting pilosa client") - } - m.schema, err = m.client.Schema() - if err != nil { - return errors.Wrap(err, "getting schema") - } - keyTranslation := len(m.PrimaryKeyFields) > 0 - m.index = m.schema.Index(m.Index, pilosa.OptIndexKeys(keyTranslation)) - if m.PackBools != "" { - m.index.Field(m.PackBools, pilosa.OptFieldTypeSet(pilosa.CacheTypeRanked, 50000), pilosa.OptFieldKeys(true)) - m.index.Field(m.PackBools+"-exists", pilosa.OptFieldTypeSet(pilosa.CacheTypeRanked, 50000), pilosa.OptFieldKeys(true)) - } - err = m.client.SyncSchema(m.schema) - if err != nil { - return errors.Wrap(err, "syncing schema") - } +// // TODO split off the Kafka-specific stuff into something else and take all the general purpose stuff out of here. - return nil -} +// // TODO Jaeger +// // TODO profiling endpoint +// // TODO Prometheus -func (m *Main) runIngester(c int) error { - source := NewSource() - source.Hosts = m.KafkaHosts - source.Topics = m.Topics - source.Group = m.Group - source.MaxMsgs = m.MaxMsgs +// // Main holds all config for Kafka indexing w/ schema registry. +// type Main struct { +// PilosaHosts []string `help:"Comma separated list of host:port pairs for Pilosa."` +// KafkaHosts []string `help:"Comma separated list of host:port pairs for Kafka."` +// RegistryURL string `help:"Location of Confluent Schema Registry"` +// BatchSize int `help:"Number of records to read before indexing all of them at once. Generally, larger means better throughput and more memory usage. 1,048,576 might be a good number."` +// Group string `help:"Kafka group."` +// Index string `help:"Name of Pilosa index."` +// Topics []string `help:"Kafka topics to read from."` +// LogPath string `help:"Log file to write to. Empty means stderr."` // TODO implement +// PrimaryKeyFields []string `help:"Data field(s) which make up the primary key for a record. These will be concatenated and translated to a Pilosa ID. If empty, record key translation will not be used."` +// IDField string `help:"Field which contains the integer column ID. May not be used in conjunction with primary-key-fields. If both are empty, auto-generated IDs will be used."` +// MaxMsgs int `help:"Number of messages to consume from Kafka before stopping. Useful for testing when you don't want to run indefinitely."` +// Concurrency int `help:"Number of concurrent kafka readers and indexing routines to launch. MaxMsgs will be read *from each*."` +// PackBools string `help:"If non-empty, boolean fields will be packed into two set fields—one with this name, and one with -exists."` +// // TODO implement the auto-generated IDs... hopefully using Pilosa to manage it. - err := source.Open() - if err != nil { - return errors.Wrap(err, "opening source") - } - var batch gpexp.RecordBatch - var recordizers []Recordizer - var prevRec pdk.Record - var row *gpexp.Row - rec, err := source.Record() - for ; err == pdk.ErrSchemaChange || err == nil; rec, err = source.Record() { - if err == pdk.ErrSchemaChange { - // finish previous batch if this is not the first - if batch != nil { - err = batch.Import() - if err != nil { - return errors.Wrap(err, "importing") - } - err = prevRec.Commit() - if err != nil { - return errors.Wrap(err, "committing") - } - } - schema := source.Schema() - recordizers, batch, row, err = m.batchFromSchema(schema) - if err != nil { - return errors.Wrap(err, "batchFromSchema") - } - } - for i := range row.Values { - row.Values[i] = nil - } - data := rec.Data() - for _, rdz := range recordizers { - err = rdz(data, row) - if err != nil { - return errors.Wrap(err, "recordizing") - } - } - err = batch.Add(*row) - if err == gpexp.ErrBatchNowFull { - err = batch.Import() - if err != nil { - return errors.Wrap(err, "importing batch") - } - err = rec.Commit() - if err != nil { - return errors.Wrap(err, "commiting record") - } - } else if err != nil { - return errors.Wrap(err, "adding to batch") - } - prevRec = rec - } - if err == io.EOF { - err = nil - } - return errors.Wrap(err, "getting record") -} +// client *pilosa.Client +// schema *pilosa.Schema +// index *pilosa.Index +// } -type Recordizer func(rawRec []interface{}, rec *gpexp.Row) error +// func NewMain() *Main { +// return &Main{ +// PilosaHosts: []string{"localhost:10101"}, +// BatchSize: 1, // definitely increase this to achieve any amount of performance +// Index: "defaultindex", +// Concurrency: 1, +// PackBools: "bools", +// } +// } -func (m *Main) batchFromSchema(schema []pdk.Field) ([]Recordizer, gpexp.RecordBatch, *gpexp.Row, error) { - // from the schema, and the configuration stored on Main, we need - // to create a []pilosa.Field and a []Recordizer processing - // functions which take a []interface{} which conforms to the - // schema, and converts it to a record which conforms to the - // []pilosa.Field. - // - // The relevant config options on Main are: - // 1. PrimaryKeyFields and IDField - // 2. PackBools - // 3. BatchSize (gets passed directly to the batch) - // - // For PrimaryKeyFields and IDField there is some complexity. There are 3 top level options. 1, the other, or neither (auto-generated IDs). - // - // 1. PrimarKeyFields - the main question here is whether in - // addition to combining these and translating them to column ID, - // do we index them separately? I think the answer by default - // should be yes. - // 2. IDField — this is pretty easy. Use the integer value as the column ID. Do not index it separately by default. - // 3. Autogenerate IDs. Ideally using a RangeAllocator per concurrent goroutine. OK, let's assume that if we set row.ID to nil, the auto generation can happen inside the Batch. - recordizers := make([]Recordizer, 0) +// func (m *Main) Run() (err error) { +// err = m.setup() +// if err != nil { +// return errors.Wrap(err, "setting up") +// } +// eg := errgroup.Group{} +// for c := 0; c < m.Concurrency; c++ { +// c := c +// eg.Go(func() error { +// return m.runIngester(c) +// }) +// } - var rz Recordizer - skips := make(map[int]struct{}) - var err error +// return eg.Wait() +// } - // primary key stuff - if len(m.PrimaryKeyFields) != 0 { - rz, skips, err = getPrimaryKeyRecordizer(schema, m.PrimaryKeyFields) - if err != nil { - return nil, nil, nil, errors.Wrap(err, "getting primary key recordizer") - } - } else if m.IDField != "" { - for fieldIndex, field := range schema { - if field.Name() == m.IDField { - if _, ok := field.(pdk.IDField); !ok { - return nil, nil, nil, errors.Errorf("specified IDField %s is not an IDField but is %T", m.IDField, field) - } - fieldIndex := fieldIndex - rz = func(rawRec []interface{}, rec *gpexp.Row) (err error) { - rec.ID, err = field.PilosafyVal(rawRec[fieldIndex]) - return errors.Wrapf(err, "converting %+v to ID", rawRec[fieldIndex]) - } - skips[fieldIndex] = struct{}{} - break - } - } - if rz == nil { - return nil, nil, nil, errors.Errorf("ID field %s not found", m.IDField) - } - } else { - return nil, nil, nil, errors.New("autogen IDs is currently unimplemented; specify an IDField or primary key fields") - } - recordizers = append(recordizers, rz) +// func (m *Main) setup() (err error) { +// if err := m.validate(); err != nil { +// return errors.Wrap(err, "validating configuration") +// } - // set up bool fields - var boolField, boolFieldExists *pilosa.Field - if m.PackBools != "" { - boolField = m.index.Field(m.PackBools, pilosa.OptFieldTypeBool()) - boolFieldExists = m.index.Field(m.PackBools+"-exists", pilosa.OptFieldTypeBool()) - } - fields := make([]*pilosa.Field, 0, len(schema)) - for i, pdkField := range schema { - // need to redefine these inside the loop since we're - // capturing them in closures - i := i - pdkField := pdkField - // see if we previously decided to skip this field of the raw - // record. - if _, ok := skips[i]; ok { - continue - } +// m.client, err = pilosa.NewClient(m.PilosaHosts) +// if err != nil { +// return errors.Wrap(err, "getting pilosa client") +// } +// m.schema, err = m.client.Schema() +// if err != nil { +// return errors.Wrap(err, "getting schema") +// } +// keyTranslation := len(m.PrimaryKeyFields) > 0 +// m.index = m.schema.Index(m.Index, pilosa.OptIndexKeys(keyTranslation)) +// if m.PackBools != "" { +// m.index.Field(m.PackBools, pilosa.OptFieldTypeSet(pilosa.CacheTypeRanked, 50000), pilosa.OptFieldKeys(true)) +// m.index.Field(m.PackBools+"-exists", pilosa.OptFieldTypeSet(pilosa.CacheTypeRanked, 50000), pilosa.OptFieldKeys(true)) +// } +// err = m.client.SyncSchema(m.schema) +// if err != nil { +// return errors.Wrap(err, "syncing schema") +// } - // handle records where pilosa already has the field - _, isBool := pdkField.(pdk.BoolField) - if (m.PackBools == "" || !isBool) && m.index.HasField(pdkField.Name()) { - // TODO validate that Pilosa's existing field matches the - // type and options of the PDK field. - fields = append(fields, m.index.Field(pdkField.Name())) - valIdx := len(fields) - 1 - // TODO may need to have more sophisticated recordizer by type at some point - recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { - rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) - return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) - }) - continue - } +// return nil +// } - // now handle this field if it was not already found in pilosa - switch fld := pdkField.(type) { - case pdk.StringField, pdk.IDField, pdk.StringArrayField: - opts := []pilosa.FieldOption{} - if hasMutex(fld) { - opts = append(opts, pilosa.OptFieldTypeMutex(pilosa.CacheTypeRanked, 50000)) - } else { - opts = append(opts, pilosa.OptFieldTypeSet(pilosa.CacheTypeRanked, 50000)) - } - _, ok1 := fld.(pdk.StringArrayField) - if _, ok2 := fld.(pdk.StringField); ok1 || ok2 { - opts = append(opts, pilosa.OptFieldKeys(true)) - } - fields = append(fields, m.index.Field(fld.Name(), opts...)) - valIdx := len(fields) - 1 - recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { - rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) - return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) - }) - case pdk.BoolField: - if m.PackBools == "" { - fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeBool())) - valIdx := len(fields) - 1 - recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { - rec.Values[valIdx] = rawRec[i] - return nil - }) - } else { - fields = append(fields, boolField, boolFieldExists) - fieldIdx := len(fields) - 2 - recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { - b, ok := rawRec[i].(bool) - if b { - rec.Values[fieldIdx] = pdkField.Name() - } - if ok { - rec.Values[fieldIdx+1] = pdkField.Name() - } - return nil - }) - continue - } - case pdk.IntField: - if fld.Min != nil { - min := *fld.Min - if fld.Max != nil { - fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt(min, *fld.Max))) - } else { - fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt(min))) - } - } else { - fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt())) - } - valIdx := len(fields) - 1 - recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { - rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) - return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) - }) - case pdk.DecimalField: - // TODO handle scale - fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt())) - valIdx := len(fields) - 1 - recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { - rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) - return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) - }) - default: - return nil, nil, nil, errors.Errorf("unknown schema field type %T %[1]v", pdkField) - } - } - err = m.client.SyncSchema(m.schema) - if err != nil { - return nil, nil, nil, errors.Wrap(err, "syncing schema") - } - batch, err := gpexp.NewBatch(m.client, m.BatchSize, m.index, fields) - if err != nil { - return nil, nil, nil, errors.Wrap(err, "creating batch") - } - row := &gpexp.Row{ - Values: make([]interface{}, len(fields)), - } - return recordizers, batch, row, nil -} +// func (m *Main) runIngester(c int) error { +// source := NewSource() +// source.Hosts = m.KafkaHosts +// source.Topics = m.Topics +// source.Group = m.Group +// source.MaxMsgs = m.MaxMsgs -func hasMutex(fld pdk.Field) bool { - if sfld, ok := fld.(pdk.StringField); ok { - return sfld.Mutex - } - if sfld, ok := fld.(pdk.IDField); ok { - return sfld.Mutex - } - return false -} +// err := source.Open() +// if err != nil { +// return errors.Wrap(err, "opening source") +// } +// var batch gpexp.RecordBatch +// var recordizers []Recordizer +// var prevRec pdk.Record +// var row *gpexp.Row +// rec, err := source.Record() +// for ; err == pdk.ErrSchemaChange || err == nil; rec, err = source.Record() { +// if err == pdk.ErrSchemaChange { +// // finish previous batch if this is not the first +// if batch != nil { +// err = batch.Import() +// if err != nil { +// return errors.Wrap(err, "importing") +// } +// err = prevRec.Commit() +// if err != nil { +// return errors.Wrap(err, "committing") +// } +// } +// schema := source.Schema() +// recordizers, batch, row, err = m.batchFromSchema(schema) +// if err != nil { +// return errors.Wrap(err, "batchFromSchema") +// } +// } +// for i := range row.Values { +// row.Values[i] = nil +// } +// data := rec.Data() +// for _, rdz := range recordizers { +// err = rdz(data, row) +// if err != nil { +// return errors.Wrap(err, "recordizing") +// } +// } +// err = batch.Add(*row) +// if err == gpexp.ErrBatchNowFull { +// err = batch.Import() +// if err != nil { +// return errors.Wrap(err, "importing batch") +// } +// err = rec.Commit() +// if err != nil { +// return errors.Wrap(err, "commiting record") +// } +// } else if err != nil { +// return errors.Wrap(err, "adding to batch") +// } +// prevRec = rec +// } +// if err == io.EOF { +// err = nil +// } +// return errors.Wrap(err, "getting record") +// } -// getPrimaryKeyRecordizer returns a Recordizer function which -// extracts the primary key fields from a record, combines them, and -// sets the ID on the record. If pkFields is a single field, and that -// field is of type string, we'll return it in skipFields, because we -// won't want to index it separately. -func getPrimaryKeyRecordizer(schema []pdk.Field, pkFields []string) (recordizer Recordizer, skipFields map[int]struct{}, err error) { - if len(schema) == 0 { - return nil, nil, errors.New("can't call getPrimaryKeyRecordizer with empty schema") - } - if len(pkFields) == 0 { - return nil, nil, errors.New("can't call getPrimaryKeyRecordizer with empty pkFields") - } - fieldIndices := make([]int, 0, len(pkFields)) - for pkIndex, pk := range pkFields { - for fieldIndex, field := range schema { - if pk == field.Name() { - switch field.(type) { - case pdk.StringArrayField: - return nil, nil, errors.Errorf("field %s cannot be a primary key field because it is a StringArray field.", pk) - } - fieldIndices = append(fieldIndices, fieldIndex) - break - } - } - if len(fieldIndices) != pkIndex+1 { - return nil, nil, errors.Errorf("no field with primary key field name %s found. fields: %+v", pk, schema) - } - } - if len(pkFields) == 1 { - if _, ok := schema[fieldIndices[0]].(pdk.StringField); ok { - skipFields = make(map[int]struct{}, 1) - skipFields[fieldIndices[0]] = struct{}{} - } - } - recordizer = func(rawRec []interface{}, rec *gpexp.Row) (err error) { - idbytes, ok := rec.ID.([]byte) - if ok { - idbytes = idbytes[:0] - } else { - idbytes = make([]byte, 0) - } - buf := bytes.NewBuffer(idbytes) // TODO does the buffer escape to heap? +// type Recordizer func(rawRec []interface{}, rec *gpexp.Row) error - // TODO... will want to change this encoding logic to length-prefix the different fields or something. - for _, fieldIdx := range fieldIndices { - val := rawRec[fieldIdx] - switch vt := val.(type) { - case string: - buf.WriteString(vt) // err is always nil - case []byte: - buf.Write(vt) // err is always nil - default: - err = binary.Write(buf, binary.BigEndian, val) - if err != nil { - return errors.Wrapf(err, "writing %+v of type %[1]T", val) - } - } - } - rec.ID = buf.Bytes() - return nil - } - return recordizer, skipFields, nil -} +// func (m *Main) batchFromSchema(schema []pdk.Field) ([]Recordizer, gpexp.RecordBatch, *gpexp.Row, error) { +// // from the schema, and the configuration stored on Main, we need +// // to create a []pilosa.Field and a []Recordizer processing +// // functions which take a []interface{} which conforms to the +// // schema, and converts it to a record which conforms to the +// // []pilosa.Field. +// // +// // The relevant config options on Main are: +// // 1. PrimaryKeyFields and IDField +// // 2. PackBools +// // 3. BatchSize (gets passed directly to the batch) +// // +// // For PrimaryKeyFields and IDField there is some complexity. There are 3 top level options. 1, the other, or neither (auto-generated IDs). +// // +// // 1. PrimarKeyFields - the main question here is whether in +// // addition to combining these and translating them to column ID, +// // do we index them separately? I think the answer by default +// // should be yes. +// // 2. IDField — this is pretty easy. Use the integer value as the column ID. Do not index it separately by default. +// // 3. Autogenerate IDs. Ideally using a RangeAllocator per concurrent goroutine. OK, let's assume that if we set row.ID to nil, the auto generation can happen inside the Batch. +// recordizers := make([]Recordizer, 0) -func (m *Main) validate() error { - if len(m.PrimaryKeyFields) != 0 && m.IDField != "" { - return errors.New("cannot set both primary key fields and id-field") - } - return nil -} +// var rz Recordizer +// skips := make(map[int]struct{}) +// var err error + +// // primary key stuff +// if len(m.PrimaryKeyFields) != 0 { +// rz, skips, err = getPrimaryKeyRecordizer(schema, m.PrimaryKeyFields) +// if err != nil { +// return nil, nil, nil, errors.Wrap(err, "getting primary key recordizer") +// } +// } else if m.IDField != "" { +// for fieldIndex, field := range schema { +// if field.Name() == m.IDField { +// if _, ok := field.(pdk.IDField); !ok { +// return nil, nil, nil, errors.Errorf("specified IDField %s is not an IDField but is %T", m.IDField, field) +// } +// fieldIndex := fieldIndex +// rz = func(rawRec []interface{}, rec *gpexp.Row) (err error) { +// rec.ID, err = field.PilosafyVal(rawRec[fieldIndex]) +// return errors.Wrapf(err, "converting %+v to ID", rawRec[fieldIndex]) +// } +// skips[fieldIndex] = struct{}{} +// break +// } +// } +// if rz == nil { +// return nil, nil, nil, errors.Errorf("ID field %s not found", m.IDField) +// } +// } else { +// return nil, nil, nil, errors.New("autogen IDs is currently unimplemented; specify an IDField or primary key fields") +// } +// recordizers = append(recordizers, rz) + +// // set up bool fields +// var boolField, boolFieldExists *pilosa.Field +// if m.PackBools != "" { +// boolField = m.index.Field(m.PackBools, pilosa.OptFieldTypeBool()) +// boolFieldExists = m.index.Field(m.PackBools+"-exists", pilosa.OptFieldTypeBool()) +// } +// fields := make([]*pilosa.Field, 0, len(schema)) +// for i, pdkField := range schema { +// // need to redefine these inside the loop since we're +// // capturing them in closures +// i := i +// pdkField := pdkField +// // see if we previously decided to skip this field of the raw +// // record. +// if _, ok := skips[i]; ok { +// continue +// } + +// // handle records where pilosa already has the field +// _, isBool := pdkField.(pdk.BoolField) +// if (m.PackBools == "" || !isBool) && m.index.HasField(pdkField.Name()) { +// // TODO validate that Pilosa's existing field matches the +// // type and options of the PDK field. +// fields = append(fields, m.index.Field(pdkField.Name())) +// valIdx := len(fields) - 1 +// // TODO may need to have more sophisticated recordizer by type at some point +// recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { +// rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) +// return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) +// }) +// continue +// } + +// // now handle this field if it was not already found in pilosa +// switch fld := pdkField.(type) { +// case pdk.StringField, pdk.IDField, pdk.StringArrayField: +// opts := []pilosa.FieldOption{} +// if hasMutex(fld) { +// opts = append(opts, pilosa.OptFieldTypeMutex(pilosa.CacheTypeRanked, 50000)) +// } else { +// opts = append(opts, pilosa.OptFieldTypeSet(pilosa.CacheTypeRanked, 50000)) +// } +// _, ok1 := fld.(pdk.StringArrayField) +// if _, ok2 := fld.(pdk.StringField); ok1 || ok2 { +// opts = append(opts, pilosa.OptFieldKeys(true)) +// } +// fields = append(fields, m.index.Field(fld.Name(), opts...)) +// valIdx := len(fields) - 1 +// recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { +// rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) +// return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) +// }) +// case pdk.BoolField: +// if m.PackBools == "" { +// fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeBool())) +// valIdx := len(fields) - 1 +// recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { +// rec.Values[valIdx] = rawRec[i] +// return nil +// }) +// } else { +// fields = append(fields, boolField, boolFieldExists) +// fieldIdx := len(fields) - 2 +// recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { +// b, ok := rawRec[i].(bool) +// if b { +// rec.Values[fieldIdx] = pdkField.Name() +// } +// if ok { +// rec.Values[fieldIdx+1] = pdkField.Name() +// } +// return nil +// }) +// continue +// } +// case pdk.IntField: +// if fld.Min != nil { +// min := *fld.Min +// if fld.Max != nil { +// fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt(min, *fld.Max))) +// } else { +// fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt(min))) +// } +// } else { +// fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt())) +// } +// valIdx := len(fields) - 1 +// recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { +// rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) +// return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) +// }) +// case pdk.DecimalField: +// // TODO handle scale +// fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt())) +// valIdx := len(fields) - 1 +// recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { +// rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) +// return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) +// }) +// default: +// return nil, nil, nil, errors.Errorf("unknown schema field type %T %[1]v", pdkField) +// } +// } +// err = m.client.SyncSchema(m.schema) +// if err != nil { +// return nil, nil, nil, errors.Wrap(err, "syncing schema") +// } +// batch, err := gpexp.NewBatch(m.client, m.BatchSize, m.index, fields) +// if err != nil { +// return nil, nil, nil, errors.Wrap(err, "creating batch") +// } +// row := &gpexp.Row{ +// Values: make([]interface{}, len(fields)), +// } +// return recordizers, batch, row, nil +// } + +// func hasMutex(fld pdk.Field) bool { +// if sfld, ok := fld.(pdk.StringField); ok { +// return sfld.Mutex +// } +// if sfld, ok := fld.(pdk.IDField); ok { +// return sfld.Mutex +// } +// return false +// } + +// // getPrimaryKeyRecordizer returns a Recordizer function which +// // extracts the primary key fields from a record, combines them, and +// // sets the ID on the record. If pkFields is a single field, and that +// // field is of type string, we'll return it in skipFields, because we +// // won't want to index it separately. +// func getPrimaryKeyRecordizer(schema []pdk.Field, pkFields []string) (recordizer Recordizer, skipFields map[int]struct{}, err error) { +// if len(schema) == 0 { +// return nil, nil, errors.New("can't call getPrimaryKeyRecordizer with empty schema") +// } +// if len(pkFields) == 0 { +// return nil, nil, errors.New("can't call getPrimaryKeyRecordizer with empty pkFields") +// } +// fieldIndices := make([]int, 0, len(pkFields)) +// for pkIndex, pk := range pkFields { +// for fieldIndex, field := range schema { +// if pk == field.Name() { +// switch field.(type) { +// case pdk.StringArrayField: +// return nil, nil, errors.Errorf("field %s cannot be a primary key field because it is a StringArray field.", pk) +// } +// fieldIndices = append(fieldIndices, fieldIndex) +// break +// } +// } +// if len(fieldIndices) != pkIndex+1 { +// return nil, nil, errors.Errorf("no field with primary key field name %s found. fields: %+v", pk, schema) +// } +// } +// if len(pkFields) == 1 { +// if _, ok := schema[fieldIndices[0]].(pdk.StringField); ok { +// skipFields = make(map[int]struct{}, 1) +// skipFields[fieldIndices[0]] = struct{}{} +// } +// } +// recordizer = func(rawRec []interface{}, rec *gpexp.Row) (err error) { +// idbytes, ok := rec.ID.([]byte) +// if ok { +// idbytes = idbytes[:0] +// } else { +// idbytes = make([]byte, 0) +// } +// buf := bytes.NewBuffer(idbytes) // TODO does the buffer escape to heap? + +// // TODO... will want to change this encoding logic to length-prefix the different fields or something. +// for _, fieldIdx := range fieldIndices { +// val := rawRec[fieldIdx] +// switch vt := val.(type) { +// case string: +// buf.WriteString(vt) // err is always nil +// case []byte: +// buf.Write(vt) // err is always nil +// default: +// err = binary.Write(buf, binary.BigEndian, val) +// if err != nil { +// return errors.Wrapf(err, "writing %+v of type %[1]T", val) +// } +// } +// } +// rec.ID = buf.Bytes() +// return nil +// } +// return recordizer, skipFields, nil +// } + +// func (m *Main) validate() error { +// if len(m.PrimaryKeyFields) != 0 && m.IDField != "" { +// return errors.New("cannot set both primary key fields and id-field") +// } +// return nil +// } diff --git a/v2/kafka/cmd_test.go b/v2/kafka/cmd_test.go index 4b1d3f0..da67647 100644 --- a/v2/kafka/cmd_test.go +++ b/v2/kafka/cmd_test.go @@ -1,13 +1,9 @@ package kafka import ( - "reflect" - "strings" "testing" "github.com/Shopify/sarama" - "github.com/pilosa/go-pilosa/gpexp" - pdk "github.com/pilosa/pdk/v2" ) func TestCmdMain(t *testing.T) { @@ -54,12 +50,16 @@ func TestCmdMain(t *testing.T) { t.Fatalf("running main: %v", err) } + client := m.PilosaClient() + schema, err := client.Schema() + index := schema.Index(m.Index) + // check data in Pilosa - if !m.index.HasField("abc") { + if !index.HasField("abc") { t.Fatalf("don't have abc") } - abc := m.index.Field("abc") - qr, err := m.client.Query(m.index.Count(abc.Row("2"))) + abc := index.Field("abc") + qr, err := client.Query(index.Count(abc.Row("2"))) if err != nil { t.Fatalf("querying: %v", err) } @@ -78,255 +78,3 @@ func makeRecord(t *testing.T, fields []string, vals []interface{}) map[string]in } return ret } - -func TestGetPrimaryKeyRecordizer(t *testing.T) { - tests := []struct { - name string - schema []pdk.Field - pkFields []string - expErr string - expSkip map[int]struct{} - rawRec []interface{} - expID interface{} - }{ - { - name: "no schema", - expErr: "can't call getPrimaryKeyRecordizer with empty schema", - }, - { - name: "no pkfields", - schema: []pdk.Field{pdk.StringField{}}, - expErr: "can't call getPrimaryKeyRecordizer with empty pkFields", - }, - { - name: "primary is StringArray", - schema: []pdk.Field{pdk.StringArrayField{NameVal: "blah"}}, - pkFields: []string{"blah"}, - expErr: "field blah cannot be a primary key field because it is a StringArray field.", - }, - { - name: "primary is StringArray complex", - schema: []pdk.Field{pdk.StringField{NameVal: "zaa"}, pdk.IntField{NameVal: "hey"}, pdk.StringArrayField{NameVal: "blah"}}, - pkFields: []string{"blah", "zaa"}, - expErr: "field blah cannot be a primary key field because it is a StringArray field.", - }, - { - name: "unknown pkfield", - schema: []pdk.Field{pdk.StringField{NameVal: "zaa"}}, - pkFields: []string{"zaa", "zz"}, - expErr: "no field with primary key field name zz found", - }, - { - name: "unknown pkfield complex", - schema: []pdk.Field{pdk.StringField{NameVal: "zaa"}, pdk.IntField{NameVal: "hey"}, pdk.StringField{NameVal: "blah"}}, - pkFields: []string{"blah", "zz", "zaa"}, - expErr: "no field with primary key field name zz found", - }, - { - name: "skip primary", - schema: []pdk.Field{pdk.StringField{NameVal: "a"}, pdk.IntField{NameVal: "b"}}, - pkFields: []string{"a"}, - expSkip: map[int]struct{}{0: struct{}{}}, - rawRec: []interface{}{"a", 9}, - expID: []byte("a"), - }, - { - name: "primaries as ints", - schema: []pdk.Field{pdk.StringField{NameVal: "a"}, pdk.IntField{NameVal: "b"}, pdk.IntField{NameVal: "c"}, pdk.IntField{NameVal: "d"}}, - pkFields: []string{"c", "d", "b"}, - rawRec: []interface{}{"a", uint32(1), uint32(2), uint32(4)}, - expID: []byte{0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 1}, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - rdz, skips, err := getPrimaryKeyRecordizer(test.schema, test.pkFields) - if test.expErr != "" { - if err == nil { - t.Fatalf("nil err, expected %s", test.expErr) - } - if !strings.Contains(err.Error(), test.expErr) { - t.Fatalf("unmatched errs exp/got\n%s\n%v", test.expErr, err) - } - return - } else if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if !reflect.DeepEqual(skips, test.expSkip) { - t.Errorf("unmatched skips exp/got\n%+v\n%+v", test.expSkip, skips) - } - - row := &gpexp.Row{} - err = rdz(test.rawRec, row) - if err != nil { - t.Fatalf("unexpected error from recordizer: %v", err) - } - if !reflect.DeepEqual(test.expID, row.ID) { - t.Fatalf("mismatched row IDs exp: %+v, got: %+v", test.expID, row.ID) - } - - }) - } -} - -func TestBatchFromSchema(t *testing.T) { - if testing.Short() { - t.Skip() - } - type testcase struct { - name string - schema []pdk.Field - IDField string - pkFields []string - packBools string - rawRec []interface{} - rowID interface{} - rowVals []interface{} - err string - batchErr string - } - runTest := func(t *testing.T, test testcase, removeIndex bool) { - m := NewMain() - m.Index = "cmd_test_index23lkjdkfj" - m.PrimaryKeyFields = test.pkFields - m.IDField = test.IDField - m.PackBools = test.packBools - m.BatchSize = 2 - - err := m.setup() - if err != nil { - t.Fatalf("%v", err) - } - if removeIndex { - defer func() { - err := m.client.DeleteIndex(m.index) - if err != nil { - t.Logf("deleting test index: %v", err) - } - }() - } - - rdzs, batch, row, err := m.batchFromSchema(test.schema) - if testErr(t, test.err, err) { - return - } - - for _, rdz := range rdzs { - err = rdz(test.rawRec, row) - if err != nil { - t.Fatalf("recordizing: %v", err) - } - } - - if !reflect.DeepEqual(row.ID, test.rowID) { - t.Fatalf("row IDs exp: %+v got %+v", test.rowID, row.ID) - } - if !reflect.DeepEqual(row.Values, test.rowVals) { - t.Errorf("row values exp/got:\n%+v %[1]T\n%+v %[2]T", test.rowVals, row.Values) - if len(row.Values) == len(test.rowVals) { - for i, v := range row.Values { - if !reflect.DeepEqual(v, test.rowVals[i]) { - t.Errorf("%v %[1]T != %v %[2]T", test.rowVals[i], v) - } - } - } - t.Fail() - } - - err = batch.Add(*row) - if testErr(t, test.batchErr, err) { - return - } - } - - tests := []testcase{ - { - name: "empty", - err: "autogen IDs is currently unimplemented", - }, - { - name: "no id field", - schema: []pdk.Field{pdk.StringField{}}, - IDField: "nope", - err: "ID field nope not found", - }, - { - name: "pk error", - pkFields: []string{"zoop"}, - err: "getting primary key recordizer", - }, - { - name: "pack bools", - schema: []pdk.Field{pdk.BoolField{NameVal: "a"}, pdk.IDField{NameVal: "b"}, pdk.BoolField{NameVal: "c"}}, - IDField: "b", - packBools: "bff", - rawRec: []interface{}{true, uint64(7), false}, - rowID: uint64(7), - rowVals: []interface{}{"a", "a", nil, "c"}, - }, - { - name: "don't pack bools", - schema: []pdk.Field{pdk.BoolField{NameVal: "a"}, pdk.IDField{NameVal: "b"}, pdk.BoolField{NameVal: "c"}}, - IDField: "b", - rawRec: []interface{}{true, uint64(7), false}, - rowID: uint64(7), - rowVals: []interface{}{true, false}, - err: "field type bool is not currently supported through Batch", - }, - { - name: "mutex field", - schema: []pdk.Field{pdk.StringField{NameVal: "a", Mutex: true}, pdk.IDField{NameVal: "b"}}, - IDField: "b", - rawRec: []interface{}{"aval", uint64(7)}, - rowID: uint64(7), - rowVals: []interface{}{"aval"}, - err: "field type mutex is not currently supported through Batch", - }, - { - name: "string array field", - schema: []pdk.Field{pdk.StringArrayField{NameVal: "a"}, pdk.StringField{NameVal: "b"}}, - pkFields: []string{"b"}, - rawRec: []interface{}{[]string{"aval", "aval2"}, uint64(7)}, - rowID: []byte{0, 0, 0, 0, 0, 0, 0, 7}, - rowVals: []interface{}{[]string{"aval", "aval2"}}, - }, - { - name: "decimal field", - schema: []pdk.Field{pdk.StringField{NameVal: "a"}, pdk.DecimalField{NameVal: "b", Scale: 2}}, - pkFields: []string{"a"}, - rawRec: []interface{}{"blah", uint64(321)}, - rowID: []byte("blah"), - rowVals: []interface{}{int64(321)}, - }, - } - - for _, test := range tests { - // test on fresh Pilosa - t.Run(test.name+"-1", func(t *testing.T) { - runTest(t, test, false) - }) - // test again with index/fields in place - t.Run(test.name+"-2", func(t *testing.T) { - runTest(t, test, true) - }) - } -} - -func testErr(t *testing.T, exp string, actual error) (done bool) { - t.Helper() - if exp == "" && actual == nil { - return false - } - if exp == "" && actual != nil { - t.Fatalf("unexpected error: %v", actual) - } - if exp != "" && actual == nil { - t.Fatalf("expected error like '%s'", exp) - } - if !strings.Contains(actual.Error(), exp) { - t.Fatalf("unmatched errs exp/got\n%s\n%v", exp, actual) - } - return true -} From 0c25e3967ae3703e32f3cc2458724aaf9ba3bf9e Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Thu, 10 Oct 2019 08:10:55 -0500 Subject: [PATCH 21/40] mock v2 kafka consumer --- go.mod | 3 +- kafka/integration_test.go | 3 + v2/cmd/kafkagen/main.go | 14 + v2/ingest.go | 42 ++- v2/kafka/cmd.go | 406 ----------------------- v2/kafka/cmd_test.go | 165 ++++++++- v2/kafka/csrc/csrc.go | 2 +- v2/kafka/testdata/schemas/bigschema.json | 2 +- v2/kafkagen/cmd.go | 129 +++++++ 9 files changed, 352 insertions(+), 414 deletions(-) create mode 100644 v2/cmd/kafkagen/main.go create mode 100644 v2/kafkagen/cmd.go diff --git a/go.mod b/go.mod index 9046de4..75bb56c 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,7 @@ module github.com/pilosa/pdk -replace github.com/pilosa/go-pilosa => github.com/jaffee/go-pilosa v0.4.1-0.20191008194651-6791c1437ec4 +//replace github.com/pilosa/go-pilosa => github.com/jaffee/go-pilosa v0.4.1-0.20191008194651-6791c1437ec4 +replace github.com/pilosa/go-pilosa => /Users/jaffee/go/src/github.com/pilosa/go-pilosa replace github.com/go-avro/avro => github.com/jaffee/avro v0.0.0-20190926030934-2b116da4fa22 diff --git a/kafka/integration_test.go b/kafka/integration_test.go index c90c29e..719aa2c 100644 --- a/kafka/integration_test.go +++ b/kafka/integration_test.go @@ -54,6 +54,7 @@ var kafkaTopic = "testtopic" var restProxyURL = "localhost:8082" func TestSource(t *testing.T) { + t.Skip("not running REST Proxy") if testing.Short() { t.Skip("integration test") } @@ -188,6 +189,7 @@ func TestCompareStringLists(t *testing.T) { // here: https://www.confluent.io/download Decompress, enter directory, then run // "./bin/confluent start kafka-rest" func TestMain(t *testing.T) { + t.Skip("not running REST Proxy") runMain(t, []string{}) // without this sleep, the next test will hang sometimes. I'm guessing // something in Confluent or the OS needs to settle between tests. @@ -195,6 +197,7 @@ func TestMain(t *testing.T) { } func TestAllowedFields(t *testing.T) { + t.Skip("not running REST Proxy") runMain(t, []string{"geoip-country_code", "aba"}) time.Sleep(time.Second) } diff --git a/v2/cmd/kafkagen/main.go b/v2/cmd/kafkagen/main.go new file mode 100644 index 0000000..6d18505 --- /dev/null +++ b/v2/cmd/kafkagen/main.go @@ -0,0 +1,14 @@ +package main + +import ( + "log" + + "github.com/jaffee/commandeer" + "github.com/pilosa/pdk/v2/kafkagen" +) + +func main() { + if err := commandeer.Run(kafkagen.NewMain()); err != nil { + log.Fatal(err) + } +} diff --git a/v2/ingest.go b/v2/ingest.go index 6cab59d..59da8b4 100644 --- a/v2/ingest.go +++ b/v2/ingest.go @@ -4,9 +4,11 @@ import ( "bytes" "encoding/binary" "io" + "os" "github.com/pilosa/go-pilosa" "github.com/pilosa/go-pilosa/gpexp" + "github.com/pilosa/pilosa/logger" "github.com/pkg/errors" "golang.org/x/sync/errgroup" ) @@ -20,12 +22,13 @@ type Main struct { PilosaHosts []string `help:"Comma separated list of host:port pairs for Pilosa."` BatchSize int `flag:"batch-size",help:"Number of records to read before indexing all of them at once. Generally, larger means better throughput and more memory usage. 1,048,576 might be a good number."` Index string `help:"Name of Pilosa index."` - LogPath string `help:"Log file to write to. Empty means stderr."` // TODO implement + LogPath string `help:"Log file to write to. Empty means stderr. TODO implement."` PrimaryKeyFields []string `help:"Data field(s) which make up the primary key for a record. These will be concatenated and translated to a Pilosa ID. If empty, record key translation will not be used."` IDField string `help:"Field which contains the integer column ID. May not be used in conjunction with primary-key-fields. If both are empty, auto-generated IDs will be used."` MaxMsgs int `help:"Number of messages to consume from Kafka before stopping. Useful for testing when you don't want to run indefinitely."` Concurrency int `help:"Number of concurrent kafka readers and indexing routines to launch. MaxMsgs will be read *from each*."` PackBools string `help:"If non-empty, boolean fields will be packed into two set fields—one with this name, and one with -exists."` + Verbose bool `help:"Enable verbose logging."` // TODO implement the auto-generated IDs... hopefully using Pilosa to manage it. NewSource func() (Source, error) `flag:"-"` @@ -33,6 +36,8 @@ type Main struct { client *pilosa.Client schema *pilosa.Schema index *pilosa.Index + + log logger.Logger } func (m *Main) PilosaClient() *pilosa.Client { @@ -70,6 +75,21 @@ func (m *Main) setup() (err error) { return errors.Wrap(err, "validating configuration") } + logOut := os.Stdout + if m.LogPath != "" { + f, err := os.OpenFile(m.LogPath, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) + if err != nil { + return errors.Wrap(err, "opening log file") + } + logOut = f + } + + if m.Verbose { + m.log = logger.NewVerboseLogger(logOut) + } else { + m.log = logger.NewStandardLogger(logOut) + } + m.client, err = pilosa.NewClient(m.PilosaHosts) if err != nil { return errors.Wrap(err, "getting pilosa client") @@ -116,6 +136,7 @@ func (m *Main) runIngester(c int) error { } } schema := source.Schema() + m.log.Printf("new schema: %+v", schema) recordizers, batch, row, err = m.batchFromSchema(schema) if err != nil { return errors.Wrap(err, "batchFromSchema") @@ -125,6 +146,7 @@ func (m *Main) runIngester(c int) error { row.Values[i] = nil } data := rec.Data() + m.log.Debugf("record: %+v", data) for _, rdz := range recordizers { err = rdz(data, row) if err != nil { @@ -190,12 +212,24 @@ func (m *Main) batchFromSchema(schema []Field) ([]Recordizer, gpexp.RecordBatch, for fieldIndex, field := range schema { if field.Name() == m.IDField { if _, ok := field.(IDField); !ok { - return nil, nil, nil, errors.Errorf("specified IDField %s is not an IDField but is %T", m.IDField, field) + if _, ok := field.(IntField); !ok { + return nil, nil, nil, errors.Errorf("specified column id field %s is not an IDField or an IntField %T", m.IDField, field) + } } fieldIndex := fieldIndex rz = func(rawRec []interface{}, rec *gpexp.Row) (err error) { - rec.ID, err = field.PilosafyVal(rawRec[fieldIndex]) - return errors.Wrapf(err, "converting %+v to ID", rawRec[fieldIndex]) + id, err := field.PilosafyVal(rawRec[fieldIndex]) + if err != nil { + return errors.Wrapf(err, "converting %+v to ID", rawRec[fieldIndex]) + } + if uid, ok := id.(uint64); ok { + rec.ID = uid + } else if iid, ok := id.(int64); ok { + rec.ID = uint64(iid) + } else { + return errors.Errorf("can't convert %v of %[1]T to uint64 for use as ID", id) + } + return nil } skips[fieldIndex] = struct{}{} break diff --git a/v2/kafka/cmd.go b/v2/kafka/cmd.go index cab254c..1d010b7 100644 --- a/v2/kafka/cmd.go +++ b/v2/kafka/cmd.go @@ -5,8 +5,6 @@ import ( "github.com/pkg/errors" ) -//////////////////new - type Main struct { pdk.Main `flag:"!embed"` KafkaHosts []string @@ -38,407 +36,3 @@ func NewMain() *Main { } return m } - -// /////////////////old - -// // TODO split off the Kafka-specific stuff into something else and take all the general purpose stuff out of here. - -// // TODO Jaeger -// // TODO profiling endpoint -// // TODO Prometheus - -// // Main holds all config for Kafka indexing w/ schema registry. -// type Main struct { -// PilosaHosts []string `help:"Comma separated list of host:port pairs for Pilosa."` -// KafkaHosts []string `help:"Comma separated list of host:port pairs for Kafka."` -// RegistryURL string `help:"Location of Confluent Schema Registry"` -// BatchSize int `help:"Number of records to read before indexing all of them at once. Generally, larger means better throughput and more memory usage. 1,048,576 might be a good number."` -// Group string `help:"Kafka group."` -// Index string `help:"Name of Pilosa index."` -// Topics []string `help:"Kafka topics to read from."` -// LogPath string `help:"Log file to write to. Empty means stderr."` // TODO implement -// PrimaryKeyFields []string `help:"Data field(s) which make up the primary key for a record. These will be concatenated and translated to a Pilosa ID. If empty, record key translation will not be used."` -// IDField string `help:"Field which contains the integer column ID. May not be used in conjunction with primary-key-fields. If both are empty, auto-generated IDs will be used."` -// MaxMsgs int `help:"Number of messages to consume from Kafka before stopping. Useful for testing when you don't want to run indefinitely."` -// Concurrency int `help:"Number of concurrent kafka readers and indexing routines to launch. MaxMsgs will be read *from each*."` -// PackBools string `help:"If non-empty, boolean fields will be packed into two set fields—one with this name, and one with -exists."` -// // TODO implement the auto-generated IDs... hopefully using Pilosa to manage it. - -// client *pilosa.Client -// schema *pilosa.Schema -// index *pilosa.Index -// } - -// func NewMain() *Main { -// return &Main{ -// PilosaHosts: []string{"localhost:10101"}, -// BatchSize: 1, // definitely increase this to achieve any amount of performance -// Index: "defaultindex", -// Concurrency: 1, -// PackBools: "bools", -// } -// } - -// func (m *Main) Run() (err error) { -// err = m.setup() -// if err != nil { -// return errors.Wrap(err, "setting up") -// } -// eg := errgroup.Group{} -// for c := 0; c < m.Concurrency; c++ { -// c := c -// eg.Go(func() error { -// return m.runIngester(c) -// }) -// } - -// return eg.Wait() -// } - -// func (m *Main) setup() (err error) { -// if err := m.validate(); err != nil { -// return errors.Wrap(err, "validating configuration") -// } - -// m.client, err = pilosa.NewClient(m.PilosaHosts) -// if err != nil { -// return errors.Wrap(err, "getting pilosa client") -// } -// m.schema, err = m.client.Schema() -// if err != nil { -// return errors.Wrap(err, "getting schema") -// } -// keyTranslation := len(m.PrimaryKeyFields) > 0 -// m.index = m.schema.Index(m.Index, pilosa.OptIndexKeys(keyTranslation)) -// if m.PackBools != "" { -// m.index.Field(m.PackBools, pilosa.OptFieldTypeSet(pilosa.CacheTypeRanked, 50000), pilosa.OptFieldKeys(true)) -// m.index.Field(m.PackBools+"-exists", pilosa.OptFieldTypeSet(pilosa.CacheTypeRanked, 50000), pilosa.OptFieldKeys(true)) -// } -// err = m.client.SyncSchema(m.schema) -// if err != nil { -// return errors.Wrap(err, "syncing schema") -// } - -// return nil -// } - -// func (m *Main) runIngester(c int) error { -// source := NewSource() -// source.Hosts = m.KafkaHosts -// source.Topics = m.Topics -// source.Group = m.Group -// source.MaxMsgs = m.MaxMsgs - -// err := source.Open() -// if err != nil { -// return errors.Wrap(err, "opening source") -// } -// var batch gpexp.RecordBatch -// var recordizers []Recordizer -// var prevRec pdk.Record -// var row *gpexp.Row -// rec, err := source.Record() -// for ; err == pdk.ErrSchemaChange || err == nil; rec, err = source.Record() { -// if err == pdk.ErrSchemaChange { -// // finish previous batch if this is not the first -// if batch != nil { -// err = batch.Import() -// if err != nil { -// return errors.Wrap(err, "importing") -// } -// err = prevRec.Commit() -// if err != nil { -// return errors.Wrap(err, "committing") -// } -// } -// schema := source.Schema() -// recordizers, batch, row, err = m.batchFromSchema(schema) -// if err != nil { -// return errors.Wrap(err, "batchFromSchema") -// } -// } -// for i := range row.Values { -// row.Values[i] = nil -// } -// data := rec.Data() -// for _, rdz := range recordizers { -// err = rdz(data, row) -// if err != nil { -// return errors.Wrap(err, "recordizing") -// } -// } -// err = batch.Add(*row) -// if err == gpexp.ErrBatchNowFull { -// err = batch.Import() -// if err != nil { -// return errors.Wrap(err, "importing batch") -// } -// err = rec.Commit() -// if err != nil { -// return errors.Wrap(err, "commiting record") -// } -// } else if err != nil { -// return errors.Wrap(err, "adding to batch") -// } -// prevRec = rec -// } -// if err == io.EOF { -// err = nil -// } -// return errors.Wrap(err, "getting record") -// } - -// type Recordizer func(rawRec []interface{}, rec *gpexp.Row) error - -// func (m *Main) batchFromSchema(schema []pdk.Field) ([]Recordizer, gpexp.RecordBatch, *gpexp.Row, error) { -// // from the schema, and the configuration stored on Main, we need -// // to create a []pilosa.Field and a []Recordizer processing -// // functions which take a []interface{} which conforms to the -// // schema, and converts it to a record which conforms to the -// // []pilosa.Field. -// // -// // The relevant config options on Main are: -// // 1. PrimaryKeyFields and IDField -// // 2. PackBools -// // 3. BatchSize (gets passed directly to the batch) -// // -// // For PrimaryKeyFields and IDField there is some complexity. There are 3 top level options. 1, the other, or neither (auto-generated IDs). -// // -// // 1. PrimarKeyFields - the main question here is whether in -// // addition to combining these and translating them to column ID, -// // do we index them separately? I think the answer by default -// // should be yes. -// // 2. IDField — this is pretty easy. Use the integer value as the column ID. Do not index it separately by default. -// // 3. Autogenerate IDs. Ideally using a RangeAllocator per concurrent goroutine. OK, let's assume that if we set row.ID to nil, the auto generation can happen inside the Batch. -// recordizers := make([]Recordizer, 0) - -// var rz Recordizer -// skips := make(map[int]struct{}) -// var err error - -// // primary key stuff -// if len(m.PrimaryKeyFields) != 0 { -// rz, skips, err = getPrimaryKeyRecordizer(schema, m.PrimaryKeyFields) -// if err != nil { -// return nil, nil, nil, errors.Wrap(err, "getting primary key recordizer") -// } -// } else if m.IDField != "" { -// for fieldIndex, field := range schema { -// if field.Name() == m.IDField { -// if _, ok := field.(pdk.IDField); !ok { -// return nil, nil, nil, errors.Errorf("specified IDField %s is not an IDField but is %T", m.IDField, field) -// } -// fieldIndex := fieldIndex -// rz = func(rawRec []interface{}, rec *gpexp.Row) (err error) { -// rec.ID, err = field.PilosafyVal(rawRec[fieldIndex]) -// return errors.Wrapf(err, "converting %+v to ID", rawRec[fieldIndex]) -// } -// skips[fieldIndex] = struct{}{} -// break -// } -// } -// if rz == nil { -// return nil, nil, nil, errors.Errorf("ID field %s not found", m.IDField) -// } -// } else { -// return nil, nil, nil, errors.New("autogen IDs is currently unimplemented; specify an IDField or primary key fields") -// } -// recordizers = append(recordizers, rz) - -// // set up bool fields -// var boolField, boolFieldExists *pilosa.Field -// if m.PackBools != "" { -// boolField = m.index.Field(m.PackBools, pilosa.OptFieldTypeBool()) -// boolFieldExists = m.index.Field(m.PackBools+"-exists", pilosa.OptFieldTypeBool()) -// } -// fields := make([]*pilosa.Field, 0, len(schema)) -// for i, pdkField := range schema { -// // need to redefine these inside the loop since we're -// // capturing them in closures -// i := i -// pdkField := pdkField -// // see if we previously decided to skip this field of the raw -// // record. -// if _, ok := skips[i]; ok { -// continue -// } - -// // handle records where pilosa already has the field -// _, isBool := pdkField.(pdk.BoolField) -// if (m.PackBools == "" || !isBool) && m.index.HasField(pdkField.Name()) { -// // TODO validate that Pilosa's existing field matches the -// // type and options of the PDK field. -// fields = append(fields, m.index.Field(pdkField.Name())) -// valIdx := len(fields) - 1 -// // TODO may need to have more sophisticated recordizer by type at some point -// recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { -// rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) -// return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) -// }) -// continue -// } - -// // now handle this field if it was not already found in pilosa -// switch fld := pdkField.(type) { -// case pdk.StringField, pdk.IDField, pdk.StringArrayField: -// opts := []pilosa.FieldOption{} -// if hasMutex(fld) { -// opts = append(opts, pilosa.OptFieldTypeMutex(pilosa.CacheTypeRanked, 50000)) -// } else { -// opts = append(opts, pilosa.OptFieldTypeSet(pilosa.CacheTypeRanked, 50000)) -// } -// _, ok1 := fld.(pdk.StringArrayField) -// if _, ok2 := fld.(pdk.StringField); ok1 || ok2 { -// opts = append(opts, pilosa.OptFieldKeys(true)) -// } -// fields = append(fields, m.index.Field(fld.Name(), opts...)) -// valIdx := len(fields) - 1 -// recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { -// rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) -// return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) -// }) -// case pdk.BoolField: -// if m.PackBools == "" { -// fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeBool())) -// valIdx := len(fields) - 1 -// recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { -// rec.Values[valIdx] = rawRec[i] -// return nil -// }) -// } else { -// fields = append(fields, boolField, boolFieldExists) -// fieldIdx := len(fields) - 2 -// recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { -// b, ok := rawRec[i].(bool) -// if b { -// rec.Values[fieldIdx] = pdkField.Name() -// } -// if ok { -// rec.Values[fieldIdx+1] = pdkField.Name() -// } -// return nil -// }) -// continue -// } -// case pdk.IntField: -// if fld.Min != nil { -// min := *fld.Min -// if fld.Max != nil { -// fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt(min, *fld.Max))) -// } else { -// fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt(min))) -// } -// } else { -// fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt())) -// } -// valIdx := len(fields) - 1 -// recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { -// rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) -// return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) -// }) -// case pdk.DecimalField: -// // TODO handle scale -// fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt())) -// valIdx := len(fields) - 1 -// recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { -// rec.Values[valIdx], err = pdkField.PilosafyVal(rawRec[i]) -// return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) -// }) -// default: -// return nil, nil, nil, errors.Errorf("unknown schema field type %T %[1]v", pdkField) -// } -// } -// err = m.client.SyncSchema(m.schema) -// if err != nil { -// return nil, nil, nil, errors.Wrap(err, "syncing schema") -// } -// batch, err := gpexp.NewBatch(m.client, m.BatchSize, m.index, fields) -// if err != nil { -// return nil, nil, nil, errors.Wrap(err, "creating batch") -// } -// row := &gpexp.Row{ -// Values: make([]interface{}, len(fields)), -// } -// return recordizers, batch, row, nil -// } - -// func hasMutex(fld pdk.Field) bool { -// if sfld, ok := fld.(pdk.StringField); ok { -// return sfld.Mutex -// } -// if sfld, ok := fld.(pdk.IDField); ok { -// return sfld.Mutex -// } -// return false -// } - -// // getPrimaryKeyRecordizer returns a Recordizer function which -// // extracts the primary key fields from a record, combines them, and -// // sets the ID on the record. If pkFields is a single field, and that -// // field is of type string, we'll return it in skipFields, because we -// // won't want to index it separately. -// func getPrimaryKeyRecordizer(schema []pdk.Field, pkFields []string) (recordizer Recordizer, skipFields map[int]struct{}, err error) { -// if len(schema) == 0 { -// return nil, nil, errors.New("can't call getPrimaryKeyRecordizer with empty schema") -// } -// if len(pkFields) == 0 { -// return nil, nil, errors.New("can't call getPrimaryKeyRecordizer with empty pkFields") -// } -// fieldIndices := make([]int, 0, len(pkFields)) -// for pkIndex, pk := range pkFields { -// for fieldIndex, field := range schema { -// if pk == field.Name() { -// switch field.(type) { -// case pdk.StringArrayField: -// return nil, nil, errors.Errorf("field %s cannot be a primary key field because it is a StringArray field.", pk) -// } -// fieldIndices = append(fieldIndices, fieldIndex) -// break -// } -// } -// if len(fieldIndices) != pkIndex+1 { -// return nil, nil, errors.Errorf("no field with primary key field name %s found. fields: %+v", pk, schema) -// } -// } -// if len(pkFields) == 1 { -// if _, ok := schema[fieldIndices[0]].(pdk.StringField); ok { -// skipFields = make(map[int]struct{}, 1) -// skipFields[fieldIndices[0]] = struct{}{} -// } -// } -// recordizer = func(rawRec []interface{}, rec *gpexp.Row) (err error) { -// idbytes, ok := rec.ID.([]byte) -// if ok { -// idbytes = idbytes[:0] -// } else { -// idbytes = make([]byte, 0) -// } -// buf := bytes.NewBuffer(idbytes) // TODO does the buffer escape to heap? - -// // TODO... will want to change this encoding logic to length-prefix the different fields or something. -// for _, fieldIdx := range fieldIndices { -// val := rawRec[fieldIdx] -// switch vt := val.(type) { -// case string: -// buf.WriteString(vt) // err is always nil -// case []byte: -// buf.Write(vt) // err is always nil -// default: -// err = binary.Write(buf, binary.BigEndian, val) -// if err != nil { -// return errors.Wrapf(err, "writing %+v of type %[1]T", val) -// } -// } -// } -// rec.ID = buf.Bytes() -// return nil -// } -// return recordizer, skipFields, nil -// } - -// func (m *Main) validate() error { -// if len(m.PrimaryKeyFields) != 0 && m.IDField != "" { -// return errors.New("cannot set both primary key fields and id-field") -// } -// return nil -// } diff --git a/v2/kafka/cmd_test.go b/v2/kafka/cmd_test.go index da67647..aa81bb1 100644 --- a/v2/kafka/cmd_test.go +++ b/v2/kafka/cmd_test.go @@ -1,12 +1,16 @@ package kafka import ( + "fmt" + "reflect" + "sort" "testing" "github.com/Shopify/sarama" + "github.com/pilosa/go-pilosa" ) -func TestCmdMain(t *testing.T) { +func TestCmdMainOne(t *testing.T) { if testing.Short() { t.Skip() } @@ -53,6 +57,12 @@ func TestCmdMain(t *testing.T) { client := m.PilosaClient() schema, err := client.Schema() index := schema.Index(m.Index) + defer func() { + err := client.DeleteIndex(index) + if err != nil { + t.Logf("deleting index: %v", err) + } + }() // check data in Pilosa if !index.HasField("abc") { @@ -66,6 +76,159 @@ func TestCmdMain(t *testing.T) { if qr.Result().Count() != 1 { t.Fatalf("wrong count for abc, %d is not 1", qr.Result().Count()) } + + bools := index.Field("bools") + qr, err = client.Query(bools.TopN(10)) + if err != nil { + t.Fatalf("querying: %v", err) + } + ci := sortableCRI(qr.Result().CountItems()) + exp := sortableCRI{{Count: 1, Key: "all_users"}} + sort.Sort(ci) + sort.Sort(exp) + if !reflect.DeepEqual(ci, exp) { + t.Errorf("unexpected result exp/got\n%+v\n%+v", exp, ci) + } + + bools = index.Field("bools-exists") + qr, err = client.Query(bools.TopN(10)) + if err != nil { + t.Fatalf("querying: %v", err) + } + ci = sortableCRI(qr.Result().CountItems()) + exp = sortableCRI{{Count: 1, Key: "all_users"}, {Count: 1, Key: "has_deleted_date"}} + sort.Sort(ci) + sort.Sort(exp) + if !reflect.DeepEqual(ci, exp) { + t.Errorf("unexpected result exp/got\n%+v\n%+v", exp, ci) + } + + rhino := index.Field("ddd_category_total_current_rhinocerous_checking") + qr, err = client.Query(rhino.GT(0)) + if err != nil { + t.Fatalf("querying: %v", err) + } + expCols := []string{string([]byte{32, 31, 0, 0, 0, 159})} + if cols := qr.Result().Row().Keys; !reflect.DeepEqual(cols, expCols) { + t.Errorf("wrong cols: %v, exp: %v", cols, expCols) + } + t.Log(qr.Result().Value(), qr.Result().Count()) +} + +func TestCmdMainIDField(t *testing.T) { + if testing.Short() { + t.Skip() + } + + // load big schema + licodec := liDecodeTestSchema(t, "bigschema.json") + schemaID := postSchema(t, "bigschema.json", "bigschema2") + + fields := []string{"abc", "db", "user_id", "all_users", "has_deleted_date", "central_group", "custom_audiences", "desktop_boolean", "desktop_frequency", "desktop_recency", "product_boolean_historical_forestry_cravings_or_bugles", "ddd_category_total_current_rhinocerous_checking", "ddd_category_total_current_rhinocerous_thedog_cheetah", "survey1234", "days_since_last_logon", "elephant_added_for_account"} + + // make a bunch of data and insert it + records := [][]interface{}{ + {"2", "1", 159, map[string]interface{}{"boolean": true}, map[string]interface{}{"boolean": false}, map[string]interface{}{"string": "cgr"}, map[string]interface{}{"array": []string{"a", "b"}}, nil, map[string]interface{}{"int": 7}, nil, nil, map[string]interface{}{"float": 5.4}, nil, map[string]interface{}{"org.test.survey1234": "yes"}, map[string]interface{}{"float": 8.0}, nil}, + } + + // put records in kafka + conf := sarama.NewConfig() + conf.Version = sarama.V0_10_0_0 // TODO - do we need this? should we move it up? + conf.Producer.Return.Successes = true + producer, err := sarama.NewSyncProducer([]string{"localhost:9092"}, conf) + if err != nil { + t.Fatalf("getting new producer: %v", err) + } + topic := "testcmdmain" + for _, vals := range records { + rec := makeRecord(t, fields, vals) + putRecordKafka(t, producer, schemaID, licodec, "akey", topic, rec) + } + + // create Main and run with MaxMsgs + m := NewMain() + m.Index = "cmd_test_index23lkjdkfj" + m.IDField = "user_id" + m.PackBools = "bools" + m.BatchSize = 1 + m.Topics = []string{topic} + m.MaxMsgs = len(records) + + fmt.Println("r2") + err = m.Run() + if err != nil { + t.Fatalf("running main: %v", err) + } + + client := m.PilosaClient() + schema, err := client.Schema() + index := schema.Index(m.Index) + defer func() { + fmt.Println("d2") + err := client.DeleteIndex(index) + fmt.Println("d3") + if err != nil { + t.Logf("deleting index: %v", err) + } + }() + + // check data in Pilosa + if !index.HasField("abc") { + t.Fatalf("don't have abc") + } + abc := index.Field("abc") + qr, err := client.Query(index.Count(abc.Row("2"))) + if err != nil { + t.Fatalf("querying: %v", err) + } + if qr.Result().Count() != 1 { + t.Fatalf("wrong count for abc, %d is not 1", qr.Result().Count()) + } + + bools := index.Field("bools") + qr, err = client.Query(bools.TopN(10)) + if err != nil { + t.Fatalf("querying: %v", err) + } + ci := sortableCRI(qr.Result().CountItems()) + exp := sortableCRI{{Count: 1, Key: "all_users"}} + sort.Sort(ci) + sort.Sort(exp) + if !reflect.DeepEqual(ci, exp) { + t.Errorf("unexpected result exp/got\n%+v\n%+v", exp, ci) + } + + bools = index.Field("bools-exists") + qr, err = client.Query(bools.TopN(10)) + if err != nil { + t.Fatalf("querying: %v", err) + } + ci = sortableCRI(qr.Result().CountItems()) + exp = sortableCRI{{Count: 1, Key: "all_users"}, {Count: 1, Key: "has_deleted_date"}} + sort.Sort(ci) + sort.Sort(exp) + if !reflect.DeepEqual(ci, exp) { + t.Errorf("unexpected result exp/got\n%+v\n%+v", exp, ci) + } +} + +type sortableCRI []pilosa.CountResultItem + +func (s sortableCRI) Len() int { return len(s) } +func (s sortableCRI) Less(i, j int) bool { + if s[i].Count != s[j].Count { + return s[i].Count > s[j].Count + } + if s[i].ID != s[j].ID { + return s[i].ID < s[j].ID + } + if s[i].Key != s[j].Key { + return s[i].Key < s[j].Key + } + return true +} +func (s sortableCRI) Swap(i, j int) { + s[i], s[j] = s[j], s[i] } func makeRecord(t *testing.T, fields []string, vals []interface{}) map[string]interface{} { diff --git a/v2/kafka/csrc/csrc.go b/v2/kafka/csrc/csrc.go index 7515d60..88e64b0 100644 --- a/v2/kafka/csrc/csrc.go +++ b/v2/kafka/csrc/csrc.go @@ -59,7 +59,7 @@ func (c *Client) PostSubjects(subj, schema string) (*SchemaResponse, error) { sr := &SchemaResponse{} err = unmarshalRespErr(resp, err, sr) if err != nil { - return nil, errors.Wrap(err, "unmarshaling resp") + return nil, errors.Wrapf(err, "unmarshaling resp to %s", fmt.Sprintf("%s/subjects/%s/versions", c.URL, subj)) } return sr, nil } diff --git a/v2/kafka/testdata/schemas/bigschema.json b/v2/kafka/testdata/schemas/bigschema.json index 9185fd7..298b2b3 100644 --- a/v2/kafka/testdata/schemas/bigschema.json +++ b/v2/kafka/testdata/schemas/bigschema.json @@ -91,7 +91,7 @@ "doc": "ddd_category_total_current_rhinocerous_checking NOTE: float? use decimal?", "type": [ "null", - "float" + {"type": "float", "scale": 2} ] }, { diff --git a/v2/kafkagen/cmd.go b/v2/kafkagen/cmd.go new file mode 100644 index 0000000..a569730 --- /dev/null +++ b/v2/kafkagen/cmd.go @@ -0,0 +1,129 @@ +package kafkagen + +import ( + "encoding/binary" + "io/ioutil" + + "github.com/Shopify/sarama" + liavro "github.com/linkedin/goavro/v2" + "github.com/pilosa/pdk/v2/kafka/csrc" + "github.com/pkg/errors" +) + +type Main struct { + SchemaFile string + Subject string + RegistryURL string + KafkaHosts []string + Topic string +} + +func NewMain() *Main { + return &Main{ + SchemaFile: "bigschema.json", + Subject: "bigschema", + RegistryURL: "localhost:8081", + KafkaHosts: []string{"localhost:9092"}, + Topic: "defaulttopic", + } +} + +func (m *Main) Run() error { + + licodec, err := decodeSchema(m.SchemaFile) + if err != nil { + return errors.Wrap(err, "decoding schema") + } + schemaID, err := m.postSchema(m.SchemaFile, m.Subject) + if err != nil { + return errors.Wrap(err, "psting schema") + } + + fields := []string{"abc", "db", "user_id", "all_users", "has_deleted_date", "central_group", "custom_audiences", "desktop_boolean", "desktop_frequency", "desktop_recency", "product_boolean_historical_forestry_cravings_or_bugles", "ddd_category_total_current_rhinocerous_checking", "ddd_category_total_current_rhinocerous_thedog_cheetah", "survey1234", "days_since_last_logon", "elephant_added_for_account"} + + // make a bunch of data and insert it + records := [][]interface{}{ + {"2", "1", 159, map[string]interface{}{"boolean": true}, map[string]interface{}{"boolean": false}, map[string]interface{}{"string": "cgr"}, map[string]interface{}{"array": []string{"a", "b"}}, nil, map[string]interface{}{"int": 7}, nil, nil, map[string]interface{}{"float": 5.4}, nil, map[string]interface{}{"org.test.survey1234": "yes"}, map[string]interface{}{"float": 8.0}, nil}, + } + + // put records in kafka + conf := sarama.NewConfig() + conf.Version = sarama.V0_10_0_0 // TODO - do we need this? should we move it up? + conf.Producer.Return.Successes = true + producer, err := sarama.NewSyncProducer([]string{"localhost:9092"}, conf) + if err != nil { + return err + } + for _, vals := range records { + rec, err := makeRecord(fields, vals) + if err != nil { + return errors.Wrap(err, "making record") + } + err = putRecordKafka(producer, schemaID, licodec, "akey", m.Topic, rec) + if err != nil { + return errors.Wrap(err, "putting record") + } + } + return nil +} + +func putRecordKafka(producer sarama.SyncProducer, schemaID int, schema *liavro.Codec, key, topic string, record map[string]interface{}) error { + buf := make([]byte, 5, 1000) + buf[0] = 0 + binary.BigEndian.PutUint32(buf[1:], uint32(schemaID)) + buf, err := schema.BinaryFromNative(buf, record) + if err != nil { + return err + } + + // post buf to kafka + _, _, err = producer.SendMessage(&sarama.ProducerMessage{Topic: topic, Key: sarama.StringEncoder(key), Value: sarama.ByteEncoder(buf)}) + if err != nil { + return err + } + return nil +} + +func readSchema(filename string) (string, error) { + bytes, err := ioutil.ReadFile(filename) + if err != nil { + return "", err + } + return string(bytes), nil +} + +func decodeSchema(filename string) (*liavro.Codec, error) { + s, err := readSchema(filename) + if err != nil { + return nil, errors.Wrap(err, "reading schema") + } + codec, err := liavro.NewCodec(s) + if err != nil { + return nil, err + } + return codec, nil +} + +func (m *Main) postSchema(schemaFile, subj string) (schemaID int, err error) { + schemaClient := csrc.NewClient("http://" + m.RegistryURL) + schemaStr, err := readSchema(schemaFile) + if err != nil { + return 0, errors.Wrap(err, "reading schema file") + } + resp, err := schemaClient.PostSubjects(subj, schemaStr) + if err != nil { + return 0, errors.Wrap(err, "posting schema") + } + return resp.ID, nil +} + +func makeRecord(fields []string, vals []interface{}) (map[string]interface{}, error) { + if len(fields) != len(vals) { + return nil, errors.Errorf("have %d fields and %d vals", len(fields), len(vals)) + } + ret := make(map[string]interface{}) + for i, field := range fields { + ret[field] = vals[i] + } + return ret, nil +} From 38648f0d83a58c391bde20db6f17ebe68021b969 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Thu, 10 Oct 2019 09:04:03 -0500 Subject: [PATCH 22/40] convert kafka consumer to pflag and env var support --- v2/cmd/kafka/main.go | 8 ++++++-- v2/ingest.go | 2 +- v2/kafka/cmd.go | 8 ++++---- v2/kafka/cmd_test.go | 6 +----- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/v2/cmd/kafka/main.go b/v2/cmd/kafka/main.go index d61e30a..6d675b2 100644 --- a/v2/cmd/kafka/main.go +++ b/v2/cmd/kafka/main.go @@ -3,12 +3,16 @@ package main import ( "log" - "github.com/jaffee/commandeer" + "github.com/jaffee/commandeer/pflag" "github.com/pilosa/pdk/v2/kafka" ) func main() { - if err := commandeer.Run(kafka.NewMain()); err != nil { + m := kafka.NewMain() + if err := pflag.LoadEnv(m, "CONSUMER_", nil); err != nil { + log.Fatal(err) + } + if err := m.Run(); err != nil { log.Fatal(err) } } diff --git a/v2/ingest.go b/v2/ingest.go index 59da8b4..1903088 100644 --- a/v2/ingest.go +++ b/v2/ingest.go @@ -20,7 +20,7 @@ import ( // Main holds all config for general ingest type Main struct { PilosaHosts []string `help:"Comma separated list of host:port pairs for Pilosa."` - BatchSize int `flag:"batch-size",help:"Number of records to read before indexing all of them at once. Generally, larger means better throughput and more memory usage. 1,048,576 might be a good number."` + BatchSize int `help:"Number of records to read before indexing all of them at once. Generally, larger means better throughput and more memory usage. 1,048,576 might be a good number."` Index string `help:"Name of Pilosa index."` LogPath string `help:"Log file to write to. Empty means stderr. TODO implement."` PrimaryKeyFields []string `help:"Data field(s) which make up the primary key for a record. These will be concatenated and translated to a Pilosa ID. If empty, record key translation will not be used."` diff --git a/v2/kafka/cmd.go b/v2/kafka/cmd.go index 1d010b7..d9272db 100644 --- a/v2/kafka/cmd.go +++ b/v2/kafka/cmd.go @@ -7,10 +7,10 @@ import ( type Main struct { pdk.Main `flag:"!embed"` - KafkaHosts []string - RegistryURL string - Group string - Topics []string + KafkaHosts []string `help:"Comma separated list of host:port pairs for Kafka."` + RegistryURL string `help:"Location of Confluent Schema Registry"` + Group string `help:"Kafka group."` + Topics []string `help:"Kafka topics to read from."` } func NewMain() *Main { diff --git a/v2/kafka/cmd_test.go b/v2/kafka/cmd_test.go index aa81bb1..eace58c 100644 --- a/v2/kafka/cmd_test.go +++ b/v2/kafka/cmd_test.go @@ -1,7 +1,6 @@ package kafka import ( - "fmt" "reflect" "sort" "testing" @@ -139,7 +138,7 @@ func TestCmdMainIDField(t *testing.T) { if err != nil { t.Fatalf("getting new producer: %v", err) } - topic := "testcmdmain" + topic := "testcmdmain2" for _, vals := range records { rec := makeRecord(t, fields, vals) putRecordKafka(t, producer, schemaID, licodec, "akey", topic, rec) @@ -154,7 +153,6 @@ func TestCmdMainIDField(t *testing.T) { m.Topics = []string{topic} m.MaxMsgs = len(records) - fmt.Println("r2") err = m.Run() if err != nil { t.Fatalf("running main: %v", err) @@ -164,9 +162,7 @@ func TestCmdMainIDField(t *testing.T) { schema, err := client.Schema() index := schema.Index(m.Index) defer func() { - fmt.Println("d2") err := client.DeleteIndex(index) - fmt.Println("d3") if err != nil { t.Logf("deleting index: %v", err) } From 0ce63157912c83b600fa4183239a4ac94de16a41 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Thu, 10 Oct 2019 13:08:52 -0500 Subject: [PATCH 23/40] dedup kafka test and improve kafka timeout between tests --- v2/kafka/cmd_test.go | 322 ++++++++++++++++++------------------------- v2/kafka/source.go | 3 + 2 files changed, 134 insertions(+), 191 deletions(-) diff --git a/v2/kafka/cmd_test.go b/v2/kafka/cmd_test.go index eace58c..f2ae713 100644 --- a/v2/kafka/cmd_test.go +++ b/v2/kafka/cmd_test.go @@ -1,8 +1,11 @@ package kafka import ( + "fmt" + "math/rand" "reflect" "sort" + "strconv" "testing" "github.com/Shopify/sarama" @@ -14,197 +17,134 @@ func TestCmdMainOne(t *testing.T) { t.Skip() } - // load big schema - licodec := liDecodeTestSchema(t, "bigschema.json") - schemaID := postSchema(t, "bigschema.json", "bigschema2") - - fields := []string{"abc", "db", "user_id", "all_users", "has_deleted_date", "central_group", "custom_audiences", "desktop_boolean", "desktop_frequency", "desktop_recency", "product_boolean_historical_forestry_cravings_or_bugles", "ddd_category_total_current_rhinocerous_checking", "ddd_category_total_current_rhinocerous_thedog_cheetah", "survey1234", "days_since_last_logon", "elephant_added_for_account"} - - // make a bunch of data and insert it - records := [][]interface{}{ - {"2", "1", 159, map[string]interface{}{"boolean": true}, map[string]interface{}{"boolean": false}, map[string]interface{}{"string": "cgr"}, map[string]interface{}{"array": []string{"a", "b"}}, nil, map[string]interface{}{"int": 7}, nil, nil, map[string]interface{}{"float": 5.4}, nil, map[string]interface{}{"org.test.survey1234": "yes"}, map[string]interface{}{"float": 8.0}, nil}, - } - - // put records in kafka - conf := sarama.NewConfig() - conf.Version = sarama.V0_10_0_0 // TODO - do we need this? should we move it up? - conf.Producer.Return.Successes = true - producer, err := sarama.NewSyncProducer([]string{"localhost:9092"}, conf) - if err != nil { - t.Fatalf("getting new producer: %v", err) - } - topic := "testcmdmain" - for _, vals := range records { - rec := makeRecord(t, fields, vals) - putRecordKafka(t, producer, schemaID, licodec, "akey", topic, rec) - } - - // create Main and run with MaxMsgs - m := NewMain() - m.Index = "cmd_test_index23lkjdkfj" - m.PrimaryKeyFields = []string{"abc", "db", "user_id"} - m.PackBools = "bools" - m.BatchSize = 1 - m.Topics = []string{topic} - m.MaxMsgs = len(records) - - err = m.Run() - if err != nil { - t.Fatalf("running main: %v", err) - } - - client := m.PilosaClient() - schema, err := client.Schema() - index := schema.Index(m.Index) - defer func() { - err := client.DeleteIndex(index) - if err != nil { - t.Logf("deleting index: %v", err) - } - }() - - // check data in Pilosa - if !index.HasField("abc") { - t.Fatalf("don't have abc") - } - abc := index.Field("abc") - qr, err := client.Query(index.Count(abc.Row("2"))) - if err != nil { - t.Fatalf("querying: %v", err) - } - if qr.Result().Count() != 1 { - t.Fatalf("wrong count for abc, %d is not 1", qr.Result().Count()) - } - - bools := index.Field("bools") - qr, err = client.Query(bools.TopN(10)) - if err != nil { - t.Fatalf("querying: %v", err) - } - ci := sortableCRI(qr.Result().CountItems()) - exp := sortableCRI{{Count: 1, Key: "all_users"}} - sort.Sort(ci) - sort.Sort(exp) - if !reflect.DeepEqual(ci, exp) { - t.Errorf("unexpected result exp/got\n%+v\n%+v", exp, ci) - } - - bools = index.Field("bools-exists") - qr, err = client.Query(bools.TopN(10)) - if err != nil { - t.Fatalf("querying: %v", err) - } - ci = sortableCRI(qr.Result().CountItems()) - exp = sortableCRI{{Count: 1, Key: "all_users"}, {Count: 1, Key: "has_deleted_date"}} - sort.Sort(ci) - sort.Sort(exp) - if !reflect.DeepEqual(ci, exp) { - t.Errorf("unexpected result exp/got\n%+v\n%+v", exp, ci) - } - - rhino := index.Field("ddd_category_total_current_rhinocerous_checking") - qr, err = client.Query(rhino.GT(0)) - if err != nil { - t.Fatalf("querying: %v", err) - } - expCols := []string{string([]byte{32, 31, 0, 0, 0, 159})} - if cols := qr.Result().Row().Keys; !reflect.DeepEqual(cols, expCols) { - t.Errorf("wrong cols: %v, exp: %v", cols, expCols) - } - t.Log(qr.Result().Value(), qr.Result().Count()) -} - -func TestCmdMainIDField(t *testing.T) { - if testing.Short() { - t.Skip() - } - - // load big schema - licodec := liDecodeTestSchema(t, "bigschema.json") - schemaID := postSchema(t, "bigschema.json", "bigschema2") - - fields := []string{"abc", "db", "user_id", "all_users", "has_deleted_date", "central_group", "custom_audiences", "desktop_boolean", "desktop_frequency", "desktop_recency", "product_boolean_historical_forestry_cravings_or_bugles", "ddd_category_total_current_rhinocerous_checking", "ddd_category_total_current_rhinocerous_thedog_cheetah", "survey1234", "days_since_last_logon", "elephant_added_for_account"} - - // make a bunch of data and insert it - records := [][]interface{}{ - {"2", "1", 159, map[string]interface{}{"boolean": true}, map[string]interface{}{"boolean": false}, map[string]interface{}{"string": "cgr"}, map[string]interface{}{"array": []string{"a", "b"}}, nil, map[string]interface{}{"int": 7}, nil, nil, map[string]interface{}{"float": 5.4}, nil, map[string]interface{}{"org.test.survey1234": "yes"}, map[string]interface{}{"float": 8.0}, nil}, - } - - // put records in kafka - conf := sarama.NewConfig() - conf.Version = sarama.V0_10_0_0 // TODO - do we need this? should we move it up? - conf.Producer.Return.Successes = true - producer, err := sarama.NewSyncProducer([]string{"localhost:9092"}, conf) - if err != nil { - t.Fatalf("getting new producer: %v", err) - } - topic := "testcmdmain2" - for _, vals := range records { - rec := makeRecord(t, fields, vals) - putRecordKafka(t, producer, schemaID, licodec, "akey", topic, rec) - } - - // create Main and run with MaxMsgs - m := NewMain() - m.Index = "cmd_test_index23lkjdkfj" - m.IDField = "user_id" - m.PackBools = "bools" - m.BatchSize = 1 - m.Topics = []string{topic} - m.MaxMsgs = len(records) - - err = m.Run() - if err != nil { - t.Fatalf("running main: %v", err) - } - - client := m.PilosaClient() - schema, err := client.Schema() - index := schema.Index(m.Index) - defer func() { - err := client.DeleteIndex(index) - if err != nil { - t.Logf("deleting index: %v", err) - } - }() - - // check data in Pilosa - if !index.HasField("abc") { - t.Fatalf("don't have abc") - } - abc := index.Field("abc") - qr, err := client.Query(index.Count(abc.Row("2"))) - if err != nil { - t.Fatalf("querying: %v", err) - } - if qr.Result().Count() != 1 { - t.Fatalf("wrong count for abc, %d is not 1", qr.Result().Count()) - } - - bools := index.Field("bools") - qr, err = client.Query(bools.TopN(10)) - if err != nil { - t.Fatalf("querying: %v", err) - } - ci := sortableCRI(qr.Result().CountItems()) - exp := sortableCRI{{Count: 1, Key: "all_users"}} - sort.Sort(ci) - sort.Sort(exp) - if !reflect.DeepEqual(ci, exp) { - t.Errorf("unexpected result exp/got\n%+v\n%+v", exp, ci) - } - - bools = index.Field("bools-exists") - qr, err = client.Query(bools.TopN(10)) - if err != nil { - t.Fatalf("querying: %v", err) - } - ci = sortableCRI(qr.Result().CountItems()) - exp = sortableCRI{{Count: 1, Key: "all_users"}, {Count: 1, Key: "has_deleted_date"}} - sort.Sort(ci) - sort.Sort(exp) - if !reflect.DeepEqual(ci, exp) { - t.Errorf("unexpected result exp/got\n%+v\n%+v", exp, ci) + tests := []struct { + name string + PrimaryKeyFields []string + IDField string + expRhinoKeys []string + expRhinoCols []uint64 + }{ + { + name: "3 primary keys str/str/int", + PrimaryKeyFields: []string{"abc", "db", "user_id"}, + expRhinoKeys: []string{string([]byte{50, 49, 0, 0, 0, 159})}, // "2" + "1" + uint32(159) + + }, + { + name: "IDField int", + IDField: "user_id", + expRhinoCols: []uint64{159}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // load big schema + licodec := liDecodeTestSchema(t, "bigschema.json") + schemaID := postSchema(t, "bigschema.json", "bigschema2") + + fields := []string{"abc", "db", "user_id", "all_users", "has_deleted_date", "central_group", "custom_audiences", "desktop_boolean", "desktop_frequency", "desktop_recency", "product_boolean_historical_forestry_cravings_or_bugles", "ddd_category_total_current_rhinocerous_checking", "ddd_category_total_current_rhinocerous_thedog_cheetah", "survey1234", "days_since_last_logon", "elephant_added_for_account"} + + // make a bunch of data and insert it + records := [][]interface{}{ + {"2", "1", 159, map[string]interface{}{"boolean": true}, map[string]interface{}{"boolean": false}, map[string]interface{}{"string": "cgr"}, map[string]interface{}{"array": []string{"a", "b"}}, nil, map[string]interface{}{"int": 7}, nil, nil, map[string]interface{}{"float": 5.4}, nil, map[string]interface{}{"org.test.survey1234": "yes"}, map[string]interface{}{"float": 8.0}, nil}, + } + + // put records in kafka + conf := sarama.NewConfig() + conf.Version = sarama.V0_10_0_0 // TODO - do we need this? should we move it up? + conf.Producer.Return.Successes = true + producer, err := sarama.NewSyncProducer([]string{"localhost:9092"}, conf) + if err != nil { + t.Fatalf("getting new producer: %v", err) + } + a := rand.Int() + topic := strconv.Itoa(a) + for _, vals := range records { + rec := makeRecord(t, fields, vals) + putRecordKafka(t, producer, schemaID, licodec, "akey", topic, rec) + } + + // create Main and run with MaxMsgs + m := NewMain() + m.Index = fmt.Sprintf("cmd_test_index239ij%s", topic) + m.PrimaryKeyFields = test.PrimaryKeyFields + m.IDField = test.IDField + m.PackBools = "bools" + m.BatchSize = 1 + m.Topics = []string{topic} + m.MaxMsgs = len(records) + + err = m.Run() + if err != nil { + t.Fatalf("running main: %v", err) + } + + client := m.PilosaClient() + schema, err := client.Schema() + index := schema.Index(m.Index) + defer func() { + err := client.DeleteIndex(index) + if err != nil { + t.Logf("deleting index: %v", err) + } + }() + + // check data in Pilosa + if !index.HasField("abc") { + t.Fatalf("don't have abc") + } + abc := index.Field("abc") + qr, err := client.Query(index.Count(abc.Row("2"))) + if err != nil { + t.Fatalf("querying: %v", err) + } + if qr.Result().Count() != 1 { + t.Fatalf("wrong count for abc, %d is not 1", qr.Result().Count()) + } + + bools := index.Field("bools") + qr, err = client.Query(bools.TopN(10)) + if err != nil { + t.Fatalf("querying: %v", err) + } + ci := sortableCRI(qr.Result().CountItems()) + exp := sortableCRI{{Count: 1, Key: "all_users"}} + sort.Sort(ci) + sort.Sort(exp) + if !reflect.DeepEqual(ci, exp) { + t.Errorf("unexpected result exp/got\n%+v\n%+v", exp, ci) + } + + bools = index.Field("bools-exists") + qr, err = client.Query(bools.TopN(10)) + if err != nil { + t.Fatalf("querying: %v", err) + } + ci = sortableCRI(qr.Result().CountItems()) + exp = sortableCRI{{Count: 1, Key: "all_users"}, {Count: 1, Key: "has_deleted_date"}} + sort.Sort(ci) + sort.Sort(exp) + if !reflect.DeepEqual(ci, exp) { + t.Errorf("unexpected result exp/got\n%+v\n%+v", exp, ci) + } + + rhino := index.Field("ddd_category_total_current_rhinocerous_checking") + qr, err = client.Query(rhino.GT(0)) + if err != nil { + t.Fatalf("querying: %v", err) + } + if test.expRhinoKeys != nil { + if keys := qr.Result().Row().Keys; !reflect.DeepEqual(keys, test.expRhinoKeys) { + t.Errorf("wrong cols: %v, exp: %v", keys, test.expRhinoKeys) + } + } + if test.expRhinoCols != nil { + if cols := qr.Result().Row().Columns; !reflect.DeepEqual(cols, test.expRhinoCols) { + t.Errorf("wrong cols: %v, exp: %v", cols, test.expRhinoCols) + } + } + }) } } diff --git a/v2/kafka/source.go b/v2/kafka/source.go index 94466a5..ae64f67 100644 --- a/v2/kafka/source.go +++ b/v2/kafka/source.go @@ -8,6 +8,7 @@ import ( "io/ioutil" "log" "net/http" + "time" "github.com/Shopify/sarama" cluster "github.com/bsm/sarama-cluster" @@ -162,6 +163,8 @@ func (s *Source) Open() error { config.Consumer.Return.Errors = true config.Consumer.Offsets.Initial = sarama.OffsetOldest config.Group.Return.Notifications = true + config.Consumer.Group.Heartbeat.Interval = time.Millisecond * 500 + config.Consumer.Group.Session.Timeout = time.Second var err error s.consumer, err = cluster.NewConsumer(s.Hosts, s.Group, s.Topics, config) From 02fbfec07f13bd69170cefe747031eb355591d25 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Fri, 11 Oct 2019 09:42:29 -0500 Subject: [PATCH 24/40] update go-pilosa dep to master (batch-ingest merged) --- go.mod | 7 +--- go.sum | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 75bb56c..eeec480 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,5 @@ module github.com/pilosa/pdk -//replace github.com/pilosa/go-pilosa => github.com/jaffee/go-pilosa v0.4.1-0.20191008194651-6791c1437ec4 -replace github.com/pilosa/go-pilosa => /Users/jaffee/go/src/github.com/pilosa/go-pilosa - replace github.com/go-avro/avro => github.com/jaffee/avro v0.0.0-20190926030934-2b116da4fa22 require ( @@ -16,7 +13,6 @@ require ( github.com/eapache/queue v1.1.0 // indirect github.com/elodina/go-avro v0.0.0-20160406082632-0c8185d9a3ba github.com/go-avro/avro v0.0.0-20171219232920-444163702c11 - github.com/hashicorp/go-uuid v1.0.1 // indirect github.com/jaffee/commandeer v0.3.0 github.com/linkedin/goavro v0.0.0-20181018120728-1beee2a74088 github.com/linkedin/goavro/v2 v2.9.6 @@ -24,14 +20,13 @@ require ( github.com/onsi/ginkgo v1.7.0 // indirect github.com/onsi/gomega v1.4.3 // indirect github.com/pierrec/lz4 v0.0.0-20181005164709-635575b42742 // indirect - github.com/pilosa/go-pilosa v1.3.1-0.20190715210601-8606626b90d6 + github.com/pilosa/go-pilosa v1.3.1-0.20191011140449-29aaccbd50c5 github.com/pilosa/pilosa v1.3.1 github.com/pkg/errors v0.8.1 github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a // indirect github.com/spf13/cobra v0.0.5 github.com/spf13/pflag v1.0.3 github.com/spf13/viper v1.4.0 - github.com/stretchr/testify v1.3.0 // indirect github.com/syndtr/goleveldb v0.0.0-20181128100959-b001fa50d6b2 golang.org/x/sync v0.0.0-20190423024810-112230192c58 gopkg.in/avro.v0 v0.0.0-20171217001914-a730b5802183 // indirect diff --git a/go.sum b/go.sum index b6deb2b..21de95a 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,7 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= +cloud.google.com/go v0.40.0/go.mod h1:Tk58MuI9rbLMKlAjeO/bDnteAx7tX2gJIXw4T5Jwlro= cloud.google.com/go v0.43.0/go.mod h1:BOSR3VbTLkk6FDC/TcffxP4NF/FFBGA5ku+jvKOP7pg= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= @@ -9,8 +10,11 @@ github.com/CAFxX/gcnotifier v0.0.0-20190112062741-224a280d589d h1:n0G4ckjMEj7bWu github.com/CAFxX/gcnotifier v0.0.0-20190112062741-224a280d589d/go.mod h1:Rn2zM2MnHze07LwkneP48TWt6UiZhzQTwCvw6djVGfE= github.com/DataDog/datadog-go v0.0.0-20180822151419-281ae9f2d895 h1:dmc/C8bpE5VkQn65PNbbyACDC8xw8Hpp/NEurdPmQDQ= github.com/DataDog/datadog-go v0.0.0-20180822151419-281ae9f2d895/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= +github.com/DataDog/datadog-go v2.2.0+incompatible h1:V5BKkxACZLjzHjSgBbr2gvLA2Ae49yhc6CSY7MLy5k4= +github.com/DataDog/datadog-go v2.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= +github.com/OneOfOne/xxhash v1.2.5/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q= github.com/Shopify/sarama v1.19.0 h1:9oksLxC6uxVPHPVYUmq6xhr1BOF/hHobWH2UzO67z1s= github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= github.com/Shopify/toxiproxy v2.1.4+incompatible h1:TKdv8HiTLgE5wdJuEML90aBgNWsokNbMijUGhmcoBJc= @@ -22,24 +26,35 @@ github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRF github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da h1:8GUt8eRujhVEGZFFEjBj46YV4rDjvGrNxb0KMWYkL2I= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= +github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878 h1:EFSB7Zo9Eg91v7MJPVsifUysc/wPdN+NOnVe6bWbdBM= +github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878/go.mod h1:3AMJUQhVx52RsWOnlkpikZr01T/yAVN2gn0861vByNg= +github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= +github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/aws/aws-sdk-go v1.15.88 h1:Om0MayFrixOds/PrbBey2Cg/lkNEIyOrAF2RFXLwmnE= github.com/aws/aws-sdk-go v1.15.88/go.mod h1:es1KtYUFs7le0xQ3rOihkuoVD90z7D0fR2Qm4S00/gU= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= +github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/boltdb/bolt v1.3.1 h1:JQmyP4ZBrce+ZQu0dY660FMfatumYDLun9hBCUVIkF4= github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= github.com/bsm/sarama-cluster v2.1.15+incompatible h1:RkV6WiNRnqEEbp81druK8zYhmnIgdOjqSVi0+9Cnl2A= github.com/bsm/sarama-cluster v2.1.15+incompatible/go.mod h1:r7ao+4tTNXvWm+VRpRJchr2kQhqxgmAp2iEX5W96gMM= github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= +github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= +github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd h1:qMd81Ts1T2OTKmB4acZcyKaMtRnY5Y44NuXGX2GFJ1w= github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= +github.com/coreos/bbolt v1.3.3/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= +github.com/coreos/etcd v3.3.13+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= +github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/go-systemd v0.0.0-20190618135430-ff7011eec365/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= @@ -57,6 +72,7 @@ github.com/eapache/queue v1.1.0 h1:YOEu7KNc61ntiQlcEeUIoDTJ2o8mQznoNvUhiigpIqc= github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= github.com/elodina/go-avro v0.0.0-20160406082632-0c8185d9a3ba h1:QkK2L3uvEaZJ40iFZbiMKz/yQF/MI2uaNO2iyV/ve6w= github.com/elodina/go-avro v0.0.0-20160406082632-0c8185d9a3ba/go.mod h1:3A7SOsr8WBIpkWUsqzMpR3tIQbanKqxZcis2GSl12Nk= +github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= @@ -100,34 +116,48 @@ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8= github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= github.com/gorilla/handlers v1.3.0 h1:tsg9qP3mjt1h4Roxp+M1paRjrVBfPSOpBuVclh6YluI= github.com/gorilla/handlers v1.3.0/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/OAirnOIQ= +github.com/gorilla/handlers v1.4.0/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/OAirnOIQ= github.com/gorilla/handlers v1.4.1 h1:BHvcRGJe/TrL+OqFxoKQGddTgeibiOjaBssV5a/N9sw= github.com/gorilla/handlers v1.4.1/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/OAirnOIQ= github.com/gorilla/mux v1.4.0/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/mux v1.7.0 h1:tOSd0UKHQd6urX6ApfOn4XdBMY6Sh1MfxV3kmaazO+U= github.com/gorilla/mux v1.7.0/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= +github.com/gorilla/mux v1.7.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/mux v1.7.3 h1:gnP5JzjVOuiZD07fKKToCAOjS0yOpj/qPETTXCCS6hw= github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway v1.9.2/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.4/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= +github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0= github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= +github.com/hashicorp/go-immutable-radix v1.1.0 h1:vN9wG1D6KG6YHRTWr8512cxGOVgTMEfgEdSj/hr8MPc= +github.com/hashicorp/go-immutable-radix v1.1.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= github.com/hashicorp/go-msgpack v0.5.3 h1:zKjpN5BK/P5lMYrLmBHdBULWbJ0XpYR+7NGzqkZzoD4= github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= +github.com/hashicorp/go-msgpack v0.5.5 h1:i9R9JSrqIz0QVLz3sz+i3YJdT7TTSLcfLLzJi9aZTuI= +github.com/hashicorp/go-msgpack v0.5.5/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= github.com/hashicorp/go-multierror v1.0.0 h1:iVjPR7a6H0tWELX5NxNe7bYopibicUzc7uPribsnS6o= github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= +github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= +github.com/hashicorp/go-retryablehttp v0.5.4/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= github.com/hashicorp/go-sockaddr v1.0.0 h1:GeH6tui99pF4NJgfnhp+L6+FfobzVW3Ah46sLo0ICXs= github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= +github.com/hashicorp/go-sockaddr v1.0.2 h1:ztczhD1jLxIRjVejw8gFomI1BQZOe2WoVOu0SyteCQc= +github.com/hashicorp/go-sockaddr v1.0.2/go.mod h1:rB4wwRAUzs07qva3c5SdrY/NEtAUjGlgmH/UkBUC97A= github.com/hashicorp/go-uuid v1.0.0 h1:RS8zrF7PhGwyNPOtxSClXXj9HA8feRnJzgnI1RJCSnM= github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-uuid v1.0.1 h1:fv1ep09latC32wFoVwnqcnKJGnMSdBanPczbHAYm1BE= @@ -140,6 +170,8 @@ github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/memberlist v0.1.3 h1:EmmoJme1matNzb+hMpDuR/0sbJSUisxyqBGG676r31M= github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= +github.com/hashicorp/memberlist v0.1.4 h1:gkyML/r71w3FL8gUi74Vk76avkj/9lYAY9lvg0OcoGs= +github.com/hashicorp/memberlist v0.1.4/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= @@ -156,18 +188,24 @@ github.com/jaffee/go-pilosa v0.4.1-0.20191008192729-4129aee12032 h1:uATKnbEhR3+K github.com/jaffee/go-pilosa v0.4.1-0.20191008192729-4129aee12032/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= github.com/jaffee/go-pilosa v0.4.1-0.20191008194651-6791c1437ec4 h1:IEGhQ3aUdbLHPkv+twI74W6ggBcAwl0cNJzquQ1IXdE= github.com/jaffee/go-pilosa v0.4.1-0.20191008194651-6791c1437ec4/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= +github.com/jaffee/go-pilosa v0.4.1-0.20191009223837-58c281632e8c h1:N8DBKtbHi9PG28iYvipDUyYs/vzX5g6bpc0tbxdKlCA= +github.com/jaffee/go-pilosa v0.4.1-0.20191009223837-58c281632e8c/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8 h1:12VvqtR6Aowv3l/EQUlocDHW2Cp4G9WJVH7uyH8QFJE= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= +github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= +github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA= github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= @@ -177,15 +215,26 @@ github.com/linkedin/goavro/v2 v2.9.6 h1:Qh8M4/oWMSJ8V3pKCl9QRZOZnefg/vU56t47Awza github.com/linkedin/goavro/v2 v2.9.6/go.mod h1:UgQUb2N/pmueQYH9bfqFioWxzYCZXSfF8Jw03O5sjqA= github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4= +github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= +github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/miekg/dns v1.1.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= github.com/miekg/dns v1.1.15 h1:CSSIDtllwGLMoA6zjdKnaE6Tx6eVUxQ29LUgGetiDCI= github.com/miekg/dns v1.1.15/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/go-wordwrap v1.0.0/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo= github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mmcloughlin/geohash v0.0.0-20181009053802-f7f2bcae3294 h1:QlTAK00UrY80KK9Da+foE04AjxhXFrgp87aZB6yfU5c= github.com/mmcloughlin/geohash v0.0.0-20181009053802-f7f2bcae3294/go.mod h1:oNZxQo5yWJh0eMQEP/8hwQuVx9Z9tjwFUqcTB1SmG0c= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= @@ -200,11 +249,18 @@ github.com/opentracing/opentracing-go v1.1.0 h1:pWlfV3Bxv7k65HYwkikxat0+s3pV4bsq github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c h1:Lgl0gzECD8GnQ5QCWA8o6BtfL6mDH5rQgM4/fX3avOs= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= +github.com/pelletier/go-toml v1.4.0 h1:u3Z1r+oOXJIkxqw34zVhyPgjBsm6X2wn21NWs/HfSeg= +github.com/pelletier/go-toml v1.4.0/go.mod h1:PN7xzY2wHTK0K9p34ErDQMlFxa51Fk0OUruD3k1mMwo= github.com/pierrec/lz4 v0.0.0-20181005164709-635575b42742 h1:wKfigKMTgvSzBLIVvB5QaBBQI0odU6n45/UKSphjLus= github.com/pierrec/lz4 v0.0.0-20181005164709-635575b42742/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= github.com/pilosa/demo-taxi v0.0.0-20190604185441-6b6ef983bff7/go.mod h1:DM8Umjg0r/UscmOs49RJeE0WUb8Nj4PLUj4J02vigLk= +github.com/pilosa/go-pilosa v0.0.0-20181106203903-796d4f7d7f3b/go.mod h1:uli4HiTymHocSAXJ9XpDbkH6kS63P8Yc0xyWDzooouc= +github.com/pilosa/go-pilosa v1.3.1-0.20190715210601-8606626b90d6/go.mod h1:aFI9h49dhkkRoBLyeZFdHj+OHYtobmA7X7pn3AKVDMw= +github.com/pilosa/go-pilosa v1.3.1-0.20191011140449-29aaccbd50c5 h1:HO6bKfnNIaI7iA1CZ6SwuIgetzq6MH3ExuWW9eu9DK4= +github.com/pilosa/go-pilosa v1.3.1-0.20191011140449-29aaccbd50c5/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= github.com/pilosa/pilosa v0.0.0-20181115192138-84148d4ee6c0/go.mod h1:NgpkJkefqUKUHV7O3TqBOu89tsao3ksth2wzTNe8CPQ= github.com/pilosa/pilosa v1.3.1 h1:rLDVqJBuRzhPtue730D+EX0YEVS4R0oDzsE4bJBwLcE= github.com/pilosa/pilosa v1.3.1/go.mod h1:97yLL9mpUqOj9naKu5XA/b/U6JLe3JGGUlc2HOTDw+A= @@ -216,14 +272,23 @@ github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= +github.com/posener/complete v1.2.1/go.mod h1:6gapUrK/U1TAN7ciCoNRIdVC5sbdBTUh1DKN0g6uH7E= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= +github.com/prometheus/client_golang v0.9.2/go.mod h1:OsXs2jCmiKlQ1lTBmv21f2mNfw4xf/QclQDMrYNZzcM= github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= +github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= +github.com/prometheus/common v0.0.0-20181126121408-4724e9255275/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.6.0/go.mod h1:eBmuwkDJBwy6iBfxCBob6t6dR6ENT/y+J+Zk0j9GMYc= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.0-20181204211112-1dc9a6cbc91a/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.3/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ= github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= github.com/rakyll/statik v0.0.0-20170410192944-89fe3459b5c8/go.mod h1:OEi9wJV/fMUAGx1eNjq75DKDsJVuEv1U0oYdX6GX8Zs= @@ -231,9 +296,14 @@ github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a h1:9ZKAASQSHhD github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/remyoudompheng/bigfft v0.0.0-20190321074620-2f0d2b0e0001 h1:YDeskXpkNDhPdWN3REluVa46HQOVuVkjkd2sWnrABNQ= github.com/remyoudompheng/bigfft v0.0.0-20190321074620-2f0d2b0e0001/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/remyoudompheng/bigfft v0.0.0-20190512091148-babf20351dd7/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= +github.com/rogpeppe/fastuuid v1.1.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= +github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= +github.com/russross/blackfriday v2.0.0+incompatible/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= +github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= @@ -243,11 +313,15 @@ github.com/shirou/gopsutil v2.18.12+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMT github.com/shirou/w32 v0.0.0-20160930032740-bb4de0191aa4 h1:udFKJ0aHUL60LboW/A+DfgoHVedieIzIXE8uylPue0U= github.com/shirou/w32 v0.0.0-20160930032740-bb4de0191aa4/go.mod h1:qsXQc7+bwAM3Q1u/4XEfrquwF8Lw7D7y5cD8CuHnfIc= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spf13/afero v1.1.2 h1:m8/z1t7/fwjysjQRYbP0RD+bUIF/8tJwPdEZsI83ACI= github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= +github.com/spf13/afero v1.2.2 h1:5jhuqJyZCZf2JRofRvN/nIFgIWNzPa3/Vz8mYylgbWc= +github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8= github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cobra v0.0.3 h1:ZlrZ4XsMRm04Fr5pSFxBgfND2EBVa1nLpiy1stUsX/8= @@ -256,6 +330,8 @@ github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s= github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9Gc1vn7yk= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= +github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= +github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= github.com/spf13/pflag v0.0.0-20170427125145-f1d95a35e132/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= @@ -266,6 +342,7 @@ github.com/spf13/viper v1.4.0 h1:yXHLWeravcrgGyFSyCgdYpXQ9dR9c/WED3pg1RhxqEU= github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= @@ -273,17 +350,25 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/syndtr/goleveldb v0.0.0-20181128100959-b001fa50d6b2 h1:GnOzE5fEFN3b2zDhJJABEofdb51uMRNb8eqIVtdducs= github.com/syndtr/goleveldb v0.0.0-20181128100959-b001fa50d6b2/go.mod h1:Z4AUp2Km+PwemOoO/VB5AOx9XSsIItzFjoJlOSiYmn0= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= github.com/uber/jaeger-client-go v2.15.0+incompatible h1:NP3qsSqNxh8VYr956ur1N/1C1PjvOJnJykCzcD5QHbk= github.com/uber/jaeger-client-go v2.15.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= +github.com/uber/jaeger-client-go v2.16.0+incompatible h1:Q2Pp6v3QYiocMxomCaJuwQGFt7E53bPYqEgug/AoBtY= +github.com/uber/jaeger-client-go v2.16.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= github.com/uber/jaeger-lib v1.5.0 h1:OHbgr8l656Ub3Fw5k9SWnBfIEwvoHQ+W2y+Aa9D1Uyo= github.com/uber/jaeger-lib v1.5.0/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U= +github.com/uber/jaeger-lib v2.0.0+incompatible h1:iMSCV0rmXEogjNWPh2D0xk9YVKvrtGoHJNe9ebLu/pw= +github.com/uber/jaeger-lib v2.0.0+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= +github.com/ugorji/go v1.1.5-pre/go.mod h1:FwP/aQVg39TXzItUBMwnWp9T9gPQnXw4Poh4/oBQZ/0= github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= +github.com/ugorji/go/codec v1.1.5-pre/go.mod h1:tULtS6Gy1AE1yCENaw4Vb//HLH5njI2tfCQDUqRd8fI= github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= +go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= @@ -296,6 +381,7 @@ golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnf golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190617133340-57b3e21c3d56/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 h1:HuIa8hRrWRSrqYzx1qI49NNxhdi2PrY7gxVSq1JjLDc= golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -303,6 +389,7 @@ golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= golang.org/x/exp v0.0.0-20190718202018-cfdd5522f6f6/go.mod h1:JhuoJpWY28nO4Vef9tZUw9qufEGTyX1+7lmHxV5q5G4= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= +golang.org/x/image v0.0.0-20190618124811-92942e4437e2/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/image v0.0.0-20190703141733-d6a02ce849c9/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= @@ -310,7 +397,9 @@ golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTk golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= +golang.org/x/mobile v0.0.0-20190607214518-6fa95d984e88/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= +golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= golang.org/x/net v0.0.0-20180530234432-1e491301e022/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -318,6 +407,7 @@ golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -327,6 +417,7 @@ golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190628185345-da137c7871d7 h1:rTIdg5QFRR7XCaK4LCjBiPbx8j4DQRpdYMnGn/bJUEU= golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -340,6 +431,7 @@ golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEhagSSnXWGV915qUMm9mrU= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -349,11 +441,14 @@ golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a h1:1n5lsVfiQW3yfsRGu98756EH1YthsFqr/5mxHduZW2A= golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190618155005-516e3c20635f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 h1:LepdCS8Gf/MVejFIt8lsiexZATdoGVyp5bcyS+rYoUI= golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -366,6 +461,7 @@ golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxb golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= @@ -374,10 +470,13 @@ golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3 golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190530171427-2b03ca6e44eb/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190618163018-fdf1049a943a/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190719005602-e377ae9d6386/go.mod h1:jcCCGcm9btYwXyDqrUWc6MKQKKGJCWEQ3AfLSRIbEuI= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= +google.golang.org/api v0.6.0/go.mod h1:btoxGiFvQNVUZQ8W08zLtrVS08CNpINPEfxXxgJL1Q4= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -388,6 +487,8 @@ google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRn google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190530194941-fb225487d101/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s= +google.golang.org/genproto v0.0.0-20190611190212-a7e196e89fd3/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s= google.golang.org/genproto v0.0.0-20190716160619-c506a9f90610/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= @@ -401,6 +502,7 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/linkedin/goavro.v1 v1.0.5 h1:BJa69CDh0awSsLUmZ9+BowBdokpduDZSM9Zk8oKHfN4= @@ -416,6 +518,7 @@ honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190614002413-cb51c254f01b/go.mod h1:JlmFZigtG9vBVR3QGIQ9g/Usz4BzH+Xm6Z8iHQWRYUw= modernc.org/mathutil v1.0.0 h1:93vKjrJopTPrtTNpZ8XIovER7iCIH1QU7wNbOQXC60I= modernc.org/mathutil v1.0.0/go.mod h1:wU0vUrJsVWBZ4P6e7xtFJEhFSNsfRLJ8H458uRjg03k= modernc.org/strutil v1.0.0 h1:XVFtQwFVwc02Wk+0L/Z/zDDXO81r5Lhe6iMKmGX3KhE= From d64466c8eb4388f6b6002bd7a5b6104eff2fef89 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Fri, 11 Oct 2019 10:21:40 -0500 Subject: [PATCH 25/40] update go-pilosa dep --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index eeec480..fd2f216 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/onsi/ginkgo v1.7.0 // indirect github.com/onsi/gomega v1.4.3 // indirect github.com/pierrec/lz4 v0.0.0-20181005164709-635575b42742 // indirect - github.com/pilosa/go-pilosa v1.3.1-0.20191011140449-29aaccbd50c5 + github.com/pilosa/go-pilosa v1.3.1-0.20191011151453-0c53860b34ff github.com/pilosa/pilosa v1.3.1 github.com/pkg/errors v0.8.1 github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a // indirect diff --git a/go.sum b/go.sum index 21de95a..ab167b6 100644 --- a/go.sum +++ b/go.sum @@ -261,6 +261,8 @@ github.com/pilosa/go-pilosa v0.0.0-20181106203903-796d4f7d7f3b/go.mod h1:uli4HiT github.com/pilosa/go-pilosa v1.3.1-0.20190715210601-8606626b90d6/go.mod h1:aFI9h49dhkkRoBLyeZFdHj+OHYtobmA7X7pn3AKVDMw= github.com/pilosa/go-pilosa v1.3.1-0.20191011140449-29aaccbd50c5 h1:HO6bKfnNIaI7iA1CZ6SwuIgetzq6MH3ExuWW9eu9DK4= github.com/pilosa/go-pilosa v1.3.1-0.20191011140449-29aaccbd50c5/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= +github.com/pilosa/go-pilosa v1.3.1-0.20191011151453-0c53860b34ff h1:6i31l2T0OsKRVnMRR7SmRaE17hsWGIoXwrgHtWyHONU= +github.com/pilosa/go-pilosa v1.3.1-0.20191011151453-0c53860b34ff/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= github.com/pilosa/pilosa v0.0.0-20181115192138-84148d4ee6c0/go.mod h1:NgpkJkefqUKUHV7O3TqBOu89tsao3ksth2wzTNe8CPQ= github.com/pilosa/pilosa v1.3.1 h1:rLDVqJBuRzhPtue730D+EX0YEVS4R0oDzsE4bJBwLcE= github.com/pilosa/pilosa v1.3.1/go.mod h1:97yLL9mpUqOj9naKu5XA/b/U6JLe3JGGUlc2HOTDw+A= From f0c735ea0513a463d333f69f04a8c6ffddda67be Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Sun, 13 Oct 2019 13:00:20 -0500 Subject: [PATCH 26/40] scaled floats working --- go.mod | 4 ++- go.sum | 6 +++++ v2/ingest.go | 1 - v2/interfaces.go | 11 ++++++++ v2/kafka/cmd_test.go | 2 +- v2/kafka/source.go | 33 +++++------------------ v2/kafka/source_test.go | 25 +++++++++++------ v2/kafka/testdata/schemas/floatscale.json | 8 ++++++ v2/kafka/testdata/schemas/unions.json | 3 ++- 9 files changed, 54 insertions(+), 39 deletions(-) create mode 100644 v2/kafka/testdata/schemas/floatscale.json diff --git a/go.mod b/go.mod index fd2f216..1ded377 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/pilosa/pdk -replace github.com/go-avro/avro => github.com/jaffee/avro v0.0.0-20190926030934-2b116da4fa22 +replace github.com/pilosa/go-pilosa => github.com/jaffee/go-pilosa v0.4.1-0.20191011215038-51699dbd7261 + +replace github.com/go-avro/avro => github.com/jaffee/avro v0.0.0-20191013175548-8d07fd23d4fa require ( github.com/Shopify/sarama v1.19.0 diff --git a/go.sum b/go.sum index ab167b6..0b972b4 100644 --- a/go.sum +++ b/go.sum @@ -178,6 +178,8 @@ github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NH github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jaffee/avro v0.0.0-20190926030934-2b116da4fa22 h1:CQuJJwOitTYqHgp52XQJ/9DVcNJM+MMfFMmR/jhMIMg= github.com/jaffee/avro v0.0.0-20190926030934-2b116da4fa22/go.mod h1:6ilXMAGKrNFwlSrER0Y6hkZeJOH0ogH6I+90pCh6d1U= +github.com/jaffee/avro v0.0.0-20191013175548-8d07fd23d4fa h1:kyi/d6Y/V25924veUrOiyQ67g9tJ8dojlhPjYhkLEZ0= +github.com/jaffee/avro v0.0.0-20191013175548-8d07fd23d4fa/go.mod h1:6ilXMAGKrNFwlSrER0Y6hkZeJOH0ogH6I+90pCh6d1U= github.com/jaffee/commandeer v0.1.0 h1:UxHHnhKmtz8gAgqu67lYK5tlX5D9A86mGc9AWcEMSWU= github.com/jaffee/commandeer v0.1.0/go.mod h1:x1WpthEI14PRNcPtVna43ontBxJ1o7plCOsZ8kksl8M= github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e h1:CC1usSIzu9p6zmz7jPj0QiP3FdpGW+PCGc9d1yhSls0= @@ -190,6 +192,8 @@ github.com/jaffee/go-pilosa v0.4.1-0.20191008194651-6791c1437ec4 h1:IEGhQ3aUdbLH github.com/jaffee/go-pilosa v0.4.1-0.20191008194651-6791c1437ec4/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= github.com/jaffee/go-pilosa v0.4.1-0.20191009223837-58c281632e8c h1:N8DBKtbHi9PG28iYvipDUyYs/vzX5g6bpc0tbxdKlCA= github.com/jaffee/go-pilosa v0.4.1-0.20191009223837-58c281632e8c/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= +github.com/jaffee/go-pilosa v0.4.1-0.20191011215038-51699dbd7261 h1:q+k1WXUVYhAL4hQMrG2T6emfPaX1VFjRq/4F9LkSz/k= +github.com/jaffee/go-pilosa v0.4.1-0.20191011215038-51699dbd7261/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8 h1:12VvqtR6Aowv3l/EQUlocDHW2Cp4G9WJVH7uyH8QFJE= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= @@ -369,6 +373,8 @@ github.com/ugorji/go/codec v1.1.5-pre/go.mod h1:tULtS6Gy1AE1yCENaw4Vb//HLH5njI2t github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +github.com/y0ssar1an/q v1.0.7 h1:s3ckTY+wjk6Y0sFce4rIS1Ezf8S6d0UFJrKwe40MyiQ= +github.com/y0ssar1an/q v1.0.7/go.mod h1:Q1Rk1StqWjSOfA/CF4zJEW1fLmkl5Cy8EsILdkB+DgE= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= diff --git a/v2/ingest.go b/v2/ingest.go index 1903088..104b54b 100644 --- a/v2/ingest.go +++ b/v2/ingest.go @@ -335,7 +335,6 @@ func (m *Main) batchFromSchema(schema []Field) ([]Recordizer, gpexp.RecordBatch, return errors.Wrapf(err, "pilosafying field %d:%+v, val:%+v", i, pdkField, rawRec[i]) }) case DecimalField: - // TODO handle scale fields = append(fields, m.index.Field(fld.Name(), pilosa.OptFieldTypeInt())) valIdx := len(fields) - 1 recordizers = append(recordizers, func(rawRec []interface{}, rec *gpexp.Row) (err error) { diff --git a/v2/interfaces.go b/v2/interfaces.go index 850b33c..cca8e1f 100644 --- a/v2/interfaces.go +++ b/v2/interfaces.go @@ -1,6 +1,7 @@ package pdk import ( + "encoding/binary" "math" "github.com/pkg/errors" @@ -125,6 +126,7 @@ type DecimalField struct { func (d DecimalField) Name() string { return d.NameVal } func (i DecimalField) PilosafyVal(val interface{}) (interface{}, error) { + var tmp [8]byte if val == nil { return nil, nil } @@ -135,6 +137,15 @@ func (i DecimalField) PilosafyVal(val interface{}) (interface{}, error) { case float64: vt = vt * math.Pow(10, float64(i.Scale)) return int64(vt), nil + case []byte: + if len(vt) == 8 { + return int64(binary.BigEndian.Uint64(vt)), nil + } else if len(vt) < 8 { + copy(tmp[8-len(vt):], vt) + return binary.BigEndian.Uint64(tmp[:]), nil + } else { + return nil, errors.Errorf("can't support decimals of greater than 8 bytes, got %d for %s", len(vt), i.Name()) + } default: return toInt64(val) } diff --git a/v2/kafka/cmd_test.go b/v2/kafka/cmd_test.go index f2ae713..da32f9c 100644 --- a/v2/kafka/cmd_test.go +++ b/v2/kafka/cmd_test.go @@ -130,7 +130,7 @@ func TestCmdMainOne(t *testing.T) { } rhino := index.Field("ddd_category_total_current_rhinocerous_checking") - qr, err = client.Query(rhino.GT(0)) + qr, err = client.Query(rhino.Equals(540)) if err != nil { t.Fatalf("querying: %v", err) } diff --git a/v2/kafka/source.go b/v2/kafka/source.go index ae64f67..c6447bb 100644 --- a/v2/kafka/source.go +++ b/v2/kafka/source.go @@ -107,32 +107,7 @@ func (s *Source) Schema() []pdk.Field { func (s *Source) toPDKRecord(vals map[string]interface{}) error { r := s.record for i, field := range s.lastSchema { - val := vals[field.Name()] - if val == nil { - r.data[i] = nil - continue - } - switch field.(type) { - case pdk.DecimalField: - vb, ok := val.([]byte) - if !ok { - r.data[i] = val - continue - } - if len(vb) == 8 { - r.data[i] = binary.BigEndian.Uint64(vb) - } else if len(vb) < 8 { - copy(s.decBytes[8-len(vb):], vb) - r.data[i] = binary.BigEndian.Uint64(s.decBytes) - for j := range s.decBytes { - s.decBytes[j] = 0 - } - } else { - return errors.Errorf("can't support decimals of greater than 8 bytes, got %d for %s", len(vb), field.Name()) - } - default: - r.data[i] = val - } + r.data[i] = vals[field.Name()] } return nil } @@ -403,10 +378,14 @@ func avroUnionToPDKField(field *avro.SchemaField) (pdk.Field, error) { // propertiesFromSchema (document and use!) func propertiesFromSchema(sch avro.Schema) map[string]interface{} { switch schT := sch.(type) { - case *avro.StringSchema, *avro.IntSchema, *avro.LongSchema, *avro.FloatSchema, *avro.DoubleSchema, *avro.BooleanSchema, *avro.NullSchema, *avro.UnionSchema: + case *avro.StringSchema, *avro.IntSchema, *avro.LongSchema, *avro.BooleanSchema, *avro.NullSchema, *avro.UnionSchema: return nil case *avro.BytesSchema: return schT.Properties + case *avro.DoubleSchema: + return schT.Properties + case *avro.FloatSchema: + return schT.Properties case *avro.RecordSchema: return schT.Properties case *avro.RecursiveSchema: diff --git a/v2/kafka/source_test.go b/v2/kafka/source_test.go index 204bfd1..c3d633b 100644 --- a/v2/kafka/source_test.go +++ b/v2/kafka/source_test.go @@ -44,6 +44,10 @@ func TestAvroToPDKSchema(t *testing.T) { schemaFile: "unions.json", exp: expectedSchemas["unions.json"], }, + { + schemaFile: "floatscale.json", + exp: expectedSchemas["floatscale.json"], + }, { schemaFile: "notarecord.json", expErr: "unsupported Avro Schema type", @@ -125,7 +129,7 @@ var tests = []struct { { schemaFile: "decimal.json", data: []map[string]interface{}{{"somenum": &big.Rat{}}, {"somenum": big.NewRat(10, 1)}, {"somenum": big.NewRat(1, 1)}, {"somenum": big.NewRat(5, 2)}, {"somenum": big.NewRat(1234567890, 1)}}, - exp: [][]interface{}{{uint64(0)}, {uint64(1000)}, {uint64(100)}, {uint64(250)}, {uint64(123456789000)}}, + exp: [][]interface{}{{[]byte{0}}, {[]byte{0x3, 0xE8}}, {[]byte{100}}, {[]byte{0, 250}}, {[]byte{0x1C, 0xBE, 0x99, 0x1A, 0x08}}}, }, { schemaFile: "othertypes.json", @@ -135,12 +139,17 @@ var tests = []struct { { schemaFile: "unions.json", data: []map[string]interface{}{ - {"first": map[string]interface{}{"string": "a"}, "second": map[string]interface{}{"boolean": true}, "third": map[string]interface{}{"long": 101}, "fourth": map[string]interface{}{"bytes.decimal": big.NewRat(5, 2)}}, - {"first": nil, "second": nil, "third": map[string]interface{}{"null": nil}, "fourth": nil}, + {"first": map[string]interface{}{"string": "a"}, "second": map[string]interface{}{"boolean": true}, "third": map[string]interface{}{"long": 101}, "fourth": map[string]interface{}{"bytes.decimal": big.NewRat(5, 2)}, "fifth": map[string]interface{}{"double": float64(9.4921)}}, + {"first": nil, "second": nil, "third": map[string]interface{}{"null": nil}, "fourth": nil, "fifth": nil}, }, exp: [][]interface{}{ - {"a", true, int64(101), uint64(2500)}, - {nil, nil, nil, nil}}, + {"a", true, int64(101), []byte{9, 196}, float64(9.4921)}, + {nil, nil, nil, nil, nil}}, + }, + { + schemaFile: "floatscale.json", + data: []map[string]interface{}{{"first": 23.12345}}, + exp: [][]interface{}{{float32(23.12345)}}, }, } @@ -180,7 +189,7 @@ func TestKafkaSourceLocal(t *testing.T) { } gotSchema := src.Schema() if !reflect.DeepEqual(gotSchema, expectedSchemas[test.schemaFile]) { - t.Errorf("unexpected schema got/exp:\n%+v\n%+v", gotSchema, expectedSchemas[test.schemaFile]) + t.Errorf("unexpected schema exp/got:\n%+v\n%+v", expectedSchemas[test.schemaFile], gotSchema) } } else if err != nil { t.Fatalf("unexpected error getting record: %v", err) @@ -199,7 +208,6 @@ func TestKafkaSourceLocal(t *testing.T) { t.Errorf("Mismatch at %d, exp/got\n%v of %[2]T\n%v of %[3]T", k, test.exp[j][k], data[k]) } } - } } }) @@ -316,6 +324,7 @@ var expectedSchemas = map[string][]pdk.Field{ "simple.json": []pdk.Field{pdk.StringField{NameVal: "first"}, pdk.StringField{NameVal: "last"}}, "stringtypes.json": []pdk.Field{pdk.StringField{NameVal: "first"}, pdk.StringField{NameVal: "last"}, pdk.StringField{NameVal: "middle"}}, "decimal.json": []pdk.Field{pdk.DecimalField{NameVal: "somenum", Scale: 2}}, - "unions.json": []pdk.Field{pdk.StringField{NameVal: "first"}, pdk.BoolField{NameVal: "second"}, pdk.IntField{NameVal: "third"}, pdk.DecimalField{NameVal: "fourth", Scale: 3}}, + "unions.json": []pdk.Field{pdk.StringField{NameVal: "first"}, pdk.BoolField{NameVal: "second"}, pdk.IntField{NameVal: "third"}, pdk.DecimalField{NameVal: "fourth", Scale: 3}, pdk.DecimalField{NameVal: "fifth", Scale: 2}}, "othertypes.json": []pdk.Field{pdk.StringField{NameVal: "first", Mutex: true}, pdk.StringArrayField{NameVal: "second"}, pdk.IntField{NameVal: "third"}, pdk.IntField{NameVal: "fourth"}, pdk.DecimalField{NameVal: "fifth"}, pdk.DecimalField{NameVal: "sixth"}, pdk.BoolField{NameVal: "seventh"}}, + "floatscale.json": []pdk.Field{pdk.DecimalField{NameVal: "first", Scale: 4}}, } diff --git a/v2/kafka/testdata/schemas/floatscale.json b/v2/kafka/testdata/schemas/floatscale.json new file mode 100644 index 0000000..7b9cd8c --- /dev/null +++ b/v2/kafka/testdata/schemas/floatscale.json @@ -0,0 +1,8 @@ +{ + "type": "record", + "namespace": "com.example", + "name": "FullName", + "fields": [ + { "name": "first", "type": "float", "scale": 4 } + ] +} diff --git a/v2/kafka/testdata/schemas/unions.json b/v2/kafka/testdata/schemas/unions.json index f3297d7..c9c32a6 100644 --- a/v2/kafka/testdata/schemas/unions.json +++ b/v2/kafka/testdata/schemas/unions.json @@ -6,6 +6,7 @@ { "name": "first", "type": ["null", "string"]}, { "name": "second", "type": ["null", "boolean"]}, { "name": "third", "type": ["null", "long"]}, - { "name": "fourth", "type": ["null", {"type": "bytes", "logicalType": "decimal", "scale": 3, "precision": 8}]} + { "name": "fourth", "type": ["null", {"type": "bytes", "logicalType": "decimal", "scale": 3, "precision": 8}]}, + { "name": "fifth", "type": ["null", {"type": "double", "scale": 2}]} ] } From 25979e8c916d148d0eb06690cf854e2a437cc2ec Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Wed, 16 Oct 2019 07:42:59 -0500 Subject: [PATCH 27/40] TLS support to v2 ingest/kafka --- go.mod | 9 +--- go.sum | 23 ++++++++ v2/ingest.go | 28 +++++++--- v2/kafka/cmd.go | 1 + v2/kafka/cmd_test.go | 29 +++++++++++ v2/kafka/source.go | 13 +++++ v2/tls.go | 121 +++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 209 insertions(+), 15 deletions(-) create mode 100644 v2/tls.go diff --git a/go.mod b/go.mod index 1ded377..5398273 100644 --- a/go.mod +++ b/go.mod @@ -5,14 +5,10 @@ replace github.com/pilosa/go-pilosa => github.com/jaffee/go-pilosa v0.4.1-0.2019 replace github.com/go-avro/avro => github.com/jaffee/avro v0.0.0-20191013175548-8d07fd23d4fa require ( - github.com/Shopify/sarama v1.19.0 - github.com/Shopify/toxiproxy v2.1.4+incompatible // indirect + github.com/Shopify/sarama v1.24.0 github.com/aws/aws-sdk-go v1.15.88 github.com/boltdb/bolt v1.3.1 github.com/bsm/sarama-cluster v2.1.15+incompatible - github.com/eapache/go-resiliency v1.1.0 // indirect - github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21 // indirect - github.com/eapache/queue v1.1.0 // indirect github.com/elodina/go-avro v0.0.0-20160406082632-0c8185d9a3ba github.com/go-avro/avro v0.0.0-20171219232920-444163702c11 github.com/jaffee/commandeer v0.3.0 @@ -21,11 +17,10 @@ require ( github.com/mmcloughlin/geohash v0.0.0-20181009053802-f7f2bcae3294 github.com/onsi/ginkgo v1.7.0 // indirect github.com/onsi/gomega v1.4.3 // indirect - github.com/pierrec/lz4 v0.0.0-20181005164709-635575b42742 // indirect github.com/pilosa/go-pilosa v1.3.1-0.20191011151453-0c53860b34ff github.com/pilosa/pilosa v1.3.1 github.com/pkg/errors v0.8.1 - github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a // indirect + github.com/pkg/profile v1.2.1 // indirect github.com/spf13/cobra v0.0.5 github.com/spf13/pflag v1.0.3 github.com/spf13/viper v1.4.0 diff --git a/go.sum b/go.sum index 0b972b4..9dfc57f 100644 --- a/go.sum +++ b/go.sum @@ -17,6 +17,8 @@ github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAE github.com/OneOfOne/xxhash v1.2.5/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q= github.com/Shopify/sarama v1.19.0 h1:9oksLxC6uxVPHPVYUmq6xhr1BOF/hHobWH2UzO67z1s= github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= +github.com/Shopify/sarama v1.24.0 h1:99vo5VAgQybHwZwiOy/RX/S3i0somjGxur3pLeheqzI= +github.com/Shopify/sarama v1.24.0/go.mod h1:fGP8eQ6PugKEI0iUETYYtnP6d1pH/bdDMTel1X5ajsU= github.com/Shopify/toxiproxy v2.1.4+incompatible h1:TKdv8HiTLgE5wdJuEML90aBgNWsokNbMijUGhmcoBJc= github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d h1:G0m3OIz70MZUWq3EgK3CesDbo8upS2Vm9/P3FtgI+Jk= @@ -73,6 +75,8 @@ github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFP github.com/elodina/go-avro v0.0.0-20160406082632-0c8185d9a3ba h1:QkK2L3uvEaZJ40iFZbiMKz/yQF/MI2uaNO2iyV/ve6w= github.com/elodina/go-avro v0.0.0-20160406082632-0c8185d9a3ba/go.mod h1:3A7SOsr8WBIpkWUsqzMpR3tIQbanKqxZcis2GSl12Nk= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= +github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= +github.com/frankban/quicktest v1.4.1/go.mod h1:36zfPVQyHxymz4cH7wlDmVwDrJuljRB60qkgn7rorfQ= github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= @@ -194,6 +198,8 @@ github.com/jaffee/go-pilosa v0.4.1-0.20191009223837-58c281632e8c h1:N8DBKtbHi9PG github.com/jaffee/go-pilosa v0.4.1-0.20191009223837-58c281632e8c/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= github.com/jaffee/go-pilosa v0.4.1-0.20191011215038-51699dbd7261 h1:q+k1WXUVYhAL4hQMrG2T6emfPaX1VFjRq/4F9LkSz/k= github.com/jaffee/go-pilosa v0.4.1-0.20191011215038-51699dbd7261/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= +github.com/jcmturner/gofork v0.0.0-20190328161633-dc7c13fece03 h1:FUwcHNlEqkqLjLBdCp5PRlCFijNjvcYANOZXzCfXwCM= +github.com/jcmturner/gofork v0.0.0-20190328161633-dc7c13fece03/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8 h1:12VvqtR6Aowv3l/EQUlocDHW2Cp4G9WJVH7uyH8QFJE= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= @@ -203,6 +209,8 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.8.2 h1:Bx0qjetmNjdFXASH02NSAREKpiaDwkO1DRZ3dV2KCcs= +github.com/klauspost/compress v1.8.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= @@ -260,6 +268,8 @@ github.com/pelletier/go-toml v1.4.0 h1:u3Z1r+oOXJIkxqw34zVhyPgjBsm6X2wn21NWs/HfS github.com/pelletier/go-toml v1.4.0/go.mod h1:PN7xzY2wHTK0K9p34ErDQMlFxa51Fk0OUruD3k1mMwo= github.com/pierrec/lz4 v0.0.0-20181005164709-635575b42742 h1:wKfigKMTgvSzBLIVvB5QaBBQI0odU6n45/UKSphjLus= github.com/pierrec/lz4 v0.0.0-20181005164709-635575b42742/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= +github.com/pierrec/lz4 v2.2.6+incompatible h1:6aCX4/YZ9v8q69hTyiR7dNLnTA3fgtKHVVW5BCd5Znw= +github.com/pierrec/lz4 v2.2.6+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pilosa/demo-taxi v0.0.0-20190604185441-6b6ef983bff7/go.mod h1:DM8Umjg0r/UscmOs49RJeE0WUb8Nj4PLUj4J02vigLk= github.com/pilosa/go-pilosa v0.0.0-20181106203903-796d4f7d7f3b/go.mod h1:uli4HiTymHocSAXJ9XpDbkH6kS63P8Yc0xyWDzooouc= github.com/pilosa/go-pilosa v1.3.1-0.20190715210601-8606626b90d6/go.mod h1:aFI9h49dhkkRoBLyeZFdHj+OHYtobmA7X7pn3AKVDMw= @@ -371,6 +381,8 @@ github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVM github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/ugorji/go/codec v1.1.5-pre/go.mod h1:tULtS6Gy1AE1yCENaw4Vb//HLH5njI2tfCQDUqRd8fI= github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= +github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I= +github.com/xdg/stringprep v1.0.0/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= github.com/y0ssar1an/q v1.0.7 h1:s3ckTY+wjk6Y0sFce4rIS1Ezf8S6d0UFJrKwe40MyiQ= @@ -387,6 +399,7 @@ golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnf golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9 h1:mKdxBk7AujPs8kU4m80U72y/zjbZ3UcXC7dClwKbUI0= golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190404164418-38d8ce5564a5/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190617133340-57b3e21c3d56/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -451,6 +464,7 @@ golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -513,6 +527,15 @@ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/jcmturner/aescts.v1 v1.0.1 h1:cVVZBK2b1zY26haWB4vbBiZrfFQnfbTVrE3xZq6hrEw= +gopkg.in/jcmturner/aescts.v1 v1.0.1/go.mod h1:nsR8qBOg+OucoIW+WMhB3GspUQXq9XorLnQb9XtvcOo= +gopkg.in/jcmturner/dnsutils.v1 v1.0.1 h1:cIuC1OLRGZrld+16ZJvvZxVJeKPsvd5eUIvxfoN5hSM= +gopkg.in/jcmturner/dnsutils.v1 v1.0.1/go.mod h1:m3v+5svpVOhtFAP/wSz+yzh4Mc0Fg7eRhxkJMWSIz9Q= +gopkg.in/jcmturner/goidentity.v3 v3.0.0/go.mod h1:oG2kH0IvSYNIu80dVAyu/yoefjq1mNfM5bm88whjWx4= +gopkg.in/jcmturner/gokrb5.v7 v7.2.3 h1:hHMV/yKPwMnJhPuPx7pH2Uw/3Qyf+thJYlisUc44010= +gopkg.in/jcmturner/gokrb5.v7 v7.2.3/go.mod h1:l8VISx+WGYp+Fp7KRbsiUuXTTOnxIc3Tuvyavf11/WM= +gopkg.in/jcmturner/rpc.v1 v1.1.0 h1:QHIUxTX1ISuAv9dD2wJ9HWQVuWDX/Zc0PfeC2tjc4rU= +gopkg.in/jcmturner/rpc.v1 v1.1.0/go.mod h1:YIdkC4XfD6GXbzje11McwsDuOlZQSb9W4vfLvuNnlv8= gopkg.in/linkedin/goavro.v1 v1.0.5 h1:BJa69CDh0awSsLUmZ9+BowBdokpduDZSM9Zk8oKHfN4= gopkg.in/linkedin/goavro.v1 v1.0.5/go.mod h1:Aw5GdAbizjOEl0kAMHV9iHmA8reZzW/OKuJAl4Hb9F0= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= diff --git a/v2/ingest.go b/v2/ingest.go index 104b54b..9ea30af 100644 --- a/v2/ingest.go +++ b/v2/ingest.go @@ -22,7 +22,7 @@ type Main struct { PilosaHosts []string `help:"Comma separated list of host:port pairs for Pilosa."` BatchSize int `help:"Number of records to read before indexing all of them at once. Generally, larger means better throughput and more memory usage. 1,048,576 might be a good number."` Index string `help:"Name of Pilosa index."` - LogPath string `help:"Log file to write to. Empty means stderr. TODO implement."` + LogPath string `help:"Log file to write to. Empty means stderr."` PrimaryKeyFields []string `help:"Data field(s) which make up the primary key for a record. These will be concatenated and translated to a Pilosa ID. If empty, record key translation will not be used."` IDField string `help:"Field which contains the integer column ID. May not be used in conjunction with primary-key-fields. If both are empty, auto-generated IDs will be used."` MaxMsgs int `help:"Number of messages to consume from Kafka before stopping. Useful for testing when you don't want to run indefinitely."` @@ -30,6 +30,7 @@ type Main struct { PackBools string `help:"If non-empty, boolean fields will be packed into two set fields—one with this name, and one with -exists."` Verbose bool `help:"Enable verbose logging."` // TODO implement the auto-generated IDs... hopefully using Pilosa to manage it. + TLS TLSConfig NewSource func() (Source, error) `flag:"-"` @@ -40,9 +41,8 @@ type Main struct { log logger.Logger } -func (m *Main) PilosaClient() *pilosa.Client { - return m.client -} +func (m *Main) PilosaClient() *pilosa.Client { return m.client } +func (m *Main) Log() logger.Logger { return m.log } func NewMain() *Main { return &Main{ @@ -75,7 +75,7 @@ func (m *Main) setup() (err error) { return errors.Wrap(err, "validating configuration") } - logOut := os.Stdout + logOut := os.Stderr if m.LogPath != "" { f, err := os.OpenFile(m.LogPath, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { @@ -90,10 +90,22 @@ func (m *Main) setup() (err error) { m.log = logger.NewStandardLogger(logOut) } - m.client, err = pilosa.NewClient(m.PilosaHosts) - if err != nil { - return errors.Wrap(err, "getting pilosa client") + if m.TLS.CertificatePath != "" { + tlsConfig, err := GetTLSConfig(&m.TLS, m.Log()) + if err != nil { + return errors.Wrap(err, "getting TLS config") + } + m.client, err = pilosa.NewClient(m.PilosaHosts, pilosa.OptClientTLSConfig(tlsConfig)) + if err != nil { + return errors.Wrap(err, "getting pilosa client") + } + } else { + m.client, err = pilosa.NewClient(m.PilosaHosts) + if err != nil { + return errors.Wrap(err, "getting pilosa client") + } } + m.schema, err = m.client.Schema() if err != nil { return errors.Wrap(err, "getting schema") diff --git a/v2/kafka/cmd.go b/v2/kafka/cmd.go index d9272db..770c743 100644 --- a/v2/kafka/cmd.go +++ b/v2/kafka/cmd.go @@ -27,6 +27,7 @@ func NewMain() *Main { source.Topics = m.Topics source.Group = m.Group source.MaxMsgs = m.MaxMsgs + source.Log = m.Main.Log() err := source.Open() if err != nil { diff --git a/v2/kafka/cmd_test.go b/v2/kafka/cmd_test.go index da32f9c..d3ffbc2 100644 --- a/v2/kafka/cmd_test.go +++ b/v2/kafka/cmd_test.go @@ -3,6 +3,7 @@ package kafka import ( "fmt" "math/rand" + "os" "reflect" "sort" "strconv" @@ -10,6 +11,7 @@ import ( "github.com/Shopify/sarama" "github.com/pilosa/go-pilosa" + "github.com/pilosa/pdk/v2" ) func TestCmdMainOne(t *testing.T) { @@ -17,10 +19,18 @@ func TestCmdMainOne(t *testing.T) { t.Skip() } + // TODO automate the setup for this test (creating certs with certstrap, etc) + home, err := os.UserHomeDir() + if err != nil { + t.Fatalf("getting home dir: %v", err) + } + tests := []struct { name string PrimaryKeyFields []string IDField string + PilosaHosts []string + TLS *pdk.TLSConfig expRhinoKeys []string expRhinoCols []uint64 }{ @@ -29,6 +39,19 @@ func TestCmdMainOne(t *testing.T) { PrimaryKeyFields: []string{"abc", "db", "user_id"}, expRhinoKeys: []string{string([]byte{50, 49, 0, 0, 0, 159})}, // "2" + "1" + uint32(159) + }, + { + name: "3 primary keys str/str/int", + PrimaryKeyFields: []string{"abc", "db", "user_id"}, + PilosaHosts: []string{"https://localhost:10111"}, + TLS: &pdk.TLSConfig{ + CertificatePath: home + "/pilosa-sec/out/theclient.crt", + CertificateKeyPath: home + "/pilosa-sec/out/theclient.key", + CACertPath: home + "/pilosa-sec/out/ca.crt", + EnableClientVerification: true, + }, + expRhinoKeys: []string{string([]byte{50, 49, 0, 0, 0, 159})}, // "2" + "1" + uint32(159) + }, { name: "IDField int", @@ -74,6 +97,12 @@ func TestCmdMainOne(t *testing.T) { m.BatchSize = 1 m.Topics = []string{topic} m.MaxMsgs = len(records) + if test.PilosaHosts != nil { + m.PilosaHosts = test.PilosaHosts + } + if test.TLS != nil { + m.TLS = *test.TLS + } err = m.Run() if err != nil { diff --git a/v2/kafka/source.go b/v2/kafka/source.go index c6447bb..f0aec9b 100644 --- a/v2/kafka/source.go +++ b/v2/kafka/source.go @@ -14,6 +14,7 @@ import ( cluster "github.com/bsm/sarama-cluster" "github.com/go-avro/avro" pdk "github.com/pilosa/pdk/v2" + "github.com/pilosa/pilosa/logger" "github.com/pkg/errors" ) @@ -26,6 +27,8 @@ type Source struct { Group string MaxMsgs int RegistryURL string + TLS pdk.TLSConfig + Log logger.Logger numMsgs int consumer *cluster.Consumer @@ -55,6 +58,7 @@ func NewSource() *Source { Topics: []string{"test"}, Group: "group0", RegistryURL: "localhost:8081", + Log: logger.NopLogger, lastSchemaID: -1, cache: make(map[int32]avro.Schema), @@ -141,6 +145,15 @@ func (s *Source) Open() error { config.Consumer.Group.Heartbeat.Interval = time.Millisecond * 500 config.Consumer.Group.Session.Timeout = time.Second + if s.TLS.CertificatePath != "" { + tlsConfig, err := pdk.GetTLSConfig(&s.TLS, s.Log) + if err != nil { + return errors.Wrap(err, "getting TLS config") + } + config.Config.Net.TLS.Config = tlsConfig + config.Config.Net.TLS.Enable = true + } + var err error s.consumer, err = cluster.NewConsumer(s.Hosts, s.Group, s.Topics, config) if err != nil { diff --git a/v2/tls.go b/v2/tls.go new file mode 100644 index 0000000..b88df9a --- /dev/null +++ b/v2/tls.go @@ -0,0 +1,121 @@ +package pdk + +import ( + "crypto/tls" + "crypto/x509" + "fmt" + "io/ioutil" + "os" + "os/signal" + "sync" + "syscall" + + "github.com/pilosa/pilosa/logger" + "github.com/pkg/errors" +) + +// TLSConfig contains TLS configuration +type TLSConfig struct { + // CertificatePath contains the path to the certificate (.crt or .pem file) + CertificatePath string `json:"certificate"` + // CertificateKeyPath contains the path to the certificate key (.key file) + CertificateKeyPath string `json:"key"` + // CACertPath is the path to a CA certificate (.crt or .pem file) + CACertPath string `json:"ca-certificate"` + // SkipVerify disables verification of server certificates when connecting to another Pilosa node + SkipVerify bool `json:"skip-verify"` + // EnableClientVerification enables verification of client TLS certificates (Mutual TLS) + EnableClientVerification bool `json:"enable-client-verification"` +} + +type keypairReloader struct { + certMu sync.RWMutex + cert *tls.Certificate + certPath string + keyPath string +} + +func NewKeypairReloader(certPath, keyPath string, log logger.Logger) (*keypairReloader, error) { + result := &keypairReloader{ + certPath: certPath, + keyPath: keyPath, + } + cert, err := tls.LoadX509KeyPair(certPath, keyPath) + if err != nil { + return nil, err + } + result.cert = &cert + go func() { + c := make(chan os.Signal, 1) + signal.Notify(c, syscall.SIGHUP) + for range c { + log.Printf("Received SIGHUP, reloading TLS certificate and key from %q and %q", certPath, keyPath) + if err := result.maybeReload(); err != nil { + log.Printf("Keeping old TLS certificate because the new one could not be loaded: %v", err) + } + } + }() + return result, nil +} + +func (kpr *keypairReloader) maybeReload() error { + newCert, err := tls.LoadX509KeyPair(kpr.certPath, kpr.keyPath) + if err != nil { + return err + } + kpr.certMu.Lock() + defer kpr.certMu.Unlock() + kpr.cert = &newCert + return nil +} + +func (kpr *keypairReloader) GetCertificateFunc() func(*tls.ClientHelloInfo) (*tls.Certificate, error) { + fmt.Println("getting certificate func") + return func(clientHello *tls.ClientHelloInfo) (*tls.Certificate, error) { + kpr.certMu.RLock() + defer kpr.certMu.RUnlock() + return kpr.cert, nil + } +} + +func (kpr *keypairReloader) GetClientCertificateFunc() func(*tls.CertificateRequestInfo) (*tls.Certificate, error) { + return func(*tls.CertificateRequestInfo) (*tls.Certificate, error) { + kpr.certMu.RLock() + defer kpr.certMu.RUnlock() + return kpr.cert, nil + } +} + +func GetTLSConfig(tlsConfig *TLSConfig, log logger.Logger) (TLSConfig *tls.Config, err error) { + if tlsConfig.CertificatePath != "" && tlsConfig.CertificateKeyPath != "" { + kpr, err := NewKeypairReloader(tlsConfig.CertificatePath, tlsConfig.CertificateKeyPath, log) + if err != nil { + return nil, errors.Wrap(err, "loading keypair") + } + TLSConfig = &tls.Config{ + InsecureSkipVerify: tlsConfig.SkipVerify, + PreferServerCipherSuites: true, + MinVersion: tls.VersionTLS12, + GetCertificate: kpr.GetCertificateFunc(), + GetClientCertificate: kpr.GetClientCertificateFunc(), + } + if tlsConfig.CACertPath != "" { + b, err := ioutil.ReadFile(tlsConfig.CACertPath) + if err != nil { + return nil, errors.Wrap(err, "loading tls ca key") + } + certPool := x509.NewCertPool() + + ok := certPool.AppendCertsFromPEM(b) + if !ok { + return nil, errors.New("error parsing CA certificate") + } + TLSConfig.ClientCAs = certPool + TLSConfig.RootCAs = certPool + } + if tlsConfig.EnableClientVerification { + TLSConfig.ClientAuth = tls.RequireAndVerifyClientCert + } + } + return TLSConfig, nil +} From ba411f5a0eafdee15abc6c6ee0104c9101fcf1e7 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Wed, 16 Oct 2019 15:26:15 -0500 Subject: [PATCH 28/40] TLS working for Pilosa, implemented for Kafka and schema registry --- v2/kafka/cmd.go | 2 +- v2/kafka/cmd_test.go | 46 ++++++++++++++++---------- v2/kafka/csrc/csrc.go | 30 +++++++++++++++-- v2/kafka/csrc/csrc_integration_test.go | 2 +- v2/kafka/source.go | 37 +++++++++++++++++++-- v2/kafka/source_test.go | 7 ++-- v2/kafkagen/cmd.go | 2 +- v2/tls.go | 5 +-- 8 files changed, 100 insertions(+), 31 deletions(-) diff --git a/v2/kafka/cmd.go b/v2/kafka/cmd.go index 770c743..7facac7 100644 --- a/v2/kafka/cmd.go +++ b/v2/kafka/cmd.go @@ -8,7 +8,7 @@ import ( type Main struct { pdk.Main `flag:"!embed"` KafkaHosts []string `help:"Comma separated list of host:port pairs for Kafka."` - RegistryURL string `help:"Location of Confluent Schema Registry"` + RegistryURL string `help:"Location of Confluent Schema Registry. Must start with 'https://' if you want to use TLS."` Group string `help:"Kafka group."` Topics []string `help:"Kafka topics to read from."` } diff --git a/v2/kafka/cmd_test.go b/v2/kafka/cmd_test.go index d3ffbc2..25c3a6e 100644 --- a/v2/kafka/cmd_test.go +++ b/v2/kafka/cmd_test.go @@ -12,6 +12,7 @@ import ( "github.com/Shopify/sarama" "github.com/pilosa/go-pilosa" "github.com/pilosa/pdk/v2" + "github.com/pilosa/pilosa/logger" ) func TestCmdMainOne(t *testing.T) { @@ -30,6 +31,7 @@ func TestCmdMainOne(t *testing.T) { PrimaryKeyFields []string IDField string PilosaHosts []string + RegistryURL string TLS *pdk.TLSConfig expRhinoKeys []string expRhinoCols []uint64 @@ -62,31 +64,16 @@ func TestCmdMainOne(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - // load big schema - licodec := liDecodeTestSchema(t, "bigschema.json") - schemaID := postSchema(t, "bigschema.json", "bigschema2") - fields := []string{"abc", "db", "user_id", "all_users", "has_deleted_date", "central_group", "custom_audiences", "desktop_boolean", "desktop_frequency", "desktop_recency", "product_boolean_historical_forestry_cravings_or_bugles", "ddd_category_total_current_rhinocerous_checking", "ddd_category_total_current_rhinocerous_thedog_cheetah", "survey1234", "days_since_last_logon", "elephant_added_for_account"} - // make a bunch of data and insert it records := [][]interface{}{ {"2", "1", 159, map[string]interface{}{"boolean": true}, map[string]interface{}{"boolean": false}, map[string]interface{}{"string": "cgr"}, map[string]interface{}{"array": []string{"a", "b"}}, nil, map[string]interface{}{"int": 7}, nil, nil, map[string]interface{}{"float": 5.4}, nil, map[string]interface{}{"org.test.survey1234": "yes"}, map[string]interface{}{"float": 8.0}, nil}, } - // put records in kafka - conf := sarama.NewConfig() - conf.Version = sarama.V0_10_0_0 // TODO - do we need this? should we move it up? - conf.Producer.Return.Successes = true - producer, err := sarama.NewSyncProducer([]string{"localhost:9092"}, conf) - if err != nil { - t.Fatalf("getting new producer: %v", err) - } a := rand.Int() topic := strconv.Itoa(a) - for _, vals := range records { - rec := makeRecord(t, fields, vals) - putRecordKafka(t, producer, schemaID, licodec, "akey", topic, rec) - } + + // make a bunch of data and insert it // create Main and run with MaxMsgs m := NewMain() @@ -103,6 +90,31 @@ func TestCmdMainOne(t *testing.T) { if test.TLS != nil { m.TLS = *test.TLS } + if test.RegistryURL != "" { + m.RegistryURL = test.RegistryURL + } + + // load big schema + licodec := liDecodeTestSchema(t, "bigschema.json") + tlsConf, err := pdk.GetTLSConfig(test.TLS, logger.NopLogger) + if err != nil { + t.Fatalf("getting tls config: %v", err) + } + schemaID := postSchema(t, "bigschema.json", "bigschema2", m.RegistryURL, tlsConf) + + // put records in kafka + conf := sarama.NewConfig() + conf.Version = sarama.V0_10_0_0 // TODO - do we need this? should we move it up? + conf.Producer.Return.Successes = true + producer, err := sarama.NewSyncProducer([]string{"localhost:9092"}, conf) + if err != nil { + t.Fatalf("getting new producer: %v", err) + } + + for _, vals := range records { + rec := makeRecord(t, fields, vals) + putRecordKafka(t, producer, schemaID, licodec, "akey", topic, rec) + } err = m.Run() if err != nil { diff --git a/v2/kafka/csrc/csrc.go b/v2/kafka/csrc/csrc.go index 88e64b0..af9fcd4 100644 --- a/v2/kafka/csrc/csrc.go +++ b/v2/kafka/csrc/csrc.go @@ -1,25 +1,37 @@ package csrc import ( + "crypto/tls" "encoding/json" "fmt" "io/ioutil" + "net" "net/http" "strings" + "time" "github.com/pkg/errors" ) type Client struct { URL string + + httpClient *http.Client } -func NewClient(url string) *Client { +func NewClient(url string, tlsConfig *tls.Config) *Client { if !strings.HasPrefix(url, "http") { url = "http://" + url } + c := http.DefaultClient + if strings.HasPrefix(url, "https://") { + fmt.Println("getting http client with tls config", tlsConfig) + c = getHTTPClient(tlsConfig) + } return &Client{ URL: url, + + httpClient: c, } } @@ -27,7 +39,7 @@ func NewClient(url string) *Client { // https://docs.confluent.io/current/schema-registry/develop/api.html#get--schemas-ids-int-%20id func (c *Client) GetSchema(id int) (string, error) { sr := SchemaResponse{} - resp, err := http.Get(fmt.Sprintf("%s/schemas/ids/%d", c.URL, id)) + resp, err := c.httpClient.Get(fmt.Sprintf("%s/schemas/ids/%d", c.URL, id)) err = unmarshalRespErr(resp, err, &sr) if err != nil { return "", errors.Wrap(err, "making http request") @@ -55,7 +67,7 @@ func (c *Client) PostSubjects(subj, schema string) (*SchemaResponse, error) { schema = strings.Replace(schema, "\t", "", -1) schema = strings.Replace(schema, "\n", `\n`, -1) schema = fmt.Sprintf(`{"schema": "%s"}`, strings.Replace(schema, `"`, `\"`, -1)) // this is probably terrible - resp, err := http.Post(fmt.Sprintf("%s/subjects/%s/versions", c.URL, subj), "application/json", strings.NewReader(schema)) + resp, err := c.httpClient.Post(fmt.Sprintf("%s/subjects/%s/versions", c.URL, subj), "application/json", strings.NewReader(schema)) sr := &SchemaResponse{} err = unmarshalRespErr(resp, err, sr) if err != nil { @@ -86,3 +98,15 @@ func unmarshalRespErr(resp *http.Response, err error, into interface{}) error { } return nil } + +func getHTTPClient(t *tls.Config) *http.Client { + transport := &http.Transport{ + Dial: (&net.Dialer{ + Timeout: time.Second * 20, + }).Dial, + } + if t != nil { + transport.TLSClientConfig = t + } + return &http.Client{Transport: transport} +} diff --git a/v2/kafka/csrc/csrc_integration_test.go b/v2/kafka/csrc/csrc_integration_test.go index b541b2a..a989eb6 100644 --- a/v2/kafka/csrc/csrc_integration_test.go +++ b/v2/kafka/csrc/csrc_integration_test.go @@ -11,7 +11,7 @@ func TestPostGet(t *testing.T) { t.Skip() } sr := "localhost:8081" - client := csrc.NewClient(sr) + client := csrc.NewClient(sr, nil) schemaStr := `{"type":"record","name":"a","fields":[{"name":"blah","type":"string"}]}` r, err := client.PostSubjects("aname", schemaStr) diff --git a/v2/kafka/source.go b/v2/kafka/source.go index f0aec9b..7db1b97 100644 --- a/v2/kafka/source.go +++ b/v2/kafka/source.go @@ -1,13 +1,16 @@ package kafka import ( + "crypto/tls" "encoding/binary" "encoding/json" "fmt" "io" "io/ioutil" "log" + "net" "net/http" + "strings" "time" "github.com/Shopify/sarama" @@ -45,7 +48,8 @@ type Source struct { cache map[int32]avro.Schema // stash is a local offset stash which source maintains so it can // control when offsets are committed to Kafka. - stash *cluster.OffsetStash + stash *cluster.OffsetStash + httpClient *http.Client decBytes []byte record *Record @@ -57,7 +61,7 @@ func NewSource() *Source { Hosts: []string{"localhost:9092"}, Topics: []string{"test"}, Group: "group0", - RegistryURL: "localhost:8081", + RegistryURL: "http://localhost:8081", Log: logger.NopLogger, lastSchemaID: -1, @@ -145,6 +149,11 @@ func (s *Source) Open() error { config.Consumer.Group.Heartbeat.Interval = time.Millisecond * 500 config.Consumer.Group.Session.Timeout = time.Second + if !strings.HasPrefix(s.RegistryURL, "http") { + s.RegistryURL = "http://" + s.RegistryURL + } + s.httpClient = http.DefaultClient + if s.TLS.CertificatePath != "" { tlsConfig, err := pdk.GetTLSConfig(&s.TLS, s.Log) if err != nil { @@ -152,6 +161,10 @@ func (s *Source) Open() error { } config.Config.Net.TLS.Config = tlsConfig config.Config.Net.TLS.Enable = true + + if strings.HasPrefix(s.RegistryURL, "https://") { + s.httpClient = getHTTPClient(tlsConfig) + } } var err error @@ -434,7 +447,7 @@ func (s *Source) getCodec(id int32) (rschema avro.Schema, rerr error) { return codec, nil } - r, err := http.Get(fmt.Sprintf("http://%s/schemas/ids/%d", s.RegistryURL, id)) + r, err := s.httpClient.Get(fmt.Sprintf("%s/schemas/ids/%d", s.RegistryURL, id)) if err != nil { return nil, errors.Wrap(err, "getting schema from registry") } @@ -480,3 +493,21 @@ func avroDecode(codec avro.Schema, data []byte) (map[string]interface{}, error) return decodedRecord.Map(), nil } + +func getHTTPClient(t *tls.Config) *http.Client { + transport := &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: 10 * time.Second, + KeepAlive: 20 * time.Second, + DualStack: true, + }).DialContext, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + } + if t != nil { + transport.TLSClientConfig = t + } + return &http.Client{Transport: transport} +} diff --git a/v2/kafka/source_test.go b/v2/kafka/source_test.go index c3d633b..9b72ecc 100644 --- a/v2/kafka/source_test.go +++ b/v2/kafka/source_test.go @@ -1,6 +1,7 @@ package kafka import ( + "crypto/tls" "encoding/binary" "fmt" "io/ioutil" @@ -253,7 +254,7 @@ func TestKafkaSourceIntegration(t *testing.T) { key := fmt.Sprintf("%d", rnd.Int()) for i, test := range tests { - schemaID := postSchema(t, test.schemaFile, fmt.Sprintf("schema%d", i)) + schemaID := postSchema(t, test.schemaFile, fmt.Sprintf("schema%d", i), "localhost:8081", nil) schema := liDecodeTestSchema(t, test.schemaFile) t.Run(test.schemaFile, func(t *testing.T) { @@ -293,8 +294,8 @@ func TestKafkaSourceIntegration(t *testing.T) { } -func postSchema(t *testing.T, schemaFile, subj string) (schemaID int) { - schemaClient := csrc.NewClient("localhost:8081") +func postSchema(t *testing.T, schemaFile, subj, regURL string, tlsConfig *tls.Config) (schemaID int) { + schemaClient := csrc.NewClient(regURL, tlsConfig) schemaStr := readTestSchema(t, schemaFile) resp, err := schemaClient.PostSubjects(subj, schemaStr) if err != nil { diff --git a/v2/kafkagen/cmd.go b/v2/kafkagen/cmd.go index a569730..fbb5cff 100644 --- a/v2/kafkagen/cmd.go +++ b/v2/kafkagen/cmd.go @@ -105,7 +105,7 @@ func decodeSchema(filename string) (*liavro.Codec, error) { } func (m *Main) postSchema(schemaFile, subj string) (schemaID int, err error) { - schemaClient := csrc.NewClient("http://" + m.RegistryURL) + schemaClient := csrc.NewClient("http://"+m.RegistryURL, nil) schemaStr, err := readSchema(schemaFile) if err != nil { return 0, errors.Wrap(err, "reading schema file") diff --git a/v2/tls.go b/v2/tls.go index b88df9a..7ec5812 100644 --- a/v2/tls.go +++ b/v2/tls.go @@ -3,7 +3,6 @@ package pdk import ( "crypto/tls" "crypto/x509" - "fmt" "io/ioutil" "os" "os/signal" @@ -70,7 +69,6 @@ func (kpr *keypairReloader) maybeReload() error { } func (kpr *keypairReloader) GetCertificateFunc() func(*tls.ClientHelloInfo) (*tls.Certificate, error) { - fmt.Println("getting certificate func") return func(clientHello *tls.ClientHelloInfo) (*tls.Certificate, error) { kpr.certMu.RLock() defer kpr.certMu.RUnlock() @@ -87,6 +85,9 @@ func (kpr *keypairReloader) GetClientCertificateFunc() func(*tls.CertificateRequ } func GetTLSConfig(tlsConfig *TLSConfig, log logger.Logger) (TLSConfig *tls.Config, err error) { + if tlsConfig == nil { + return nil, nil + } if tlsConfig.CertificatePath != "" && tlsConfig.CertificateKeyPath != "" { kpr, err := NewKeypairReloader(tlsConfig.CertificatePath, tlsConfig.CertificateKeyPath, log) if err != nil { From aa1efcb3dfcc7f3e3cea3bf36dc876e9d8b06f41 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Wed, 16 Oct 2019 17:37:32 -0500 Subject: [PATCH 29/40] add help strings for TLS config --- v2/tls.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/v2/tls.go b/v2/tls.go index 7ec5812..20e9d66 100644 --- a/v2/tls.go +++ b/v2/tls.go @@ -16,15 +16,15 @@ import ( // TLSConfig contains TLS configuration type TLSConfig struct { // CertificatePath contains the path to the certificate (.crt or .pem file) - CertificatePath string `json:"certificate"` + CertificatePath string `json:"certificate" help:"Path to certificate file."` // CertificateKeyPath contains the path to the certificate key (.key file) - CertificateKeyPath string `json:"key"` + CertificateKeyPath string `json:"key" help:"Path to certificate key file."` // CACertPath is the path to a CA certificate (.crt or .pem file) - CACertPath string `json:"ca-certificate"` - // SkipVerify disables verification of server certificates when connecting to another Pilosa node - SkipVerify bool `json:"skip-verify"` + CACertPath string `json:"ca-certificate" help:"Path to CA certificate file."` + // SkipVerify disables verification of server certificates. + SkipVerify bool `json:"skip-verify" help:"Disables verification of server certificates."` // EnableClientVerification enables verification of client TLS certificates (Mutual TLS) - EnableClientVerification bool `json:"enable-client-verification"` + EnableClientVerification bool `json:"enable-client-verification" help:"Enable verification of client certificates."` } type keypairReloader struct { From 5c54509c04531ebe5edd0b08a211c5e1d4a27e37 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Tue, 22 Oct 2019 09:11:04 -0500 Subject: [PATCH 30/40] add profiling and some client options --- v2/ingest.go | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/v2/ingest.go b/v2/ingest.go index 9ea30af..c953547 100644 --- a/v2/ingest.go +++ b/v2/ingest.go @@ -4,6 +4,8 @@ import ( "bytes" "encoding/binary" "io" + "net/http" + _ "net/http/pprof" "os" "github.com/pilosa/go-pilosa" @@ -25,8 +27,7 @@ type Main struct { LogPath string `help:"Log file to write to. Empty means stderr."` PrimaryKeyFields []string `help:"Data field(s) which make up the primary key for a record. These will be concatenated and translated to a Pilosa ID. If empty, record key translation will not be used."` IDField string `help:"Field which contains the integer column ID. May not be used in conjunction with primary-key-fields. If both are empty, auto-generated IDs will be used."` - MaxMsgs int `help:"Number of messages to consume from Kafka before stopping. Useful for testing when you don't want to run indefinitely."` - Concurrency int `help:"Number of concurrent kafka readers and indexing routines to launch. MaxMsgs will be read *from each*."` + Concurrency int `help:"Number of concurrent sources and indexing routines to launch."` PackBools string `help:"If non-empty, boolean fields will be packed into two set fields—one with this name, and one with -exists."` Verbose bool `help:"Enable verbose logging."` // TODO implement the auto-generated IDs... hopefully using Pilosa to manage it. @@ -75,6 +76,7 @@ func (m *Main) setup() (err error) { return errors.Wrap(err, "validating configuration") } + // setup logging logOut := os.Stderr if m.LogPath != "" { f, err := os.OpenFile(m.LogPath, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) @@ -90,6 +92,12 @@ func (m *Main) setup() (err error) { m.log = logger.NewStandardLogger(logOut) } + // start profiling endpoint + go func() { + m.log.Printf("%v", http.ListenAndServe("localhost:6060", nil)) + }() + + // set up Pilosa client if m.TLS.CertificatePath != "" { tlsConfig, err := GetTLSConfig(&m.TLS, m.Log()) if err != nil { @@ -100,12 +108,11 @@ func (m *Main) setup() (err error) { return errors.Wrap(err, "getting pilosa client") } } else { - m.client, err = pilosa.NewClient(m.PilosaHosts) + m.client, err = pilosa.NewClient(m.PilosaHosts, pilosa.OptClientRetries(10), pilosa.OptClientTotalPoolSize(1000), pilosa.OptClientPoolSizePerRoute(400)) if err != nil { return errors.Wrap(err, "getting pilosa client") } } - m.schema, err = m.client.Schema() if err != nil { return errors.Wrap(err, "getting schema") @@ -125,6 +132,8 @@ func (m *Main) setup() (err error) { } func (m *Main) runIngester(c int) error { + m.log.Printf("start ingester %d", c) + source, err := m.NewSource() if err != nil { return errors.Wrap(err, "getting source") From 95cf3f5f01f02faedbcb47b0201cca0f75340650 Mon Sep 17 00:00:00 2001 From: Travis Date: Tue, 22 Oct 2019 16:46:57 -0500 Subject: [PATCH 31/40] remove unnecessary OptFieldTypeBool()s --- v2/ingest.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/v2/ingest.go b/v2/ingest.go index c953547..83b6ba4 100644 --- a/v2/ingest.go +++ b/v2/ingest.go @@ -267,8 +267,8 @@ func (m *Main) batchFromSchema(schema []Field) ([]Recordizer, gpexp.RecordBatch, // set up bool fields var boolField, boolFieldExists *pilosa.Field if m.PackBools != "" { - boolField = m.index.Field(m.PackBools, pilosa.OptFieldTypeBool()) - boolFieldExists = m.index.Field(m.PackBools+"-exists", pilosa.OptFieldTypeBool()) + boolField = m.index.Field(m.PackBools) + boolFieldExists = m.index.Field(m.PackBools + "-exists") } fields := make([]*pilosa.Field, 0, len(schema)) for i, pdkField := range schema { From 127c236498193f48524927757bacfe5113116e52 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Wed, 23 Oct 2019 07:47:05 -0500 Subject: [PATCH 32/40] fewer retries --- v2/ingest.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v2/ingest.go b/v2/ingest.go index 83b6ba4..64c336e 100644 --- a/v2/ingest.go +++ b/v2/ingest.go @@ -108,7 +108,7 @@ func (m *Main) setup() (err error) { return errors.Wrap(err, "getting pilosa client") } } else { - m.client, err = pilosa.NewClient(m.PilosaHosts, pilosa.OptClientRetries(10), pilosa.OptClientTotalPoolSize(1000), pilosa.OptClientPoolSizePerRoute(400)) + m.client, err = pilosa.NewClient(m.PilosaHosts, pilosa.OptClientRetries(2), pilosa.OptClientTotalPoolSize(1000), pilosa.OptClientPoolSizePerRoute(400)) if err != nil { return errors.Wrap(err, "getting pilosa client") } From b1c2a3eb5befbe1519b40aefed66c157e49b1d04 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Wed, 23 Oct 2019 10:40:46 -0500 Subject: [PATCH 33/40] add locally autogenned IDs --- v2/ingest.go | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/v2/ingest.go b/v2/ingest.go index 64c336e..95b95d1 100644 --- a/v2/ingest.go +++ b/v2/ingest.go @@ -10,6 +10,7 @@ import ( "github.com/pilosa/go-pilosa" "github.com/pilosa/go-pilosa/gpexp" + "github.com/pilosa/pdk" "github.com/pilosa/pilosa/logger" "github.com/pkg/errors" "golang.org/x/sync/errgroup" @@ -39,6 +40,8 @@ type Main struct { schema *pilosa.Schema index *pilosa.Index + ra pdk.RangeAllocator + log logger.Logger } @@ -127,12 +130,28 @@ func (m *Main) setup() (err error) { if err != nil { return errors.Wrap(err, "syncing schema") } + if len(m.PrimaryKeyFields) == 0 && m.IDField == "" { + shardWidth := m.index.ShardWidth() + if shardWidth == 0 { + shardWidth = pilosa.DefaultShardWidth + } + m.ra = pdk.NewLocalRangeAllocator(shardWidth) + } return nil } func (m *Main) runIngester(c int) error { m.log.Printf("start ingester %d", c) + var nexter pdk.RangeNexter + if m.IDField == "" && len(m.PrimaryKeyFields) == 0 { + var err error + nexter, err = pdk.NewRangeNexter(m.ra) + if err != nil { + return errors.Wrap(err, "getting range nexter") + } + defer nexter.Return() // TODO log possible err? + } source, err := m.NewSource() if err != nil { @@ -174,6 +193,12 @@ func (m *Main) runIngester(c int) error { return errors.Wrap(err, "recordizing") } } + if nexter != nil { // add ID if no id field specified + row.ID, err = nexter.Next() + if err != nil { + return errors.Wrap(err, "getting next ID") + } + } err = batch.Add(*row) if err == gpexp.ErrBatchNowFull { err = batch.Import() @@ -260,9 +285,11 @@ func (m *Main) batchFromSchema(schema []Field) ([]Recordizer, gpexp.RecordBatch, return nil, nil, nil, errors.Errorf("ID field %s not found", m.IDField) } } else { - return nil, nil, nil, errors.New("autogen IDs is currently unimplemented; specify an IDField or primary key fields") + m.log.Debugf("getting no recordizer because we're autogening IDs") + } + if rz != nil { + recordizers = append(recordizers, rz) } - recordizers = append(recordizers, rz) // set up bool fields var boolField, boolFieldExists *pilosa.Field From 12f86f67f08845bfa56fecc745204856b3fc899d Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Mon, 28 Oct 2019 11:38:48 -0500 Subject: [PATCH 34/40] pass registry url through main to source, add MaxMsgs to command main instead of having on PDK main --- v2/kafka/cmd.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/v2/kafka/cmd.go b/v2/kafka/cmd.go index 7facac7..01fe5ba 100644 --- a/v2/kafka/cmd.go +++ b/v2/kafka/cmd.go @@ -9,6 +9,7 @@ type Main struct { pdk.Main `flag:"!embed"` KafkaHosts []string `help:"Comma separated list of host:port pairs for Kafka."` RegistryURL string `help:"Location of Confluent Schema Registry. Must start with 'https://' if you want to use TLS."` + MaxMsgs int `help:"Number of messages to consume from Kafka before stopping. Useful for testing when you don't want to run indefinitely."` Group string `help:"Kafka group."` Topics []string `help:"Kafka topics to read from."` } @@ -24,8 +25,9 @@ func NewMain() *Main { m.NewSource = func() (pdk.Source, error) { source := NewSource() source.Hosts = m.KafkaHosts - source.Topics = m.Topics + source.RegistryURL = m.RegistryURL source.Group = m.Group + source.Topics = m.Topics source.MaxMsgs = m.MaxMsgs source.Log = m.Main.Log() From 0d64d4d9cfd8b83c89a856fa286d7b27b2582268 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Mon, 28 Oct 2019 11:59:13 -0500 Subject: [PATCH 35/40] fix ineffassign in test --- v2/kafka/cmd_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/v2/kafka/cmd_test.go b/v2/kafka/cmd_test.go index 25c3a6e..d081a01 100644 --- a/v2/kafka/cmd_test.go +++ b/v2/kafka/cmd_test.go @@ -123,6 +123,9 @@ func TestCmdMainOne(t *testing.T) { client := m.PilosaClient() schema, err := client.Schema() + if err != nil { + t.Fatalf("getting client: %v", err) + } index := schema.Index(m.Index) defer func() { err := client.DeleteIndex(index) From 9fbf304bd752b7f2931ce2535eb961c9665d38a1 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Thu, 31 Oct 2019 16:25:57 -0500 Subject: [PATCH 36/40] fix consumer bug where a final partial batch would not be imported --- v2/ingest.go | 24 ++++++++++++++---------- v2/kafka/cmd_test.go | 24 ++++++++++++++---------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/v2/ingest.go b/v2/ingest.go index 95b95d1..dc054be 100644 --- a/v2/ingest.go +++ b/v2/ingest.go @@ -162,8 +162,8 @@ func (m *Main) runIngester(c int) error { var prevRec Record var row *gpexp.Row rec, err := source.Record() - for ; err == ErrSchemaChange || err == nil; rec, err = source.Record() { - if err == ErrSchemaChange { + for ; ; rec, err = source.Record() { + if err != nil { // finish previous batch if this is not the first if batch != nil { err = batch.Import() @@ -175,11 +175,15 @@ func (m *Main) runIngester(c int) error { return errors.Wrap(err, "committing") } } - schema := source.Schema() - m.log.Printf("new schema: %+v", schema) - recordizers, batch, row, err = m.batchFromSchema(schema) - if err != nil { - return errors.Wrap(err, "batchFromSchema") + if err == ErrSchemaChange { + schema := source.Schema() + m.log.Printf("new schema: %+v", schema) + recordizers, batch, row, err = m.batchFromSchema(schema) + if err != nil { + return errors.Wrap(err, "batchFromSchema") + } + } else { + break } } for i := range row.Values { @@ -214,10 +218,10 @@ func (m *Main) runIngester(c int) error { } prevRec = rec } - if err == io.EOF { - err = nil + if err != io.EOF { + return errors.Wrap(err, "getting record") } - return errors.Wrap(err, "getting record") + return nil } type Recordizer func(rawRec []interface{}, rec *gpexp.Row) error diff --git a/v2/kafka/cmd_test.go b/v2/kafka/cmd_test.go index d081a01..9399cc3 100644 --- a/v2/kafka/cmd_test.go +++ b/v2/kafka/cmd_test.go @@ -39,11 +39,11 @@ func TestCmdMainOne(t *testing.T) { { name: "3 primary keys str/str/int", PrimaryKeyFields: []string{"abc", "db", "user_id"}, - expRhinoKeys: []string{string([]byte{50, 49, 0, 0, 0, 159})}, // "2" + "1" + uint32(159) + expRhinoKeys: []string{string([]byte{50, 49, 0, 0, 0, 159}), string([]byte{52, 51, 0, 0, 0, 44})}, // "2" + "1" + uint32(159) }, { - name: "3 primary keys str/str/int", + name: "3 primary keys str/str/int TLS", PrimaryKeyFields: []string{"abc", "db", "user_id"}, PilosaHosts: []string{"https://localhost:10111"}, TLS: &pdk.TLSConfig{ @@ -52,13 +52,13 @@ func TestCmdMainOne(t *testing.T) { CACertPath: home + "/pilosa-sec/out/ca.crt", EnableClientVerification: true, }, - expRhinoKeys: []string{string([]byte{50, 49, 0, 0, 0, 159})}, // "2" + "1" + uint32(159) + expRhinoKeys: []string{string([]byte{50, 49, 0, 0, 0, 159}), string([]byte{52, 51, 0, 0, 0, 44})}, // "2" + "1" + uint32(159) }, { name: "IDField int", IDField: "user_id", - expRhinoCols: []uint64{159}, + expRhinoCols: []uint64{44, 159}, }, } @@ -68,6 +68,7 @@ func TestCmdMainOne(t *testing.T) { records := [][]interface{}{ {"2", "1", 159, map[string]interface{}{"boolean": true}, map[string]interface{}{"boolean": false}, map[string]interface{}{"string": "cgr"}, map[string]interface{}{"array": []string{"a", "b"}}, nil, map[string]interface{}{"int": 7}, nil, nil, map[string]interface{}{"float": 5.4}, nil, map[string]interface{}{"org.test.survey1234": "yes"}, map[string]interface{}{"float": 8.0}, nil}, + {"4", "3", 44, map[string]interface{}{"boolean": true}, map[string]interface{}{"boolean": false}, map[string]interface{}{"string": "cgr"}, map[string]interface{}{"array": []string{"a", "b"}}, nil, map[string]interface{}{"int": 7}, nil, nil, map[string]interface{}{"float": 5.4}, nil, map[string]interface{}{"org.test.survey1234": "yes"}, map[string]interface{}{"float": 8.0}, nil}, } a := rand.Int() @@ -81,7 +82,7 @@ func TestCmdMainOne(t *testing.T) { m.PrimaryKeyFields = test.PrimaryKeyFields m.IDField = test.IDField m.PackBools = "bools" - m.BatchSize = 1 + m.BatchSize = 3 // need to test at a batch size less than the # of records, greater than, and equal to m.Topics = []string{topic} m.MaxMsgs = len(records) if test.PilosaHosts != nil { @@ -141,10 +142,10 @@ func TestCmdMainOne(t *testing.T) { abc := index.Field("abc") qr, err := client.Query(index.Count(abc.Row("2"))) if err != nil { - t.Fatalf("querying: %v", err) + t.Errorf("querying: %v", err) } if qr.Result().Count() != 1 { - t.Fatalf("wrong count for abc, %d is not 1", qr.Result().Count()) + t.Errorf("wrong count for abc, %d is not 1", qr.Result().Count()) } bools := index.Field("bools") @@ -153,7 +154,7 @@ func TestCmdMainOne(t *testing.T) { t.Fatalf("querying: %v", err) } ci := sortableCRI(qr.Result().CountItems()) - exp := sortableCRI{{Count: 1, Key: "all_users"}} + exp := sortableCRI{{Count: 2, Key: "all_users"}} sort.Sort(ci) sort.Sort(exp) if !reflect.DeepEqual(ci, exp) { @@ -166,7 +167,7 @@ func TestCmdMainOne(t *testing.T) { t.Fatalf("querying: %v", err) } ci = sortableCRI(qr.Result().CountItems()) - exp = sortableCRI{{Count: 1, Key: "all_users"}, {Count: 1, Key: "has_deleted_date"}} + exp = sortableCRI{{Count: 2, Key: "all_users"}, {Count: 2, Key: "has_deleted_date"}} sort.Sort(ci) sort.Sort(exp) if !reflect.DeepEqual(ci, exp) { @@ -178,8 +179,11 @@ func TestCmdMainOne(t *testing.T) { if err != nil { t.Fatalf("querying: %v", err) } + keys := qr.Result().Row().Keys + sort.Strings(keys) + sort.Strings(test.expRhinoKeys) if test.expRhinoKeys != nil { - if keys := qr.Result().Row().Keys; !reflect.DeepEqual(keys, test.expRhinoKeys) { + if !reflect.DeepEqual(keys, test.expRhinoKeys) { t.Errorf("wrong cols: %v, exp: %v", keys, test.expRhinoKeys) } } From 7af2c6738ebfbf80bbb468fe06f03718a30b26b2 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Fri, 1 Nov 2019 13:03:06 -0500 Subject: [PATCH 37/40] change primary key encoding to be pipe separated strings --- v2/ingest.go | 20 +++++++++----------- v2/kafka/cmd_test.go | 12 +++++++----- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/v2/ingest.go b/v2/ingest.go index dc054be..b4a8be5 100644 --- a/v2/ingest.go +++ b/v2/ingest.go @@ -2,7 +2,7 @@ package pdk import ( "bytes" - "encoding/binary" + "fmt" "io" "net/http" _ "net/http/pprof" @@ -464,20 +464,18 @@ func getPrimaryKeyRecordizer(schema []Field, pkFields []string) (recordizer Reco } buf := bytes.NewBuffer(idbytes) // TODO does the buffer escape to heap? - // TODO... will want to change this encoding logic to length-prefix the different fields or something. for _, fieldIdx := range fieldIndices { - val := rawRec[fieldIdx] - switch vt := val.(type) { - case string: - buf.WriteString(vt) // err is always nil - case []byte: - buf.Write(vt) // err is always nil - default: - err = binary.Write(buf, binary.BigEndian, val) + if fieldIdx != 0 { + err := buf.WriteByte('|') if err != nil { - return errors.Wrapf(err, "writing %+v of type %[1]T", val) + return errors.Wrap(err, "writing separator") } } + val := rawRec[fieldIdx] + _, err := fmt.Fprintf(buf, "%v", val) + if err != nil { + return errors.Wrapf(err, "encoding primary key val:'%v' type: %[1]T", val) + } } rec.ID = buf.Bytes() return nil diff --git a/v2/kafka/cmd_test.go b/v2/kafka/cmd_test.go index 9399cc3..e12163e 100644 --- a/v2/kafka/cmd_test.go +++ b/v2/kafka/cmd_test.go @@ -39,7 +39,7 @@ func TestCmdMainOne(t *testing.T) { { name: "3 primary keys str/str/int", PrimaryKeyFields: []string{"abc", "db", "user_id"}, - expRhinoKeys: []string{string([]byte{50, 49, 0, 0, 0, 159}), string([]byte{52, 51, 0, 0, 0, 44})}, // "2" + "1" + uint32(159) + expRhinoKeys: []string{"2|1|159", "4|3|44", "123456789|q2db_1234|432"}, // "2" + "1" + uint32(159) }, { @@ -52,7 +52,7 @@ func TestCmdMainOne(t *testing.T) { CACertPath: home + "/pilosa-sec/out/ca.crt", EnableClientVerification: true, }, - expRhinoKeys: []string{string([]byte{50, 49, 0, 0, 0, 159}), string([]byte{52, 51, 0, 0, 0, 44})}, // "2" + "1" + uint32(159) + expRhinoKeys: []string{"2|1|159", "4|3|44", "123456789|q2db_1234|432"}, // "2" + "1" + uint32(159) }, { @@ -69,13 +69,15 @@ func TestCmdMainOne(t *testing.T) { records := [][]interface{}{ {"2", "1", 159, map[string]interface{}{"boolean": true}, map[string]interface{}{"boolean": false}, map[string]interface{}{"string": "cgr"}, map[string]interface{}{"array": []string{"a", "b"}}, nil, map[string]interface{}{"int": 7}, nil, nil, map[string]interface{}{"float": 5.4}, nil, map[string]interface{}{"org.test.survey1234": "yes"}, map[string]interface{}{"float": 8.0}, nil}, {"4", "3", 44, map[string]interface{}{"boolean": true}, map[string]interface{}{"boolean": false}, map[string]interface{}{"string": "cgr"}, map[string]interface{}{"array": []string{"a", "b"}}, nil, map[string]interface{}{"int": 7}, nil, nil, map[string]interface{}{"float": 5.4}, nil, map[string]interface{}{"org.test.survey1234": "yes"}, map[string]interface{}{"float": 8.0}, nil}, + {"123456789", "q2db_1234", 432, map[string]interface{}{"boolean": false}, map[string]interface{}{"boolean": false}, map[string]interface{}{"string": "cgr"}, map[string]interface{}{"array": []string{"a", "b"}}, nil, map[string]interface{}{"int": 7}, nil, nil, map[string]interface{}{"float": 5.9}, nil, map[string]interface{}{"org.test.survey1234": "yes"}, map[string]interface{}{"float": 8.0}, nil}, + {"123456789", "q2db_1234", 432, map[string]interface{}{"boolean": false}, map[string]interface{}{"boolean": false}, map[string]interface{}{"string": "cgr"}, map[string]interface{}{"array": []string{"a", "b"}}, nil, map[string]interface{}{"int": 7}, nil, nil, map[string]interface{}{"float": 5.4}, nil, map[string]interface{}{"org.test.survey1234": "yes"}, map[string]interface{}{"float": 8.0}, nil}, + {"2", "1", 159, map[string]interface{}{"boolean": true}, map[string]interface{}{"boolean": false}, map[string]interface{}{"string": "cgr"}, map[string]interface{}{"array": []string{"a", "b"}}, nil, map[string]interface{}{"int": 7}, nil, nil, map[string]interface{}{"float": 5.4}, nil, map[string]interface{}{"org.test.survey1234": "yes"}, map[string]interface{}{"float": 8.0}, nil}, + {"4", "3", 44, map[string]interface{}{"boolean": true}, map[string]interface{}{"boolean": false}, map[string]interface{}{"string": "cgr"}, map[string]interface{}{"array": []string{"a", "b"}}, nil, map[string]interface{}{"int": 7}, nil, nil, map[string]interface{}{"float": 5.4}, nil, map[string]interface{}{"org.test.survey1234": "yes"}, map[string]interface{}{"float": 8.0}, nil}, } a := rand.Int() topic := strconv.Itoa(a) - // make a bunch of data and insert it - // create Main and run with MaxMsgs m := NewMain() m.Index = fmt.Sprintf("cmd_test_index239ij%s", topic) @@ -167,7 +169,7 @@ func TestCmdMainOne(t *testing.T) { t.Fatalf("querying: %v", err) } ci = sortableCRI(qr.Result().CountItems()) - exp = sortableCRI{{Count: 2, Key: "all_users"}, {Count: 2, Key: "has_deleted_date"}} + exp = sortableCRI{{Count: 3, Key: "all_users"}, {Count: 3, Key: "has_deleted_date"}} sort.Sort(ci) sort.Sort(exp) if !reflect.DeepEqual(ci, exp) { From 82b6637fe2f517a8eeb308ac2e45cc9a8ef8207e Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Fri, 1 Nov 2019 13:14:03 -0500 Subject: [PATCH 38/40] fix bug where a separator could be inserted initially in a PK --- v2/ingest.go | 4 ++-- v2/ingest_test.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/v2/ingest.go b/v2/ingest.go index b4a8be5..2925823 100644 --- a/v2/ingest.go +++ b/v2/ingest.go @@ -464,8 +464,8 @@ func getPrimaryKeyRecordizer(schema []Field, pkFields []string) (recordizer Reco } buf := bytes.NewBuffer(idbytes) // TODO does the buffer escape to heap? - for _, fieldIdx := range fieldIndices { - if fieldIdx != 0 { + for i, fieldIdx := range fieldIndices { + if i != 0 { err := buf.WriteByte('|') if err != nil { return errors.Wrap(err, "writing separator") diff --git a/v2/ingest_test.go b/v2/ingest_test.go index ca1440d..bd6ef73 100644 --- a/v2/ingest_test.go +++ b/v2/ingest_test.go @@ -64,7 +64,7 @@ func TestGetPrimaryKeyRecordizer(t *testing.T) { schema: []Field{StringField{NameVal: "a"}, IntField{NameVal: "b"}, IntField{NameVal: "c"}, IntField{NameVal: "d"}}, pkFields: []string{"c", "d", "b"}, rawRec: []interface{}{"a", uint32(1), uint32(2), uint32(4)}, - expID: []byte{0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 1}, + expID: []byte("2|4|1"), }, } From 012049a41a5198a7cde8046da77a0009b451ab4b Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Fri, 1 Nov 2019 15:52:24 -0500 Subject: [PATCH 39/40] upgrade commandeer to fix duplicated primary keys --- .gitignore | 2 ++ Makefile | 5 +++++ go.mod | 2 +- go.sum | 2 ++ v2/ingest.go | 2 ++ 5 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 414864f..64e1a54 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,5 @@ vendor .terraform terraform.tfstate* + +build \ No newline at end of file diff --git a/Makefile b/Makefile index 43b4780..6c9060e 100644 --- a/Makefile +++ b/Makefile @@ -55,3 +55,8 @@ install-gometalinter: GO111MODULE=off go get -u github.com/alecthomas/gometalinter GO111MODULE=off gometalinter --install GO111MODULE=off go get github.com/remyoudompheng/go-misc/deadcode + +build-consumers: + mkdir -p build + go build -o build/consumer-mac-`git log | head -1 | cut -d' ' -f2 | head -c 7` ./v2/cmd/kafka + GOOS=linux go build -o build/consumer-linux-`git log | head -1 | cut -d' ' -f2 | head -c 7` ./v2/cmd/kafka diff --git a/go.mod b/go.mod index 5398273..48830d5 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( github.com/bsm/sarama-cluster v2.1.15+incompatible github.com/elodina/go-avro v0.0.0-20160406082632-0c8185d9a3ba github.com/go-avro/avro v0.0.0-20171219232920-444163702c11 - github.com/jaffee/commandeer v0.3.0 + github.com/jaffee/commandeer v0.3.1-0.20191101204523-07c6265b86ee github.com/linkedin/goavro v0.0.0-20181018120728-1beee2a74088 github.com/linkedin/goavro/v2 v2.9.6 github.com/mmcloughlin/geohash v0.0.0-20181009053802-f7f2bcae3294 diff --git a/go.sum b/go.sum index 9dfc57f..0c94aa4 100644 --- a/go.sum +++ b/go.sum @@ -190,6 +190,8 @@ github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e h1:CC1usSIzu9p github.com/jaffee/commandeer v0.1.1-0.20190726022955-4d43b78ebc4e/go.mod h1:N5yIzoHN6EwVFi0QCKvpFPJeECoZyEcFBQSR8r+7Mz0= github.com/jaffee/commandeer v0.3.0 h1:9KEz8f9T6PwuzjdxfV8C5FevdJp6xih5yqPLmNzQarc= github.com/jaffee/commandeer v0.3.0/go.mod h1:kCwfuSvZ2T0NVEr3LDSo6fDUgi0xSBnAVDdkOKTtpLQ= +github.com/jaffee/commandeer v0.3.1-0.20191101204523-07c6265b86ee h1:tt4zMiPQ/qi1RoJNh9vTrQ/JEphBNMxQDV42DjvWZl4= +github.com/jaffee/commandeer v0.3.1-0.20191101204523-07c6265b86ee/go.mod h1:kCwfuSvZ2T0NVEr3LDSo6fDUgi0xSBnAVDdkOKTtpLQ= github.com/jaffee/go-pilosa v0.4.1-0.20191008192729-4129aee12032 h1:uATKnbEhR3+K3L1YFYa4DfPIHpfvnLvNcG9v4iWYjrA= github.com/jaffee/go-pilosa v0.4.1-0.20191008192729-4129aee12032/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= github.com/jaffee/go-pilosa v0.4.1-0.20191008194651-6791c1437ec4 h1:IEGhQ3aUdbLHPkv+twI74W6ggBcAwl0cNJzquQ1IXdE= diff --git a/v2/ingest.go b/v2/ingest.go index 2925823..4ee74d8 100644 --- a/v2/ingest.go +++ b/v2/ingest.go @@ -64,6 +64,8 @@ func (m *Main) Run() (err error) { return errors.Wrap(err, "setting up") } eg := errgroup.Group{} + + m.log.Debugf("Ingest Config: %+v", m) for c := 0; c < m.Concurrency; c++ { c := c eg.Go(func() error { From c24dcb3ba009bfc085e234d738e549716c1a6032 Mon Sep 17 00:00:00 2001 From: Matt Jaffee Date: Fri, 8 Nov 2019 12:38:14 -0600 Subject: [PATCH 40/40] update go-pilosa dep to latest --- go.mod | 4 +--- go.sum | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 48830d5..7d0791d 100644 --- a/go.mod +++ b/go.mod @@ -1,7 +1,5 @@ module github.com/pilosa/pdk -replace github.com/pilosa/go-pilosa => github.com/jaffee/go-pilosa v0.4.1-0.20191011215038-51699dbd7261 - replace github.com/go-avro/avro => github.com/jaffee/avro v0.0.0-20191013175548-8d07fd23d4fa require ( @@ -17,7 +15,7 @@ require ( github.com/mmcloughlin/geohash v0.0.0-20181009053802-f7f2bcae3294 github.com/onsi/ginkgo v1.7.0 // indirect github.com/onsi/gomega v1.4.3 // indirect - github.com/pilosa/go-pilosa v1.3.1-0.20191011151453-0c53860b34ff + github.com/pilosa/go-pilosa v1.3.1-0.20191028170026-39c2f0e6af86 github.com/pilosa/pilosa v1.3.1 github.com/pkg/errors v0.8.1 github.com/pkg/profile v1.2.1 // indirect diff --git a/go.sum b/go.sum index 0c94aa4..aad71e5 100644 --- a/go.sum +++ b/go.sum @@ -279,6 +279,8 @@ github.com/pilosa/go-pilosa v1.3.1-0.20191011140449-29aaccbd50c5 h1:HO6bKfnNIaI7 github.com/pilosa/go-pilosa v1.3.1-0.20191011140449-29aaccbd50c5/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= github.com/pilosa/go-pilosa v1.3.1-0.20191011151453-0c53860b34ff h1:6i31l2T0OsKRVnMRR7SmRaE17hsWGIoXwrgHtWyHONU= github.com/pilosa/go-pilosa v1.3.1-0.20191011151453-0c53860b34ff/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= +github.com/pilosa/go-pilosa v1.3.1-0.20191028170026-39c2f0e6af86 h1:Vz4i83fazqlIo0yNjSPesbl8hDDeg/KasYs5UtIicn4= +github.com/pilosa/go-pilosa v1.3.1-0.20191028170026-39c2f0e6af86/go.mod h1:rauptBRJWI8sLM+7BAd+6+GfqSFDnuQdgreUjOIO6BE= github.com/pilosa/pilosa v0.0.0-20181115192138-84148d4ee6c0/go.mod h1:NgpkJkefqUKUHV7O3TqBOu89tsao3ksth2wzTNe8CPQ= github.com/pilosa/pilosa v1.3.1 h1:rLDVqJBuRzhPtue730D+EX0YEVS4R0oDzsE4bJBwLcE= github.com/pilosa/pilosa v1.3.1/go.mod h1:97yLL9mpUqOj9naKu5XA/b/U6JLe3JGGUlc2HOTDw+A=