Skip to content
This repository was archived by the owner on Sep 28, 2022. It is now read-only.

Commit 29aaccb

Browse files
authored
Merge pull request #253 from jaffee/batch-ingest
Batch ingest
2 parents b745504 + d00044f commit 29aaccb

12 files changed

+2076
-78
lines changed

Diff for: .circleci/config.yml

+7-7
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ version: 2
22
defaults: &defaults
33
working_directory: /go/src/github.com/pilosa/go-pilosa
44
docker:
5-
- image: circleci/golang:1.11
5+
- image: circleci/golang:1.12
66
environment:
77
GO111MODULE: "on"
88
fast-checkout: &fast-checkout
@@ -30,18 +30,18 @@ jobs:
3030
- *fast-checkout
3131
- run: make install-gometalinter
3232
- run: make gometalinter
33-
test-golang-1.12-rc: &base-test
33+
test-golang-1.13: &base-test
3434
<<: *defaults
3535
steps:
3636
- *fast-checkout
3737
- run: make test-all
3838
docker:
39-
- image: circleci/golang:1.12-rc
39+
- image: circleci/golang:1.13
4040
- image: pilosa/pilosa:master
41-
test-golang-1.11:
41+
test-golang-1.12:
4242
<<: *base-test
4343
docker:
44-
- image: circleci/golang:1.11
44+
- image: circleci/golang:1.12
4545
- image: pilosa/pilosa:master
4646
workflows:
4747
version: 2
@@ -51,9 +51,9 @@ workflows:
5151
- linter:
5252
requires:
5353
- build
54-
- test-golang-1.12-rc:
54+
- test-golang-1.13:
5555
requires:
5656
- build
57-
- test-golang-1.11:
57+
- test-golang-1.12:
5858
requires:
5959
- build

Diff for: client.go

+166-10
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,81 @@ type Client struct {
8989

9090
importLogEncoder encoder
9191
logLock sync.Mutex
92+
93+
shardNodes shardNodes
94+
tick *time.Ticker
95+
done chan struct{}
96+
}
97+
98+
func (c *Client) getURIsForShard(index string, shard uint64) ([]*URI, error) {
99+
uris, ok := c.shardNodes.Get(index, shard)
100+
if ok {
101+
return uris, nil
102+
}
103+
fragmentNodes, err := c.fetchFragmentNodes(index, shard)
104+
if err != nil {
105+
return nil, errors.Wrap(err, "trying to look up nodes for shard")
106+
}
107+
uris = make([]*URI, 0, len(fragmentNodes))
108+
for _, fn := range fragmentNodes {
109+
uris = append(uris, fn.URI())
110+
}
111+
c.shardNodes.Put(index, shard, uris)
112+
return uris, nil
113+
}
114+
115+
func (c *Client) runChangeDetection() {
116+
for {
117+
select {
118+
case <-c.tick.C:
119+
c.detectClusterChanges()
120+
case <-c.done:
121+
return
122+
}
123+
}
124+
}
125+
126+
func (c *Client) Close() error {
127+
c.tick.Stop()
128+
close(c.done)
129+
return nil
130+
}
131+
132+
// detectClusterChanges chooses a random index and shard from the
133+
// shardNodes cache and deletes it. It then looks it up from Pilosa to
134+
// see if it still matches, and if not it drops the whole cache.
135+
func (c *Client) detectClusterChanges() {
136+
c.shardNodes.mu.Lock()
137+
// we rely on Go's random map iteration order to get a random
138+
// element. If it doesn't end up being random, it shouldn't
139+
// actually matter.
140+
for index, shardMap := range c.shardNodes.data {
141+
for shard, uris := range shardMap {
142+
delete(shardMap, shard)
143+
c.shardNodes.data[index] = shardMap
144+
c.shardNodes.mu.Unlock()
145+
newURIs, err := c.getURIsForShard(index, shard) // refetch URIs from server.
146+
if err != nil {
147+
c.logger.Printf("problem invalidating shard node cache: %v", err)
148+
return
149+
}
150+
if len(uris) != len(newURIs) {
151+
c.logger.Printf("invalidating shard node cache old: %s, new: %s", URIs(uris), URIs(newURIs))
152+
c.shardNodes.Invalidate()
153+
return
154+
}
155+
for i := range uris {
156+
u1, u2 := uris[i], newURIs[i]
157+
if *u1 != *u2 {
158+
c.logger.Printf("invalidating shard node cache, uri mismatch at %d old: %s, new: %s", i, URIs(uris), URIs(newURIs))
159+
c.shardNodes.Invalidate()
160+
return
161+
}
162+
}
163+
break
164+
}
165+
break
166+
}
92167
}
93168

94169
// DefaultClient creates a client with the default address and options.
@@ -138,6 +213,10 @@ func newClientWithOptions(options *ClientOptions) *Client {
138213
client: newHTTPClient(options.withDefaults()),
139214
logger: log.New(os.Stderr, "go-pilosa ", log.Flags()),
140215
coordinatorLock: &sync.RWMutex{},
216+
217+
shardNodes: newShardNodes(),
218+
tick: time.NewTicker(time.Minute),
219+
done: make(chan struct{}, 0),
141220
}
142221
if options.importLogWriter != nil {
143222
c.importLogEncoder = newImportLogEncoder(options.importLogWriter)
@@ -148,9 +227,10 @@ func newClientWithOptions(options *ClientOptions) *Client {
148227
c.tracer = options.tracer
149228
}
150229
c.retries = *options.retries
151-
c.minRetrySleepTime = 1 * time.Second
230+
c.minRetrySleepTime = 100 * time.Millisecond
152231
c.maxRetrySleepTime = 2 * time.Minute
153232
c.importManager = newRecordImportManager(c)
233+
go c.runChangeDetection()
154234
return c
155235

156236
}
@@ -282,7 +362,7 @@ func (c *Client) EnsureIndex(index *Index) error {
282362
if err == ErrIndexExists {
283363
return nil
284364
}
285-
return err
365+
return errors.Wrap(err, "creating index")
286366
}
287367

288368
// EnsureField creates a field on the server if it doesn't exists.
@@ -296,13 +376,17 @@ func (c *Client) EnsureField(field *Field) error {
296376

297377
// DeleteIndex deletes an index on the server.
298378
func (c *Client) DeleteIndex(index *Index) error {
379+
return c.DeleteIndexByName(index.Name())
380+
}
381+
382+
// DeleteIndexByName deletes the named index on the server.
383+
func (c *Client) DeleteIndexByName(index string) error {
299384
span := c.tracer.StartSpan("Client.DeleteIndex")
300385
defer span.Finish()
301386

302-
path := fmt.Sprintf("/index/%s", index.name)
387+
path := fmt.Sprintf("/index/%s", index)
303388
_, _, err := c.httpRequest("DELETE", path, nil, nil, false)
304389
return err
305-
306390
}
307391

308392
// DeleteField deletes a field on the server.
@@ -340,7 +424,7 @@ func (c *Client) syncSchema(schema *Schema, serverSchema *Schema) error {
340424
if _, ok := serverSchema.indexes[indexName]; !ok {
341425
err = c.EnsureIndex(index)
342426
if err != nil {
343-
return err
427+
return errors.Wrap(err, "ensuring index")
344428
}
345429
}
346430
for _, field := range index.fields {
@@ -535,7 +619,7 @@ func (c *Client) translateRecordsRowKeys(rowKeyIDMap *lru.LRU, field *Field, col
535619
}
536620
if len(keys) > 0 {
537621
// translate missing keys
538-
ids, err := c.translateRowKeys(field, keys)
622+
ids, err := c.TranslateRowKeys(field, keys)
539623
if err != nil {
540624
return err
541625
}
@@ -572,7 +656,7 @@ func (c *Client) translateRecordsColumnKeys(columnKeyIDMap *lru.LRU, index *Inde
572656
}
573657
if len(keys) > 0 {
574658
// translate missing keys
575-
ids, err := c.translateColumnKeys(index, keys)
659+
ids, err := c.TranslateColumnKeys(index, keys)
576660
if err != nil {
577661
return err
578662
}
@@ -641,6 +725,63 @@ func (c *Client) importValues(field *Field,
641725
return errors.Wrap(err, "importing values to nodes")
642726
}
643727

728+
// ImportValues takes the given integer values and column ids (which
729+
// must all be in the given shard) and imports them into the given
730+
// index,field,shard on all nodes which should hold that shard. It
731+
// assumes that the ids have been translated from keys if necessary
732+
// and so tells Pilosa to ignore checking if the index uses column
733+
// keys. ImportValues wraps EncodeImportValues and DoImportValues —
734+
// these are broken out and exported so that performance conscious
735+
// users can re-use the same vals and ids byte buffers for local
736+
// encoding, while performing the imports concurrently.
737+
func (c *Client) ImportValues(index, field string, shard uint64, vals []int64, ids []uint64, clear bool) error {
738+
path, data, err := c.EncodeImportValues(index, field, shard, vals, ids, clear)
739+
if err != nil {
740+
return errors.Wrap(err, "encoding import-values request")
741+
}
742+
err = c.DoImportValues(index, shard, path, data)
743+
return errors.Wrap(err, "doing import values")
744+
}
745+
746+
// EncodeImportValues computes the HTTP path and payload for an
747+
// import-values request. It is typically followed by a call to
748+
// DoImportValues.
749+
func (c *Client) EncodeImportValues(index, field string, shard uint64, vals []int64, ids []uint64, clear bool) (path string, data []byte, err error) {
750+
msg := &pbuf.ImportValueRequest{
751+
Index: index,
752+
Field: field,
753+
Shard: shard,
754+
ColumnIDs: ids,
755+
Values: vals,
756+
}
757+
data, err = proto.Marshal(msg)
758+
if err != nil {
759+
return "", nil, errors.Wrap(err, "marshaling to protobuf")
760+
}
761+
path = fmt.Sprintf("/index/%s/field/%s/import?clear=%s&ignoreKeyCheck=true", index, field, strconv.FormatBool(clear))
762+
return path, data, nil
763+
}
764+
765+
// DoImportValues takes a path and data payload (normally from
766+
// EncodeImportValues), logs the import, finds all nodes which own
767+
// this shard, and concurrently imports to those nodes.
768+
func (c *Client) DoImportValues(index string, shard uint64, path string, data []byte) error {
769+
c.logImport(index, path, shard, false, data)
770+
771+
uris, err := c.getURIsForShard(index, shard)
772+
if err != nil {
773+
return errors.Wrap(err, "getting uris")
774+
}
775+
776+
eg := errgroup.Group{}
777+
for _, uri := range uris {
778+
eg.Go(func() error {
779+
return c.importData(uri, path, data)
780+
})
781+
}
782+
return errors.Wrap(eg.Wait(), "importing values to nodes")
783+
}
784+
644785
func importPathData(field *Field, shard uint64, msg proto.Message, options *ImportOptions) (path string, data []byte, err error) {
645786
data, err = proto.Marshal(msg)
646787
if err != nil {
@@ -704,6 +845,18 @@ func (c *Client) importData(uri *URI, path string, data []byte) error {
704845
return nil
705846
}
706847

848+
// ImportRoaringBitmap can import pre-made bitmaps for a number of
849+
// different views into the given field/shard. If the view name in the
850+
// map is an empty string, the standard view will be used.
851+
func (c *Client) ImportRoaringBitmap(field *Field, shard uint64, views map[string]*roaring.Bitmap, clear bool) error {
852+
uris, err := c.getURIsForShard(field.index.Name(), shard)
853+
if err != nil {
854+
return errors.Wrap(err, "getting URIs for import")
855+
}
856+
err = c.importRoaringBitmap(uris[0], field, shard, views, &ImportOptions{clear: clear})
857+
return errors.Wrap(err, "importing bitmap")
858+
}
859+
707860
func (c *Client) importRoaringBitmap(uri *URI, field *Field, shard uint64, views viewImports, options *ImportOptions) error {
708861
protoViews := []*pbuf.ImportRoaringRequestView{}
709862
for name, bmp := range views {
@@ -971,7 +1124,10 @@ func (c *Client) doRequest(host *URI, method, path string, headers map[string]st
9711124
}
9721125
err = errors.New(strings.TrimSpace(string(content)))
9731126
}
974-
c.logger.Printf("request failed with: %s, retrying (%d)", err.Error(), tries)
1127+
if tries == 0 {
1128+
break
1129+
}
1130+
c.logger.Printf("request failed with: %s status: %d, retrying after %d more time(s) after %v ", err.Error(), resp.StatusCode, tries, sleepTime)
9751131
time.Sleep(sleepTime)
9761132
sleepTime *= 2
9771133
if sleepTime > c.maxRetrySleepTime {
@@ -1023,7 +1179,7 @@ func (c *Client) augmentHeaders(headers map[string]string) map[string]string {
10231179
return headers
10241180
}
10251181

1026-
func (c *Client) translateRowKeys(field *Field, keys []string) ([]uint64, error) {
1182+
func (c *Client) TranslateRowKeys(field *Field, keys []string) ([]uint64, error) {
10271183
req := &pbuf.TranslateKeysRequest{
10281184
Index: field.index.name,
10291185
Field: field.name,
@@ -1032,7 +1188,7 @@ func (c *Client) translateRowKeys(field *Field, keys []string) ([]uint64, error)
10321188
return c.translateKeys(req, keys)
10331189
}
10341190

1035-
func (c *Client) translateColumnKeys(index *Index, keys []string) ([]uint64, error) {
1191+
func (c *Client) TranslateColumnKeys(index *Index, keys []string) ([]uint64, error) {
10361192
req := &pbuf.TranslateKeysRequest{
10371193
Index: index.name,
10381194
Keys: keys,

Diff for: client_internal_it_test.go

+9
Original file line numberDiff line numberDiff line change
@@ -171,3 +171,12 @@ func TestImportWithReplayErrors(t *testing.T) {
171171
t.Fatal("import replay hanging when no schema created")
172172
}
173173
}
174+
175+
func TestDetectClusterChanges(t *testing.T) {
176+
c := getClient()
177+
defer c.Close()
178+
c.shardNodes.data["blah"] = make(map[uint64][]*URI)
179+
c.shardNodes.data["blah"][1] = []*URI{{scheme: "zzz"}}
180+
181+
c.detectClusterChanges()
182+
}

0 commit comments

Comments
 (0)