@@ -7,18 +7,20 @@ import (
7
7
"fmt"
8
8
"io/ioutil"
9
9
"log"
10
+ "net"
10
11
"net/http"
11
12
"sort"
13
+ "strconv"
12
14
"sync"
13
15
"time"
14
16
15
17
"github.com/hashicorp/go-discover"
16
- "github.com/rancher/dynamiclistener/server"
17
18
"github.com/rancher/rancherd/pkg/config"
18
19
"github.com/rancher/wrangler/pkg/data/convert"
19
20
"github.com/rancher/wrangler/pkg/randomtoken"
20
21
"github.com/rancher/wrangler/pkg/slice"
21
22
"github.com/sirupsen/logrus"
23
+ "k8s.io/client-go/util/cert"
22
24
23
25
// Include kubernetes provider
24
26
_ "github.com/hashicorp/go-discover/provider/k8s"
@@ -38,10 +40,17 @@ var (
38
40
)
39
41
40
42
func DiscoverServerAndRole (ctx context.Context , cfg * config.Config ) error {
41
- if len (cfg .Discovery ) == 0 {
43
+ if cfg .Discovery == nil {
44
+ if cfg .Server == "" && cfg .Role == "server" && cfg .Token == "" {
45
+ cfg .Role = "cluster-init"
46
+ }
42
47
return nil
43
48
}
44
49
50
+ if cfg .Token == "" {
51
+ return fmt .Errorf ("token is required to be set" )
52
+ }
53
+
45
54
server , clusterInit , err := discoverServerAndRole (ctx , cfg )
46
55
if err != nil {
47
56
return err
@@ -51,6 +60,7 @@ func DiscoverServerAndRole(ctx context.Context, cfg *config.Config) error {
51
60
} else if server != "" {
52
61
cfg .Server = server
53
62
}
63
+ logrus .Infof ("Using role=%s and server=%s" , cfg .Role , cfg .Server )
54
64
return nil
55
65
56
66
}
@@ -68,13 +78,18 @@ func discoverServerAndRole(ctx context.Context, cfg *config.Config) (string, boo
68
78
ctx , cancel := context .WithCancel (ctx )
69
79
defer cancel ()
70
80
71
- server , err := newJoinServer (ctx , port )
81
+ server , err := newJoinServer (ctx , cfg . Discovery . ServerCacheDuration , port )
72
82
if err != nil {
73
83
return "" , false , err
74
84
}
75
85
86
+ count := cfg .Discovery .ExpectedServers
87
+ if count == 0 {
88
+ count = 3
89
+ }
90
+
76
91
for {
77
- server , clusterInit := server .loop (ctx , cfg .Discovery , port , discovery )
92
+ server , clusterInit := server .loop (ctx , count , cfg .Discovery . Params , port , discovery )
78
93
if clusterInit {
79
94
return "" , true , nil
80
95
}
@@ -90,15 +105,33 @@ func discoverServerAndRole(ctx context.Context, cfg *config.Config) (string, boo
90
105
}
91
106
}
92
107
93
- func (j * joinServer ) loop ( ctx context. Context , params map [string ]string , port int64 , discovery * discover.Discover ) (string , bool ) {
108
+ func (j * joinServer ) addresses ( params map [string ]string , discovery * discover.Discover ) ([] string , error ) {
94
109
addrs , err := discovery .Addrs (discover .Config (params ).String (), log .Default ())
110
+ if err != nil {
111
+ return nil , err
112
+ }
113
+
114
+ var ips []string
115
+ for _ , addr := range addrs {
116
+ host , _ , err := net .SplitHostPort (addr )
117
+ if err == nil {
118
+ ips = append (ips , host )
119
+ } else {
120
+ ips = append (ips , addr )
121
+ }
122
+ }
123
+
124
+ return ips , nil
125
+ }
126
+
127
+ func (j * joinServer ) loop (ctx context.Context , count int , params map [string ]string , port int64 , discovery * discover.Discover ) (string , bool ) {
128
+ addrs , err := j .addresses (params , discovery )
95
129
if err != nil {
96
130
logrus .Errorf ("failed to discover peers to: %v" , err )
97
131
return "" , false
98
132
}
99
133
100
- sort .Strings (addrs )
101
- j .setPeers (addrs )
134
+ addrs = j .setPeers (addrs )
102
135
103
136
var (
104
137
allAgree = true
@@ -128,7 +161,7 @@ func (j *joinServer) loop(ctx context.Context, params map[string]string, port in
128
161
129
162
rancherID := resp .Header .Get ("X-Cattle-Rancherd-Id" )
130
163
if rancherID == "" {
131
- return fmt .Sprintf ("https://%s:%d " , addr , port ), false
164
+ return fmt .Sprintf ("https://%s" , net . JoinHostPort ( addr , strconv . FormatInt ( port , 10 )) ), false
132
165
}
133
166
if i == 0 {
134
167
firstID = rancherID
@@ -148,41 +181,126 @@ func (j *joinServer) loop(ctx context.Context, params map[string]string, port in
148
181
}
149
182
}
150
183
151
- if allAgree && len (addrs ) > 2 && firstID == j .id {
152
- return "" , true
184
+ if firstID != j .id {
185
+ logrus .Infof ("Waiting for peer %s from %v to initialize" , addrs [0 ], addrs )
186
+ return "" , false
153
187
}
154
188
155
- return "" , false
189
+ if len (addrs ) != count {
190
+ logrus .Infof ("Expecting %d servers currently have %v" , count , addrs )
191
+ return "" , false
192
+ }
193
+
194
+ if ! allAgree {
195
+ logrus .Infof ("All peers %v do not agree on the peer list" , addrs )
196
+ return "" , false
197
+ }
198
+
199
+ logrus .Infof ("Currently the elected leader %s from peers %v" , firstID , addrs )
200
+ return "" , true
156
201
}
157
202
158
203
type joinServer struct {
159
- lock sync.Mutex
160
- id string
161
- peers []string
204
+ lock sync.Mutex
205
+ id string
206
+ peers []string
207
+ peerSeen map [string ]time.Time
208
+ cacheDuration time.Duration
162
209
}
163
210
164
211
type pingResponse struct {
165
212
Peers []string `json:"peers,omitempty"`
166
213
}
167
214
168
- func newJoinServer (ctx context.Context , port int64 ) (* joinServer , error ) {
215
+ func newJoinServer (ctx context.Context , cacheDuration string , port int64 ) (* joinServer , error ) {
169
216
id , err := randomtoken .Generate ()
170
217
if err != nil {
171
218
return nil , err
172
219
}
173
220
221
+ if cacheDuration == "" {
222
+ cacheDuration = "5m"
223
+ }
224
+
225
+ duration , err := time .ParseDuration (cacheDuration )
226
+ if err != nil {
227
+ return nil , err
228
+ }
229
+
174
230
j := & joinServer {
175
- id : id ,
231
+ id : id ,
232
+ cacheDuration : duration ,
233
+ peerSeen : map [string ]time.Time {},
234
+ }
235
+
236
+ cert , key , err := cert .GenerateSelfSignedCertKey ("rancherd-bootstrap" , nil , nil )
237
+ if err != nil {
238
+ return nil , err
239
+ }
240
+ certs , err := tls .X509KeyPair (cert , key )
241
+ if err != nil {
242
+ return nil , err
243
+ }
244
+ l , err := tls .Listen ("tcp" , fmt .Sprintf (":%d" , port ), & tls.Config {
245
+ Certificates : []tls.Certificate {
246
+ certs ,
247
+ },
248
+ })
249
+ if err != nil {
250
+ return nil , err
176
251
}
252
+ server := & http.Server {
253
+ BaseContext : func (_ net.Listener ) context.Context {
254
+ return ctx
255
+ },
256
+ Handler : j ,
257
+ }
258
+ go func () {
259
+ err := server .Serve (l )
260
+ if err != nil {
261
+ logrus .Errorf ("failed to server bootstrap http server: %v" , err )
262
+ }
263
+ }()
264
+ go func () {
265
+ <- ctx .Done ()
266
+ server .Shutdown (context .Background ())
267
+ l .Close ()
268
+ }()
177
269
178
- return j , server . ListenAndServe ( ctx , int ( port ), 0 , j , nil )
270
+ return j , nil
179
271
}
180
272
181
- func (j * joinServer ) setPeers (peers []string ) {
273
+ func (j * joinServer ) setPeers (peers []string ) [] string {
182
274
j .lock .Lock ()
183
275
defer j .lock .Unlock ()
184
- logrus .Infof ("current set of peers: %v" , peers )
185
- j .peers = peers
276
+
277
+ // purge
278
+ now := time .Now ()
279
+ for k , v := range j .peerSeen {
280
+ if v .Add (j .cacheDuration ).Before (now ) {
281
+ logrus .Info ("Forgetting peer %s" , k )
282
+ delete (j .peerSeen , k )
283
+ }
284
+ }
285
+
286
+ // add
287
+ for _ , peer := range peers {
288
+ if _ , ok := j .peerSeen [peer ]; ! ok {
289
+ logrus .Info ("New peer discovered %s" , peer )
290
+ }
291
+ j .peerSeen [peer ] = now
292
+ }
293
+
294
+ // sort
295
+ newPeers := make ([]string , 0 , len (j .peerSeen ))
296
+ for k := range j .peerSeen {
297
+ newPeers = append (newPeers , k )
298
+ }
299
+ sort .Strings (newPeers )
300
+
301
+ j .peers = newPeers
302
+ logrus .Infof ("current set of peers: %v" , j .peers )
303
+ return j .peers
186
304
}
187
305
188
306
func (j * joinServer ) ServeHTTP (rw http.ResponseWriter , req * http.Request ) {
0 commit comments