Skip to content

Commit 3328858

Browse files
Add server discovery
1 parent 21423a0 commit 3328858

File tree

6 files changed

+687
-11
lines changed

6 files changed

+687
-11
lines changed

go.mod

+2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ replace (
3131
)
3232

3333
require (
34+
github.com/hashicorp/go-discover v0.0.0-20201029210230-738cb3105cd0 // indirect
3435
github.com/pkg/errors v0.9.1
36+
github.com/rancher/dynamiclistener v0.3.1-0.20210616080009-9865ae859c7f // indirect
3537
github.com/rancher/rancher/pkg/apis v0.0.0-20210616082234-54d4afc27c36
3638
github.com/rancher/system-agent v0.0.1-alpha30
3739
github.com/rancher/wharfie v0.3.2

go.sum

+480
Large diffs are not rendered by default.

pkg/config/types.go

+5-4
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,11 @@ var (
3838

3939
type Config struct {
4040
RuntimeConfig
41-
KubernetesVersion string `json:"kubernetesVersion,omitempty"`
42-
RancherVersion string `json:"rancherVersion,omitempty"`
43-
Server string `json:"server,omitempty"`
44-
Role string `json:"role,omitempty"`
41+
KubernetesVersion string `json:"kubernetesVersion,omitempty"`
42+
RancherVersion string `json:"rancherVersion,omitempty"`
43+
Server string `json:"server,omitempty"`
44+
Discovery map[string]string `json:"discovery,omitempty"`
45+
Role string `json:"role,omitempty"`
4546

4647
RancherValues map[string]interface{} `json:"rancherValues,omitempty"`
4748
PreInstructions []plan.Instruction `json:"preInstructions,omitempty"`

pkg/discovery/discovery.go

+189-2
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,197 @@ package discovery
22

33
import (
44
"context"
5+
"crypto/tls"
6+
"encoding/json"
7+
"fmt"
8+
"io/ioutil"
9+
"log"
10+
"net/http"
11+
"sort"
12+
"sync"
13+
"time"
514

15+
"github.com/hashicorp/go-discover"
16+
"github.com/rancher/dynamiclistener/server"
617
"github.com/rancher/rancherd/pkg/config"
18+
"github.com/rancher/wrangler/pkg/data/convert"
19+
"github.com/rancher/wrangler/pkg/randomtoken"
20+
"github.com/rancher/wrangler/pkg/slice"
21+
"github.com/sirupsen/logrus"
22+
23+
// Include kubernetes provider
24+
_ "github.com/hashicorp/go-discover/provider/k8s"
25+
)
26+
27+
var (
28+
insecureHTTPClient = http.Client{
29+
Timeout: 10 * time.Second,
30+
Transport: &http.Transport{
31+
Proxy: http.ProxyFromEnvironment,
32+
TLSHandshakeTimeout: 5 * time.Second,
33+
TLSClientConfig: &tls.Config{
34+
InsecureSkipVerify: true,
35+
},
36+
},
37+
}
738
)
839

9-
func FindServer(ctx context.Context, cfg *config.Config) (string, error) {
10-
return cfg.Server, nil
40+
func DiscoverServerAndRole(ctx context.Context, cfg *config.Config) error {
41+
if len(cfg.Discovery) == 0 {
42+
return nil
43+
}
44+
45+
server, clusterInit, err := discoverServerAndRole(ctx, cfg)
46+
if err != nil {
47+
return err
48+
}
49+
if clusterInit {
50+
cfg.Role = "cluster-init"
51+
} else if server != "" {
52+
cfg.Server = server
53+
}
54+
return nil
55+
56+
}
57+
func discoverServerAndRole(ctx context.Context, cfg *config.Config) (string, bool, error) {
58+
discovery, err := discover.New()
59+
if err != nil {
60+
return "", false, err
61+
}
62+
63+
port, err := convert.ToNumber(cfg.RancherValues["hostPort"])
64+
if err != nil || port == 0 {
65+
port = 8443
66+
}
67+
68+
ctx, cancel := context.WithCancel(ctx)
69+
defer cancel()
70+
71+
server, err := NewJoinServer(ctx, port)
72+
if err != nil {
73+
return "", false, err
74+
}
75+
76+
for {
77+
server, clusterInit := server.loop(ctx, cfg.Discovery, port, discovery)
78+
if clusterInit {
79+
return "", true, nil
80+
}
81+
if server != "" {
82+
return server, false, nil
83+
}
84+
logrus.Info("Waiting to discover server")
85+
select {
86+
case <-ctx.Done():
87+
return "", false, fmt.Errorf("interrupted waiting to discover server: %w", ctx.Err())
88+
case <-time.After(5 * time.Second):
89+
}
90+
}
91+
}
92+
93+
func (j *joinServer) loop(ctx context.Context, params map[string]string, port int64, discovery *discover.Discover) (string, bool) {
94+
addrs, err := discovery.Addrs(discover.Config(params).String(), log.Default())
95+
if err != nil {
96+
logrus.Errorf("failed to discover peers to: %v", err)
97+
return "", false
98+
}
99+
100+
sort.Strings(addrs)
101+
j.setPeers(addrs)
102+
103+
var (
104+
allAgree = true
105+
firstID = ""
106+
)
107+
for i, addr := range addrs {
108+
url := fmt.Sprintf("https://%s:%d/cacerts", addr, port)
109+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
110+
if err != nil {
111+
logrus.Errorf("failed to construct request for %s: %v", url, err)
112+
allAgree = false
113+
return "", false
114+
}
115+
resp, err := insecureHTTPClient.Do(req)
116+
if err != nil {
117+
logrus.Errorf("failed to connect to %s: %v", url, err)
118+
allAgree = false
119+
continue
120+
}
121+
122+
data, err := ioutil.ReadAll(resp.Body)
123+
resp.Body.Close()
124+
if err != nil || resp.StatusCode != http.StatusOK {
125+
logrus.Errorf("failed to read response from %s: code %d: %v", url, resp.StatusCode, err)
126+
allAgree = false
127+
continue
128+
}
129+
130+
rancherID := resp.Header.Get("X-Cattle-Rancherd-Id")
131+
if rancherID == "" {
132+
return fmt.Sprintf("https://%s:%d", addr, port), false
133+
}
134+
if i == 0 {
135+
firstID = rancherID
136+
}
137+
138+
var pingResponse pingResponse
139+
if err := json.Unmarshal(data, &pingResponse); err != nil {
140+
logrus.Errorf("failed to unmarshal response (%s) from %s: %v", data, url, err)
141+
allAgree = false
142+
continue
143+
}
144+
145+
if !slice.StringsEqual(addrs, pingResponse.Peers) {
146+
logrus.Infof("Peer %s does not agree on peer list, %v != %v", addr, addrs, pingResponse.Peers)
147+
allAgree = false
148+
continue
149+
}
150+
}
151+
152+
if allAgree && len(addrs) > 2 && firstID == j.id {
153+
return "", true
154+
}
155+
156+
return "", false
157+
}
158+
159+
type joinServer struct {
160+
lock sync.Mutex
161+
id string
162+
peers []string
163+
}
164+
165+
type pingResponse struct {
166+
Peers []string `json:"peers,omitempty"`
167+
}
168+
169+
func NewJoinServer(ctx context.Context, port int64) (*joinServer, error) {
170+
id, err := randomtoken.Generate()
171+
if err != nil {
172+
return nil, err
173+
}
174+
175+
j := &joinServer{
176+
id: id,
177+
}
178+
179+
return j, server.ListenAndServe(ctx, int(port), 0, j, nil)
180+
}
181+
182+
func (j *joinServer) setPeers(peers []string) {
183+
j.lock.Lock()
184+
defer j.lock.Unlock()
185+
logrus.Infof("current set of peers: %v", peers)
186+
j.peers = peers
187+
}
188+
189+
func (j *joinServer) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
190+
j.lock.Lock()
191+
defer j.lock.Unlock()
192+
193+
rw.Header().Set("X-Cattle-Rancherd-Id", j.id)
194+
rw.Header().Set("Content-Type", "application/json")
195+
_ = json.NewEncoder(rw).Encode(pingResponse{
196+
Peers: j.peers,
197+
})
11198
}

pkg/plan/bootstrap.go

+10-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
package plan
22

33
import (
4+
"context"
45
"fmt"
56

67
"github.com/rancher/rancherd/pkg/config"
8+
"github.com/rancher/rancherd/pkg/discovery"
79
"github.com/rancher/rancherd/pkg/join"
810
"github.com/rancher/rancherd/pkg/probe"
911
"github.com/rancher/rancherd/pkg/rancher"
@@ -62,11 +64,15 @@ func toJoinPlan(cfg *config.Config, dataDir string) (*applyinator.Plan, error) {
6264
return (*applyinator.Plan)(&plan), nil
6365
}
6466

65-
func ToPlan(config *config.Config, dataDir string) (*applyinator.Plan, error) {
66-
if config.Role == "cluster-init" {
67-
return toInitPlan(config, dataDir)
67+
func ToPlan(ctx context.Context, config *config.Config, dataDir string) (*applyinator.Plan, error) {
68+
newCfg := *config
69+
if err := discovery.DiscoverServerAndRole(ctx, &newCfg); err != nil {
70+
return nil, err
71+
}
72+
if newCfg.Role == "cluster-init" {
73+
return toInitPlan(&newCfg, dataDir)
6874
}
69-
return toJoinPlan(config, dataDir)
75+
return toJoinPlan(&newCfg, dataDir)
7076
}
7177

7278
func (p *plan) addInstructions(cfg *config.Config, dataDir string) error {

pkg/rancherd/rancher.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func (r *Rancherd) execute(ctx context.Context) error {
5757

5858
logrus.Infof("Bootstrapping Rancher (%s/%s)", rancherVersion, k8sVersion)
5959

60-
nodePlan, err := plan.ToPlan(&cfg, r.cfg.DataDir)
60+
nodePlan, err := plan.ToPlan(ctx, &cfg, r.cfg.DataDir)
6161
if err != nil {
6262
return fmt.Errorf("generating plan: %w", err)
6363
}

0 commit comments

Comments
 (0)