Skip to content

Commit 5e8673d

Browse files
giorio94adamjensenbot
authored andcommitted
Virtual kubelet: reflection-based pod offloading
1 parent fb516db commit 5e8673d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+2207
-2723
lines changed

cmd/virtual-kubelet/root/flag.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ func InstallFlags(flags *pflag.FlagSet, c *Opts) {
2626
flags.StringVar(&c.NodeName, "nodename", c.NodeName, "kubernetes node name")
2727
flags.StringVar(&c.MetricsAddr, "metrics-addr", c.MetricsAddr, "address to listen for metrics/stats requests")
2828

29-
flags.UintVar(&c.PodSyncWorkers, "pod-sync-workers", c.PodSyncWorkers, "the number of pod synchronization workers")
29+
flags.UintVar(&c.PodWorkers, "pod-reflection-workers", c.PodWorkers, "the number of pod reflection workers")
3030
flags.UintVar(&c.ServiceWorkers, "service-reflection-workers", c.ServiceWorkers, "the number of service reflection workers")
3131
flags.UintVar(&c.EndpointSliceWorkers, "endpointslice-reflection-workers", c.EndpointSliceWorkers,
3232
"the number of endpointslice reflection workers")

cmd/virtual-kubelet/root/http.go

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import (
2828
"github.com/virtual-kubelet/virtual-kubelet/node/api"
2929
"k8s.io/klog/v2"
3030

31-
podprovider "github.com/liqotech/liqo/pkg/virtualKubelet/provider"
31+
"github.com/liqotech/liqo/pkg/virtualKubelet/reflection/workload"
3232
)
3333

3434
// AcceptedCiphers is the list of accepted TLS ciphers, with known weak ciphers elided
@@ -59,8 +59,7 @@ func loadTLSConfig(certPath, keyPath string) (*tls.Config, error) {
5959
}, nil
6060
}
6161

62-
func setupHTTPServer(ctx context.Context, p *podprovider.LiqoProvider, cfg *apiServerConfig,
63-
getPodsFromKubernetes api.PodListerFunc) (_ func(), retErr error) {
62+
func setupHTTPServer(ctx context.Context, handler workload.PodHandler, cfg *apiServerConfig) (_ func(), retErr error) {
6463
var closers []io.Closer
6564
cancel := func() {
6665
for _, c := range closers {
@@ -76,7 +75,7 @@ func setupHTTPServer(ctx context.Context, p *podprovider.LiqoProvider, cfg *apiS
7675
if cfg.CertPath == "" || cfg.KeyPath == "" {
7776
klog.Error("TLS certificates not provided, not setting up pod http server")
7877
} else {
79-
s, err := startPodHandlerServer(ctx, p, cfg, getPodsFromKubernetes)
78+
s, err := startPodHandlerServer(ctx, handler, cfg)
8079
if err != nil {
8180
return nil, err
8281
}
@@ -96,7 +95,7 @@ func setupHTTPServer(ctx context.Context, p *podprovider.LiqoProvider, cfg *apiS
9695
mux := http.NewServeMux()
9796

9897
podMetricsRoutes := api.PodMetricsConfig{
99-
GetStatsSummary: p.GetStatsSummary,
98+
GetStatsSummary: handler.Stats,
10099
}
101100
api.AttachPodMetricsRoutes(podMetricsRoutes, mux)
102101
s := &http.Server{
@@ -109,8 +108,7 @@ func setupHTTPServer(ctx context.Context, p *podprovider.LiqoProvider, cfg *apiS
109108
return cancel, nil
110109
}
111110

112-
func startPodHandlerServer(ctx context.Context, p *podprovider.LiqoProvider,
113-
cfg *apiServerConfig, getPodsFromKubernetes api.PodListerFunc) (*http.Server, error) {
111+
func startPodHandlerServer(ctx context.Context, handler workload.PodHandler, cfg *apiServerConfig) (*http.Server, error) {
114112
tlsCfg, err := loadTLSConfig(cfg.CertPath, cfg.KeyPath)
115113
if err != nil {
116114
klog.Error(err)
@@ -126,11 +124,11 @@ func startPodHandlerServer(ctx context.Context, p *podprovider.LiqoProvider,
126124
mux := http.NewServeMux()
127125

128126
podRoutes := api.PodHandlerConfig{
129-
RunInContainer: p.RunInContainer,
130-
GetContainerLogs: p.GetContainerLogs,
131-
GetPodsFromKubernetes: getPodsFromKubernetes,
132-
GetStatsSummary: p.GetStatsSummary,
133-
GetPods: p.GetPods,
127+
RunInContainer: handler.Exec,
128+
GetContainerLogs: handler.Logs,
129+
GetStatsSummary: handler.Stats,
130+
GetPodsFromKubernetes: handler.List,
131+
GetPods: handler.List,
134132
}
135133

136134
api.AttachPodRoutes(podRoutes, mux, true)

cmd/virtual-kubelet/root/opts.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
package root
1616

1717
import (
18+
"fmt"
1819
"os"
1920
"strconv"
2021
"time"
@@ -33,7 +34,7 @@ const (
3334
DefaultMetricsAddr = ":10255"
3435
DefaultListenPort = 10250
3536

36-
DefaultPodSyncWorkers = 10
37+
DefaultPodWorkers = 10
3738
DefaultServiceWorkers = 3
3839
DefaultEndpointSliceWorkers = 10
3940
DefaultConfigMapWorkers = 3
@@ -59,8 +60,8 @@ type Opts struct {
5960

6061
MetricsAddr string
6162

62-
// Number of workers to use to handle pod notifications and resource reflection
63-
PodSyncWorkers uint
63+
// Number of workers to use to handle pod and resource reflection
64+
PodWorkers uint
6465
ServiceWorkers uint
6566
EndpointSliceWorkers uint
6667
ConfigMapWorkers uint
@@ -99,8 +100,8 @@ func SetDefaultOpts(c *Opts) error {
99100
c.MetricsAddr = DefaultMetricsAddr
100101
}
101102

102-
if c.PodSyncWorkers == 0 {
103-
c.PodSyncWorkers = DefaultPodSyncWorkers
103+
if c.PodWorkers == 0 {
104+
c.PodWorkers = DefaultPodWorkers
104105
}
105106

106107
if c.ServiceWorkers == 0 {
@@ -138,7 +139,7 @@ func SetDefaultOpts(c *Opts) error {
138139
c.HomeKubeconfig = os.Getenv("KUBECONFIG")
139140
}
140141
if c.LiqoIpamServer == "" {
141-
c.LiqoIpamServer = DefaultLiqoIpamServer
142+
c.LiqoIpamServer = fmt.Sprintf("%v:%v", consts.NetworkManagerServiceName, consts.NetworkManagerIpamPort)
142143
}
143144

144145
return nil

cmd/virtual-kubelet/root/root.go

Lines changed: 5 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -18,31 +18,23 @@ package root
1818
import (
1919
"context"
2020
"os"
21-
"path"
22-
"time"
2321

2422
"github.com/pkg/errors"
2523
"github.com/spf13/cobra"
2624
"github.com/virtual-kubelet/virtual-kubelet/node"
2725
corev1 "k8s.io/api/core/v1"
2826
k8serrors "k8s.io/apimachinery/pkg/api/errors"
2927
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
30-
"k8s.io/apimachinery/pkg/fields"
3128
"k8s.io/client-go/discovery"
32-
kubeinformers "k8s.io/client-go/informers"
3329
"k8s.io/client-go/kubernetes"
34-
"k8s.io/client-go/kubernetes/scheme"
3530
"k8s.io/client-go/rest"
36-
"k8s.io/client-go/tools/record"
37-
"k8s.io/client-go/util/workqueue"
3831
"k8s.io/klog/v2"
3932

4033
"github.com/liqotech/liqo/internal/utils/errdefs"
4134
"github.com/liqotech/liqo/pkg/utils"
4235
"github.com/liqotech/liqo/pkg/utils/restcfg"
4336
"github.com/liqotech/liqo/pkg/virtualKubelet"
4437
nodeprovider "github.com/liqotech/liqo/pkg/virtualKubelet/liqoNodeProvider"
45-
"github.com/liqotech/liqo/pkg/virtualKubelet/manager"
4638
podprovider "github.com/liqotech/liqo/pkg/virtualKubelet/provider"
4739
)
4840

@@ -70,8 +62,8 @@ func runRootCommand(ctx context.Context, c *Opts) error {
7062
return errors.New("cluster id is mandatory")
7163
}
7264

73-
if c.PodSyncWorkers == 0 {
74-
return errdefs.InvalidInput("pod sync workers must be greater than 0")
65+
if c.PodWorkers == 0 || c.ServiceWorkers == 0 || c.EndpointSliceWorkers == 0 || c.ConfigMapWorkers == 0 || c.SecretWorkers == 0 {
66+
return errdefs.InvalidInput("reflection workers must be greater than 0")
7567
}
7668

7769
config, err := utils.GetRestConfig(c.HomeKubeconfig)
@@ -85,27 +77,6 @@ func runRootCommand(ctx context.Context, c *Opts) error {
8577
return err
8678
}
8779

88-
// Create a shared informer factory for Kubernetes pods in the current namespace (if specified) and scheduled to the current node.
89-
podInformerFactory := kubeinformers.NewSharedInformerFactoryWithOptions(
90-
client,
91-
c.InformerResyncPeriod,
92-
kubeinformers.WithTweakListOptions(func(options *metav1.ListOptions) {
93-
options.FieldSelector = fields.OneTermEqualSelector("spec.nodeName", c.NodeName).String()
94-
}))
95-
podInformer := podInformerFactory.Core().V1().Pods()
96-
97-
// Create another shared informer factory for Kubernetes secrets and configmaps (not subject to any selectors).
98-
scmInformerFactory := kubeinformers.NewSharedInformerFactoryWithOptions(client, c.InformerResyncPeriod)
99-
// Create a secret informer and a config map informer so we can pass their listers to the resource manager.
100-
secretInformer := scmInformerFactory.Core().V1().Secrets()
101-
configMapInformer := scmInformerFactory.Core().V1().ConfigMaps()
102-
serviceInformer := scmInformerFactory.Core().V1().Services()
103-
104-
rm, err := manager.NewResourceManager(podInformer.Lister(), secretInformer.Lister(), configMapInformer.Lister(), serviceInformer.Lister())
105-
if err != nil {
106-
return errors.Wrap(err, "could not create resource manager")
107-
}
108-
10980
// Initialize the pod provider
11081
podcfg := podprovider.InitConfig{
11182
HomeConfig: config,
@@ -114,10 +85,12 @@ func runRootCommand(ctx context.Context, c *Opts) error {
11485

11586
Namespace: c.KubeletNamespace,
11687
NodeName: c.NodeName,
88+
NodeIP: os.Getenv("VKUBELET_POD_IP"),
11789

11890
LiqoIpamServer: c.LiqoIpamServer,
11991
InformerResyncPeriod: c.InformerResyncPeriod,
12092

93+
PodWorkers: c.PodWorkers,
12194
ServiceWorkers: c.ServiceWorkers,
12295
EndpointSliceWorkers: c.EndpointSliceWorkers,
12396
ConfigMapWorkers: c.ConfigMapWorkers,
@@ -195,55 +168,12 @@ func runRootCommand(ctx context.Context, c *Opts) error {
195168
return err
196169
}
197170

198-
eb := record.NewBroadcaster()
199-
pc, err := node.NewPodController(node.PodControllerConfig{
200-
PodClient: client.CoreV1(),
201-
PodInformer: podInformer,
202-
EventRecorder: eb.NewRecorder(scheme.Scheme, corev1.EventSource{Component: path.Join(c.NodeName, "pod-controller")}),
203-
Provider: podProvider,
204-
SecretInformer: secretInformer,
205-
ConfigMapInformer: configMapInformer,
206-
ServiceInformer: serviceInformer,
207-
SyncPodsFromKubernetesRateLimiter: newPodControllerWorkqueueRateLimiter(),
208-
SyncPodStatusFromProviderRateLimiter: newPodControllerWorkqueueRateLimiter(),
209-
DeletePodsFromKubernetesRateLimiter: newPodControllerWorkqueueRateLimiter(),
210-
})
211-
if err != nil {
212-
return errors.Wrap(err, "error setting up pod controller")
213-
}
214-
215-
go podInformerFactory.Start(ctx.Done())
216-
go scmInformerFactory.Start(ctx.Done())
217-
218-
cancelHTTP, err := setupHTTPServer(ctx, podProvider, getAPIConfig(c), func(context.Context) ([]*corev1.Pod, error) {
219-
return rm.GetPods(), nil
220-
})
171+
cancelHTTP, err := setupHTTPServer(ctx, podProvider.PodHandler(), getAPIConfig(c))
221172
if err != nil {
222173
return errors.Wrap(err, "error while setting up HTTP server")
223174
}
224175
defer cancelHTTP()
225176

226-
go func() {
227-
if err := pc.Run(ctx, int(c.PodSyncWorkers)); err != nil && errors.Is(err, context.Canceled) {
228-
klog.Fatal(errors.Wrap(err, "error in pod controller running"))
229-
}
230-
}()
231-
232-
if c.StartupTimeout > 0 {
233-
ctx, cancel := context.WithTimeout(ctx, c.StartupTimeout)
234-
klog.Info("Waiting for pod controller / VK to be ready")
235-
select {
236-
case <-ctx.Done():
237-
cancel()
238-
return ctx.Err()
239-
case <-pc.Ready():
240-
}
241-
cancel()
242-
if err := pc.Err(); err != nil {
243-
return err
244-
}
245-
}
246-
247177
go func() {
248178
if err := nodeRunner.Run(ctx); err != nil {
249179
klog.Error(err, "error in pod controller running")
@@ -259,14 +189,6 @@ func runRootCommand(ctx context.Context, c *Opts) error {
259189
return nil
260190
}
261191

262-
// newPodControllerWorkqueueRateLimiter returns a new custom rate limiter to be assigned to the pod controller workqueues.
263-
// Differently from the standard workqueue.DefaultControllerRateLimiter(), composed of an overall bucket rate limiter
264-
// and a per-item exponential rate limiter to address failures, this includes only the latter component. Hance avoiding
265-
// performance limitations when processing a high number of pods in parallel.
266-
func newPodControllerWorkqueueRateLimiter() workqueue.RateLimiter {
267-
return workqueue.NewItemExponentialFailureRateLimiter(5*time.Millisecond, 1000*time.Second)
268-
}
269-
270192
func getVersion(config *rest.Config) string {
271193
client, err := discovery.NewDiscoveryClientForConfig(config)
272194
if err != nil {

deployments/liqo/files/liqo-virtual-kubelet-remote-ClusterRole.yaml

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ rules:
33
- ""
44
resources:
55
- configmaps
6-
- pods
76
- secrets
87
- services
98
verbs:
@@ -17,44 +16,40 @@ rules:
1716
- apiGroups:
1817
- ""
1918
resources:
20-
- pods/status
21-
- services/status
19+
- pods
2220
verbs:
23-
- create
24-
- delete
2521
- get
2622
- list
27-
- patch
28-
- update
2923
- watch
3024
- apiGroups:
31-
- apps
25+
- discovery.k8s.io
3226
resources:
33-
- replicasets
27+
- endpointslices
3428
verbs:
3529
- create
3630
- delete
3731
- get
3832
- list
33+
- patch
3934
- update
4035
- watch
4136
- apiGroups:
42-
- discovery.k8s.io
37+
- metrics.k8s.io
4338
resources:
44-
- endpointslices
39+
- pods
4540
verbs:
46-
- create
47-
- delete
4841
- get
4942
- list
50-
- patch
51-
- update
5243
- watch
5344
- apiGroups:
54-
- metrics.k8s.io
45+
- virtualkubelet.liqo.io
5546
resources:
56-
- pods
47+
- shadowpods
5748
verbs:
49+
- create
50+
- delete
5851
- get
5952
- list
53+
- patch
54+
- update
6055
- watch

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ require (
1818
github.com/metal-stack/go-ipam v1.8.4-0.20210322080203-5a9da5064b27
1919
github.com/miekg/dns v1.1.35
2020
github.com/mittwald/go-helm-client v0.8.3-0.20211026133933-a26889186afc
21-
github.com/modern-go/reflect2 v1.0.1
2221
github.com/onsi/ginkgo v1.16.4
2322
github.com/onsi/gomega v1.16.0
2423
github.com/openshift/api v0.0.0-20210521075222-e273a339932a
@@ -155,6 +154,7 @@ require (
155154
github.com/moby/sys/mountinfo v0.4.1 // indirect
156155
github.com/moby/term v0.0.0-20210610120745-9d4ed1856297 // indirect
157156
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
157+
github.com/modern-go/reflect2 v1.0.1 // indirect
158158
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect
159159
github.com/morikuni/aec v1.0.0 // indirect
160160
github.com/nxadm/tail v1.4.8 // indirect

pkg/liqonet/ipam/ipam.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1096,7 +1096,7 @@ func (liqoIPAM *IPAM) GetHomePodIP(ctx context.Context, request *GetHomePodIPReq
10961096
homeIP, err := liqoIPAM.getHomePodIPInternal(request.GetClusterID(), request.GetIp())
10971097
if err != nil {
10981098
return &GetHomePodIPResponse{}, fmt.Errorf("cannot get home Pod IP starting from IP %s: %w",
1099-
request.GetClusterID(), err)
1099+
request.GetIp(), err)
11001100
}
11011101
return &GetHomePodIPResponse{HomeIP: homeIP}, nil
11021102
}

0 commit comments

Comments
 (0)