-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmain.py
148 lines (121 loc) · 6.92 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import os
from kubernetes import client, config
from kubernetes.client.rest import ApiException
from utils import *
from recommender import *
# Current Recommender Name
RECOMMENDER_NAME = "clever"
SLEEP_WINDOW = 60
# VPA resources
DOMAIN = "autoscaling.k8s.io"
VPA_NAME = "verticalpodautoscaler"
VPA_PLURAL = "verticalpodautoscalers"
VPA_CHECKPOINT_NAME = "verticalpodautoscalercheckpoint"
VPA_CHECKPOINT_PLURAL = "verticalpodautoscalercheckpoints"
# PROMETHEUS Queries
MAX_CPU_FREQUENCY_QUERY = "node_cpu_frequency_max_hertz"
MIN_CPU_FREQUENCY_QUERY = "node_cpu_frequency_min_hertz"
LATEST_CPU_FREQUENCY_QUERY = "node_cpu_scaling_frequency_hertz"
# Keep the latest node frequencies and the VPA default requests in cache
MAX_NODE_CPU_FREQUENCY = {}
LATEST_NODE_CPU_FREQUENCY = {}
ACTIVE_VPA_DEFAULT_CPU_REQUESTS = {}
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
if 'KUBERNETES_PORT' in os.environ:
config.load_incluster_config()
else:
config.load_kube_config()
# Get the api instance to interact with the cluster
api_client = client.api_client.ApiClient()
v1 = client.ApiextensionsV1Api(api_client)
corev1 = client.CoreV1Api(api_client)
crds = client.CustomObjectsApi(api_client)
resource_version = ''
# Initialize the prometheus client
prom_client = PromClient()
# Initialize the node CPU frequency cache.
MAX_NODE_CPU_FREQUENCY = get_all_node_homogeneous_frequencies(prom_client, MAX_CPU_FREQUENCY_QUERY)
if MAX_NODE_CPU_FREQUENCY is None:
print("Prometheus Query {} at Endpoint {} failed.".format(MAX_CPU_FREQUENCY_QUERY, prom_client.prom_address))
exit(-1)
LATEST_NODE_CPU_FREQUENCY = get_all_node_homogeneous_frequencies(prom_client, LATEST_CPU_FREQUENCY_QUERY)
if LATEST_NODE_CPU_FREQUENCY is None:
print("Prometheus Query {} at Endpoint {} failed.".format(LATEST_CPU_FREQUENCY_QUERY, prom_client.prom_address))
exit(-1)
print("Initialized the node CPU frequency cache {}".format(LATEST_NODE_CPU_FREQUENCY))
# Get the VPA CRD
current_crds = [x['spec']['names']['kind'].lower() for x in v1.list_custom_resource_definition().to_dict()['items']]
if VPA_NAME not in current_crds:
print("VerticalPodAutoscaler CRD is not created!")
exit(-1)
while True:
print("Checking the frequency and the target IPS")
# Updating the default VPA CPU cache.
vpas = crds.list_cluster_custom_object(group=DOMAIN, version="v1", plural=VPA_PLURAL)
selectedVpas = selects_recommender(vpas, RECOMMENDER_NAME)
# Update the container default requests for selectedVpas
# Keep the mapping between nodes and vpas, which manage pods on those nodes.
node_vpas = {}
for vpa in selectedVpas:
vpa_name = vpa["metadata"]["name"]
vpa_namespace = vpa["metadata"]["namespace"]
# Get initial container request.
if vpa_name not in ACTIVE_VPA_DEFAULT_CPU_REQUESTS.keys():
ACTIVE_VPA_DEFAULT_CPU_REQUESTS[vpa_name], vpa_nodes = get_vpa_detailed_info(corev1, vpa)
print("Updating the default CPU request cache for newly discovered VPA {}".format(vpa_name))
print(ACTIVE_VPA_DEFAULT_CPU_REQUESTS)
else:
_, vpa_nodes = get_vpa_detailed_info(corev1, vpa)
# Select VPAs per node.
for node in list(set(vpa_nodes.values())):
if node not in node_vpas.keys():
node_vpas[node] = [vpa]
else:
node_vpas[node].append(vpa)
print("Discovering VPAs running on the following nodes.")
print(node_vpas)
# Obtain the latest node cpu frequencies
CUR_NODE_CPU_FREQUENCY = get_all_node_homogeneous_frequencies(prom_client, LATEST_CPU_FREQUENCY_QUERY)
print("Current node CPU frequencies\n {}".format(CUR_NODE_CPU_FREQUENCY))
# Check difference between LATEST_NODE_CPU_FREQUENCY and CUR_NODE_CPU_FREQUENCY
if CUR_NODE_CPU_FREQUENCY != LATEST_NODE_CPU_FREQUENCY:
# Select nodes with frequency changes.
nodes_with_frequency_changes = find_node_with_frequency_changes(CUR_NODE_CPU_FREQUENCY, LATEST_NODE_CPU_FREQUENCY)
print("Discover nodes with frequency changes {}".format(nodes_with_frequency_changes))
vpas_to_update = {}
for node in nodes_with_frequency_changes:
if node not in node_vpas.keys():
print("Frequency changes on node {} does not impact any vpa managed pods!")
continue
cur_node_vpas = node_vpas[node]
for vpa in cur_node_vpas:
vpa_name = vpa["metadata"]["name"]
vpas_to_update[vpa_name] = vpa
print("These VPAs {} are impacted by the following nodes with frequency changes {}.".format(vpas_to_update.keys(), nodes_with_frequency_changes))
for vpa in vpas_to_update.values():
vpa_name = vpa["metadata"]["name"]
vpa_namespace = vpa["metadata"]["namespace"]
print("Recommend sizes according to current frequency for vpas on nodes with frequency changes!")
recommendations = get_recommendation(vpa, corev1, CUR_NODE_CPU_FREQUENCY, MAX_NODE_CPU_FREQUENCY, ACTIVE_VPA_DEFAULT_CPU_REQUESTS[vpa_name])
print("Recommendations for VPA {} are {}".format(vpa_name, recommendations))
if not recommendations:
print("No new recommendations obtained, so skip updating the vpa object {}".format(vpa_name))
continue
# Update the recommendations.
patched_vpa = {"recommendation": {"containerRecommendations": recommendations}}
body = {"status": patched_vpa}
vpa_api = client.CustomObjectsApi()
# Update the VPA object
# API call doc: https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/CustomObjectsApi.md#patch_namespaced_custom_object
try:
vpa_updated = vpa_api.patch_namespaced_custom_object(group=DOMAIN, version="v1", plural=VPA_PLURAL,
namespace=vpa_namespace, name=vpa_name,
body=body)
print("Successfully patched VPA object with the recommendation: %s" %
vpa_updated['status']['recommendation']['containerRecommendations'])
except ApiException as e:
print("Exception when calling CustomObjectsApi->patch_namespaced_custom_object: %s\n" % e)
print("Sleeping for {} seconds".format(SLEEP_WINDOW))
print("=====================================================================================================")
time.sleep(SLEEP_WINDOW)