Skip to content

Commit ecf984d

Browse files
committed
add vcluster support
1 parent 6d2b995 commit ecf984d

File tree

7 files changed

+118
-35
lines changed

7 files changed

+118
-35
lines changed

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,22 @@ Refer to `krr simple --help`, and look at the flags `--prometheus-url`, `--prome
400400
If you need help, contact us on Slack, email, or by opening a GitHub issue.
401401
</details>
402402

403+
<details>
404+
<summary>VCluster</summary>
405+
406+
KRR Support VCluster software when Prometheus is outside of the VCluster (on physical cluster or centralized). Because of VCluster pod renamming, you need to provide :
407+
408+
- `vcluster-namespace` : The namespace on physical cluster where VCluster is
409+
- `vcluster-name` : The name of your VCluster (set during VCluster deployment)
410+
411+
Other parameter like namespace selector, pod selector etc work as expected.
412+
413+
```sh
414+
krr simple --vcluster-name my-vcluster-name --vcluster-namespace my-vcluster-namespace
415+
```
416+
417+
</details>
418+
403419
<details>
404420
<summary>Debug mode</summary>
405421
If you want to see additional debug logs:

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,5 @@ tzlocal==5.2 ; python_version >= "3.9" and python_full_version < "3.13"
5353
urllib3==1.26.19 ; python_version >= "3.9" and python_full_version < "3.13"
5454
websocket-client==1.7.0 ; python_version >= "3.9" and python_full_version < "3.13"
5555
zipp==3.19.2 ; python_version >= "3.9" and python_version < "3.13"
56-
tenacity==9.0.0 ; python_version >= "3.9" and python_version < "3.13"
56+
tenacity==9.0.0 ; python_version >= "3.9" and python_version < "3.13"
57+
hashlib

robusta_krr/core/integrations/prometheus/metrics/base.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from robusta_krr.core.models.config import settings
1919
from robusta_krr.core.models.objects import K8sObjectData
2020

21+
import hashlib
2122

2223
class PrometheusSeries(TypedDict):
2324
metric: dict[str, Any]
@@ -259,3 +260,25 @@ def combine_batches(self, results: list[PodsTimeData]) -> PodsTimeData:
259260
"""
260261

261262
return reduce(lambda x, y: x | y, results, {})
263+
264+
## Vcluster
265+
def get_vcluster_pod_real_name(self, vcluster_pod_name, vcluster_pod_namespace) -> str:
266+
if settings.vcluster_name is None:
267+
return vcluster_pod_name
268+
else:
269+
x = '-x-'
270+
new_name = vcluster_pod_name + x + vcluster_pod_namespace + x + settings.vcluster_name
271+
result_sha256 = hashlib.sha256(new_name.encode()).hexdigest()
272+
273+
if len(new_name) > 63:
274+
shortened_name = new_name[:52] + '-' + result_sha256[:10]
275+
return shortened_name
276+
else:
277+
return new_name
278+
279+
280+
def get_pod_namespace(self, object_namespace) -> str:
281+
if settings.vcluster_namespace is None:
282+
return object_namespace
283+
else:
284+
return settings.vcluster_namespace
Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from robusta_krr.core.models.objects import K8sObjectData
22

33
from .base import PrometheusMetric, QueryType
4+
import logging
45

5-
6+
logger = logging.getLogger("krr")
7+
68
class CPULoader(PrometheusMetric):
79
"""
810
A metric loader for loading CPU usage metrics.
@@ -11,20 +13,24 @@ class CPULoader(PrometheusMetric):
1113
query_type: QueryType = QueryType.QueryRange
1214

1315
def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
14-
pods_selector = "|".join(pod.name for pod in object.pods)
16+
pods_selector = "|".join(self.get_vcluster_pod_real_name(pod.name, object.namespace) for pod in object.pods)
17+
pods_namespace = self.get_pod_namespace(object.namespace)
1518
cluster_label = self.get_prometheus_cluster_label()
16-
return f"""
19+
prom_query = f"""
1720
max(
18-
rate(
19-
container_cpu_usage_seconds_total{{
20-
namespace="{object.namespace}",
21-
pod=~"{pods_selector}",
22-
container="{object.container}"
23-
{cluster_label}
24-
}}[{step}]
25-
)
26-
) by (container, pod, job)
27-
"""
21+
rate(
22+
container_cpu_usage_seconds_total{{
23+
namespace="{pods_namespace}",
24+
pod=~"{pods_selector}",
25+
container="{object.container}"
26+
{cluster_label}
27+
}}[{step}]
28+
)
29+
) by (container, pod, job)
30+
"""
31+
logger.debug(f"{prom_query}")
32+
33+
return prom_query
2834

2935

3036
def PercentileCPULoader(percentile: float) -> type[PrometheusMetric]:
@@ -37,15 +43,16 @@ def PercentileCPULoader(percentile: float) -> type[PrometheusMetric]:
3743

3844
class PercentileCPULoader(PrometheusMetric):
3945
def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
40-
pods_selector = "|".join(pod.name for pod in object.pods)
46+
pods_selector = "|".join(self.get_vcluster_pod_real_name(pod.name, object.namespace) for pod in object.pods)
47+
pods_namespace = self.get_pod_namespace(object.namespace)
4148
cluster_label = self.get_prometheus_cluster_label()
42-
return f"""
49+
prom_query = f"""
4350
quantile_over_time(
4451
{round(percentile / 100, 2)},
4552
max(
4653
rate(
4754
container_cpu_usage_seconds_total{{
48-
namespace="{object.namespace}",
55+
namespace="{pods_namespace}",
4956
pod=~"{pods_selector}",
5057
container="{object.container}"
5158
{cluster_label}
@@ -55,6 +62,8 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
5562
[{duration}:{step}]
5663
)
5764
"""
65+
logger.debug(f"{prom_query}")
66+
return prom_query
5867

5968
return PercentileCPULoader
6069

@@ -65,13 +74,14 @@ class CPUAmountLoader(PrometheusMetric):
6574
"""
6675

6776
def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
68-
pods_selector = "|".join(pod.name for pod in object.pods)
77+
pods_selector = "|".join(self.get_vcluster_pod_real_name(pod.name, object.namespace) for pod in object.pods)
78+
pods_namespace = self.get_pod_namespace(object.namespace)
6979
cluster_label = self.get_prometheus_cluster_label()
70-
return f"""
80+
prom_query = f"""
7181
count_over_time(
7282
max(
7383
container_cpu_usage_seconds_total{{
74-
namespace="{object.namespace}",
84+
namespace="{pods_namespace}",
7585
pod=~"{pods_selector}",
7686
container="{object.container}"
7787
{cluster_label}
@@ -80,3 +90,5 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
8090
[{duration}:{step}]
8191
)
8292
"""
93+
logger.debug(f"{prom_query}")
94+
return prom_query
Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
from robusta_krr.core.models.objects import K8sObjectData
22

33
from .base import PrometheusMetric, QueryType
4+
import logging
45

6+
logger = logging.getLogger("krr")
57

68
class MemoryLoader(PrometheusMetric):
79
"""
@@ -11,18 +13,21 @@ class MemoryLoader(PrometheusMetric):
1113
query_type: QueryType = QueryType.QueryRange
1214

1315
def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
14-
pods_selector = "|".join(pod.name for pod in object.pods)
16+
pods_selector = "|".join(self.get_vcluster_pod_real_name(pod.name, object.namespace) for pod in object.pods)
17+
pods_namespace = self.get_pod_namespace(object.namespace)
1518
cluster_label = self.get_prometheus_cluster_label()
16-
return f"""
19+
prom_query = f"""
1720
max(
1821
container_memory_working_set_bytes{{
19-
namespace="{object.namespace}",
22+
namespace="{pods_namespace}",
2023
pod=~"{pods_selector}",
2124
container="{object.container}"
2225
{cluster_label}
2326
}}
2427
) by (container, pod, job)
2528
"""
29+
logger.debug(f"{prom_query}")
30+
return prom_query
2631

2732

2833
class MaxMemoryLoader(PrometheusMetric):
@@ -31,13 +36,14 @@ class MaxMemoryLoader(PrometheusMetric):
3136
"""
3237

3338
def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
34-
pods_selector = "|".join(pod.name for pod in object.pods)
39+
pods_selector = "|".join(self.get_vcluster_pod_real_name(pod.name, object.namespace) for pod in object.pods)
40+
pods_namespace = self.get_pod_namespace(object.namespace)
3541
cluster_label = self.get_prometheus_cluster_label()
36-
return f"""
42+
prom_query = f"""
3743
max_over_time(
3844
max(
3945
container_memory_working_set_bytes{{
40-
namespace="{object.namespace}",
46+
namespace="{pods_namespace}",
4147
pod=~"{pods_selector}",
4248
container="{object.container}"
4349
{cluster_label}
@@ -46,21 +52,23 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
4652
[{duration}:{step}]
4753
)
4854
"""
49-
55+
logger.debug(f"{prom_query}")
56+
return prom_query
5057

5158
class MemoryAmountLoader(PrometheusMetric):
5259
"""
5360
A metric loader for loading memory points count.
5461
"""
5562

5663
def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
57-
pods_selector = "|".join(pod.name for pod in object.pods)
64+
pods_selector = "|".join(self.get_vcluster_pod_real_name(pod.name, object.namespace) for pod in object.pods)
65+
pods_namespace = self.get_pod_namespace(object.namespace)
5866
cluster_label = self.get_prometheus_cluster_label()
59-
return f"""
67+
prom_query = f"""
6068
count_over_time(
6169
max(
6270
container_memory_working_set_bytes{{
63-
namespace="{object.namespace}",
71+
namespace="{pods_namespace}",
6472
pod=~"{pods_selector}",
6573
container="{object.container}"
6674
{cluster_label}
@@ -69,7 +77,9 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
6977
[{duration}:{step}]
7078
)
7179
"""
72-
80+
logger.debug(f"{prom_query}")
81+
return prom_query
82+
7383
# TODO: Need to battle test if this one is correct.
7484
class MaxOOMKilledMemoryLoader(PrometheusMetric):
7585
"""
@@ -79,15 +89,16 @@ class MaxOOMKilledMemoryLoader(PrometheusMetric):
7989
warning_on_no_data = False
8090

8191
def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
82-
pods_selector = "|".join(pod.name for pod in object.pods)
92+
pods_selector = "|".join(self.get_vcluster_pod_real_name(pod.name, object.namespace) for pod in object.pods)
93+
pods_namespace = self.get_pod_namespace(object.namespace)
8394
cluster_label = self.get_prometheus_cluster_label()
84-
return f"""
95+
prom_query = f"""
8596
max_over_time(
8697
max(
8798
max(
8899
kube_pod_container_resource_limits{{
89100
resource="memory",
90-
namespace="{object.namespace}",
101+
namespace="{pods_namespace}",
91102
pod=~"{pods_selector}",
92103
container="{object.container}"
93104
{cluster_label}
@@ -97,7 +108,7 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
97108
max(
98109
kube_pod_container_status_last_terminated_reason{{
99110
reason="OOMKilled",
100-
namespace="{object.namespace}",
111+
namespace="{pods_namespace}",
101112
pod=~"{pods_selector}",
102113
container="{object.container}"
103114
{cluster_label}
@@ -107,3 +118,5 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
107118
[{duration}:{step}]
108119
)
109120
"""
121+
logger.debug(f"{prom_query}")
122+
return prom_query

robusta_krr/core/models/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ class Config(pd.BaseSettings):
7171
inside_cluster: bool = False
7272
_logging_console: Optional[Console] = pd.PrivateAttr(None)
7373

74+
# vcluster settings
75+
vcluster_name: Optional[str] = pd.Field(None)
76+
vcluster_namespace: Optional[str] = pd.Field(None)
77+
7478
def __init__(self, **kwargs: Any) -> None:
7579
super().__init__(**kwargs)
7680

robusta_krr/main.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,18 @@ def run_strategy(
266266
help="Send to output to a slack channel, must have SLACK_BOT_TOKEN",
267267
rich_help_panel="Output Settings",
268268
),
269+
vcluster_namespace: str = typer.Option(
270+
None,
271+
"--vcluster-namespace",
272+
help="The vcluster namespace on physical cluster",
273+
rich_help_panel="VCluster Settings",
274+
),
275+
vcluster_name: str = typer.Option(
276+
None,
277+
"--vcluster-name",
278+
help="The vcluster name on physical cluster",
279+
rich_help_panel="VCluster Settings",
280+
),
269281
**strategy_args,
270282
) -> None:
271283
f"""Run KRR using the `{_strategy_name}` strategy"""
@@ -310,6 +322,8 @@ def run_strategy(
310322
show_severity=show_severity,
311323
strategy=_strategy_name,
312324
other_args=strategy_args,
325+
vcluster_namespace=vcluster_namespace,
326+
vcluster_name=vcluster_name,
313327
)
314328
Config.set_config(config)
315329
except ValidationError:

0 commit comments

Comments
 (0)