Skip to content

Commit 8f5cbeb

Browse files
committed
add vcluster support
Use f-strings, rename variables, add doc strings
1 parent 6d2b995 commit 8f5cbeb

File tree

7 files changed

+137
-37
lines changed

7 files changed

+137
-37
lines changed

README.md

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
<div align="center">
1515
<h1 align="center">Kubernetes Resource Recommendations Based on Historical Data</h1>
1616
<h2 align="center">Get recommendations based on your existing data in Prometheus/Coralogix/Thanos/Mimir and more!</h2>
17-
<p align="center">
17+
<p align="center">
1818
<a href="#installation"><strong>Installation</strong></a>
1919
.
2020
<a href="#how-krr-works"><strong>How KRR works</strong></a>
@@ -202,7 +202,7 @@ Apart from running KRR as a CLI tool you can also run KRR inside your cluster. W
202202

203203
<img src="./images/ui_recommendation.png">
204204

205-
You can also run KRR in-cluster as a Kubernetes Job, if you don't want to view results easily in a <a href="https://platform.robusta.dev/signup/?benefits=krr&utm_source=github&utm_medium=krr-readme&utm_content=in-cluster-ui">UI</a>.
205+
You can also run KRR in-cluster as a Kubernetes Job, if you don't want to view results easily in a <a href="https://platform.robusta.dev/signup/?benefits=krr&utm_source=github&utm_medium=krr-readme&utm_content=in-cluster-ui">UI</a>.
206206

207207
```
208208
kubectl apply -f https://raw.githubusercontent.com/robusta-dev/krr/refs/heads/main/docs/krr-in-cluster/krr-in-cluster-job.yaml
@@ -400,6 +400,22 @@ Refer to `krr simple --help`, and look at the flags `--prometheus-url`, `--prome
400400
If you need help, contact us on Slack, email, or by opening a GitHub issue.
401401
</details>
402402

403+
<details>
404+
<summary>VCluster</summary>
405+
406+
KRR supports VCluster software when Prometheus is outside of the VCluster (on physical cluster or centralized). Because of VCluster pod renaming, you need to provide :
407+
408+
- `vcluster-namespace` : The namespace on physical cluster where VCluster is
409+
- `vcluster-name` : The name of your VCluster (set during VCluster deployment)
410+
411+
Other parameters like namespace selector, pod selector etc work as expected.
412+
413+
```sh
414+
krr simple --vcluster-name my-vcluster-name --vcluster-namespace my-vcluster-namespace
415+
```
416+
417+
</details>
418+
403419
<details>
404420
<summary>Debug mode</summary>
405421
If you want to see additional debug logs:

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,4 @@ tzlocal==5.2 ; python_version >= "3.9" and python_full_version < "3.13"
5353
urllib3==1.26.19 ; python_version >= "3.9" and python_full_version < "3.13"
5454
websocket-client==1.7.0 ; python_version >= "3.9" and python_full_version < "3.13"
5555
zipp==3.19.2 ; python_version >= "3.9" and python_version < "3.13"
56-
tenacity==9.0.0 ; python_version >= "3.9" and python_version < "3.13"
56+
tenacity==9.0.0 ; python_version >= "3.9" and python_version < "3.13"

robusta_krr/core/integrations/prometheus/metrics/base.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import asyncio
55
import datetime
66
import enum
7+
import hashlib
78
from concurrent.futures import ThreadPoolExecutor
89
from functools import reduce
910
from typing import Any, Optional, TypedDict
@@ -259,3 +260,43 @@ def combine_batches(self, results: list[PodsTimeData]) -> PodsTimeData:
259260
"""
260261

261262
return reduce(lambda x, y: x | y, results, {})
263+
264+
## Vcluster
265+
def get_vcluster_pod_real_name(self, pod_name: str, pod_namespace: str) -> str:
266+
"""
267+
Returns the pod name on the (host) cluster, which is different from the pod name in the VCluster.
268+
When not in a VCluster, just returns the pod name as is.
269+
270+
Args:
271+
pod_name (string): The pod name in the cluster krr connected to
272+
pod_namespace (string): The pod namespace in the cluster krr connected to
273+
274+
Returns:
275+
string: the pod name in the host cluster.
276+
"""
277+
278+
if settings.vcluster_name is None:
279+
return pod_name
280+
else:
281+
host_pod_name = f"{pod_name}-x-{pod_namespace}-x-{settings.vcluster_name}"
282+
if len(host_pod_name) > 63:
283+
host_pod_name_sha256 = hashlib.sha256(host_pod_name.encode()).hexdigest()
284+
host_pod_name = f"{host_pod_name[:52]}-{host_pod_name_sha256[:10]}"
285+
return host_pod_name
286+
287+
def get_pod_namespace(self, pod_namespace: str) -> str:
288+
"""
289+
Returns the pod namespace on the (host) cluster, which is different from the pod namespace in the VCluster.
290+
When not in a VCluster, just returns the pod namespace as is.
291+
292+
Args:
293+
pod_namespace (string): The pod namespace in the cluster krr connected to
294+
295+
Returns:
296+
string: the pod namepace in the host cluster.
297+
"""
298+
299+
if settings.vcluster_namespace is None:
300+
return pod_namespace
301+
else:
302+
return settings.vcluster_namespace
Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from robusta_krr.core.models.objects import K8sObjectData
22

33
from .base import PrometheusMetric, QueryType
4+
import logging
45

5-
6+
logger = logging.getLogger("krr")
7+
68
class CPULoader(PrometheusMetric):
79
"""
810
A metric loader for loading CPU usage metrics.
@@ -11,20 +13,24 @@ class CPULoader(PrometheusMetric):
1113
query_type: QueryType = QueryType.QueryRange
1214

1315
def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
14-
pods_selector = "|".join(pod.name for pod in object.pods)
16+
pods_selector = "|".join(self.get_vcluster_pod_real_name(pod.name, object.namespace) for pod in object.pods)
17+
pods_namespace = self.get_pod_namespace(object.namespace)
1518
cluster_label = self.get_prometheus_cluster_label()
16-
return f"""
19+
prom_query = f"""
1720
max(
18-
rate(
19-
container_cpu_usage_seconds_total{{
20-
namespace="{object.namespace}",
21-
pod=~"{pods_selector}",
22-
container="{object.container}"
23-
{cluster_label}
24-
}}[{step}]
25-
)
26-
) by (container, pod, job)
27-
"""
21+
rate(
22+
container_cpu_usage_seconds_total{{
23+
namespace="{pods_namespace}",
24+
pod=~"{pods_selector}",
25+
container="{object.container}"
26+
{cluster_label}
27+
}}[{step}]
28+
)
29+
) by (container, pod, job)
30+
"""
31+
logger.debug(f"{prom_query}")
32+
33+
return prom_query
2834

2935

3036
def PercentileCPULoader(percentile: float) -> type[PrometheusMetric]:
@@ -37,15 +43,16 @@ def PercentileCPULoader(percentile: float) -> type[PrometheusMetric]:
3743

3844
class PercentileCPULoader(PrometheusMetric):
3945
def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
40-
pods_selector = "|".join(pod.name for pod in object.pods)
46+
pods_selector = "|".join(self.get_vcluster_pod_real_name(pod.name, object.namespace) for pod in object.pods)
47+
pods_namespace = self.get_pod_namespace(object.namespace)
4148
cluster_label = self.get_prometheus_cluster_label()
42-
return f"""
49+
prom_query = f"""
4350
quantile_over_time(
4451
{round(percentile / 100, 2)},
4552
max(
4653
rate(
4754
container_cpu_usage_seconds_total{{
48-
namespace="{object.namespace}",
55+
namespace="{pods_namespace}",
4956
pod=~"{pods_selector}",
5057
container="{object.container}"
5158
{cluster_label}
@@ -55,6 +62,8 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
5562
[{duration}:{step}]
5663
)
5764
"""
65+
logger.debug(f"{prom_query}")
66+
return prom_query
5867

5968
return PercentileCPULoader
6069

@@ -65,13 +74,14 @@ class CPUAmountLoader(PrometheusMetric):
6574
"""
6675

6776
def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
68-
pods_selector = "|".join(pod.name for pod in object.pods)
77+
pods_selector = "|".join(self.get_vcluster_pod_real_name(pod.name, object.namespace) for pod in object.pods)
78+
pods_namespace = self.get_pod_namespace(object.namespace)
6979
cluster_label = self.get_prometheus_cluster_label()
70-
return f"""
80+
prom_query = f"""
7181
count_over_time(
7282
max(
7383
container_cpu_usage_seconds_total{{
74-
namespace="{object.namespace}",
84+
namespace="{pods_namespace}",
7585
pod=~"{pods_selector}",
7686
container="{object.container}"
7787
{cluster_label}
@@ -80,3 +90,5 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
8090
[{duration}:{step}]
8191
)
8292
"""
93+
logger.debug(f"{prom_query}")
94+
return prom_query
Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
from robusta_krr.core.models.objects import K8sObjectData
22

33
from .base import PrometheusMetric, QueryType
4+
import logging
45

6+
logger = logging.getLogger("krr")
57

68
class MemoryLoader(PrometheusMetric):
79
"""
@@ -11,18 +13,21 @@ class MemoryLoader(PrometheusMetric):
1113
query_type: QueryType = QueryType.QueryRange
1214

1315
def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
14-
pods_selector = "|".join(pod.name for pod in object.pods)
16+
pods_selector = "|".join(self.get_vcluster_pod_real_name(pod.name, object.namespace) for pod in object.pods)
17+
pods_namespace = self.get_pod_namespace(object.namespace)
1518
cluster_label = self.get_prometheus_cluster_label()
16-
return f"""
19+
prom_query = f"""
1720
max(
1821
container_memory_working_set_bytes{{
19-
namespace="{object.namespace}",
22+
namespace="{pods_namespace}",
2023
pod=~"{pods_selector}",
2124
container="{object.container}"
2225
{cluster_label}
2326
}}
2427
) by (container, pod, job)
2528
"""
29+
logger.debug(f"{prom_query}")
30+
return prom_query
2631

2732

2833
class MaxMemoryLoader(PrometheusMetric):
@@ -31,13 +36,14 @@ class MaxMemoryLoader(PrometheusMetric):
3136
"""
3237

3338
def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
34-
pods_selector = "|".join(pod.name for pod in object.pods)
39+
pods_selector = "|".join(self.get_vcluster_pod_real_name(pod.name, object.namespace) for pod in object.pods)
40+
pods_namespace = self.get_pod_namespace(object.namespace)
3541
cluster_label = self.get_prometheus_cluster_label()
36-
return f"""
42+
prom_query = f"""
3743
max_over_time(
3844
max(
3945
container_memory_working_set_bytes{{
40-
namespace="{object.namespace}",
46+
namespace="{pods_namespace}",
4147
pod=~"{pods_selector}",
4248
container="{object.container}"
4349
{cluster_label}
@@ -46,21 +52,23 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
4652
[{duration}:{step}]
4753
)
4854
"""
49-
55+
logger.debug(f"{prom_query}")
56+
return prom_query
5057

5158
class MemoryAmountLoader(PrometheusMetric):
5259
"""
5360
A metric loader for loading memory points count.
5461
"""
5562

5663
def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
57-
pods_selector = "|".join(pod.name for pod in object.pods)
64+
pods_selector = "|".join(self.get_vcluster_pod_real_name(pod.name, object.namespace) for pod in object.pods)
65+
pods_namespace = self.get_pod_namespace(object.namespace)
5866
cluster_label = self.get_prometheus_cluster_label()
59-
return f"""
67+
prom_query = f"""
6068
count_over_time(
6169
max(
6270
container_memory_working_set_bytes{{
63-
namespace="{object.namespace}",
71+
namespace="{pods_namespace}",
6472
pod=~"{pods_selector}",
6573
container="{object.container}"
6674
{cluster_label}
@@ -69,7 +77,9 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
6977
[{duration}:{step}]
7078
)
7179
"""
72-
80+
logger.debug(f"{prom_query}")
81+
return prom_query
82+
7383
# TODO: Need to battle test if this one is correct.
7484
class MaxOOMKilledMemoryLoader(PrometheusMetric):
7585
"""
@@ -79,15 +89,16 @@ class MaxOOMKilledMemoryLoader(PrometheusMetric):
7989
warning_on_no_data = False
8090

8191
def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
82-
pods_selector = "|".join(pod.name for pod in object.pods)
92+
pods_selector = "|".join(self.get_vcluster_pod_real_name(pod.name, object.namespace) for pod in object.pods)
93+
pods_namespace = self.get_pod_namespace(object.namespace)
8394
cluster_label = self.get_prometheus_cluster_label()
84-
return f"""
95+
prom_query = f"""
8596
max_over_time(
8697
max(
8798
max(
8899
kube_pod_container_resource_limits{{
89100
resource="memory",
90-
namespace="{object.namespace}",
101+
namespace="{pods_namespace}",
91102
pod=~"{pods_selector}",
92103
container="{object.container}"
93104
{cluster_label}
@@ -97,7 +108,7 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
97108
max(
98109
kube_pod_container_status_last_terminated_reason{{
99110
reason="OOMKilled",
100-
namespace="{object.namespace}",
111+
namespace="{pods_namespace}",
101112
pod=~"{pods_selector}",
102113
container="{object.container}"
103114
{cluster_label}
@@ -107,3 +118,5 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
107118
[{duration}:{step}]
108119
)
109120
"""
121+
logger.debug(f"{prom_query}")
122+
return prom_query

robusta_krr/core/models/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ class Config(pd.BaseSettings):
7171
inside_cluster: bool = False
7272
_logging_console: Optional[Console] = pd.PrivateAttr(None)
7373

74+
# vcluster settings
75+
vcluster_name: Optional[str] = pd.Field(None)
76+
vcluster_namespace: Optional[str] = pd.Field(None)
77+
7478
def __init__(self, **kwargs: Any) -> None:
7579
super().__init__(**kwargs)
7680

robusta_krr/main.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,18 @@ def run_strategy(
266266
help="Send to output to a slack channel, must have SLACK_BOT_TOKEN",
267267
rich_help_panel="Output Settings",
268268
),
269+
vcluster_namespace: str = typer.Option(
270+
None,
271+
"--vcluster-namespace",
272+
help="The vcluster namespace on physical cluster",
273+
rich_help_panel="VCluster Settings",
274+
),
275+
vcluster_name: str = typer.Option(
276+
None,
277+
"--vcluster-name",
278+
help="The vcluster name on physical cluster",
279+
rich_help_panel="VCluster Settings",
280+
),
269281
**strategy_args,
270282
) -> None:
271283
f"""Run KRR using the `{_strategy_name}` strategy"""
@@ -310,6 +322,8 @@ def run_strategy(
310322
show_severity=show_severity,
311323
strategy=_strategy_name,
312324
other_args=strategy_args,
325+
vcluster_namespace=vcluster_namespace,
326+
vcluster_name=vcluster_name,
313327
)
314328
Config.set_config(config)
315329
except ValidationError:

0 commit comments

Comments
 (0)