Skip to content

Commit 46f5d39

Browse files
authored
Merge pull request #1007 from Matyro/Prometheus_Exporter_Update
Prometheus Exporter - added total energy & auto cleanup, renamed Exports
2 parents 222b697 + dfd51b3 commit 46f5d39

File tree

5 files changed

+67
-8
lines changed

5 files changed

+67
-8
lines changed

codecarbon/emissions_tracker.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import dataclasses
77
import os
88
import platform
9+
import re
910
import time
1011
import uuid
1112
from abc import ABC, abstractmethod
@@ -454,7 +455,16 @@ def _init_output_methods(self, *, api_key: str = None):
454455
self.run_id = uuid.uuid4()
455456

456457
if self._save_to_prometheus:
457-
self._output_handlers.append(PrometheusOutput(self._prometheus_url))
458+
self._output_handlers.append(
459+
PrometheusOutput(
460+
self._prometheus_url,
461+
job_name=re.sub(
462+
r"[^a-zA-Z0-9_-]",
463+
"_",
464+
f"{self._project_name}_{self._experiment_name}",
465+
),
466+
)
467+
)
458468

459469
if self._save_to_logfire:
460470
self._output_handlers.append(LogfireOutput())
@@ -707,6 +717,10 @@ def stop(self) -> Optional[float]:
707717

708718
self.final_emissions_data = emissions_data
709719
self.final_emissions = emissions_data.emissions
720+
721+
for handler in self._output_handlers:
722+
handler.exit()
723+
710724
return emissions_data.emissions
711725

712726
def _persist_data(

codecarbon/output_methods/base_output.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,6 @@ def live_out(self, total: EmissionsData, delta: EmissionsData):
2222

2323
def task_out(self, data: List[TaskEmissionsData], experiment_name: str):
2424
pass
25+
26+
def exit(self):
27+
pass

codecarbon/output_methods/metrics/metric_docs.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,17 +50,22 @@ class MetricDocumentation:
5050
)
5151
cpu_energy_doc = MetricDocumentation(
5252
"codecarbon_cpu_energy",
53-
description="Energy used per CPU (kWh)",
53+
description="Energy used per CPU since last reading (kWh)",
5454
)
5555
gpu_energy_doc = MetricDocumentation(
5656
"codecarbon_gpu_energy",
57-
description="Energy used per GPU (kWh)",
57+
description="Energy used per GPU since last reading (kWh)",
5858
)
5959
ram_energy_doc = MetricDocumentation(
6060
"codecarbon_ram_energy",
61-
description="Energy used per RAM (kWh)",
61+
description="Energy used per RAM since last reading (kWh)",
6262
)
6363
energy_consumed_doc = MetricDocumentation(
6464
"codecarbon_energy_consumed",
65-
description="Sum of cpu_energy, gpu_energy and ram_energy (kW)",
65+
description="Sum of cpu_energy, gpu_energy and ram_energy (kWh)",
66+
)
67+
68+
energy_consumed_total_doc = MetricDocumentation(
69+
"codecarbon_energy_total",
70+
description="Accumulated cpu_energy, gpu_energy and ram_energy (kWh) since the start of the run",
6671
)

codecarbon/output_methods/metrics/prometheus.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
11
import dataclasses
22
import os
33

4-
from prometheus_client import CollectorRegistry, Gauge, push_to_gateway
4+
from prometheus_client import (
5+
CollectorRegistry,
6+
Counter,
7+
Gauge,
8+
delete_from_gateway,
9+
push_to_gateway,
10+
)
511
from prometheus_client.exposition import basic_auth_handler
612

713
from codecarbon.external.logger import logger
@@ -15,6 +21,7 @@
1521
emissions_doc,
1622
emissions_rate_doc,
1723
energy_consumed_doc,
24+
energy_consumed_total_doc,
1825
gpu_energy_doc,
1926
gpu_power_doc,
2027
ram_energy_doc,
@@ -60,6 +67,15 @@ def generate_gauge(metric_doc: MetricDocumentation):
6067
)
6168

6269

70+
def generate_counter(metric_doc: MetricDocumentation):
71+
return Counter(
72+
metric_doc.name,
73+
metric_doc.description,
74+
labelnames,
75+
registry=registry,
76+
)
77+
78+
6379
duration_gauge = generate_gauge(duration_doc)
6480
emissions_gauge = generate_gauge(emissions_doc)
6581
emissions_rate_gauge = generate_gauge(emissions_rate_doc)
@@ -70,15 +86,26 @@ def generate_gauge(metric_doc: MetricDocumentation):
7086
gpu_energy_gauge = generate_gauge(gpu_energy_doc)
7187
ram_energy_gauge = generate_gauge(ram_energy_doc)
7288
energy_consumed_gauge = generate_gauge(energy_consumed_doc)
89+
energy_consumed_total = generate_counter(energy_consumed_total_doc)
7390

7491

7592
class PrometheusOutput(BaseOutput):
7693
"""
7794
Send emissions data to prometheus pushgateway
7895
"""
7996

80-
def __init__(self, prometheus_url: str):
97+
def __init__(self, prometheus_url: str, job_name: str = "codecarbon"):
8198
self.prometheus_url = prometheus_url
99+
self.job_name = job_name
100+
101+
def exit(self):
102+
# Cleanup metrics from pushgateway on shutdown, prometheus should already have read them
103+
# Otherwise they will persist with their last values
104+
try:
105+
logger.info("Deleting metrics from Prometheus Pushgateway")
106+
delete_from_gateway(self.prometheus_url, job=self.job_name)
107+
except Exception as e:
108+
logger.error(e, exc_info=True)
82109

83110
def out(self, total: EmissionsData, delta: EmissionsData):
84111
try:
@@ -121,10 +148,14 @@ def add_emission(self, carbon_emission: dict):
121148
]:
122149
gauge.labels(**labels).set(carbon_emission[emission_name])
123150

151+
# Update the total energy consumed counter
152+
# This is separate from the total values given to self.out(...)
153+
energy_consumed_total.labels(**labels).inc(carbon_emission["energy_consumed"])
154+
124155
# Send the new metric values
125156
push_to_gateway(
126157
self.prometheus_url,
127-
job="codecarbon",
158+
job=self.job_name,
128159
registry=registry,
129160
handler=self._auth_handler,
130161
)

tests/output_methods/metrics/test_prometheus.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@ def test_out_method(self, mock_push_to_gateway):
5151
output = prometheus.PrometheusOutput("url")
5252
output.out(total=EMISSIONS_DATA, delta=EMISSIONS_DATA)
5353

54+
@patch("codecarbon.output_methods.metrics.prometheus.delete_from_gateway")
55+
def test_exit_method(self, mock_delete):
56+
output = prometheus.PrometheusOutput("url", job_name="custom_job")
57+
output.exit()
58+
mock_delete.assert_called_once_with("url", job="custom_job")
59+
5460
@patch(
5561
"codecarbon.output_methods.metrics.prometheus.push_to_gateway",
5662
side_effect=Exception("Test error"),

0 commit comments

Comments
 (0)