Skip to content

Commit d97d910

Browse files
adding global_cache_ttl feature
Signed-off-by: Victor Amorim <victor.amorim@nubank.com.br>
1 parent 0fc87d7 commit d97d910

File tree

3 files changed

+222
-70
lines changed

3 files changed

+222
-70
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ set_timestamp | Optional. Boolean for whether to set the Prometheus metric times
8888
use_get_metric_data | Optional. Boolean (experimental) Use GetMetricData API to get metrics instead of GetMetricStatistics. Can be set globally and per metric.
8989
list_metrics_cache_ttl | Optional. Number of seconds to cache the result of calling the ListMetrics API. Defaults to 0 (no cache). Can be set globally and per metric.
9090
warn_on_empty_list_dimensions | Optional. Boolean Emit warning if the exporter cannot determine what metrics to request
91+
global_cache_ttl | Optional. Number of seconds to cache the result from /metrics. Any value greater than 0 means that the last result will be returned. Defaults to 0 (no cache). Can be set globally.
9192

9293

9394
The above config will export time series such as

src/main/java/io/prometheus/cloudwatch/CloudWatchCollector.java

Lines changed: 95 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,9 @@
11
package io.prometheus.cloudwatch;
22

3-
import static io.prometheus.cloudwatch.CachingDimensionSource.DimensionCacheConfig;
4-
53
import io.prometheus.client.Collector;
64
import io.prometheus.client.Collector.Describable;
75
import io.prometheus.client.Counter;
86
import io.prometheus.cloudwatch.DataGetter.MetricRuleData;
9-
import java.io.FileReader;
10-
import java.io.IOException;
11-
import java.io.Reader;
12-
import java.time.Duration;
13-
import java.util.ArrayList;
14-
import java.util.Arrays;
15-
import java.util.Collections;
16-
import java.util.HashMap;
17-
import java.util.HashSet;
18-
import java.util.List;
19-
import java.util.Map;
20-
import java.util.Map.Entry;
21-
import java.util.Set;
22-
import java.util.logging.Level;
23-
import java.util.logging.Logger;
247
import org.yaml.snakeyaml.LoaderOptions;
258
import org.yaml.snakeyaml.Yaml;
269
import org.yaml.snakeyaml.constructor.SafeConstructor;
@@ -32,15 +15,23 @@
3215
import software.amazon.awssdk.services.cloudwatch.model.Statistic;
3316
import software.amazon.awssdk.services.resourcegroupstaggingapi.ResourceGroupsTaggingApiClient;
3417
import software.amazon.awssdk.services.resourcegroupstaggingapi.ResourceGroupsTaggingApiClientBuilder;
35-
import software.amazon.awssdk.services.resourcegroupstaggingapi.model.GetResourcesRequest;
36-
import software.amazon.awssdk.services.resourcegroupstaggingapi.model.GetResourcesResponse;
37-
import software.amazon.awssdk.services.resourcegroupstaggingapi.model.ResourceTagMapping;
38-
import software.amazon.awssdk.services.resourcegroupstaggingapi.model.Tag;
39-
import software.amazon.awssdk.services.resourcegroupstaggingapi.model.TagFilter;
18+
import software.amazon.awssdk.services.resourcegroupstaggingapi.model.*;
4019
import software.amazon.awssdk.services.sts.StsClient;
4120
import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider;
4221
import software.amazon.awssdk.services.sts.model.AssumeRoleRequest;
4322

23+
import java.io.FileReader;
24+
import java.io.IOException;
25+
import java.io.Reader;
26+
import java.time.Duration;
27+
import java.time.Instant;
28+
import java.util.*;
29+
import java.util.Map.Entry;
30+
import java.util.logging.Level;
31+
import java.util.logging.Logger;
32+
33+
import static io.prometheus.cloudwatch.CachingDimensionSource.DimensionCacheConfig;
34+
4435
public class CloudWatchCollector extends Collector implements Describable {
4536
private static final Logger LOGGER = Logger.getLogger(CloudWatchCollector.class.getName());
4637

@@ -50,11 +41,14 @@ static class ActiveConfig {
5041
ResourceGroupsTaggingApiClient taggingClient;
5142
DimensionSource dimensionSource;
5243

44+
Map<String, Object> globalConfig;
45+
5346
public ActiveConfig(ActiveConfig cfg) {
5447
this.rules = new ArrayList<>(cfg.rules);
5548
this.cloudWatchClient = cfg.cloudWatchClient;
5649
this.taggingClient = cfg.taggingClient;
5750
this.dimensionSource = cfg.dimensionSource;
51+
this.globalConfig = cfg.globalConfig;
5852
}
5953

6054
public ActiveConfig() {}
@@ -96,29 +90,32 @@ static class AWSTagSelect {
9690
"ReadThrottleEvents", "WriteThrottleEvents");
9791

9892
public CloudWatchCollector(Reader in) {
99-
loadConfig(in, null, null);
93+
loadConfig(in, null, null, null);
10094
}
10195

10296
public CloudWatchCollector(String yamlConfig) {
103-
this(yamlConfig, null, null);
97+
this(yamlConfig, null, null, null);
10498
}
10599

106100
/* For unittests. */
107101
protected CloudWatchCollector(
108102
String jsonConfig,
109103
CloudWatchClient cloudWatchClient,
110-
ResourceGroupsTaggingApiClient taggingClient) {
104+
ResourceGroupsTaggingApiClient taggingClient,
105+
Map<String, Object> globalConfig) {
111106
this(
112107
(Map<String, Object>) new Yaml(new SafeConstructor(new LoaderOptions())).load(jsonConfig),
113108
cloudWatchClient,
114-
taggingClient);
109+
taggingClient,
110+
globalConfig);
115111
}
116112

117113
private CloudWatchCollector(
118114
Map<String, Object> config,
119115
CloudWatchClient cloudWatchClient,
120-
ResourceGroupsTaggingApiClient taggingClient) {
121-
loadConfig(config, cloudWatchClient, taggingClient);
116+
ResourceGroupsTaggingApiClient taggingClient,
117+
Map<String, Object> globalConfig) {
118+
loadConfig(config, cloudWatchClient, taggingClient, globalConfig);
122119
}
123120

124121
@Override
@@ -129,26 +126,33 @@ public List<MetricFamilySamples> describe() {
129126
protected void reloadConfig() throws IOException {
130127
LOGGER.log(Level.INFO, "Reloading configuration");
131128
try (FileReader reader = new FileReader(WebServer.configFilePath); ) {
132-
loadConfig(reader, activeConfig.cloudWatchClient, activeConfig.taggingClient);
129+
loadConfig(reader, activeConfig.cloudWatchClient, activeConfig.taggingClient, activeConfig.globalConfig);
133130
}
134131
}
135132

136133
protected void loadConfig(
137-
Reader in, CloudWatchClient cloudWatchClient, ResourceGroupsTaggingApiClient taggingClient) {
134+
Reader in, CloudWatchClient cloudWatchClient, ResourceGroupsTaggingApiClient taggingClient, Map<String, Object> globalConfig) {
138135
loadConfig(
139136
(Map<String, Object>) new Yaml(new SafeConstructor(new LoaderOptions())).load(in),
140137
cloudWatchClient,
141-
taggingClient);
138+
taggingClient,
139+
globalConfig);
142140
}
143141

144142
private void loadConfig(
145143
Map<String, Object> config,
146144
CloudWatchClient cloudWatchClient,
147-
ResourceGroupsTaggingApiClient taggingClient) {
145+
ResourceGroupsTaggingApiClient taggingClient,
146+
Map<String, Object> globalConfig) {
148147
if (config == null) { // Yaml config empty, set config to empty map.
149148
config = new HashMap<>();
150149
}
151150

151+
if (globalConfig == null) { // Yaml config empty, set config to empty map.
152+
globalConfig = new HashMap<>();
153+
}
154+
155+
152156
int defaultPeriod = 60;
153157
if (config.containsKey("period_seconds")) {
154158
defaultPeriod = ((Number) config.get("period_seconds")).intValue();
@@ -178,6 +182,12 @@ private void loadConfig(
178182
Duration.ofSeconds(((Number) config.get("list_metrics_cache_ttl")).intValue());
179183
}
180184

185+
int defaultGlobalCacheSeconds = 0;
186+
if (config.containsKey("global_cache_ttl")) {
187+
defaultGlobalCacheSeconds = ((Number) config.get("global_cache_ttl")).intValue();
188+
}
189+
globalConfig.put("globalCacheSeconds", defaultGlobalCacheSeconds);
190+
181191
boolean defaultWarnOnMissingDimensions = false;
182192
if (config.containsKey("warn_on_empty_list_dimensions")) {
183193
defaultWarnOnMissingDimensions = (Boolean) config.get("warn_on_empty_list_dimensions");
@@ -331,19 +341,21 @@ private void loadConfig(
331341
dimensionSource = new CachingDimensionSource(dimensionSource, metricCacheConfig);
332342
}
333343

334-
loadConfig(rules, cloudWatchClient, taggingClient, dimensionSource);
344+
loadConfig(rules, cloudWatchClient, taggingClient, dimensionSource, globalConfig);
335345
}
336346

337347
private void loadConfig(
338-
ArrayList<MetricRule> rules,
339-
CloudWatchClient cloudWatchClient,
340-
ResourceGroupsTaggingApiClient taggingClient,
341-
DimensionSource dimensionSource) {
348+
ArrayList<MetricRule> rules,
349+
CloudWatchClient cloudWatchClient,
350+
ResourceGroupsTaggingApiClient taggingClient,
351+
DimensionSource dimensionSource,
352+
Map<String, Object> globalConfig) {
342353
synchronized (activeConfig) {
343354
activeConfig.cloudWatchClient = cloudWatchClient;
344355
activeConfig.taggingClient = taggingClient;
345356
activeConfig.rules = rules;
346357
activeConfig.dimensionSource = dimensionSource;
358+
activeConfig.globalConfig = globalConfig;
347359
}
348360
}
349361

@@ -633,11 +645,42 @@ private void scrape(List<MetricFamilySamples> mfs) {
633645
"AWS information available for resource",
634646
infoSamples));
635647
}
648+
private void updateCacheMetric(List<MetricFamilySamples> mfs, double value){
649+
List<MetricFamilySamples.Sample> samples = new ArrayList<>();
650+
MetricFamilySamples cacheMetric = null;
651+
for(MetricFamilySamples metric : mfs){
652+
if (metric.name.equals("cloudwatch_exporter_cached_answer")){
653+
cacheMetric = metric;
654+
break;
655+
}
656+
}
657+
658+
if(cacheMetric == null){
659+
cacheMetric = new MetricFamilySamples(
660+
"cloudwatch_exporter_cached_answer",
661+
Type.GAUGE,
662+
"Non-zero means this scrape was from cache",
663+
samples);
664+
mfs.add(cacheMetric);
665+
}else{
666+
cacheMetric.samples.clear();
667+
}
636668

669+
cacheMetric.samples.add(new MetricFamilySamples.Sample(
670+
"cloudwatch_exporter_cached_answer", new ArrayList<>(), new ArrayList<>(), value));
671+
}
672+
List<MetricFamilySamples> cachedMfs = new ArrayList<>();
637673
public List<MetricFamilySamples> collect() {
638674
long start = System.nanoTime();
639675
double error = 0;
640676
List<MetricFamilySamples> mfs = new ArrayList<>();
677+
678+
if (shouldCache() && shouldReturnFromCache()){
679+
LOGGER.log(Level.INFO, "Returning from cache");
680+
this.updateCacheMetric(this.cachedMfs, 1.0);
681+
return this.cachedMfs;
682+
}
683+
this.updateCacheMetric(mfs, 0.0);
641684
try {
642685
scrape(mfs);
643686
} catch (Exception e) {
@@ -668,8 +711,23 @@ public List<MetricFamilySamples> collect() {
668711
Type.GAUGE,
669712
"Non-zero if this scrape failed.",
670713
samples));
714+
if (shouldCache()){
715+
this.cachedMfs = mfs;
716+
}
717+
this.lastCall = Instant.now();
671718
return mfs;
672719
}
720+
public Instant lastCall;
721+
private boolean shouldCache() {
722+
return (int) this.activeConfig.globalConfig.get("globalCacheSeconds") > 0;
723+
}
724+
private boolean shouldReturnFromCache() {
725+
if (this.lastCall == null){
726+
return false;
727+
}
728+
Duration elapsedTime = Duration.between(lastCall, Instant.now());
729+
return elapsedTime.toSeconds() <= (int) this.activeConfig.globalConfig.get("globalCacheSeconds");
730+
}
673731

674732
private String extractResourceIdFromArn(String arn) {
675733
// ARN parsing is based on

0 commit comments

Comments
 (0)