Skip to content

Commit 99809cb

Browse files
[b455552691] Collect dynamic resource allocation of Cloudera services (#1028)
Create a new task to iterate over Cloudera hosts and collect dynamic resource allocation of their services calling timeseries endpoint. It queries memory, cpu and disk throughput statistics in hourly manner and stores the results in `service-resource-allocation.jsonl` file.
1 parent 9624a94 commit 99809cb

File tree

13 files changed

+1994
-14
lines changed

13 files changed

+1994
-14
lines changed

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/AbstractClouderaTimeSeriesTask.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import com.fasterxml.jackson.databind.JsonNode;
2020
import com.google.common.base.Preconditions;
21+
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
2122
import java.net.URI;
2223
import java.time.ZonedDateTime;
2324
import java.time.format.DateTimeFormatter;
@@ -33,12 +34,14 @@ abstract class AbstractClouderaTimeSeriesTask extends AbstractClouderaManagerTas
3334
private final ZonedDateTime startDate;
3435
private final ZonedDateTime endDate;
3536
private final TimeSeriesAggregation tsAggregation;
37+
private final TaskCategory taskCategory;
3638

3739
public AbstractClouderaTimeSeriesTask(
3840
@Nonnull String targetPath,
3941
@Nonnull ZonedDateTime startDate,
4042
@Nonnull ZonedDateTime endDate,
41-
@Nonnull TimeSeriesAggregation tsAggregation) {
43+
@Nonnull TimeSeriesAggregation tsAggregation,
44+
@Nonnull TaskCategory taskCategory) {
4245
super(targetPath);
4346
Preconditions.checkNotNull(targetPath, "Target path must be not null.");
4447
Preconditions.checkState(!targetPath.isEmpty(), "Target file path must be not empty.");
@@ -50,6 +53,13 @@ public AbstractClouderaTimeSeriesTask(
5053
this.startDate = startDate;
5154
this.endDate = endDate;
5255
this.tsAggregation = tsAggregation;
56+
this.taskCategory = taskCategory;
57+
}
58+
59+
@Nonnull
60+
@Override
61+
public TaskCategory getCategory() {
62+
return taskCategory;
5363
}
5464

5565
protected JsonNode requestTimeSeriesChart(ClouderaManagerHandle handle, String query)

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaClusterCPUChartTask.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import com.google.common.io.ByteSink;
2121
import com.google.edwmigration.dumper.application.dumper.MetadataDumperUsageException;
2222
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.ClouderaManagerHandle.ClouderaClusterDTO;
23+
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
2324
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
2425
import java.io.Writer;
2526
import java.nio.charset.StandardCharsets;
@@ -40,13 +41,16 @@
4041
* language.
4142
*/
4243
public class ClouderaClusterCPUChartTask extends AbstractClouderaTimeSeriesTask {
43-
private static final Logger logger = LoggerFactory.getLogger(ClouderaCMFHostsTask.class);
44+
private static final Logger logger = LoggerFactory.getLogger(ClouderaClusterCPUChartTask.class);
4445
private static final String TS_CPU_QUERY_TEMPLATE =
4546
"SELECT cpu_percent_across_hosts WHERE entityName = \"%s\" AND category = CLUSTER";
4647

4748
public ClouderaClusterCPUChartTask(
48-
ZonedDateTime startDate, ZonedDateTime endDate, TimeSeriesAggregation tsAggregation) {
49-
super("cluster-cpu.jsonl", startDate, endDate, tsAggregation);
49+
ZonedDateTime startDate,
50+
ZonedDateTime endDate,
51+
TimeSeriesAggregation tsAggregation,
52+
TaskCategory taskCategory) {
53+
super("cluster-cpu.jsonl", startDate, endDate, tsAggregation, taskCategory);
5054
}
5155

5256
@Override

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaHostRAMChartTask.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import com.google.common.io.ByteSink;
2121
import com.google.edwmigration.dumper.application.dumper.MetadataDumperUsageException;
2222
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.ClouderaManagerHandle.ClouderaHostDTO;
23+
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
2324
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
2425
import java.io.Writer;
2526
import java.nio.charset.StandardCharsets;
@@ -40,14 +41,17 @@
4041
*/
4142
public class ClouderaHostRAMChartTask extends AbstractClouderaTimeSeriesTask {
4243

43-
private static final Logger logger = LoggerFactory.getLogger(ClouderaCMFHostsTask.class);
44+
private static final Logger logger = LoggerFactory.getLogger(ClouderaHostRAMChartTask.class);
4445

4546
private static final String TS_RAM_QUERY_TEMPLATE =
4647
"select swap_used, physical_memory_used, physical_memory_total, physical_memory_cached, physical_memory_buffers where entityName = \"%s\"";
4748

4849
public ClouderaHostRAMChartTask(
49-
ZonedDateTime startDate, ZonedDateTime endDate, TimeSeriesAggregation tsAggregation) {
50-
super("host-ram.jsonl", startDate, endDate, tsAggregation);
50+
ZonedDateTime startDate,
51+
ZonedDateTime endDate,
52+
TimeSeriesAggregation tsAggregation,
53+
TaskCategory taskCategory) {
54+
super("host-ram.jsonl", startDate, endDate, tsAggregation, taskCategory);
5155
}
5256

5357
@Override

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaManagerConnector.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818

1919
import static com.google.edwmigration.dumper.application.dumper.connector.Connector.validateDateRange;
2020
import static com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.AbstractClouderaTimeSeriesTask.TimeSeriesAggregation.DAILY;
21+
import static com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.AbstractClouderaTimeSeriesTask.TimeSeriesAggregation.HOURLY;
2122
import static com.google.edwmigration.dumper.application.dumper.task.TaskCategory.OPTIONAL;
23+
import static com.google.edwmigration.dumper.application.dumper.task.TaskCategory.REQUIRED;
2224

2325
import com.google.auto.service.AutoService;
2426
import com.google.common.base.Preconditions;
@@ -110,8 +112,9 @@ public void addTasksTo(@Nonnull List<? super Task<?>> out, @Nonnull ConnectorArg
110112
endDate = arguments.getEndDate();
111113
}
112114

113-
out.add(new ClouderaClusterCPUChartTask(startDate, endDate, DAILY));
114-
out.add(new ClouderaHostRAMChartTask(startDate, endDate, DAILY));
115+
out.add(new ClouderaClusterCPUChartTask(startDate, endDate, DAILY, REQUIRED));
116+
out.add(new ClouderaHostRAMChartTask(startDate, endDate, DAILY, REQUIRED));
117+
out.add(new ClouderaServiceResourceAllocationChartTask(startDate, endDate, HOURLY, OPTIONAL));
115118
out.add(new ClouderaYarnApplicationsTask(startDate, endDate, OPTIONAL));
116119
out.add(new ClouderaYarnApplicationTypeTask(startDate, endDate, OPTIONAL));
117120
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/*
2+
* Copyright 2022-2025 Google LLC
3+
* Copyright 2013-2021 CompilerWorks
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager;
18+
19+
import com.fasterxml.jackson.databind.JsonNode;
20+
import com.google.common.io.ByteSink;
21+
import com.google.edwmigration.dumper.application.dumper.MetadataDumperUsageException;
22+
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.ClouderaManagerHandle.ClouderaHostDTO;
23+
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
24+
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
25+
import java.io.Writer;
26+
import java.nio.charset.StandardCharsets;
27+
import java.time.ZonedDateTime;
28+
import java.util.ArrayList;
29+
import java.util.List;
30+
import javax.annotation.Nonnull;
31+
import org.slf4j.Logger;
32+
import org.slf4j.LoggerFactory;
33+
34+
/**
35+
* The task collects resource allocation metrics for <strong>all</strong> individual service roles
36+
* present on a given host from the Cloudera Manager <a
37+
* href="https://cldr2-aw-dl-gateway.cldr2-cd.svye-dcxb.a5.cloudera.site/static/apidocs/resource_TimeSeriesResource.html">TimeSeries
38+
* API</a>.
39+
*
40+
* <p>This class provides a comprehensive, component-by-component breakdown of resource consumption,
41+
* programmatically fetching the data that powers the stacked resource charts on a host's status
42+
* page in the Cloudera Manager UI (found under the "Charts" tab for a specific host).
43+
*
44+
* <p>Queries are written in the <a
45+
* href="https://docs.cloudera.com/documentation/enterprise/latest/topics/cm_dg_tsquery.html">tsquery</a>
46+
* language and are filtered by a specific {@code hostId}.
47+
*
48+
* <p>Key metrics collected include:
49+
*
50+
* <ul>
51+
* <li><b>Memory:</b> {@code mem_rss}
52+
* <li><b>CPU:</b> {@code cpu_user_rate} and {@code cpu_system_rate}
53+
* <li><b>Disk Throughput:</b> {@code read_bytes_rate} and {@code write_bytes_rate}
54+
* </ul>
55+
*/
56+
public class ClouderaServiceResourceAllocationChartTask extends AbstractClouderaTimeSeriesTask {
57+
58+
private static final Logger logger =
59+
LoggerFactory.getLogger(ClouderaServiceResourceAllocationChartTask.class);
60+
61+
private static final String SERVICE_RESOURCE_ALLOCATION_QUERY_TEMPLATE =
62+
"select mem_rss, cpu_user_rate, cpu_system_rate, read_bytes_rate, write_bytes_rate where category = \"ROLE\" AND hostId = \"%s\"";
63+
64+
public ClouderaServiceResourceAllocationChartTask(
65+
ZonedDateTime startDate,
66+
ZonedDateTime endDate,
67+
TimeSeriesAggregation tsAggregation,
68+
TaskCategory taskCategory) {
69+
super("service-resource-allocation.jsonl", startDate, endDate, tsAggregation, taskCategory);
70+
}
71+
72+
@Override
73+
protected void doRun(
74+
TaskRunContext context, @Nonnull ByteSink sink, @Nonnull ClouderaManagerHandle handle)
75+
throws Exception {
76+
List<ClouderaHostDTO> hosts = getHostsFromHandle(handle);
77+
78+
try (Writer writer = sink.asCharSink(StandardCharsets.UTF_8).openBufferedStream()) {
79+
for (ClouderaHostDTO host : hosts) {
80+
String resourceAllocationPerHostQuery =
81+
String.format(SERVICE_RESOURCE_ALLOCATION_QUERY_TEMPLATE, host.getId());
82+
logger.debug(
83+
"Execute service resource allocation charts query: [{}] for the host: [{}].",
84+
resourceAllocationPerHostQuery,
85+
host.getName());
86+
87+
JsonNode chartInJson = requestTimeSeriesChart(handle, resourceAllocationPerHostQuery);
88+
89+
writer.write(chartInJson.toString());
90+
writer.write('\n');
91+
}
92+
}
93+
}
94+
95+
private List<ClouderaHostDTO> getHostsFromHandle(@Nonnull ClouderaManagerHandle handle) {
96+
List<ClouderaHostDTO> hosts = handle.getHosts();
97+
if (hosts == null) {
98+
throw new MetadataDumperUsageException(
99+
"Cloudera hosts must be initialized before service resource allocation charts dumping.");
100+
}
101+
List<ClouderaHostDTO> validHosts = new ArrayList<>();
102+
for (ClouderaHostDTO host : hosts) {
103+
if (host.getId() == null) {
104+
logger.warn(
105+
"Cloudera host id is null for host [{}]. Skip resource allocation metrics for services belonging to this host.",
106+
host.getName());
107+
} else {
108+
validHosts.add(host);
109+
}
110+
}
111+
return validHosts;
112+
}
113+
}

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaYarnApplicationTypeTask.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@
4444
import org.slf4j.LoggerFactory;
4545

4646
public class ClouderaYarnApplicationTypeTask extends AbstractClouderaYarnApplicationTask {
47-
private static final Logger logger = LoggerFactory.getLogger(ClouderaYarnApplicationsTask.class);
47+
private static final Logger logger =
48+
LoggerFactory.getLogger(ClouderaYarnApplicationTypeTask.class);
4849

4950
private final ImmutableList<String> predefinedAppTypes =
5051
ImmutableList.of("MAPREDUCE", "SPARK", "Oozie Launcher");

dumper/app/src/test/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/AbstractClouderaTimeSeriesTaskTest.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import com.google.common.io.ByteSink;
2323
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.AbstractClouderaTimeSeriesTask.TimeSeriesAggregation;
24+
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
2425
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
2526
import java.io.IOException;
2627
import java.time.LocalDateTime;
@@ -39,7 +40,11 @@ public void doRun_missedAggregationParameter_throwsException() {
3940
NullPointerException.class,
4041
() ->
4142
new AbstractClouderaTimeSeriesTask(
42-
"some path", timeTravelDaysAgo(5), timeTravelDaysAgo(0), null) {
43+
"some path",
44+
timeTravelDaysAgo(5),
45+
timeTravelDaysAgo(0),
46+
null,
47+
TaskCategory.REQUIRED) {
4348
@Override
4449
protected void doRun(
4550
TaskRunContext context,
@@ -63,7 +68,8 @@ public void doRun_emptyDateRange_throwsException() throws IOException {
6368
"some path",
6469
timeTravelDaysAgo(5),
6570
timeTravelDaysAgo(8),
66-
TimeSeriesAggregation.DAILY) {
71+
TimeSeriesAggregation.DAILY,
72+
TaskCategory.REQUIRED) {
6773
@Override
6874
protected void doRun(
6975
TaskRunContext context,

dumper/app/src/test/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaClusterCPUChartTaskTest.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import com.google.edwmigration.dumper.application.dumper.MetadataDumperUsageException;
4242
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.AbstractClouderaTimeSeriesTask.TimeSeriesAggregation;
4343
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.ClouderaManagerHandle.ClouderaClusterDTO;
44+
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
4445
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
4546
import java.io.IOException;
4647
import java.io.Writer;
@@ -69,7 +70,10 @@
6970
public class ClouderaClusterCPUChartTaskTest {
7071
private final ClouderaClusterCPUChartTask task =
7172
new ClouderaClusterCPUChartTask(
72-
timeTravelDaysAgo(30), timeTravelDaysAgo(0), TimeSeriesAggregation.HOURLY);
73+
timeTravelDaysAgo(30),
74+
timeTravelDaysAgo(0),
75+
TimeSeriesAggregation.HOURLY,
76+
TaskCategory.REQUIRED);
7377
private ClouderaManagerHandle handle;
7478
private String servicesJson;
7579
private static WireMockServer server;

dumper/app/src/test/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaHostRAMChartTaskTest.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import com.google.edwmigration.dumper.application.dumper.MetadataDumperUsageException;
4242
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.AbstractClouderaTimeSeriesTask.TimeSeriesAggregation;
4343
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.ClouderaManagerHandle.ClouderaHostDTO;
44+
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
4445
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
4546
import java.io.IOException;
4647
import java.io.Writer;
@@ -66,7 +67,10 @@
6667
public class ClouderaHostRAMChartTaskTest {
6768
private final ClouderaHostRAMChartTask task =
6869
new ClouderaHostRAMChartTask(
69-
timeTravelDaysAgo(1), timeTravelDaysAgo(0), TimeSeriesAggregation.HOURLY);
70+
timeTravelDaysAgo(1),
71+
timeTravelDaysAgo(0),
72+
TimeSeriesAggregation.HOURLY,
73+
TaskCategory.REQUIRED);
7074
private ClouderaManagerHandle handle;
7175
private static WireMockServer server;
7276

dumper/app/src/test/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaManagerConnectorTest.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ public void addTasksTo_checkFilesCategory() throws Exception {
5555
ImmutableMap.of(
5656
"cluster-cpu.jsonl", TaskCategory.REQUIRED,
5757
"host-ram.jsonl", TaskCategory.REQUIRED,
58+
"service-resource-allocation.jsonl", TaskCategory.OPTIONAL,
5859
"yarn-applications.jsonl", TaskCategory.OPTIONAL,
5960
"yarn-application-types.jsonl", TaskCategory.OPTIONAL))
6061
.build();
@@ -87,6 +88,8 @@ public void addTasksTo_checkFileWasGeneratedByProperTask() throws Exception {
8788
"host-components.jsonl", ClouderaHostComponentsTask.class,
8889
"cluster-cpu.jsonl", ClouderaClusterCPUChartTask.class,
8990
"host-ram.jsonl", ClouderaHostRAMChartTask.class,
91+
"service-resource-allocation.jsonl",
92+
ClouderaServiceResourceAllocationChartTask.class,
9093
"yarn-applications.jsonl", ClouderaYarnApplicationsTask.class,
9194
"yarn-application-types.jsonl", ClouderaYarnApplicationTypeTask.class))
9295
.build();
@@ -128,6 +131,7 @@ public void addTasksTo_checkFilesCategoryWithCustomDateRange() throws Exception
128131
ImmutableMap.of(
129132
"cluster-cpu.jsonl", TaskCategory.REQUIRED,
130133
"host-ram.jsonl", TaskCategory.REQUIRED,
134+
"service-resource-allocation.jsonl", TaskCategory.OPTIONAL,
131135
"yarn-applications.jsonl", TaskCategory.OPTIONAL,
132136
"yarn-application-types.jsonl", TaskCategory.OPTIONAL))
133137
.build();
@@ -169,6 +173,8 @@ public void addTasksTo_checkFileWasGeneratedByProperTaskWithCustomDateRange() th
169173
ImmutableMap.of(
170174
"cluster-cpu.jsonl", ClouderaClusterCPUChartTask.class,
171175
"host-ram.jsonl", ClouderaHostRAMChartTask.class,
176+
"service-resource-allocation.jsonl",
177+
ClouderaServiceResourceAllocationChartTask.class,
172178
"yarn-applications.jsonl", ClouderaYarnApplicationsTask.class,
173179
"yarn-application-types.jsonl", ClouderaYarnApplicationTypeTask.class))
174180
.build();

0 commit comments

Comments
 (0)