Skip to content

Commit d4aa125

Browse files
[b455552691] Collect dynamic resource allocation of Cloudera services
Create a new task to iterate over Cloudera hosts and collect dynamic resource allocation of their services calling timeseries endpoint. It queries memory, cpu and disk I/O statistics in hourly manner and stores the results in `service-resource-allocation.jsonl` file.
1 parent 8980efb commit d4aa125

File tree

13 files changed

+1994
-14
lines changed

13 files changed

+1994
-14
lines changed

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/AbstractClouderaTimeSeriesTask.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import com.fasterxml.jackson.databind.JsonNode;
2020
import com.google.common.base.Preconditions;
21+
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
2122
import java.net.URI;
2223
import java.time.ZonedDateTime;
2324
import java.time.format.DateTimeFormatter;
@@ -33,12 +34,14 @@ abstract class AbstractClouderaTimeSeriesTask extends AbstractClouderaManagerTas
3334
private final ZonedDateTime startDate;
3435
private final ZonedDateTime endDate;
3536
private final TimeSeriesAggregation tsAggregation;
37+
private final TaskCategory taskCategory;
3638

3739
public AbstractClouderaTimeSeriesTask(
3840
@Nonnull String targetPath,
3941
@Nonnull ZonedDateTime startDate,
4042
@Nonnull ZonedDateTime endDate,
41-
@Nonnull TimeSeriesAggregation tsAggregation) {
43+
@Nonnull TimeSeriesAggregation tsAggregation,
44+
@Nonnull TaskCategory taskCategory) {
4245
super(targetPath);
4346
Preconditions.checkNotNull(targetPath, "Target path must be not null.");
4447
Preconditions.checkState(!targetPath.isEmpty(), "Target file path must be not empty.");
@@ -50,6 +53,13 @@ public AbstractClouderaTimeSeriesTask(
5053
this.startDate = startDate;
5154
this.endDate = endDate;
5255
this.tsAggregation = tsAggregation;
56+
this.taskCategory = taskCategory;
57+
}
58+
59+
@Nonnull
60+
@Override
61+
public TaskCategory getCategory() {
62+
return taskCategory;
5363
}
5464

5565
protected JsonNode requestTimeSeriesChart(ClouderaManagerHandle handle, String query)

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaClusterCPUChartTask.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import com.google.common.io.ByteSink;
2121
import com.google.edwmigration.dumper.application.dumper.MetadataDumperUsageException;
2222
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.ClouderaManagerHandle.ClouderaClusterDTO;
23+
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
2324
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
2425
import java.io.Writer;
2526
import java.nio.charset.StandardCharsets;
@@ -40,13 +41,16 @@
4041
* language.
4142
*/
4243
public class ClouderaClusterCPUChartTask extends AbstractClouderaTimeSeriesTask {
43-
private static final Logger logger = LoggerFactory.getLogger(ClouderaCMFHostsTask.class);
44+
private static final Logger logger = LoggerFactory.getLogger(ClouderaClusterCPUChartTask.class);
4445
private static final String TS_CPU_QUERY_TEMPLATE =
4546
"SELECT cpu_percent_across_hosts WHERE entityName = \"%s\" AND category = CLUSTER";
4647

4748
public ClouderaClusterCPUChartTask(
48-
ZonedDateTime startDate, ZonedDateTime endDate, TimeSeriesAggregation tsAggregation) {
49-
super("cluster-cpu.jsonl", startDate, endDate, tsAggregation);
49+
ZonedDateTime startDate,
50+
ZonedDateTime endDate,
51+
TimeSeriesAggregation tsAggregation,
52+
TaskCategory taskCategory) {
53+
super("cluster-cpu.jsonl", startDate, endDate, tsAggregation, taskCategory);
5054
}
5155

5256
@Override

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaHostRAMChartTask.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import com.google.common.io.ByteSink;
2121
import com.google.edwmigration.dumper.application.dumper.MetadataDumperUsageException;
2222
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.ClouderaManagerHandle.ClouderaHostDTO;
23+
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
2324
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
2425
import java.io.Writer;
2526
import java.nio.charset.StandardCharsets;
@@ -40,14 +41,17 @@
4041
*/
4142
public class ClouderaHostRAMChartTask extends AbstractClouderaTimeSeriesTask {
4243

43-
private static final Logger logger = LoggerFactory.getLogger(ClouderaCMFHostsTask.class);
44+
private static final Logger logger = LoggerFactory.getLogger(ClouderaHostRAMChartTask.class);
4445

4546
private static final String TS_RAM_QUERY_TEMPLATE =
4647
"select swap_used, physical_memory_used, physical_memory_total, physical_memory_cached, physical_memory_buffers where entityName = \"%s\"";
4748

4849
public ClouderaHostRAMChartTask(
49-
ZonedDateTime startDate, ZonedDateTime endDate, TimeSeriesAggregation tsAggregation) {
50-
super("host-ram.jsonl", startDate, endDate, tsAggregation);
50+
ZonedDateTime startDate,
51+
ZonedDateTime endDate,
52+
TimeSeriesAggregation tsAggregation,
53+
TaskCategory taskCategory) {
54+
super("host-ram.jsonl", startDate, endDate, tsAggregation, taskCategory);
5155
}
5256

5357
@Override

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaManagerConnector.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818

1919
import static com.google.edwmigration.dumper.application.dumper.connector.Connector.validateDateRange;
2020
import static com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.AbstractClouderaTimeSeriesTask.TimeSeriesAggregation.DAILY;
21+
import static com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.AbstractClouderaTimeSeriesTask.TimeSeriesAggregation.HOURLY;
2122
import static com.google.edwmigration.dumper.application.dumper.task.TaskCategory.OPTIONAL;
23+
import static com.google.edwmigration.dumper.application.dumper.task.TaskCategory.REQUIRED;
2224

2325
import com.google.auto.service.AutoService;
2426
import com.google.common.base.Preconditions;
@@ -110,8 +112,9 @@ public void addTasksTo(@Nonnull List<? super Task<?>> out, @Nonnull ConnectorArg
110112
endDate = arguments.getEndDate();
111113
}
112114

113-
out.add(new ClouderaClusterCPUChartTask(startDate, endDate, DAILY));
114-
out.add(new ClouderaHostRAMChartTask(startDate, endDate, DAILY));
115+
out.add(new ClouderaClusterCPUChartTask(startDate, endDate, DAILY, REQUIRED));
116+
out.add(new ClouderaHostRAMChartTask(startDate, endDate, DAILY, REQUIRED));
117+
out.add(new ClouderaServiceResourceAllocationChartTask(startDate, endDate, HOURLY, OPTIONAL));
115118
out.add(new ClouderaYarnApplicationsTask(startDate, endDate, OPTIONAL));
116119
out.add(new ClouderaYarnApplicationTypeTask(startDate, endDate, OPTIONAL));
117120
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/*
2+
* Copyright 2022-2025 Google LLC
3+
* Copyright 2013-2021 CompilerWorks
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager;
18+
19+
import com.fasterxml.jackson.databind.JsonNode;
20+
import com.google.common.io.ByteSink;
21+
import com.google.edwmigration.dumper.application.dumper.MetadataDumperUsageException;
22+
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.ClouderaManagerHandle.ClouderaHostDTO;
23+
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
24+
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
25+
import java.io.Writer;
26+
import java.nio.charset.StandardCharsets;
27+
import java.time.ZonedDateTime;
28+
import java.util.ArrayList;
29+
import java.util.List;
30+
import javax.annotation.Nonnull;
31+
import org.slf4j.Logger;
32+
import org.slf4j.LoggerFactory;
33+
34+
/**
35+
* The task collects resource allocation metrics for <strong>all</strong> individual service roles
36+
* present on a given host from the Cloudera Manager <a
37+
* href="https://cldr2-aw-dl-gateway.cldr2-cd.svye-dcxb.a5.cloudera.site/static/apidocs/resource_TimeSeriesResource.html">TimeSeries
38+
* API</a>.
39+
*
40+
* <p>This class provides a comprehensive, component-by-component breakdown of resource consumption,
41+
* programmatically fetching the data that powers the stacked resource charts on a host's status
42+
* page in the Cloudera Manager UI (found under the "Charts" tab for a specific host).
43+
*
44+
* <p>Queries are written in the <a
45+
* href="https://docs.cloudera.com/documentation/enterprise/latest/topics/cm_dg_tsquery.html">tsquery</a>
46+
* language and are filtered by a specific {@code hostId}.
47+
*
48+
* <p>Key metrics collected include:
49+
*
50+
* <ul>
51+
* <li><b>Memory:</b> {@code mem_rss}
52+
* <li><b>CPU:</b> {@code cpu_user_rate} and {@code cpu_system_rate}
53+
* <li><b>Disk I/O:</b> {@code read_ios_rate} and {@code write_ios_rate}
54+
* </ul>
55+
*/
56+
public class ClouderaServiceResourceAllocationChartTask extends AbstractClouderaTimeSeriesTask {
57+
58+
private static final Logger logger =
59+
LoggerFactory.getLogger(ClouderaServiceResourceAllocationChartTask.class);
60+
61+
private static final String SERVICE_RESOURCE_ALLOCATION_QUERY_TEMPLATE =
62+
"select mem_rss, cpu_user_rate, cpu_system_rate, read_ios_rate, write_ios_rate where category = \"ROLE\" AND hostId = \"%s\"";
63+
64+
public ClouderaServiceResourceAllocationChartTask(
65+
ZonedDateTime startDate,
66+
ZonedDateTime endDate,
67+
TimeSeriesAggregation tsAggregation,
68+
TaskCategory taskCategory) {
69+
super("service-resource-allocation.jsonl", startDate, endDate, tsAggregation, taskCategory);
70+
}
71+
72+
@Override
73+
protected void doRun(
74+
TaskRunContext context, @Nonnull ByteSink sink, @Nonnull ClouderaManagerHandle handle)
75+
throws Exception {
76+
List<ClouderaHostDTO> hosts = getHostsFromHandle(handle);
77+
78+
try (Writer writer = sink.asCharSink(StandardCharsets.UTF_8).openBufferedStream()) {
79+
for (ClouderaHostDTO host : hosts) {
80+
String resourceAllocationPerHostQuery =
81+
String.format(SERVICE_RESOURCE_ALLOCATION_QUERY_TEMPLATE, host.getId());
82+
logger.debug(
83+
"Execute service resource allocation charts query: [{}] for the host: [{}].",
84+
resourceAllocationPerHostQuery,
85+
host.getName());
86+
87+
JsonNode chartInJson = requestTimeSeriesChart(handle, resourceAllocationPerHostQuery);
88+
89+
writer.write(chartInJson.toString());
90+
writer.write('\n');
91+
}
92+
}
93+
}
94+
95+
private List<ClouderaHostDTO> getHostsFromHandle(@Nonnull ClouderaManagerHandle handle) {
96+
List<ClouderaHostDTO> hosts = handle.getHosts();
97+
if (hosts == null) {
98+
throw new MetadataDumperUsageException(
99+
"Cloudera hosts must be initialized before service resource allocation charts dumping.");
100+
}
101+
List<ClouderaHostDTO> validHosts = new ArrayList<>();
102+
for (ClouderaHostDTO host : hosts) {
103+
if (host.getId() == null) {
104+
logger.warn(
105+
"Cloudera host id is null for host [{}]. Skip resource allocation metrics for services belonging to this host.",
106+
host.getName());
107+
} else {
108+
validHosts.add(host);
109+
}
110+
}
111+
return validHosts;
112+
}
113+
}

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaYarnApplicationTypeTask.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@
4444
import org.slf4j.LoggerFactory;
4545

4646
public class ClouderaYarnApplicationTypeTask extends AbstractClouderaYarnApplicationTask {
47-
private static final Logger logger = LoggerFactory.getLogger(ClouderaYarnApplicationsTask.class);
47+
private static final Logger logger =
48+
LoggerFactory.getLogger(ClouderaYarnApplicationTypeTask.class);
4849

4950
private final ImmutableList<String> predefinedAppTypes =
5051
ImmutableList.of("MAPREDUCE", "SPARK", "Oozie Launcher");

dumper/app/src/test/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/AbstractClouderaTimeSeriesTaskTest.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import com.google.common.io.ByteSink;
2323
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.AbstractClouderaTimeSeriesTask.TimeSeriesAggregation;
24+
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
2425
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
2526
import java.io.IOException;
2627
import java.time.LocalDateTime;
@@ -39,7 +40,11 @@ public void doRun_missedAggregationParameter_throwsException() {
3940
NullPointerException.class,
4041
() ->
4142
new AbstractClouderaTimeSeriesTask(
42-
"some path", timeTravelDaysAgo(5), timeTravelDaysAgo(0), null) {
43+
"some path",
44+
timeTravelDaysAgo(5),
45+
timeTravelDaysAgo(0),
46+
null,
47+
TaskCategory.REQUIRED) {
4348
@Override
4449
protected void doRun(
4550
TaskRunContext context,
@@ -63,7 +68,8 @@ public void doRun_emptyDateRange_throwsException() throws IOException {
6368
"some path",
6469
timeTravelDaysAgo(5),
6570
timeTravelDaysAgo(8),
66-
TimeSeriesAggregation.DAILY) {
71+
TimeSeriesAggregation.DAILY,
72+
TaskCategory.REQUIRED) {
6773
@Override
6874
protected void doRun(
6975
TaskRunContext context,

dumper/app/src/test/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaClusterCPUChartTaskTest.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import com.google.edwmigration.dumper.application.dumper.MetadataDumperUsageException;
4242
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.AbstractClouderaTimeSeriesTask.TimeSeriesAggregation;
4343
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.ClouderaManagerHandle.ClouderaClusterDTO;
44+
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
4445
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
4546
import java.io.IOException;
4647
import java.io.Writer;
@@ -69,7 +70,10 @@
6970
public class ClouderaClusterCPUChartTaskTest {
7071
private final ClouderaClusterCPUChartTask task =
7172
new ClouderaClusterCPUChartTask(
72-
timeTravelDaysAgo(30), timeTravelDaysAgo(0), TimeSeriesAggregation.HOURLY);
73+
timeTravelDaysAgo(30),
74+
timeTravelDaysAgo(0),
75+
TimeSeriesAggregation.HOURLY,
76+
TaskCategory.REQUIRED);
7377
private ClouderaManagerHandle handle;
7478
private String servicesJson;
7579
private static WireMockServer server;

dumper/app/src/test/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaHostRAMChartTaskTest.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import com.google.edwmigration.dumper.application.dumper.MetadataDumperUsageException;
4242
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.AbstractClouderaTimeSeriesTask.TimeSeriesAggregation;
4343
import com.google.edwmigration.dumper.application.dumper.connector.cloudera.manager.ClouderaManagerHandle.ClouderaHostDTO;
44+
import com.google.edwmigration.dumper.application.dumper.task.TaskCategory;
4445
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
4546
import java.io.IOException;
4647
import java.io.Writer;
@@ -66,7 +67,10 @@
6667
public class ClouderaHostRAMChartTaskTest {
6768
private final ClouderaHostRAMChartTask task =
6869
new ClouderaHostRAMChartTask(
69-
timeTravelDaysAgo(1), timeTravelDaysAgo(0), TimeSeriesAggregation.HOURLY);
70+
timeTravelDaysAgo(1),
71+
timeTravelDaysAgo(0),
72+
TimeSeriesAggregation.HOURLY,
73+
TaskCategory.REQUIRED);
7074
private ClouderaManagerHandle handle;
7175
private static WireMockServer server;
7276

dumper/app/src/test/java/com/google/edwmigration/dumper/application/dumper/connector/cloudera/manager/ClouderaManagerConnectorTest.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ public void addTasksTo_checkFilesCategory() throws Exception {
5555
ImmutableMap.of(
5656
"cluster-cpu.jsonl", TaskCategory.REQUIRED,
5757
"host-ram.jsonl", TaskCategory.REQUIRED,
58+
"service-resource-allocation.jsonl", TaskCategory.OPTIONAL,
5859
"yarn-applications.jsonl", TaskCategory.OPTIONAL,
5960
"yarn-application-types.jsonl", TaskCategory.OPTIONAL))
6061
.build();
@@ -87,6 +88,8 @@ public void addTasksTo_checkFileWasGeneratedByProperTask() throws Exception {
8788
"host-components.jsonl", ClouderaHostComponentsTask.class,
8889
"cluster-cpu.jsonl", ClouderaClusterCPUChartTask.class,
8990
"host-ram.jsonl", ClouderaHostRAMChartTask.class,
91+
"service-resource-allocation.jsonl",
92+
ClouderaServiceResourceAllocationChartTask.class,
9093
"yarn-applications.jsonl", ClouderaYarnApplicationsTask.class,
9194
"yarn-application-types.jsonl", ClouderaYarnApplicationTypeTask.class))
9295
.build();
@@ -128,6 +131,7 @@ public void addTasksTo_checkFilesCategoryWithCustomDateRange() throws Exception
128131
ImmutableMap.of(
129132
"cluster-cpu.jsonl", TaskCategory.REQUIRED,
130133
"host-ram.jsonl", TaskCategory.REQUIRED,
134+
"service-resource-allocation.jsonl", TaskCategory.OPTIONAL,
131135
"yarn-applications.jsonl", TaskCategory.OPTIONAL,
132136
"yarn-application-types.jsonl", TaskCategory.OPTIONAL))
133137
.build();
@@ -169,6 +173,8 @@ public void addTasksTo_checkFileWasGeneratedByProperTaskWithCustomDateRange() th
169173
ImmutableMap.of(
170174
"cluster-cpu.jsonl", ClouderaClusterCPUChartTask.class,
171175
"host-ram.jsonl", ClouderaHostRAMChartTask.class,
176+
"service-resource-allocation.jsonl",
177+
ClouderaServiceResourceAllocationChartTask.class,
172178
"yarn-applications.jsonl", ClouderaYarnApplicationsTask.class,
173179
"yarn-application-types.jsonl", ClouderaYarnApplicationTypeTask.class))
174180
.build();

0 commit comments

Comments
 (0)