Skip to content

Commit 919bd18

Browse files
authored
HDFS-17641. Add badly distributed blocks metric (#7123)
1 parent 2ab33c6 commit 919bd18

File tree

23 files changed

+218
-42
lines changed

23 files changed

+218
-42
lines changed

hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md

+1
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ Each metrics record contains tags such as HAState and Hostname as additional inf
304304
| `StaleDataNodes` | Current number of DataNodes marked stale due to delayed heartbeat |
305305
| `NumStaleStorages` | Number of storages marked as content stale (after NameNode restart/failover before first block report is received) |
306306
| `MissingReplOneBlocks` | Current number of missing blocks with replication factor 1 |
307+
| `BadlyDistributedBlocks` | Current number of blocks that are badly distributed across racks. |
307308
| `HighestPriorityLowRedundancyReplicatedBlocks` | Current number of non-corrupt, low redundancy replicated blocks with the highest risk of loss (have 0 or 1 replica). Will be recovered with the highest priority. |
308309
| `HighestPriorityLowRedundancyECBlocks` | Current number of non-corrupt, low redundancy EC blocks with the highest risk of loss. Will be recovered with the highest priority. |
309310
| `NumFilesUnderConstruction` | Current number of files under construction |

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ECBlockGroupStats.java

+20-6
Original file line numberDiff line numberDiff line change
@@ -38,24 +38,28 @@ public final class ECBlockGroupStats {
3838
private final long missingBlockGroups;
3939
private final long bytesInFutureBlockGroups;
4040
private final long pendingDeletionBlocks;
41+
private final long badlyDistributedBlocks;
4142
private final Long highestPriorityLowRedundancyBlocks;
4243

4344
public ECBlockGroupStats(long lowRedundancyBlockGroups,
4445
long corruptBlockGroups, long missingBlockGroups,
45-
long bytesInFutureBlockGroups, long pendingDeletionBlocks) {
46+
long bytesInFutureBlockGroups, long pendingDeletionBlocks,
47+
long badlyDistributedBlocks) {
4648
this(lowRedundancyBlockGroups, corruptBlockGroups, missingBlockGroups,
47-
bytesInFutureBlockGroups, pendingDeletionBlocks, null);
49+
bytesInFutureBlockGroups, pendingDeletionBlocks,
50+
badlyDistributedBlocks, null);
4851
}
4952

5053
public ECBlockGroupStats(long lowRedundancyBlockGroups,
5154
long corruptBlockGroups, long missingBlockGroups,
5255
long bytesInFutureBlockGroups, long pendingDeletionBlocks,
53-
Long highestPriorityLowRedundancyBlocks) {
56+
long badlyDistributedBlocks, Long highestPriorityLowRedundancyBlocks) {
5457
this.lowRedundancyBlockGroups = lowRedundancyBlockGroups;
5558
this.corruptBlockGroups = corruptBlockGroups;
5659
this.missingBlockGroups = missingBlockGroups;
5760
this.bytesInFutureBlockGroups = bytesInFutureBlockGroups;
5861
this.pendingDeletionBlocks = pendingDeletionBlocks;
62+
this.badlyDistributedBlocks = badlyDistributedBlocks;
5963
this.highestPriorityLowRedundancyBlocks
6064
= highestPriorityLowRedundancyBlocks;
6165
}
@@ -80,6 +84,10 @@ public long getPendingDeletionBlocks() {
8084
return pendingDeletionBlocks;
8185
}
8286

87+
public long getBadlyDistributedBlocks() {
88+
return badlyDistributedBlocks;
89+
}
90+
8391
public boolean hasHighestPriorityLowRedundancyBlocks() {
8492
return getHighestPriorityLowRedundancyBlocks() != null;
8593
}
@@ -99,7 +107,8 @@ public String toString() {
99107
.append(", BytesInFutureBlockGroups=").append(
100108
getBytesInFutureBlockGroups())
101109
.append(", PendingDeletionBlocks=").append(
102-
getPendingDeletionBlocks());
110+
getPendingDeletionBlocks())
111+
.append(" , BadlyDistributedBlocks=").append(getBadlyDistributedBlocks());
103112
if (hasHighestPriorityLowRedundancyBlocks()) {
104113
statsBuilder.append(", HighestPriorityLowRedundancyBlocks=")
105114
.append(getHighestPriorityLowRedundancyBlocks());
@@ -116,6 +125,7 @@ public int hashCode() {
116125
.append(missingBlockGroups)
117126
.append(bytesInFutureBlockGroups)
118127
.append(pendingDeletionBlocks)
128+
.append(badlyDistributedBlocks)
119129
.append(highestPriorityLowRedundancyBlocks)
120130
.toHashCode();
121131
}
@@ -135,6 +145,7 @@ public boolean equals(Object o) {
135145
.append(missingBlockGroups, other.missingBlockGroups)
136146
.append(bytesInFutureBlockGroups, other.bytesInFutureBlockGroups)
137147
.append(pendingDeletionBlocks, other.pendingDeletionBlocks)
148+
.append(badlyDistributedBlocks, other.badlyDistributedBlocks)
138149
.append(highestPriorityLowRedundancyBlocks,
139150
other.highestPriorityLowRedundancyBlocks)
140151
.isEquals();
@@ -151,6 +162,7 @@ public static ECBlockGroupStats merge(Collection<ECBlockGroupStats> stats) {
151162
long missingBlockGroups = 0;
152163
long bytesInFutureBlockGroups = 0;
153164
long pendingDeletionBlocks = 0;
165+
long badlyDistributedBlocks = 0;
154166
long highestPriorityLowRedundancyBlocks = 0;
155167
boolean hasHighestPriorityLowRedundancyBlocks = false;
156168

@@ -160,6 +172,7 @@ public static ECBlockGroupStats merge(Collection<ECBlockGroupStats> stats) {
160172
missingBlockGroups += stat.getMissingBlockGroups();
161173
bytesInFutureBlockGroups += stat.getBytesInFutureBlockGroups();
162174
pendingDeletionBlocks += stat.getPendingDeletionBlocks();
175+
badlyDistributedBlocks += stat.getBadlyDistributedBlocks();
163176
if (stat.hasHighestPriorityLowRedundancyBlocks()) {
164177
hasHighestPriorityLowRedundancyBlocks = true;
165178
highestPriorityLowRedundancyBlocks +=
@@ -169,9 +182,10 @@ public static ECBlockGroupStats merge(Collection<ECBlockGroupStats> stats) {
169182
if (hasHighestPriorityLowRedundancyBlocks) {
170183
return new ECBlockGroupStats(lowRedundancyBlockGroups, corruptBlockGroups,
171184
missingBlockGroups, bytesInFutureBlockGroups, pendingDeletionBlocks,
172-
highestPriorityLowRedundancyBlocks);
185+
badlyDistributedBlocks, highestPriorityLowRedundancyBlocks);
173186
}
174187
return new ECBlockGroupStats(lowRedundancyBlockGroups, corruptBlockGroups,
175-
missingBlockGroups, bytesInFutureBlockGroups, pendingDeletionBlocks);
188+
missingBlockGroups, bytesInFutureBlockGroups, pendingDeletionBlocks,
189+
badlyDistributedBlocks);
176190
}
177191
}

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReplicatedBlockStats.java

+17-6
Original file line numberDiff line numberDiff line change
@@ -37,27 +37,30 @@ public final class ReplicatedBlockStats {
3737
private final long missingReplicationOneBlocks;
3838
private final long bytesInFutureBlocks;
3939
private final long pendingDeletionBlocks;
40+
private final long badlyDistributedBlocks;
4041
private final Long highestPriorityLowRedundancyBlocks;
4142

4243
public ReplicatedBlockStats(long lowRedundancyBlocks,
4344
long corruptBlocks, long missingBlocks,
4445
long missingReplicationOneBlocks, long bytesInFutureBlocks,
45-
long pendingDeletionBlocks) {
46+
long pendingDeletionBlocks, long badlyDistributedBlocks) {
4647
this(lowRedundancyBlocks, corruptBlocks, missingBlocks,
4748
missingReplicationOneBlocks, bytesInFutureBlocks, pendingDeletionBlocks,
48-
null);
49+
badlyDistributedBlocks, null);
4950
}
5051

5152
public ReplicatedBlockStats(long lowRedundancyBlocks,
5253
long corruptBlocks, long missingBlocks,
5354
long missingReplicationOneBlocks, long bytesInFutureBlocks,
54-
long pendingDeletionBlocks, Long highestPriorityLowRedundancyBlocks) {
55+
long pendingDeletionBlocks, long badlyDistributedBlocks,
56+
Long highestPriorityLowRedundancyBlocks) {
5557
this.lowRedundancyBlocks = lowRedundancyBlocks;
5658
this.corruptBlocks = corruptBlocks;
5759
this.missingBlocks = missingBlocks;
5860
this.missingReplicationOneBlocks = missingReplicationOneBlocks;
5961
this.bytesInFutureBlocks = bytesInFutureBlocks;
6062
this.pendingDeletionBlocks = pendingDeletionBlocks;
63+
this.badlyDistributedBlocks = badlyDistributedBlocks;
6164
this.highestPriorityLowRedundancyBlocks
6265
= highestPriorityLowRedundancyBlocks;
6366
}
@@ -86,6 +89,10 @@ public long getPendingDeletionBlocks() {
8689
return pendingDeletionBlocks;
8790
}
8891

92+
public long getBadlyDistributedBlocks() {
93+
return badlyDistributedBlocks;
94+
}
95+
8996
public boolean hasHighestPriorityLowRedundancyBlocks() {
9097
return getHighestPriorityLowRedundancyBlocks() != null;
9198
}
@@ -94,6 +101,7 @@ public Long getHighestPriorityLowRedundancyBlocks(){
94101
return highestPriorityLowRedundancyBlocks;
95102
}
96103

104+
97105
@Override
98106
public String toString() {
99107
StringBuilder statsBuilder = new StringBuilder();
@@ -105,7 +113,8 @@ public String toString() {
105113
getMissingReplicationOneBlocks())
106114
.append(", BytesInFutureBlocks=").append(getBytesInFutureBlocks())
107115
.append(", PendingDeletionBlocks=").append(
108-
getPendingDeletionBlocks());
116+
getPendingDeletionBlocks())
117+
.append(" , badlyDistributedBlocks=").append(getBadlyDistributedBlocks());
109118
if (hasHighestPriorityLowRedundancyBlocks()) {
110119
statsBuilder.append(", HighestPriorityLowRedundancyBlocks=").append(
111120
getHighestPriorityLowRedundancyBlocks());
@@ -127,6 +136,7 @@ public static ReplicatedBlockStats merge(
127136
long missingReplicationOneBlocks = 0;
128137
long bytesInFutureBlocks = 0;
129138
long pendingDeletionBlocks = 0;
139+
long badlyDistributedBlocks = 0;
130140
long highestPriorityLowRedundancyBlocks = 0;
131141
boolean hasHighestPriorityLowRedundancyBlocks = false;
132142

@@ -138,6 +148,7 @@ public static ReplicatedBlockStats merge(
138148
missingReplicationOneBlocks += stat.getMissingReplicationOneBlocks();
139149
bytesInFutureBlocks += stat.getBytesInFutureBlocks();
140150
pendingDeletionBlocks += stat.getPendingDeletionBlocks();
151+
badlyDistributedBlocks += stat.getBadlyDistributedBlocks();
141152
if (stat.hasHighestPriorityLowRedundancyBlocks()) {
142153
hasHighestPriorityLowRedundancyBlocks = true;
143154
highestPriorityLowRedundancyBlocks +=
@@ -147,10 +158,10 @@ public static ReplicatedBlockStats merge(
147158
if (hasHighestPriorityLowRedundancyBlocks) {
148159
return new ReplicatedBlockStats(lowRedundancyBlocks, corruptBlocks,
149160
missingBlocks, missingReplicationOneBlocks, bytesInFutureBlocks,
150-
pendingDeletionBlocks, highestPriorityLowRedundancyBlocks);
161+
pendingDeletionBlocks, badlyDistributedBlocks, highestPriorityLowRedundancyBlocks);
151162
}
152163
return new ReplicatedBlockStats(lowRedundancyBlocks, corruptBlocks,
153164
missingBlocks, missingReplicationOneBlocks, bytesInFutureBlocks,
154-
pendingDeletionBlocks);
165+
pendingDeletionBlocks, badlyDistributedBlocks);
155166
}
156167
}

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java

+9-4
Original file line numberDiff line numberDiff line change
@@ -2037,13 +2037,13 @@ public static ReplicatedBlockStats convert(
20372037
return new ReplicatedBlockStats(res.getLowRedundancy(),
20382038
res.getCorruptBlocks(), res.getMissingBlocks(),
20392039
res.getMissingReplOneBlocks(), res.getBlocksInFuture(),
2040-
res.getPendingDeletionBlocks(),
2040+
res.getPendingDeletionBlocks(), res.getBadlyDistributedBlocks(),
20412041
res.getHighestPrioLowRedundancyBlocks());
20422042
}
20432043
return new ReplicatedBlockStats(res.getLowRedundancy(),
20442044
res.getCorruptBlocks(), res.getMissingBlocks(),
20452045
res.getMissingReplOneBlocks(), res.getBlocksInFuture(),
2046-
res.getPendingDeletionBlocks());
2046+
res.getBadlyDistributedBlocks(), res.getPendingDeletionBlocks());
20472047
}
20482048

20492049
public static ECBlockGroupStats convert(
@@ -2052,11 +2052,12 @@ public static ECBlockGroupStats convert(
20522052
return new ECBlockGroupStats(res.getLowRedundancy(),
20532053
res.getCorruptBlocks(), res.getMissingBlocks(),
20542054
res.getBlocksInFuture(), res.getPendingDeletionBlocks(),
2055-
res.getHighestPrioLowRedundancyBlocks());
2055+
res.getBadlyDistributedBlocks(), res.getHighestPrioLowRedundancyBlocks());
20562056
}
20572057
return new ECBlockGroupStats(res.getLowRedundancy(),
20582058
res.getCorruptBlocks(), res.getMissingBlocks(),
2059-
res.getBlocksInFuture(), res.getPendingDeletionBlocks());
2059+
res.getBlocksInFuture(), res.getPendingDeletionBlocks(),
2060+
res.getBadlyDistributedBlocks());
20602061
}
20612062

20622063
public static DatanodeReportTypeProto convert(DatanodeReportType t) {
@@ -2525,6 +2526,8 @@ public static GetFsReplicatedBlockStatsResponseProto convert(
25252526
replicatedBlockStats.getBytesInFutureBlocks());
25262527
result.setPendingDeletionBlocks(
25272528
replicatedBlockStats.getPendingDeletionBlocks());
2529+
result.setBadlyDistributedBlocks(
2530+
replicatedBlockStats.getBadlyDistributedBlocks());
25282531
if (replicatedBlockStats.hasHighestPriorityLowRedundancyBlocks()) {
25292532
result.setHighestPrioLowRedundancyBlocks(
25302533
replicatedBlockStats.getHighestPriorityLowRedundancyBlocks());
@@ -2544,6 +2547,8 @@ public static GetFsECBlockGroupStatsResponseProto convert(
25442547
ecBlockGroupStats.getBytesInFutureBlockGroups());
25452548
result.setPendingDeletionBlocks(
25462549
ecBlockGroupStats.getPendingDeletionBlocks());
2550+
result.setBadlyDistributedBlocks(
2551+
ecBlockGroupStats.getBadlyDistributedBlocks());
25472552
if (ecBlockGroupStats.hasHighestPriorityLowRedundancyBlocks()) {
25482553
result.setHighestPrioLowRedundancyBlocks(
25492554
ecBlockGroupStats.getHighestPriorityLowRedundancyBlocks());

hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto

+2
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,7 @@ message GetFsReplicatedBlockStatsResponseProto {
372372
required uint64 blocks_in_future = 5;
373373
required uint64 pending_deletion_blocks = 6;
374374
optional uint64 highest_prio_low_redundancy_blocks = 7;
375+
required uint64 badly_distributed_blocks = 8;
375376

376377
}
377378

@@ -385,6 +386,7 @@ message GetFsECBlockGroupStatsResponseProto {
385386
required uint64 blocks_in_future = 4;
386387
required uint64 pending_deletion_blocks = 5;
387388
optional uint64 highest_prio_low_redundancy_blocks = 6;
389+
required uint64 badly_distributed_blocks = 7;
388390
}
389391

390392
enum DatanodeReportTypeProto { // type of the datanode report

hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMBean.java

+7
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,13 @@ public interface FederationMBean {
326326
*/
327327
long getNumberOfMissingBlocksWithReplicationFactorOne();
328328

329+
/**
330+
* Gets the total number of badly distributed blocks.
331+
*
332+
* @return the total number of badly distrubted blocks.
333+
*/
334+
long getNumberOfBadlyDistributedBlocks();
335+
329336
/**
330337
* Gets the total number of replicated low redundancy blocks on the cluster
331338
* with the highest risk of loss.

hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java

+10
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,16 @@ public long getNumberOfMissingBlocksWithReplicationFactorOne() {
384384
return 0;
385385
}
386386

387+
@Override
388+
public long getNumberOfBadlyDistributedBlocks() {
389+
try {
390+
return getRBFMetrics().getNumberOfBadlyDistributedBlocks();
391+
} catch (IOException e) {
392+
LOG.debug("Failed to get number of badly distributed blocks", e);
393+
}
394+
return 0;
395+
}
396+
387397
@Override
388398
public long getHighestPriorityLowRedundancyReplicatedBlocks() {
389399
try {

hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/RBFMetrics.java

+6
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,12 @@ public long getHighestPriorityLowRedundancyReplicatedBlocks() {
756756
MembershipStats::getHighestPriorityLowRedundancyReplicatedBlocks);
757757
}
758758

759+
@Override
760+
public long getNumberOfBadlyDistributedBlocks() {
761+
return getNameserviceAggregatedLong(
762+
MembershipStats::getNumberOfBadlyDistributedBlocks);
763+
}
764+
759765
@Override
760766
public long getHighestPriorityLowRedundancyECBlocks() {
761767
return getNameserviceAggregatedLong(

hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MembershipNamenodeResolver.java

+2
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,8 @@ public boolean registerNamenode(NamenodeStatusReport report)
363363
report.getScheduledReplicationBlocks());
364364
stats.setNumberOfMissingBlocksWithReplicationFactorOne(
365365
report.getNumberOfMissingBlocksWithReplicationFactorOne());
366+
stats.setNumberOfBadlyDistributedBlocks(
367+
report.getNumberOfBadlyDistributedBlocks());
366368
stats.setHighestPriorityLowRedundancyReplicatedBlocks(
367369
report.getHighestPriorityLowRedundancyReplicatedBlocks());
368370
stats.setHighestPriorityLowRedundancyECBlocks(

hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/NamenodeStatusReport.java

+15
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ public class NamenodeStatusReport {
7373
private int corruptFilesCount = -1;
7474
private long scheduledReplicationBlocks = -1;
7575
private long numberOfMissingBlocksWithReplicationFactorOne = -1;
76+
private long numberOfBadlyDistributedBlocks = -1;
7677
private long highestPriorityLowRedundancyReplicatedBlocks = -1;
7778
private long highestPriorityLowRedundancyECBlocks = -1;
7879
private int pendingSPSPaths = -1;
@@ -394,18 +395,22 @@ public void setNamesystemInfo(long available, long total,
394395
* @param numCorruptFiles number of corrupt files.
395396
* @param numOfMissingBlocksWithReplicationFactorOne number of missing
396397
* blocks with rep one.
398+
* @param numOfBadlyDistributedBlocks number of badly distributed blocks
397399
* @param highestPriorityLowRedundancyRepBlocks number of high priority low
398400
* redundancy rep blocks.
399401
* @param highPriorityLowRedundancyECBlocks number of high priority low
400402
* redundancy EC blocks.
401403
*/
402404
public void setNamenodeInfo(int numCorruptFiles,
403405
long numOfMissingBlocksWithReplicationFactorOne,
406+
long numOfBadlyDistributedBlocks,
404407
long highestPriorityLowRedundancyRepBlocks,
405408
long highPriorityLowRedundancyECBlocks) {
406409
this.corruptFilesCount = numCorruptFiles;
407410
this.numberOfMissingBlocksWithReplicationFactorOne =
408411
numOfMissingBlocksWithReplicationFactorOne;
412+
this.numberOfBadlyDistributedBlocks =
413+
numOfBadlyDistributedBlocks;
409414
this.highestPriorityLowRedundancyReplicatedBlocks =
410415
highestPriorityLowRedundancyRepBlocks;
411416
this.highestPriorityLowRedundancyECBlocks =
@@ -441,6 +446,16 @@ public long getNumberOfMissingBlocksWithReplicationFactorOne() {
441446
return this.numberOfMissingBlocksWithReplicationFactorOne;
442447
}
443448

449+
/**
450+
* Gets the total number of badly distributed blocks.
451+
*
452+
* @return the total number of badly distrubted blocks.
453+
*/
454+
public long getNumberOfBadlyDistributedBlocks() {
455+
return this.numberOfBadlyDistributedBlocks;
456+
}
457+
458+
444459
/**
445460
* Gets the total number of replicated low redundancy blocks on the cluster
446461
* with the highest risk of loss.

hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,8 @@ private void populateNamenodeInfoMetrics(JSONArray aux, NamenodeStatusReport rep
543543
.optLong("NumberOfMissingBlocksWithReplicationFactorOne"),
544544
jsonObject
545545
.optLong("HighestPriorityLowRedundancyReplicatedBlocks"),
546-
jsonObject.optLong("HighestPriorityLowRedundancyECBlocks"));
546+
jsonObject.optLong("HighestPriorityLowRedundancyECBlocks"),
547+
jsonObject.optLong("BadlyDistributedBlocks"));
547548
}
548549
}
549550
}
@@ -608,4 +609,4 @@ protected void serviceStop() throws Exception {
608609
}
609610
super.serviceStop();
610611
}
611-
}
612+
}

hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MembershipStats.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,11 @@ public abstract void setNumberOfMissingBlocksWithReplicationFactorOne(
122122

123123
public abstract long getNumberOfMissingBlocksWithReplicationFactorOne();
124124

125+
public abstract void setNumberOfBadlyDistributedBlocks(
126+
long blocks);
127+
128+
public abstract long getNumberOfBadlyDistributedBlocks();
129+
125130
public abstract void setHighestPriorityLowRedundancyReplicatedBlocks(
126131
long blocks);
127132

@@ -171,4 +176,4 @@ public long getDateCreated() {
171176
// We don't store this record directly
172177
return 0;
173178
}
174-
}
179+
}

0 commit comments

Comments
 (0)