Skip to content

Commit b484a40

Browse files
committed
HDFS-17755. Enhance refreshVolumes usability and code structure.
1 parent 126c3d4 commit b484a40

File tree

4 files changed

+177
-78
lines changed

4 files changed

+177
-78
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
676676
public static final String DFS_UPGRADE_DOMAIN_FACTOR = "dfs.namenode.upgrade.domain.factor";
677677
public static final int DFS_UPGRADE_DOMAIN_FACTOR_DEFAULT = DFS_REPLICATION_DEFAULT;
678678

679+
// Used to dynamically refresh the conf of data dirs
680+
public static final String DFS_DATANODE_DATA_DIR_TO_ADD_KEY = "dfs.datanode.data.dir.to.add";
681+
public static final String DFS_DATANODE_DATA_DIR_TO_REMOVE_KEY =
682+
"dfs.datanode.data.dir.to.remove";
683+
679684
//Following keys have no defaults
680685
public static final String DFS_DATANODE_DATA_DIR_KEY =
681686
HdfsClientConfigKeys.DeprecatedKeys.DFS_DATANODE_DATA_DIR_KEY;

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

Lines changed: 114 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING;
3737
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING_DEFAULT;
3838
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY;
39+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_TO_ADD_KEY;
40+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_TO_REMOVE_KEY;
3941
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_TRANSFER_BANDWIDTHPERSEC_DEFAULT;
4042
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_TRANSFER_BANDWIDTHPERSEC_KEY;
4143
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_WRITE_BANDWIDTHPERSEC_DEFAULT;
@@ -138,7 +140,6 @@
138140
import java.util.Map.Entry;
139141
import java.util.Set;
140142
import java.util.UUID;
141-
import java.util.concurrent.Callable;
142143
import java.util.concurrent.ConcurrentHashMap;
143144
import java.util.concurrent.ExecutionException;
144145
import java.util.concurrent.ExecutorService;
@@ -351,6 +352,8 @@ public class DataNode extends ReconfigurableBase
351352
Collections.unmodifiableList(
352353
Arrays.asList(
353354
DFS_DATANODE_DATA_DIR_KEY,
355+
DFS_DATANODE_DATA_DIR_TO_ADD_KEY,
356+
DFS_DATANODE_DATA_DIR_TO_REMOVE_KEY,
354357
DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY,
355358
DFS_BLOCKREPORT_INTERVAL_MSEC_KEY,
356359
DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY,
@@ -639,34 +642,10 @@ protected Configuration getNewConf() {
639642
public String reconfigurePropertyImpl(String property, String newVal)
640643
throws ReconfigurationException {
641644
switch (property) {
642-
case DFS_DATANODE_DATA_DIR_KEY: {
643-
IOException rootException = null;
644-
try {
645-
LOG.info("Reconfiguring {} to {}", property, newVal);
646-
this.refreshVolumes(newVal);
647-
return getConf().get(DFS_DATANODE_DATA_DIR_KEY);
648-
} catch (IOException e) {
649-
rootException = e;
650-
} finally {
651-
// Send a full block report to let NN acknowledge the volume changes.
652-
try {
653-
triggerBlockReport(
654-
new BlockReportOptions.Factory().setIncremental(false).build());
655-
} catch (IOException e) {
656-
LOG.warn("Exception while sending the block report after refreshing"
657-
+ " volumes {} to {}", property, newVal, e);
658-
if (rootException == null) {
659-
rootException = e;
660-
}
661-
} finally {
662-
if (rootException != null) {
663-
throw new ReconfigurationException(property, newVal,
664-
getConf().get(property), rootException);
665-
}
666-
}
667-
}
668-
break;
669-
}
645+
case DFS_DATANODE_DATA_DIR_KEY:
646+
case DFS_DATANODE_DATA_DIR_TO_ADD_KEY:
647+
case DFS_DATANODE_DATA_DIR_TO_REMOVE_KEY:
648+
return reconfDataDirsParameters(property, newVal);
670649
case DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY: {
671650
ReconfigurationException rootException = null;
672651
try {
@@ -1079,6 +1058,43 @@ private String reconfSlowIoWarningThresholdParameters(String property, String ne
10791058
}
10801059
}
10811060

1061+
private String reconfDataDirsParameters(String property, String newVal)
1062+
throws ReconfigurationException {
1063+
LOG.info("Reconfiguring {} to {}", property, newVal);
1064+
Set<String> allDataDirs = new HashSet<>();
1065+
List<StorageLocation> storageLocations = getStorageLocations(getConf());
1066+
for (StorageLocation location : storageLocations) {
1067+
allDataDirs.add(location.getNormalizedUri().getPath());
1068+
}
1069+
if (property.equals(DFS_DATANODE_DATA_DIR_TO_ADD_KEY)) {
1070+
Set<String> toAdd = new HashSet<>(Arrays.asList(newVal.split(",")));
1071+
allDataDirs.addAll(toAdd);
1072+
} else if (property.equals(DFS_DATANODE_DATA_DIR_TO_REMOVE_KEY)) {
1073+
Set<String> toRemove = new HashSet<>(Arrays.asList(newVal.split(",")));
1074+
allDataDirs.removeAll(toRemove);
1075+
}
1076+
String newDirs = String.join(",", allDataDirs);
1077+
IOException rootException = null;
1078+
try {
1079+
this.refreshVolumes(newDirs);
1080+
return getConf().get(DFS_DATANODE_DATA_DIR_KEY);
1081+
} catch (IOException e) {
1082+
rootException = e;
1083+
} finally {
1084+
// Send a full block report to let NN acknowledge the volume changes.
1085+
try {
1086+
triggerBlockReport(new BlockReportOptions.Factory().setIncremental(false).build());
1087+
} catch (IOException e) {
1088+
LOG.warn("Exception while sending the block report after refreshing volumes {} to {}",
1089+
property, newVal, e);
1090+
if (rootException == null) {
1091+
rootException = e;
1092+
}
1093+
}
1094+
}
1095+
throw new ReconfigurationException(property, newVal, getConf().get(property), rootException);
1096+
}
1097+
10821098
/**
10831099
* Get a list of the keys of the re-configurable properties in configuration.
10841100
*/
@@ -1286,10 +1302,9 @@ private void refreshVolumes(String newVolumes) throws IOException {
12861302
for (BPOfferService bpos : blockPoolManager.getAllNamenodeThreads()) {
12871303
nsInfos.add(bpos.getNamespaceInfo());
12881304
}
1289-
synchronized(this) {
1305+
synchronized (this) {
12901306
Configuration conf = getConf();
12911307
conf.set(DFS_DATANODE_DATA_DIR_KEY, newVolumes);
1292-
ExecutorService service = null;
12931308
int numOldDataDirs = dataDirs.size();
12941309
ChangedVolumes changedVolumes = parseChangedVolumes(newVolumes);
12951310
StringBuilder errorMessageBuilder = new StringBuilder();
@@ -1305,49 +1320,9 @@ private void refreshVolumes(String newVolumes) throws IOException {
13051320
throw new IOException("Attempt to remove all volumes.");
13061321
}
13071322
if (!changedVolumes.newLocations.isEmpty()) {
1308-
LOG.info("Adding new volumes: {}",
1309-
Joiner.on(",").join(changedVolumes.newLocations));
1310-
1311-
service = Executors
1312-
.newFixedThreadPool(changedVolumes.newLocations.size());
1313-
List<Future<IOException>> exceptions = Lists.newArrayList();
1314-
1315-
checkStorageState("refreshVolumes");
1316-
for (final StorageLocation location : changedVolumes.newLocations) {
1317-
exceptions.add(service.submit(new Callable<IOException>() {
1318-
@Override
1319-
public IOException call() {
1320-
try {
1321-
data.addVolume(location, nsInfos);
1322-
} catch (IOException e) {
1323-
return e;
1324-
}
1325-
return null;
1326-
}
1327-
}));
1328-
}
1329-
1330-
for (int i = 0; i < changedVolumes.newLocations.size(); i++) {
1331-
StorageLocation volume = changedVolumes.newLocations.get(i);
1332-
Future<IOException> ioExceptionFuture = exceptions.get(i);
1333-
try {
1334-
IOException ioe = ioExceptionFuture.get();
1335-
if (ioe != null) {
1336-
errorMessageBuilder.append(
1337-
String.format("FAILED TO ADD: %s: %s%n",
1338-
volume, ioe.getMessage()));
1339-
LOG.error("Failed to add volume: {}", volume, ioe);
1340-
} else {
1341-
effectiveVolumes.add(volume.toString());
1342-
LOG.info("Successfully added volume: {}", volume);
1343-
}
1344-
} catch (Exception e) {
1345-
errorMessageBuilder.append(
1346-
String.format("FAILED to ADD: %s: %s%n", volume,
1347-
e.toString()));
1348-
LOG.error("Failed to add volume: {}", volume, e);
1349-
}
1350-
}
1323+
List<String> addedVolumes =
1324+
addVolumes(changedVolumes.newLocations, nsInfos, errorMessageBuilder);
1325+
effectiveVolumes.addAll(addedVolumes);
13511326
}
13521327

13531328
try {
@@ -1361,16 +1336,65 @@ public IOException call() {
13611336
throw new IOException(errorMessageBuilder.toString());
13621337
}
13631338
} finally {
1364-
if (service != null) {
1365-
service.shutdown();
1366-
}
13671339
conf.set(DFS_DATANODE_DATA_DIR_KEY,
13681340
Joiner.on(",").join(effectiveVolumes));
13691341
dataDirs = getStorageLocations(conf);
13701342
}
13711343
}
13721344
}
13731345

1346+
/**
1347+
* Add volumes from DataNode.
1348+
*
1349+
* @param locations the StorageLocations of the volumes to be added.
1350+
* @throws IOException storage not yet initialized
1351+
*/
1352+
private List<String> addVolumes(final List<StorageLocation> locations,
1353+
List<NamespaceInfo> nsInfos, StringBuilder errorMessageBuilder) throws IOException {
1354+
LOG.info("Adding new volumes: {}", Joiner.on(",").join(locations));
1355+
List<String> effectiveVolumes = new ArrayList<>();
1356+
ExecutorService service = null;
1357+
List<Future<IOException>> exceptions = Lists.newArrayList();
1358+
checkStorageState("refreshVolumes");
1359+
try {
1360+
service = Executors.newFixedThreadPool(locations.size());
1361+
for (final StorageLocation location : locations) {
1362+
exceptions.add(service.submit(() -> {
1363+
try {
1364+
data.addVolume(location, nsInfos);
1365+
} catch (IOException e) {
1366+
return e;
1367+
}
1368+
return null;
1369+
}));
1370+
}
1371+
1372+
for (int i = 0; i < locations.size(); i++) {
1373+
StorageLocation volume = locations.get(i);
1374+
Future<IOException> ioExceptionFuture = exceptions.get(i);
1375+
try {
1376+
IOException ioe = ioExceptionFuture.get();
1377+
if (ioe != null) {
1378+
errorMessageBuilder.append(
1379+
String.format("FAILED TO ADD: %s: %s%n", volume, ioe.getMessage()));
1380+
LOG.error("Failed to add volume: {}", volume, ioe);
1381+
} else {
1382+
effectiveVolumes.add(volume.toString());
1383+
LOG.info("Successfully added volume: {}", volume);
1384+
}
1385+
} catch (Exception e) {
1386+
errorMessageBuilder.append(String.format("FAILED to ADD: %s: %s%n", volume, e));
1387+
LOG.error("Failed to add volume: {}", volume, e);
1388+
}
1389+
}
1390+
} finally {
1391+
if (service != null) {
1392+
service.shutdown();
1393+
}
1394+
}
1395+
return effectiveVolumes;
1396+
}
1397+
13741398
/**
13751399
* Remove volumes from DataNode.
13761400
* See {@link #removeVolumes(Collection, boolean)} for details.
@@ -3308,6 +3332,19 @@ private static boolean checkFileSystemWithConfigured(
33083332
public static List<StorageLocation> getStorageLocations(Configuration conf) {
33093333
Collection<String> rawLocations =
33103334
conf.getTrimmedStringCollection(DFS_DATANODE_DATA_DIR_KEY);
3335+
// Compatible with local conf for service restarts
3336+
Collection<String> addedList =
3337+
conf.getTrimmedStringCollection(DFS_DATANODE_DATA_DIR_TO_ADD_KEY);
3338+
for (String location : addedList) {
3339+
if (!rawLocations.contains(location)) {
3340+
rawLocations.add(location);
3341+
}
3342+
}
3343+
Collection<String> removedList =
3344+
conf.getTrimmedStringCollection(DFS_DATANODE_DATA_DIR_TO_REMOVE_KEY);
3345+
for (String location : removedList) {
3346+
rawLocations.remove(location);
3347+
}
33113348
List<StorageLocation> locations =
33123349
new ArrayList<StorageLocation>(rawLocations.size());
33133350

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY;
3232
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY;
3333
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT;
34+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY;
35+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_TO_ADD_KEY;
36+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_TO_REMOVE_KEY;
3437
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_READ_BANDWIDTHPERSEC_KEY;
3538
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_TRANSFER_BANDWIDTHPERSEC_KEY;
3639
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_WRITE_BANDWIDTHPERSEC_KEY;
@@ -61,9 +64,12 @@
6164
import static org.junit.Assert.assertTrue;
6265
import static org.junit.Assert.fail;
6366

67+
import com.google.common.base.Joiner;
6468
import java.io.File;
6569
import java.io.IOException;
6670
import java.net.InetSocketAddress;
71+
import java.nio.file.Files;
72+
import java.util.ArrayList;
6773
import java.util.List;
6874
import java.util.Map;
6975
import java.util.concurrent.TimeUnit;
@@ -78,9 +84,14 @@
7884
import org.apache.hadoop.hdfs.HdfsConfiguration;
7985
import org.apache.hadoop.hdfs.MiniDFSCluster;
8086
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
87+
import org.apache.hadoop.hdfs.server.common.Storage;
88+
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
89+
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
8190
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.BlockPoolSlice;
8291
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl;
8392
import org.apache.hadoop.test.LambdaTestUtils;
93+
94+
import org.assertj.core.api.Assertions;
8495
import org.junit.After;
8596
import org.junit.Assert;
8697
import org.junit.Before;
@@ -948,4 +959,50 @@ public void testSlowIoWarningThresholdReconfiguration() throws Exception {
948959
}
949960
}
950961

962+
@Test
963+
public void testRefreshVolumes() throws Exception {
964+
final String dataDir = cluster.getDataDirectory();
965+
List<String> testDirs = new ArrayList<>();
966+
for (int i = 0; i < 3; i++) {
967+
File newVolume = new File(dataDir, "test_vol" + i);
968+
testDirs.add(newVolume.toString());
969+
if (i == 0) {
970+
// Create file, make addVolume() fail.
971+
Files.createFile(newVolume.toPath());
972+
}
973+
}
974+
975+
// add new vol.
976+
DataNode dn = cluster.getDataNodes().get(0);
977+
String newValue = Joiner.on(",").join(testDirs);
978+
LambdaTestUtils.intercept(ReconfigurationException.class,
979+
"test_vol0",
980+
() -> dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_TO_ADD_KEY, newValue));
981+
String[] effectiveVolumes = dn.getConf().get(DFS_DATANODE_DATA_DIR_KEY).split(",");
982+
Assertions.assertThat(4).isEqualTo(effectiveVolumes.length);
983+
for (String volume : effectiveVolumes) {
984+
Assertions.assertThat(volume).isNotIn(testDirs.get(0));
985+
}
986+
987+
// remove vol.
988+
dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_TO_REMOVE_KEY, newValue);
989+
effectiveVolumes = dn.getConf().get(DFS_DATANODE_DATA_DIR_KEY).split(",");
990+
Assertions.assertThat(2).isEqualTo(effectiveVolumes.length);
991+
for (String volume : effectiveVolumes) {
992+
Assertions.assertThat(volume).isNotIn(testDirs.get(0), testDirs.get(1), testDirs.get(2));
993+
}
994+
995+
// Make sure that test dir metadata are not left in memory.
996+
FsDatasetSpi<?> dataset = dn.getFSDataset();
997+
for (FsVolumeSpi volume : dataset.getVolumeList()) {
998+
Assertions.assertThat(volume.getBaseURI().getPath())
999+
.isNotIn(testDirs.get(0), testDirs.get(1), testDirs.get(2));
1000+
}
1001+
DataStorage storage = dn.getStorage();
1002+
for (int i = 0; i < storage.getNumStorageDirs(); i++) {
1003+
Storage.StorageDirectory storageDir = storage.getStorageDir(i);
1004+
Assertions.assertThat(storageDir.getRoot().toString())
1005+
.isNotIn(testDirs.get(0), testDirs.get(1), testDirs.get(2));
1006+
}
1007+
}
9511008
}

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ public void testDataNodeGetReconfigurableProperties() throws IOException, Interr
350350
final List<String> outs = Lists.newArrayList();
351351
final List<String> errs = Lists.newArrayList();
352352
getReconfigurableProperties("datanode", address, outs, errs);
353-
assertEquals(26, outs.size());
353+
assertEquals(28, outs.size());
354354
assertEquals(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, outs.get(1));
355355
}
356356

0 commit comments

Comments
 (0)