Skip to content

Commit bdc0de5

Browse files
committed
Integrate backup WAL cleanup logic with the delete command
1 parent 80e65ce commit bdc0de5

File tree

3 files changed

+352
-0
lines changed

3 files changed

+352
-0
lines changed

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
package org.apache.hadoop.hbase.backup.impl;
1919

2020
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS;
21+
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_WAL_DIR;
22+
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONTINUOUS_BACKUP_REPLICATION_PEER;
2123
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.DEFAULT_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS;
2224
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BACKUP_LIST_DESC;
2325
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BANDWIDTH;
@@ -47,18 +49,26 @@
4749
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_WORKERS_DESC;
4850
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_YARN_QUEUE_NAME;
4951
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_YARN_QUEUE_NAME_DESC;
52+
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.DATE_FORMAT;
53+
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.ONE_DAY_IN_MILLISECONDS;
5054

5155
import java.io.IOException;
5256
import java.net.URI;
57+
import java.text.ParseException;
58+
import java.text.SimpleDateFormat;
5359
import java.util.ArrayList;
60+
import java.util.Collections;
61+
import java.util.HashSet;
5462
import java.util.List;
5563
import java.util.Map;
5664
import java.util.Optional;
5765
import java.util.Set;
66+
import java.util.TimeZone;
5867
import java.util.concurrent.TimeUnit;
5968
import org.apache.commons.lang3.StringUtils;
6069
import org.apache.hadoop.conf.Configuration;
6170
import org.apache.hadoop.conf.Configured;
71+
import org.apache.hadoop.fs.FileStatus;
6272
import org.apache.hadoop.fs.FileSystem;
6373
import org.apache.hadoop.fs.Path;
6474
import org.apache.hadoop.hbase.HBaseConfiguration;
@@ -71,6 +81,7 @@
7181
import org.apache.hadoop.hbase.backup.BackupRestoreConstants.BackupCommand;
7282
import org.apache.hadoop.hbase.backup.BackupType;
7383
import org.apache.hadoop.hbase.backup.HBackupFileSystem;
84+
import org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager;
7485
import org.apache.hadoop.hbase.backup.util.BackupSet;
7586
import org.apache.hadoop.hbase.backup.util.BackupUtils;
7687
import org.apache.hadoop.hbase.client.Connection;
@@ -649,6 +660,8 @@ public void execute() throws IOException {
649660
} else if (cmdline.hasOption(OPTION_LIST)) {
650661
executeDeleteListOfBackups(cmdline, isForceDelete);
651662
}
663+
664+
cleanUpUnusedBackupWALs();
652665
}
653666

654667
private void executeDeleteOlderThan(CommandLine cmdline, boolean isForceDelete)
@@ -876,6 +889,140 @@ private boolean canAnyOtherBackupCover(List<BackupInfo> allBackups, BackupInfo c
876889
return false;
877890
}
878891

892+
/**
893+
* Cleans up Write-Ahead Logs (WALs) that are no longer required for PITR after a successful
894+
* backup deletion.
895+
*/
896+
private void cleanUpUnusedBackupWALs() throws IOException {
897+
Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create();
898+
String backupWalDir = conf.get(CONF_CONTINUOUS_BACKUP_WAL_DIR);
899+
900+
if (backupWalDir == null || backupWalDir.isEmpty()) {
901+
System.out.println("No WAL directory specified for continuous backup. Skipping cleanup.");
902+
return;
903+
}
904+
905+
try (Connection conn = ConnectionFactory.createConnection(conf);
906+
BackupSystemTable sysTable = new BackupSystemTable(conn)) {
907+
// Get list of tables under continuous backup
908+
Map<TableName, Long> continuousBackupTables = sysTable.getContinuousBackupTableSet();
909+
if (continuousBackupTables.isEmpty()) {
910+
System.out.println("No continuous backups configured. Skipping WAL cleanup.");
911+
return;
912+
}
913+
914+
// Find the earliest timestamp after which WALs are still needed
915+
long cutoffTimestamp = determineWALCleanupCutoffTime(sysTable);
916+
if (cutoffTimestamp == 0) {
917+
System.err.println("ERROR: No valid full backup found. Skipping WAL cleanup.");
918+
return;
919+
}
920+
921+
// Update metadata before actual cleanup to avoid inconsistencies
922+
updateBackupTableStartTimes(sysTable, cutoffTimestamp);
923+
924+
// Delete WAL files older than cutoff timestamp
925+
deleteOldWALFiles(conf, backupWalDir, cutoffTimestamp);
926+
927+
}
928+
}
929+
930+
/**
931+
* Determines the cutoff time for cleaning WAL files.
932+
* @param sysTable Backup system table
933+
* @return cutoff timestamp or 0 if not found
934+
*/
935+
private long determineWALCleanupCutoffTime(BackupSystemTable sysTable) throws IOException {
936+
List<BackupInfo> backupInfos = sysTable.getBackupInfos(BackupState.COMPLETE);
937+
Collections.reverse(backupInfos); // Start from oldest
938+
939+
for (BackupInfo backupInfo : backupInfos) {
940+
if (BackupType.FULL.equals(backupInfo.getType())) {
941+
return backupInfo.getStartTs();
942+
}
943+
}
944+
return 0;
945+
}
946+
947+
/**
948+
* Updates the start time for continuous backups if older than cutoff timestamp.
949+
* @param sysTable Backup system table
950+
* @param cutoffTimestamp Timestamp before which WALs are no longer needed
951+
*/
952+
private void updateBackupTableStartTimes(BackupSystemTable sysTable, long cutoffTimestamp)
953+
throws IOException {
954+
955+
Map<TableName, Long> backupTables = sysTable.getContinuousBackupTableSet();
956+
Set<TableName> tablesToUpdate = new HashSet<>();
957+
958+
for (Map.Entry<TableName, Long> entry : backupTables.entrySet()) {
959+
if (entry.getValue() < cutoffTimestamp) {
960+
tablesToUpdate.add(entry.getKey());
961+
}
962+
}
963+
964+
if (!tablesToUpdate.isEmpty()) {
965+
sysTable.updateContinuousBackupTableSet(tablesToUpdate, cutoffTimestamp);
966+
}
967+
}
968+
969+
/**
970+
* Cleans up old WAL and bulk-loaded files based on the determined cutoff timestamp.
971+
*/
972+
private void deleteOldWALFiles(Configuration conf, String backupWalDir, long cutoffTime)
973+
throws IOException {
974+
System.out.println("Starting WAL cleanup in backup directory: " + backupWalDir
975+
+ " with cutoff time: " + cutoffTime);
976+
977+
BackupFileSystemManager manager =
978+
new BackupFileSystemManager(CONTINUOUS_BACKUP_REPLICATION_PEER, conf, backupWalDir);
979+
FileSystem fs = manager.getBackupFs();
980+
Path walDir = manager.getWalsDir();
981+
Path bulkloadDir = manager.getBulkLoadFilesDir();
982+
983+
SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
984+
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
985+
986+
System.out.println("Listing directories under: " + walDir);
987+
988+
FileStatus[] directories = fs.listStatus(walDir);
989+
990+
for (FileStatus dirStatus : directories) {
991+
if (!dirStatus.isDirectory()) {
992+
continue; // Skip files, we only want directories
993+
}
994+
995+
Path dirPath = dirStatus.getPath();
996+
String dirName = dirPath.getName();
997+
998+
try {
999+
long dayStart = parseDayDirectory(dirName, dateFormat);
1000+
System.out
1001+
.println("Checking WAL directory: " + dirName + " (Start Time: " + dayStart + ")");
1002+
1003+
// If WAL files of that day are older than cutoff time, delete them
1004+
if (dayStart + ONE_DAY_IN_MILLISECONDS - 1 < cutoffTime) {
1005+
System.out.println("Deleting outdated WAL directory: " + dirPath);
1006+
fs.delete(dirPath, true);
1007+
fs.delete(new Path(bulkloadDir, dirName), true);
1008+
}
1009+
} catch (ParseException e) {
1010+
System.out.println("WARNING: Failed to parse directory name '" + dirName
1011+
+ "'. Skipping. Error: " + e.getMessage());
1012+
} catch (IOException e) {
1013+
System.out.println("WARNING: Failed to delete directory '" + dirPath
1014+
+ "'. Skipping. Error: " + e.getMessage());
1015+
}
1016+
}
1017+
1018+
System.out.println("Completed WAL cleanup for backup directory: " + backupWalDir);
1019+
}
1020+
1021+
private long parseDayDirectory(String dayDir, SimpleDateFormat dateFormat)
1022+
throws ParseException {
1023+
return dateFormat.parse(dayDir).getTime();
1024+
}
1025+
8791026
@Override
8801027
protected void printUsage() {
8811028
System.out.println(DELETE_CMD_USAGE);

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,6 +1056,27 @@ public void addContinuousBackupTableSet(Set<TableName> tables, long startTimesta
10561056
}
10571057
}
10581058

1059+
/**
1060+
* Updates the system table with the new start timestamps for continuous backup tables.
1061+
* @param tablesToUpdate The set of tables that need their start timestamps updated.
1062+
* @param newStartTimestamp The new start timestamp to be set.
1063+
*/
1064+
public void updateContinuousBackupTableSet(Set<TableName> tablesToUpdate, long newStartTimestamp)
1065+
throws IOException {
1066+
try (Table table = connection.getTable(tableName)) {
1067+
Put put = new Put(rowkey(CONTINUOUS_BACKUP_SET));
1068+
1069+
for (TableName tableName : tablesToUpdate) {
1070+
put.addColumn(BackupSystemTable.META_FAMILY, Bytes.toBytes(tableName.getNameAsString()),
1071+
Bytes.toBytes(newStartTimestamp));
1072+
}
1073+
1074+
table.put(put);
1075+
LOG.info("Successfully updated start timestamps for {} tables in the backup system table.",
1076+
tablesToUpdate.size());
1077+
}
1078+
}
1079+
10591080
/**
10601081
* Removes tables from the global continuous backup set. Only removes entries that currently exist
10611082
* in the backup system table.

0 commit comments

Comments
 (0)