diff --git a/core/src/main/java/org/apache/accumulo/core/spi/compaction/CompactionPlanner.java b/core/src/main/java/org/apache/accumulo/core/spi/compaction/CompactionPlanner.java index 9f4946b3575..7b401e49c59 100644 --- a/core/src/main/java/org/apache/accumulo/core/spi/compaction/CompactionPlanner.java +++ b/core/src/main/java/org/apache/accumulo/core/spi/compaction/CompactionPlanner.java @@ -26,6 +26,7 @@ import org.apache.accumulo.core.client.admin.compaction.CompactableFile; import org.apache.accumulo.core.data.NamespaceId; import org.apache.accumulo.core.data.TableId; +import org.apache.accumulo.core.data.TabletId; import org.apache.accumulo.core.spi.common.ServiceEnvironment; /** @@ -94,6 +95,12 @@ public interface PlanningParameters { */ TableId getTableId(); + /** + * @return the tablet for which a compaction is being planned + * @since 2.1.4 + */ + TabletId getTabletId(); + ServiceEnvironment getServiceEnvironment(); CompactionKind getKind(); diff --git a/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java b/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java index 6c60d5095cf..a0d8a7872fc 100644 --- a/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java +++ b/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java @@ -28,12 +28,14 @@ import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.stream.Collectors; import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.client.admin.compaction.CompactableFile; import org.apache.accumulo.core.conf.ConfigurationTypeHelper; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.spi.common.ServiceEnvironment; +import org.apache.accumulo.core.util.NumUtil; import org.apache.accumulo.core.util.compaction.CompactionJobPrioritizer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -402,18 +404,34 @@ private Collection findFilesToCompactWithLowerRatio(PlanningPar } if (found.isEmpty() && lowRatio == 1.0) { - // in this case the data must be really skewed, operator intervention may be needed. + var examinedFiles = sortAndLimitByMaxSize(candidates, maxSizeToCompact); + var excludedBecauseMaxSize = candidates.size() - examinedFiles.size(); + var tabletId = params.getTabletId(); + log.warn( - "Attempted to lower compaction ration from {} to {} for {} because there are {} files " - + "and the max tablet files is {}, however no set of files to compact were found.", - params.getRatio(), highRatio, params.getTableId(), params.getCandidates().size(), - maxTabletFiles); + "Unable to plan compaction for {} that has too many files. {}:{} num_files:{} " + + "excluded_large_files:{} max_compaction_size:{} ratio_search_range:{},{} ", + tabletId, Property.TABLE_FILE_MAX.getKey(), maxTabletFiles, candidates.size(), + excludedBecauseMaxSize, NumUtil.bigNumberForSize(maxSizeToCompact), highRatio, + params.getRatio()); + if (log.isDebugEnabled()) { + var sizesOfExamined = examinedFiles.stream() + .map(compactableFile -> NumUtil.bigNumberForSize(compactableFile.getEstimatedSize())) + .collect(Collectors.toList()); + HashSet excludedFiles = new HashSet<>(candidates); + examinedFiles.forEach(excludedFiles::remove); + var sizesOfExcluded = excludedFiles.stream() + .map(compactableFile -> NumUtil.bigNumberForSize(compactableFile.getEstimatedSize())) + .collect(Collectors.toList()); + log.debug("Failed planning details for {} examined_file_sizes:{} excluded_file_sizes:{}", + tabletId, sizesOfExamined, sizesOfExcluded); + } } log.info( "For {} found {} files to compact lowering compaction ratio from {} to {} because the tablet " + "exceeded {} files, it had {}", - params.getTableId(), found.size(), params.getRatio(), lowRatio, maxTabletFiles, + params.getTabletId(), found.size(), params.getRatio(), lowRatio, maxTabletFiles, params.getCandidates().size()); return found; @@ -482,15 +500,18 @@ private Set getExpected(Collection compacting) { return sortedFiles.subList(0, numToCompact); } - static Collection findDataFilesToCompact(Set files, - double ratio, int maxFilesToCompact, long maxSizeToCompact) { - if (files.size() <= 1) { - return Collections.emptySet(); - } - + /** + * @return a list of the smallest files where the sum of the sizes is less than maxSizeToCompact + */ + static List sortAndLimitByMaxSize(Set files, + long maxSizeToCompact) { // sort files from smallest to largest. So position 0 has the smallest file. List sortedFiles = sortByFileSize(files); + if (maxSizeToCompact == Long.MAX_VALUE) { + return sortedFiles; + } + int maxSizeIndex = sortedFiles.size(); long sum = 0; for (int i = 0; i < sortedFiles.size(); i++) { @@ -502,10 +523,22 @@ static Collection findDataFilesToCompact(Set f } if (maxSizeIndex < sortedFiles.size()) { - sortedFiles = sortedFiles.subList(0, maxSizeIndex); - if (sortedFiles.size() <= 1) { - return Collections.emptySet(); - } + return sortedFiles.subList(0, maxSizeIndex); + } else { + return sortedFiles; + } + } + + static Collection findDataFilesToCompact(Set files, + double ratio, int maxFilesToCompact, long maxSizeToCompact) { + + if (files.size() <= 1) { + return Collections.emptySet(); + } + + List sortedFiles = sortAndLimitByMaxSize(files, maxSizeToCompact); + if (sortedFiles.size() <= 1) { + return Collections.emptySet(); } int windowStart = 0; diff --git a/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java b/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java index f3186701ad5..4e5f1c4d86c 100644 --- a/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java +++ b/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java @@ -45,6 +45,9 @@ import org.apache.accumulo.core.conf.SiteConfiguration; import org.apache.accumulo.core.data.NamespaceId; import org.apache.accumulo.core.data.TableId; +import org.apache.accumulo.core.data.TabletId; +import org.apache.accumulo.core.dataImpl.KeyExtent; +import org.apache.accumulo.core.dataImpl.TabletIdImpl; import org.apache.accumulo.core.spi.common.ServiceEnvironment; import org.apache.accumulo.core.spi.common.ServiceEnvironment.Configuration; import org.apache.accumulo.core.spi.compaction.CompactionPlan.Builder; @@ -754,6 +757,11 @@ public TableId getTableId() { return TableId.of("42"); } + @Override + public TabletId getTabletId() { + return new TabletIdImpl(new KeyExtent(getTableId(), null, null)); + } + @Override public ServiceEnvironment getServiceEnvironment() { ServiceEnvironment senv = EasyMock.createMock(ServiceEnvironment.class); diff --git a/core/src/test/java/org/apache/accumulo/core/util/NumUtilTest.java b/core/src/test/java/org/apache/accumulo/core/util/NumUtilTest.java index 0473fdcaa6d..92526465ca9 100644 --- a/core/src/test/java/org/apache/accumulo/core/util/NumUtilTest.java +++ b/core/src/test/java/org/apache/accumulo/core/util/NumUtilTest.java @@ -27,7 +27,6 @@ import org.junit.jupiter.api.Test; public class NumUtilTest { - @Test public void testBigNumberForSize() { Locale.setDefault(Locale.US); diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/compactions/CompactionService.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/compactions/CompactionService.java index 49d56447956..2d609182533 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/compactions/CompactionService.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/compactions/CompactionService.java @@ -47,7 +47,9 @@ import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.data.NamespaceId; import org.apache.accumulo.core.data.TableId; +import org.apache.accumulo.core.data.TabletId; import org.apache.accumulo.core.dataImpl.KeyExtent; +import org.apache.accumulo.core.dataImpl.TabletIdImpl; import org.apache.accumulo.core.spi.common.ServiceEnvironment; import org.apache.accumulo.core.spi.compaction.CompactionExecutorId; import org.apache.accumulo.core.spi.compaction.CompactionJob; @@ -247,6 +249,11 @@ public TableId getTableId() { return comp.getTableId(); } + @Override + public TabletId getTabletId() { + return new TabletIdImpl(comp.getExtent()); + } + @Override public ServiceEnvironment getServiceEnvironment() { return senv;