28
28
import java .util .List ;
29
29
import java .util .Objects ;
30
30
import java .util .Set ;
31
+ import java .util .stream .Collectors ;
31
32
32
33
import org .apache .accumulo .core .client .TableNotFoundException ;
33
34
import org .apache .accumulo .core .client .admin .compaction .CompactableFile ;
34
35
import org .apache .accumulo .core .conf .ConfigurationTypeHelper ;
35
36
import org .apache .accumulo .core .conf .Property ;
36
37
import org .apache .accumulo .core .spi .common .ServiceEnvironment ;
38
+ import org .apache .accumulo .core .util .NumUtil ;
37
39
import org .apache .accumulo .core .util .compaction .CompactionJobPrioritizer ;
38
40
import org .slf4j .Logger ;
39
41
import org .slf4j .LoggerFactory ;
@@ -402,18 +404,34 @@ private Collection<CompactableFile> findFilesToCompactWithLowerRatio(PlanningPar
402
404
}
403
405
404
406
if (found .isEmpty () && lowRatio == 1.0 ) {
405
- // in this case the data must be really skewed, operator intervention may be needed.
407
+ var examinedFiles = sortAndLimitByMaxSize (candidates , maxSizeToCompact );
408
+ var excludedBecauseMaxSize = candidates .size () - examinedFiles .size ();
409
+ var tabletId = params .getTabletId ();
410
+
406
411
log .warn (
407
- "Attempted to lower compaction ration from {} to {} for {} because there are {} files "
408
- + "and the max tablet files is {}, however no set of files to compact were found." ,
409
- params .getRatio (), highRatio , params .getTableId (), params .getCandidates ().size (),
410
- maxTabletFiles );
412
+ "Unable to plan compaction for {} that has too many files. {}:{} num_files:{} "
413
+ + "excluded_large_files:{} max_compaction_size:{} ratio_search_range:{},{} " ,
414
+ tabletId , Property .TABLE_FILE_MAX .getKey (), maxTabletFiles , candidates .size (),
415
+ excludedBecauseMaxSize , NumUtil .bigNumberForSize (maxSizeToCompact ), highRatio ,
416
+ params .getRatio ());
417
+ if (log .isDebugEnabled ()) {
418
+ var sizesOfExamined = examinedFiles .stream ()
419
+ .map (compactableFile -> NumUtil .bigNumberForSize (compactableFile .getEstimatedSize ()))
420
+ .collect (Collectors .toList ());
421
+ HashSet <CompactableFile > excludedFiles = new HashSet <>(candidates );
422
+ examinedFiles .forEach (excludedFiles ::remove );
423
+ var sizesOfExcluded = excludedFiles .stream ()
424
+ .map (compactableFile -> NumUtil .bigNumberForSize (compactableFile .getEstimatedSize ()))
425
+ .collect (Collectors .toList ());
426
+ log .debug ("Failed planning details for {} examined_file_sizes:{} excluded_file_sizes:{}" ,
427
+ tabletId , sizesOfExamined , sizesOfExcluded );
428
+ }
411
429
}
412
430
413
431
log .info (
414
432
"For {} found {} files to compact lowering compaction ratio from {} to {} because the tablet "
415
433
+ "exceeded {} files, it had {}" ,
416
- params .getTableId (), found .size (), params .getRatio (), lowRatio , maxTabletFiles ,
434
+ params .getTabletId (), found .size (), params .getRatio (), lowRatio , maxTabletFiles ,
417
435
params .getCandidates ().size ());
418
436
419
437
return found ;
@@ -482,15 +500,18 @@ private Set<CompactableFile> getExpected(Collection<CompactionJob> compacting) {
482
500
return sortedFiles .subList (0 , numToCompact );
483
501
}
484
502
485
- static Collection <CompactableFile > findDataFilesToCompact (Set <CompactableFile > files ,
486
- double ratio , int maxFilesToCompact , long maxSizeToCompact ) {
487
- if (files .size () <= 1 ) {
488
- return Collections .emptySet ();
489
- }
490
-
503
+ /**
504
+ * @return a list of the smallest files where the sum of the sizes is less than maxSizeToCompact
505
+ */
506
+ static List <CompactableFile > sortAndLimitByMaxSize (Set <CompactableFile > files ,
507
+ long maxSizeToCompact ) {
491
508
// sort files from smallest to largest. So position 0 has the smallest file.
492
509
List <CompactableFile > sortedFiles = sortByFileSize (files );
493
510
511
+ if (maxSizeToCompact == Long .MAX_VALUE ) {
512
+ return sortedFiles ;
513
+ }
514
+
494
515
int maxSizeIndex = sortedFiles .size ();
495
516
long sum = 0 ;
496
517
for (int i = 0 ; i < sortedFiles .size (); i ++) {
@@ -502,10 +523,22 @@ static Collection<CompactableFile> findDataFilesToCompact(Set<CompactableFile> f
502
523
}
503
524
504
525
if (maxSizeIndex < sortedFiles .size ()) {
505
- sortedFiles = sortedFiles .subList (0 , maxSizeIndex );
506
- if (sortedFiles .size () <= 1 ) {
507
- return Collections .emptySet ();
508
- }
526
+ return sortedFiles .subList (0 , maxSizeIndex );
527
+ } else {
528
+ return sortedFiles ;
529
+ }
530
+ }
531
+
532
+ static Collection <CompactableFile > findDataFilesToCompact (Set <CompactableFile > files ,
533
+ double ratio , int maxFilesToCompact , long maxSizeToCompact ) {
534
+
535
+ if (files .size () <= 1 ) {
536
+ return Collections .emptySet ();
537
+ }
538
+
539
+ List <CompactableFile > sortedFiles = sortAndLimitByMaxSize (files , maxSizeToCompact );
540
+ if (sortedFiles .size () <= 1 ) {
541
+ return Collections .emptySet ();
509
542
}
510
543
511
544
int windowStart = 0 ;
0 commit comments