diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/BalanceManager.java b/server/manager/src/main/java/org/apache/accumulo/manager/BalanceManager.java new file mode 100644 index 00000000000..ceb4c631ee0 --- /dev/null +++ b/server/manager/src/main/java/org/apache/accumulo/manager/BalanceManager.java @@ -0,0 +1,329 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.manager; + +import java.util.Collections; +import java.util.EnumMap; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.stream.Collectors; + +import org.apache.accumulo.core.conf.Property; +import org.apache.accumulo.core.data.TableId; +import org.apache.accumulo.core.dataImpl.KeyExtent; +import org.apache.accumulo.core.manager.balancer.AssignmentParamsImpl; +import org.apache.accumulo.core.manager.balancer.BalanceParamsImpl; +import org.apache.accumulo.core.manager.balancer.TServerStatusImpl; +import org.apache.accumulo.core.manager.balancer.TabletServerIdImpl; +import org.apache.accumulo.core.manager.thrift.TableInfo; +import org.apache.accumulo.core.manager.thrift.TabletServerStatus; +import org.apache.accumulo.core.metadata.SystemTables; +import org.apache.accumulo.core.metadata.TServerInstance; +import org.apache.accumulo.core.metadata.schema.Ample; +import org.apache.accumulo.core.metadata.schema.TabletMetadata; +import org.apache.accumulo.core.metrics.MetricsProducer; +import org.apache.accumulo.core.spi.balancer.BalancerEnvironment; +import org.apache.accumulo.core.spi.balancer.TableLoadBalancer; +import org.apache.accumulo.core.spi.balancer.TabletBalancer; +import org.apache.accumulo.core.spi.balancer.data.TServerStatus; +import org.apache.accumulo.core.spi.balancer.data.TabletMigration; +import org.apache.accumulo.core.spi.balancer.data.TabletServerId; +import org.apache.accumulo.manager.metrics.BalancerMetrics; +import org.apache.accumulo.server.ServerContext; +import org.apache.accumulo.server.manager.balancer.BalancerEnvironmentImpl; +import org.apache.accumulo.server.manager.state.UnassignedTablet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; + +public class BalanceManager { + + private static final Logger log = LoggerFactory.getLogger(BalanceManager.class); + + private final Manager manager; + protected volatile TabletBalancer tabletBalancer; + private final BalancerEnvironment balancerEnvironment; + private final BalancerMetrics balancerMetrics = new BalancerMetrics(); + private final Object balancedNotifier = new Object(); + + BalanceManager(Manager manager) { + this.manager = manager; + this.balancerEnvironment = new BalancerEnvironmentImpl(manager.getContext()); + initializeBalancer(); + } + + private void initializeBalancer() { + var localTabletBalancer = + Property.createInstanceFromPropertyName(getContext().getConfiguration(), + Property.MANAGER_TABLET_BALANCER, TabletBalancer.class, new TableLoadBalancer()); + localTabletBalancer.init(balancerEnvironment); + tabletBalancer = localTabletBalancer; + log.info("Setup new balancer instance {}", tabletBalancer.getClass().getName()); + } + + void propertyChanged(String property) { + if (property.equals(Property.MANAGER_TABLET_BALANCER.getKey())) { + initializeBalancer(); + } + } + + private ServerContext getContext() { + return manager.getContext(); + } + + MetricsProducer getMetrics() { + return balancerMetrics; + } + + /** + * balanceTablets() balances tables by DataLevel. Return the current set of migrations partitioned + * by DataLevel + */ + private Map> partitionMigrations() { + final Map> partitionedMigrations = + new EnumMap<>(Ample.DataLevel.class); + for (Ample.DataLevel dl : Ample.DataLevel.values()) { + Set extents = new HashSet<>(); + // prev row needed for the extent + try ( + var tabletsMetadata = getContext() + .getAmple().readTablets().forLevel(dl).fetch(TabletMetadata.ColumnType.PREV_ROW, + TabletMetadata.ColumnType.LOCATION, TabletMetadata.ColumnType.MIGRATION) + .build()) { + // filter out migrations that are awaiting cleanup + tabletsMetadata.stream() + .filter(tm -> tm.getMigration() != null && !manager.shouldCleanupMigration(tm)) + .forEach(tm -> extents.add(tm.getExtent())); + } + partitionedMigrations.put(dl, extents); + } + return partitionedMigrations; + } + + /** + * Given the current tserverStatus map and a DataLevel, return a view of the tserverStatus map + * that only contains entries for tables in the DataLevel + */ + private SortedMap createTServerStatusView( + final Ample.DataLevel dl, final SortedMap status) { + final SortedMap tserverStatusForLevel = new TreeMap<>(); + status.forEach((tsi, tss) -> { + final TabletServerStatus copy = tss.deepCopy(); + final Map oldTableMap = copy.getTableMap(); + final Map newTableMap = + new HashMap<>(dl == Ample.DataLevel.USER ? oldTableMap.size() : 1); + if (dl == Ample.DataLevel.ROOT) { + if (oldTableMap.containsKey(SystemTables.ROOT.tableId().canonical())) { + newTableMap.put(SystemTables.ROOT.tableId().canonical(), + oldTableMap.get(SystemTables.ROOT.tableId().canonical())); + } + } else if (dl == Ample.DataLevel.METADATA) { + if (oldTableMap.containsKey(SystemTables.METADATA.tableId().canonical())) { + newTableMap.put(SystemTables.METADATA.tableId().canonical(), + oldTableMap.get(SystemTables.METADATA.tableId().canonical())); + } + } else if (dl == Ample.DataLevel.USER) { + if (!oldTableMap.containsKey(SystemTables.METADATA.tableId().canonical()) + && !oldTableMap.containsKey(SystemTables.ROOT.tableId().canonical())) { + newTableMap.putAll(oldTableMap); + } else { + oldTableMap.forEach((table, info) -> { + if (!table.equals(SystemTables.ROOT.tableId().canonical()) + && !table.equals(SystemTables.METADATA.tableId().canonical())) { + newTableMap.put(table, info); + } + }); + } + } else { + throw new IllegalArgumentException("Unhandled DataLevel value: " + dl); + } + copy.setTableMap(newTableMap); + tserverStatusForLevel.put(tsi, copy); + }); + return tserverStatusForLevel; + } + + private Map getTablesForLevel(Ample.DataLevel dataLevel) { + switch (dataLevel) { + case ROOT: + return Map.of(SystemTables.ROOT.tableName(), SystemTables.ROOT.tableId()); + case METADATA: + return Map.of(SystemTables.METADATA.tableName(), SystemTables.METADATA.tableId()); + case USER: { + Map userTables = getContext().createQualifiedTableNameToIdMap(); + for (var accumuloTable : SystemTables.values()) { + if (Ample.DataLevel.of(accumuloTable.tableId()) != Ample.DataLevel.USER) { + userTables.remove(accumuloTable.tableName()); + } + } + return Collections.unmodifiableMap(userTables); + } + default: + throw new IllegalArgumentException("Unknown data level " + dataLevel); + } + } + + private List checkMigrationSanity(Set current, + List migrations, Ample.DataLevel level) { + return migrations.stream().filter(m -> { + boolean includeMigration = false; + if (m.getTablet() == null) { + log.error("Balancer gave back a null tablet {}", m); + } else if (Ample.DataLevel.of(m.getTablet().getTable()) != level) { + log.warn( + "Balancer wants to move a tablet ({}) outside of the current processing level ({}), " + + "ignoring and should be processed at the correct level ({})", + m.getTablet(), level, Ample.DataLevel.of(m.getTablet().getTable())); + } else if (m.getNewTabletServer() == null) { + log.error("Balancer did not set the destination {}", m); + } else if (m.getOldTabletServer() == null) { + log.error("Balancer did not set the source {}", m); + } else if (!current.contains(m.getOldTabletServer())) { + log.warn("Balancer wants to move a tablet from a server that is not current: {}", m); + } else if (!current.contains(m.getNewTabletServer())) { + log.warn("Balancer wants to move a tablet to a server that is not current: {}", m); + } else { + includeMigration = true; + } + return includeMigration; + }).collect(Collectors.toList()); + } + + long balanceTablets() { + + final int tabletsNotHosted = manager.notHosted(); + BalanceParamsImpl params = null; + long wait = 0; + long totalMigrationsOut = 0; + final Map> partitionedMigrations = partitionMigrations(); + int levelsCompleted = 0; + + for (Ample.DataLevel dl : Ample.DataLevel.values()) { + if (dl == Ample.DataLevel.USER && tabletsNotHosted > 0) { + log.debug("not balancing user tablets because there are {} unhosted tablets", + tabletsNotHosted); + continue; + } + + if ((dl == Ample.DataLevel.METADATA || dl == Ample.DataLevel.USER) + && !partitionedMigrations.get(Ample.DataLevel.ROOT).isEmpty()) { + log.debug("Not balancing {} because {} has migrations", dl, Ample.DataLevel.ROOT); + continue; + } + + if (dl == Ample.DataLevel.USER + && !partitionedMigrations.get(Ample.DataLevel.METADATA).isEmpty()) { + log.debug("Not balancing {} because {} has migrations", dl, Ample.DataLevel.METADATA); + continue; + } + + // Create a view of the tserver status such that it only contains the tables + // for this level in the tableMap. + SortedMap tserverStatusForLevel = + createTServerStatusView(dl, manager.tserverStatus); + // Construct the Thrift variant of the map above for the BalancerParams + final SortedMap tserverStatusForBalancerLevel = new TreeMap<>(); + tserverStatusForLevel.forEach((tsi, status) -> tserverStatusForBalancerLevel + .put(new TabletServerIdImpl(tsi), TServerStatusImpl.fromThrift(status))); + + log.debug("Balancing for tables at level {}", dl); + + SortedMap statusForBalancerLevel = + tserverStatusForBalancerLevel; + params = + BalanceParamsImpl.fromThrift(statusForBalancerLevel, manager.tServerGroupingForBalancer, + tserverStatusForLevel, partitionedMigrations.get(dl), dl, getTablesForLevel(dl)); + wait = Math.max(tabletBalancer.balance(params), wait); + long migrationsOutForLevel = 0; + try (var tabletsMutator = getContext().getAmple().conditionallyMutateTablets(result -> {})) { + for (TabletMigration m : checkMigrationSanity(statusForBalancerLevel.keySet(), + params.migrationsOut(), dl)) { + final KeyExtent ke = KeyExtent.fromTabletId(m.getTablet()); + if (partitionedMigrations.get(dl).contains(ke)) { + log.warn("balancer requested migration more than once, skipping {}", m); + continue; + } + migrationsOutForLevel++; + var migration = TabletServerIdImpl.toThrift(m.getNewTabletServer()); + tabletsMutator.mutateTablet(ke).requireAbsentOperation() + .requireCurrentLocationNotEqualTo(migration).putMigration(migration) + .submit(tm -> false); + log.debug("migration {}", m); + } + } + totalMigrationsOut += migrationsOutForLevel; + + // increment this at end of loop to signal complete run w/o any continue + levelsCompleted++; + } + final long totalMigrations = + totalMigrationsOut + partitionedMigrations.values().stream().mapToLong(Set::size).sum(); + balancerMetrics.assignMigratingCount(() -> totalMigrations); + + if (totalMigrationsOut == 0 && levelsCompleted == Ample.DataLevel.values().length) { + synchronized (balancedNotifier) { + balancedNotifier.notifyAll(); + } + } else if (totalMigrationsOut > 0) { + manager.nextEvent.event("Migrating %d more tablets, %d total", totalMigrationsOut, + totalMigrations); + } + return wait; + } + + @SuppressFBWarnings(value = "UW_UNCOND_WAIT", justification = "TODO needs triage") + public void waitForBalance() { + synchronized (balancedNotifier) { + long eventCounter; + do { + eventCounter = manager.nextEvent.waitForEvents(0, 0); + try { + balancedNotifier.wait(); + } catch (InterruptedException e) { + log.debug(e.toString(), e); + } + } while (manager.displayUnassigned() > 0 || manager.numMigrations() > 0 + || eventCounter != manager.nextEvent.waitForEvents(0, 0)); + } + } + + void getAssignments(SortedMap currentStatus, + Map> currentTServerGroups, + Map unassigned, Map assignedOut) { + AssignmentParamsImpl params = + AssignmentParamsImpl.fromThrift(currentStatus, currentTServerGroups, + unassigned.entrySet().stream().collect(HashMap::new, + (m, e) -> m.put(e.getKey(), + e.getValue().getLastLocation() == null ? null + : e.getValue().getLastLocation().getServerInstance()), + Map::putAll), + assignedOut); + tabletBalancer.getAssignments(params); + } + + public TabletBalancer getBalancer() { + return tabletBalancer; + } +} diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java index 80102708990..94a35a125b7 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java @@ -34,7 +34,6 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.EnumMap; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -44,7 +43,6 @@ import java.util.Optional; import java.util.Set; import java.util.SortedMap; -import java.util.TreeMap; import java.util.UUID; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.CountDownLatch; @@ -56,7 +54,6 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.Predicate; -import java.util.stream.Collectors; import org.apache.accumulo.core.Constants; import org.apache.accumulo.core.cli.ConfigOpts; @@ -89,10 +86,6 @@ import org.apache.accumulo.core.lock.ServiceLockPaths.AddressSelector; import org.apache.accumulo.core.lock.ServiceLockPaths.ServiceLockPath; import org.apache.accumulo.core.lock.ServiceLockSupport.HAServiceLockWatcher; -import org.apache.accumulo.core.manager.balancer.AssignmentParamsImpl; -import org.apache.accumulo.core.manager.balancer.BalanceParamsImpl; -import org.apache.accumulo.core.manager.balancer.TServerStatusImpl; -import org.apache.accumulo.core.manager.balancer.TabletServerIdImpl; import org.apache.accumulo.core.manager.state.tables.TableState; import org.apache.accumulo.core.manager.thrift.BulkImportState; import org.apache.accumulo.core.manager.thrift.ManagerGoalState; @@ -106,12 +99,6 @@ import org.apache.accumulo.core.metadata.schema.TabletMetadata; import org.apache.accumulo.core.metrics.MetricsInfo; import org.apache.accumulo.core.metrics.MetricsProducer; -import org.apache.accumulo.core.spi.balancer.BalancerEnvironment; -import org.apache.accumulo.core.spi.balancer.TableLoadBalancer; -import org.apache.accumulo.core.spi.balancer.TabletBalancer; -import org.apache.accumulo.core.spi.balancer.data.TServerStatus; -import org.apache.accumulo.core.spi.balancer.data.TabletMigration; -import org.apache.accumulo.core.spi.balancer.data.TabletServerId; import org.apache.accumulo.core.trace.TraceUtil; import org.apache.accumulo.core.util.Retry; import org.apache.accumulo.core.util.Timer; @@ -121,7 +108,6 @@ import org.apache.accumulo.core.zookeeper.ZcStat; import org.apache.accumulo.manager.compaction.coordinator.CompactionCoordinator; import org.apache.accumulo.manager.merge.FindMergeableRangeTask; -import org.apache.accumulo.manager.metrics.BalancerMetrics; import org.apache.accumulo.manager.metrics.ManagerMetrics; import org.apache.accumulo.manager.recovery.RecoveryManager; import org.apache.accumulo.manager.split.Splitter; @@ -136,11 +122,9 @@ import org.apache.accumulo.server.manager.LiveTServerSet; import org.apache.accumulo.server.manager.LiveTServerSet.LiveTServersSnapshot; import org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection; -import org.apache.accumulo.server.manager.balancer.BalancerEnvironmentImpl; import org.apache.accumulo.server.manager.state.DeadServerList; import org.apache.accumulo.server.manager.state.TabletServerState; import org.apache.accumulo.server.manager.state.TabletStateStore; -import org.apache.accumulo.server.manager.state.UnassignedTablet; import org.apache.accumulo.server.rpc.ServerAddress; import org.apache.accumulo.server.rpc.TServerUtils; import org.apache.accumulo.server.rpc.ThriftProcessorTypes; @@ -166,7 +150,6 @@ import com.google.common.util.concurrent.RateLimiter; import com.google.common.util.concurrent.Uninterruptibles; -import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import io.micrometer.core.instrument.MeterRegistry; import io.opentelemetry.api.trace.Span; import io.opentelemetry.context.Scope; @@ -190,7 +173,6 @@ public class Manager extends AbstractServer implements LiveTServerSet.Listener { private static final int MAX_BAD_STATUS_COUNT = 3; private static final double MAX_SHUTDOWNS_PER_SEC = 10D / 60D; - private final Object balancedNotifier = new Object(); final LiveTServerSet tserverSet; private final List watchers = new ArrayList<>(); final Map badServers = @@ -207,9 +189,7 @@ public class Manager extends AbstractServer implements LiveTServerSet.Listener { ServiceLock managerLock = null; private TServer clientService = null; - protected volatile TabletBalancer tabletBalancer; - private final BalancerEnvironment balancerEnvironment; - private final BalancerMetrics balancerMetrics = new BalancerMetrics(); + private final BalanceManager balanceManager; private ManagerState state = ManagerState.INITIAL; @@ -237,6 +217,10 @@ public synchronized ManagerState getManagerState() { return state; } + public BalanceManager getBalanceManager() { + return balanceManager; + } + public Map> getCompactionHints(DataLevel level) { Predicate tablePredicate = (tableId) -> DataLevel.of(tableId) == level; Map allConfig; @@ -376,7 +360,7 @@ private int nonMetaDataTabletsAssignedOrHosted() { - assignedOrHosted(SystemTables.ROOT.tableId()); } - private int notHosted() { + int notHosted() { int result = 0; for (TabletGroupWatcher watcher : watchers) { for (TableCounts counts : watcher.getStats().values()) { @@ -452,7 +436,7 @@ protected Manager(ConfigOpts opts, Function ser super(ServerId.Type.MANAGER, opts, serverContextFactory, args); ServerContext context = super.getContext(); upgradeCoordinator = new UpgradeCoordinator(context); - balancerEnvironment = new BalancerEnvironmentImpl(context); + balanceManager = new BalanceManager(this); AccumuloConfiguration aconf = context.getConfiguration(); @@ -460,7 +444,6 @@ protected Manager(ConfigOpts opts, Function ser log.info("Instance {}", context.getInstanceID()); timeKeeper = new ManagerTime(this, aconf); tserverSet = new LiveTServerSet(context, this); - initializeBalancer(); final long tokenLifetime = aconf.getTimeInMillis(Property.GENERAL_DELEGATION_TOKEN_LIFETIME); @@ -521,10 +504,6 @@ public Splitter getSplitter() { return splitter; } - public MetricsProducer getBalancerMetrics() { - return balancerMetrics; - } - public UpgradeCoordinator.UpgradeStatus getUpgradeStatus() { return upgradeCoordinator.getStatus(); } @@ -580,7 +559,7 @@ public void run() { } } - private boolean shouldCleanupMigration(TabletMetadata tabletMetadata) { + boolean shouldCleanupMigration(TabletMetadata tabletMetadata) { var tableState = getContext().getTableManager().getTableState(tabletMetadata.getTableId()); var migration = tabletMetadata.getMigration(); Preconditions.checkState(migration != null, @@ -632,30 +611,6 @@ public void run() { } - /** - * balanceTablets() balances tables by DataLevel. Return the current set of migrations partitioned - * by DataLevel - */ - private Map> partitionMigrations() { - final Map> partitionedMigrations = new EnumMap<>(DataLevel.class); - for (DataLevel dl : DataLevel.values()) { - Set extents = new HashSet<>(); - // prev row needed for the extent - try ( - var tabletsMetadata = getContext() - .getAmple().readTablets().forLevel(dl).fetch(TabletMetadata.ColumnType.PREV_ROW, - TabletMetadata.ColumnType.MIGRATION, TabletMetadata.ColumnType.LOCATION) - .build()) { - // filter out migrations that are awaiting cleanup - tabletsMetadata.stream() - .filter(tm -> tm.getMigration() != null && !shouldCleanupMigration(tm)) - .forEach(tm -> extents.add(tm.getExtent())); - } - partitionedMigrations.put(dl, extents); - } - return partitionedMigrations; - } - private class StatusThread implements Runnable { private boolean goodStats() { @@ -800,7 +755,7 @@ private long updateStatus() { return DEFAULT_WAIT_FOR_WATCHER; } } - return balanceTablets(); + return balanceManager.balanceTablets(); } return DEFAULT_WAIT_FOR_WATCHER; } @@ -832,177 +787,6 @@ private void checkForHeldServer(SortedMap ts badServers.putIfAbsent(instance, new AtomicInteger(1)); } } - - /** - * Given the current tserverStatus map and a DataLevel, return a view of the tserverStatus map - * that only contains entries for tables in the DataLevel - */ - private SortedMap createTServerStatusView( - final DataLevel dl, final SortedMap status) { - final SortedMap tserverStatusForLevel = new TreeMap<>(); - status.forEach((tsi, tss) -> { - final TabletServerStatus copy = tss.deepCopy(); - final Map oldTableMap = copy.getTableMap(); - final Map newTableMap = - new HashMap<>(dl == DataLevel.USER ? oldTableMap.size() : 1); - if (dl == DataLevel.ROOT) { - if (oldTableMap.containsKey(SystemTables.ROOT.tableId().canonical())) { - newTableMap.put(SystemTables.ROOT.tableId().canonical(), - oldTableMap.get(SystemTables.ROOT.tableId().canonical())); - } - } else if (dl == DataLevel.METADATA) { - if (oldTableMap.containsKey(SystemTables.METADATA.tableId().canonical())) { - newTableMap.put(SystemTables.METADATA.tableId().canonical(), - oldTableMap.get(SystemTables.METADATA.tableId().canonical())); - } - } else if (dl == DataLevel.USER) { - if (!oldTableMap.containsKey(SystemTables.METADATA.tableId().canonical()) - && !oldTableMap.containsKey(SystemTables.ROOT.tableId().canonical())) { - newTableMap.putAll(oldTableMap); - } else { - oldTableMap.forEach((table, info) -> { - if (!table.equals(SystemTables.ROOT.tableId().canonical()) - && !table.equals(SystemTables.METADATA.tableId().canonical())) { - newTableMap.put(table, info); - } - }); - } - } else { - throw new IllegalArgumentException("Unhandled DataLevel value: " + dl); - } - copy.setTableMap(newTableMap); - tserverStatusForLevel.put(tsi, copy); - }); - return tserverStatusForLevel; - } - - private Map getTablesForLevel(DataLevel dataLevel) { - switch (dataLevel) { - case ROOT: - return Map.of(SystemTables.ROOT.tableName(), SystemTables.ROOT.tableId()); - case METADATA: - return Map.of(SystemTables.METADATA.tableName(), SystemTables.METADATA.tableId()); - case USER: { - Map userTables = getContext().createQualifiedTableNameToIdMap(); - for (var accumuloTable : SystemTables.values()) { - if (DataLevel.of(accumuloTable.tableId()) != DataLevel.USER) { - userTables.remove(accumuloTable.tableName()); - } - } - return Collections.unmodifiableMap(userTables); - } - default: - throw new IllegalArgumentException("Unknown data level " + dataLevel); - } - } - - private long balanceTablets() { - - final int tabletsNotHosted = notHosted(); - BalanceParamsImpl params = null; - long wait = 0; - long totalMigrationsOut = 0; - final Map> partitionedMigrations = partitionMigrations(); - int levelsCompleted = 0; - - for (DataLevel dl : DataLevel.values()) { - if (dl == DataLevel.USER && tabletsNotHosted > 0) { - log.debug("not balancing user tablets because there are {} unhosted tablets", - tabletsNotHosted); - continue; - } - - if ((dl == DataLevel.METADATA || dl == DataLevel.USER) - && !partitionedMigrations.get(DataLevel.ROOT).isEmpty()) { - log.debug("Not balancing {} because {} has migrations", dl, DataLevel.ROOT); - continue; - } - - if (dl == DataLevel.USER && !partitionedMigrations.get(DataLevel.METADATA).isEmpty()) { - log.debug("Not balancing {} because {} has migrations", dl, DataLevel.METADATA); - continue; - } - - // Create a view of the tserver status such that it only contains the tables - // for this level in the tableMap. - SortedMap tserverStatusForLevel = - createTServerStatusView(dl, tserverStatus); - // Construct the Thrift variant of the map above for the BalancerParams - final SortedMap tserverStatusForBalancerLevel = - new TreeMap<>(); - tserverStatusForLevel.forEach((tsi, status) -> tserverStatusForBalancerLevel - .put(new TabletServerIdImpl(tsi), TServerStatusImpl.fromThrift(status))); - - log.debug("Balancing for tables at level {}", dl); - - SortedMap statusForBalancerLevel = - tserverStatusForBalancerLevel; - params = BalanceParamsImpl.fromThrift(statusForBalancerLevel, tServerGroupingForBalancer, - tserverStatusForLevel, partitionedMigrations.get(dl), dl, getTablesForLevel(dl)); - wait = Math.max(tabletBalancer.balance(params), wait); - long migrationsOutForLevel = 0; - try ( - var tabletsMutator = getContext().getAmple().conditionallyMutateTablets(result -> {})) { - for (TabletMigration m : checkMigrationSanity(statusForBalancerLevel.keySet(), - params.migrationsOut(), dl)) { - final KeyExtent ke = KeyExtent.fromTabletId(m.getTablet()); - if (partitionedMigrations.get(dl).contains(ke)) { - log.warn("balancer requested migration more than once, skipping {}", m); - continue; - } - migrationsOutForLevel++; - var migration = TabletServerIdImpl.toThrift(m.getNewTabletServer()); - tabletsMutator.mutateTablet(ke).requireAbsentOperation() - .requireCurrentLocationNotEqualTo(migration).putMigration(migration) - .submit(tm -> false); - log.debug("migration {}", m); - } - } - totalMigrationsOut += migrationsOutForLevel; - - // increment this at end of loop to signal complete run w/o any continue - levelsCompleted++; - } - final long totalMigrations = - totalMigrationsOut + partitionedMigrations.values().stream().mapToLong(Set::size).sum(); - balancerMetrics.assignMigratingCount(() -> totalMigrations); - - if (totalMigrationsOut == 0 && levelsCompleted == DataLevel.values().length) { - synchronized (balancedNotifier) { - balancedNotifier.notifyAll(); - } - } else if (totalMigrationsOut > 0) { - nextEvent.event("Migrating %d more tablets, %d total", totalMigrationsOut, totalMigrations); - } - return wait; - } - - private List checkMigrationSanity(Set current, - List migrations, DataLevel level) { - return migrations.stream().filter(m -> { - boolean includeMigration = false; - if (m.getTablet() == null) { - log.error("Balancer gave back a null tablet {}", m); - } else if (DataLevel.of(m.getTablet().getTable()) != level) { - log.warn( - "Balancer wants to move a tablet ({}) outside of the current processing level ({}), " - + "ignoring and should be processed at the correct level ({})", - m.getTablet(), level, DataLevel.of(m.getTablet().getTable())); - } else if (m.getNewTabletServer() == null) { - log.error("Balancer did not set the destination {}", m); - } else if (m.getOldTabletServer() == null) { - log.error("Balancer did not set the source {}", m); - } else if (!current.contains(m.getOldTabletServer())) { - log.warn("Balancer wants to move a tablet from a server that is not current: {}", m); - } else if (!current.contains(m.getNewTabletServer())) { - log.warn("Balancer wants to move a tablet to a server that is not current: {}", m); - } else { - includeMigration = true; - } - return includeMigration; - }).collect(Collectors.toList()); - } - } private SortedMap @@ -1196,7 +980,7 @@ public void process(WatchedEvent event) { MetricsInfo metricsInfo = getContext().getMetricsInfo(); ManagerMetrics managerMetrics = new ManagerMetrics(getConfiguration(), this); var producers = managerMetrics.getProducers(getConfiguration(), this); - producers.add(balancerMetrics); + producers.add(balanceManager.getMetrics()); final TabletGroupWatcher userTableTGW = new TabletGroupWatcher(this, this.userTabletStore, null, managerMetrics) { @@ -1734,22 +1518,6 @@ public void assignedTablet(KeyExtent extent) { } } - @SuppressFBWarnings(value = "UW_UNCOND_WAIT", justification = "TODO needs triage") - public void waitForBalance() { - synchronized (balancedNotifier) { - long eventCounter; - do { - eventCounter = nextEvent.waitForEvents(0, 0); - try { - balancedNotifier.wait(); - } catch (InterruptedException e) { - log.debug(e.toString(), e); - } - } while (displayUnassigned() > 0 || numMigrations() > 0 - || eventCounter != nextEvent.waitForEvents(0, 0)); - } - } - public ManagerMonitorInfo getManagerMonitorInfo() { final ManagerMonitorInfo result = new ManagerMonitorInfo(); @@ -1818,44 +1586,6 @@ public boolean isUpgrading() { return upgradeCoordinator.getStatus() != UpgradeCoordinator.UpgradeStatus.COMPLETE; } - void initializeBalancer() { - var localTabletBalancer = Property.createInstanceFromPropertyName(getConfiguration(), - Property.MANAGER_TABLET_BALANCER, TabletBalancer.class, new TableLoadBalancer()); - localTabletBalancer.init(balancerEnvironment); - tabletBalancer = localTabletBalancer; - } - - Class getBalancerClass() { - return tabletBalancer.getClass(); - } - - void getAssignments(SortedMap currentStatus, - Map> currentTServerGroups, - Map unassigned, Map assignedOut) { - AssignmentParamsImpl params = - AssignmentParamsImpl.fromThrift(currentStatus, currentTServerGroups, - unassigned.entrySet().stream().collect(HashMap::new, - (m, e) -> m.put(e.getKey(), - e.getValue().getLastLocation() == null ? null - : e.getValue().getLastLocation().getServerInstance()), - Map::putAll), - assignedOut); - tabletBalancer.getAssignments(params); - } - - public TabletStateStore getTabletStateStore(DataLevel level) { - switch (level) { - case METADATA: - return this.metadataTabletStore; - case ROOT: - return this.rootTabletStore; - case USER: - return this.userTabletStore; - default: - throw new IllegalStateException("Unhandled DataLevel value: " + level); - } - } - @Override public void registerMetrics(MeterRegistry registry) { super.registerMetrics(registry); @@ -1873,7 +1603,7 @@ public ServiceLock getLock() { return managerLock; } - private long numMigrations() { + long numMigrations() { long count = 0; for (DataLevel dl : DataLevel.values()) { // prev row needed for the extent diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/ManagerClientServiceHandler.java b/server/manager/src/main/java/org/apache/accumulo/manager/ManagerClientServiceHandler.java index a0172872c0e..2318becedf9 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/ManagerClientServiceHandler.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/ManagerClientServiceHandler.java @@ -51,8 +51,6 @@ import org.apache.accumulo.core.clientImpl.thrift.ThriftNotActiveServiceException; import org.apache.accumulo.core.clientImpl.thrift.ThriftSecurityException; import org.apache.accumulo.core.clientImpl.thrift.ThriftTableOperationException; -import org.apache.accumulo.core.conf.DeprecatedPropertyUtil; -import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.data.NamespaceId; import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.dataImpl.KeyExtent; @@ -555,17 +553,12 @@ private void alterTableProperty(TCredentials c, String tableName, String propert } private void updatePlugins(String property) { - // resolve without warning; any warnings should have already occurred - String resolved = DeprecatedPropertyUtil.getReplacementName(property, (log, replacement) -> {}); - if (resolved.equals(Property.MANAGER_TABLET_BALANCER.getKey())) { - manager.initializeBalancer(); - log.info("tablet balancer changed to {}", manager.getBalancerClass().getName()); - } + manager.getBalanceManager().propertyChanged(property); } @Override public void waitForBalance(TInfo tinfo) { - manager.waitForBalance(); + manager.getBalanceManager().waitForBalance(); } @Override diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java index 46af91364f3..86b1f1372b4 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java @@ -532,8 +532,8 @@ private TableMgmtStats manageTablets(Iterator iter, // This is final because nothing in this method should change the goal. All computation of the // goal should be done in TabletGoalState.compute() so that all parts of the Accumulo code // will compute a consistent goal. - final TabletGoalState goal = - TabletGoalState.compute(tm, state, manager.tabletBalancer, tableMgmtParams); + final TabletGoalState goal = TabletGoalState.compute(tm, state, + manager.getBalanceManager().getBalancer(), tableMgmtParams); final Set actions = mti.getActions(); @@ -968,8 +968,8 @@ private void getAssignmentsFromBalancer(TabletLists tLists, Map unassigned) { if (!tLists.destinations.isEmpty()) { Map assignedOut = new HashMap<>(); - manager.getAssignments(tLists.destinations, tLists.currentTServerGrouping, unassigned, - assignedOut); + manager.getBalanceManager().getAssignments(tLists.destinations, tLists.currentTServerGrouping, + unassigned, assignedOut); for (Entry assignment : assignedOut.entrySet()) { if (unassigned.containsKey(assignment.getKey())) { if (assignment.getValue() != null) {