apache
diff --git a/Diff for: ‎docs/layouts/shortcodes/generated/auto_scaler_configuration.html
+12 b/Diff for: ‎docs/layouts/shortcodes/generated/auto_scaler_configuration.html
+12
diff --git a/Diff for: ‎flink-autoscaler/src/main/java/org/apache/flink/autoscaler/ScalingExecutor.java
+109-3 b/Diff for: ‎flink-autoscaler/src/main/java/org/apache/flink/autoscaler/ScalingExecutor.java
+109-3
diff --git a/Diff for: ‎flink-autoscaler/src/main/java/org/apache/flink/autoscaler/ScalingMetricCollector.java
+12-3 b/Diff for: ‎flink-autoscaler/src/main/java/org/apache/flink/autoscaler/ScalingMetricCollector.java
+12-3
diff --git a/Diff for: ‎flink-autoscaler/src/main/java/org/apache/flink/autoscaler/config/AutoScalerOptions.java
+17 b/Diff for: ‎flink-autoscaler/src/main/java/org/apache/flink/autoscaler/config/AutoScalerOptions.java
+17
diff --git a/Diff for: ‎flink-autoscaler/src/main/java/org/apache/flink/autoscaler/topology/JobTopology.java
+14-4 b/Diff for: ‎flink-autoscaler/src/main/java/org/apache/flink/autoscaler/topology/JobTopology.java
+14-4
diff --git a/Diff for: ‎flink-autoscaler/src/main/java/org/apache/flink/autoscaler/topology/VertexInfo.java
+21-2 b/Diff for: ‎flink-autoscaler/src/main/java/org/apache/flink/autoscaler/topology/VertexInfo.java
+21-2
@@ -116,6 +116,18 @@
             <td>Double</td>
             <td>Percentage threshold for switching to observed from busy time based true processing rate if the measurement is off by at least the configured fraction. For example 0.15 means we switch to observed if the busy time based computation is at least 15% higher during catchup.</td>
         </tr>
+        <tr>
+            <td><h5>job.autoscaler.quota.cpu</h5></td>
+            <td style="word-wrap: break-word;">(none)</td>
+            <td>Double</td>
+            <td>Quota of the CPU count. When scaling would go beyond this number the the scaling is not going to happen.</td>
+        </tr>
+        <tr>
+            <td><h5>job.autoscaler.quota.memory</h5></td>
+            <td style="word-wrap: break-word;">(none)</td>
+            <td>MemorySize</td>
+            <td>Quota of the memory size. When scaling would go beyond this number the the scaling is not going to happen.</td>
+        </tr>
         <tr>
             <td><h5>job.autoscaler.restart.time</h5></td>
             <td style="word-wrap: break-word;">5 min</td>
 
@@ -33,6 +33,7 @@
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.configuration.MemorySize;
 import org.apache.flink.configuration.TaskManagerOptions;
+import org.apache.flink.runtime.instance.SlotSharingGroupId;
 import org.apache.flink.runtime.jobgraph.JobVertexID;
 
 import org.slf4j.Logger;
@@ -67,6 +68,9 @@ public class ScalingExecutor<KEY, Context extends JobAutoScalerContext<KEY>> {
     public static final String HEAP_USAGE_MESSAGE =
             "Heap Usage %s is above the allowed limit for scaling operations. Please adjust the available memory manually.";
 
+    public static final String RESOURCE_QUOTA_REACHED_MESSAGE =
+            "Resource usage is above the allowed limit for scaling operations. Please adjust the resource quota manually.";
+
     private static final Logger LOG = LoggerFactory.getLogger(ScalingExecutor.class);
 
     private final JobVertexScaler<KEY, Context> jobVertexScaler;
@@ -129,8 +133,10 @@ public boolean scaleResource(
                         scalingSummaries,
                         autoScalerEventHandler);
 
-        if (scalingWouldExceedClusterResources(
-                configOverrides.newConfigWithOverrides(conf),
+        var memoryTuningEnabled = conf.get(AutoScalerOptions.MEMORY_TUNING_ENABLED);
+        if (scalingWouldExceedMaxResources(
+                memoryTuningEnabled ? configOverrides.newConfigWithOverrides(conf) : conf,
+                jobTopology,
                 evaluatedMetrics,
                 scalingSummaries,
                 context)) {
@@ -280,6 +286,29 @@ private boolean isJobUnderMemoryPressure(
         return false;
     }
 
+    private boolean scalingWouldExceedMaxResources(
+            Configuration tunedConfig,
+            JobTopology jobTopology,
+            EvaluatedMetrics evaluatedMetrics,
+            Map<JobVertexID, ScalingSummary> scalingSummaries,
+            Context ctx) {
+        if (scalingWouldExceedClusterResources(
+                tunedConfig, evaluatedMetrics, scalingSummaries, ctx)) {
+            return true;
+        }
+        if (scalingWouldExceedResourceQuota(tunedConfig, jobTopology, scalingSummaries, ctx)) {
+            autoScalerEventHandler.handleEvent(
+                    ctx,
+                    AutoScalerEventHandler.Type.Warning,
+                    "ResourceQuotaReached",
+                    RESOURCE_QUOTA_REACHED_MESSAGE,
+                    null,
+                    tunedConfig.get(SCALING_EVENT_INTERVAL));
+            return true;
+        }
+        return false;
+    }
+
     private boolean scalingWouldExceedClusterResources(
             Configuration tunedConfig,
             EvaluatedMetrics evaluatedMetrics,
@@ -306,7 +335,7 @@ private boolean scalingWouldExceedClusterResources(
                 ResourceCheckUtils.estimateNumTaskSlotsAfterRescale(
                         evaluatedMetrics.getVertexMetrics(), scalingSummaries, numTaskSlotsUsed);
 
-        int taskSlotsPerTm = ctx.getConfiguration().get(TaskManagerOptions.NUM_TASK_SLOTS);
+        int taskSlotsPerTm = tunedConfig.get(TaskManagerOptions.NUM_TASK_SLOTS);
 
         int currentNumTms = (int) Math.ceil(numTaskSlotsUsed / (double) taskSlotsPerTm);
         int newNumTms = (int) Math.ceil(numTaskSlotsAfterRescale / (double) taskSlotsPerTm);
@@ -315,6 +344,83 @@ private boolean scalingWouldExceedClusterResources(
                 currentNumTms, newNumTms, taskManagerCpu, taskManagerMemory);
     }
 
+    protected static boolean scalingWouldExceedResourceQuota(
+            Configuration tunedConfig,
+            JobTopology jobTopology,
+            Map<JobVertexID, ScalingSummary> scalingSummaries,
+            JobAutoScalerContext<?> ctx) {
+
+        if (jobTopology == null || jobTopology.getSlotSharingGroupMapping().isEmpty()) {
+            return false;
+        }
+
+        var cpuQuota = tunedConfig.getOptional(AutoScalerOptions.CPU_QUOTA);
+        var memoryQuota = tunedConfig.getOptional(AutoScalerOptions.MEMORY_QUOTA);
+        var tmMemory = MemoryTuning.getTotalMemory(tunedConfig, ctx);
+        var tmCpu = ctx.getTaskManagerCpu().orElse(0.);
+
+        if (cpuQuota.isPresent() || memoryQuota.isPresent()) {
+            var currentSlotSharingGroupMaxParallelisms = new HashMap<SlotSharingGroupId, Integer>();
+            var newSlotSharingGroupMaxParallelisms = new HashMap<SlotSharingGroupId, Integer>();
+            for (var e : jobTopology.getSlotSharingGroupMapping().entrySet()) {
+                int currentMaxParallelism =
+                        e.getValue().stream()
+                                .filter(scalingSummaries::containsKey)
+                                .mapToInt(v -> scalingSummaries.get(v).getCurrentParallelism())
+                                .max()
+                                .orElse(0);
+                currentSlotSharingGroupMaxParallelisms.put(e.getKey(), currentMaxParallelism);
+                int newMaxParallelism =
+                        e.getValue().stream()
+                                .filter(scalingSummaries::containsKey)
+                                .mapToInt(v -> scalingSummaries.get(v).getNewParallelism())
+                                .max()
+                                .orElse(0);
+                newSlotSharingGroupMaxParallelisms.put(e.getKey(), newMaxParallelism);
+            }
+
+            var numSlotsPerTm = tunedConfig.get(TaskManagerOptions.NUM_TASK_SLOTS);
+            var currentTotalSlots =
+                    currentSlotSharingGroupMaxParallelisms.values().stream()
+                            .mapToInt(Integer::intValue)
+                            .sum();
+            var currentNumTms = currentTotalSlots / numSlotsPerTm;
+            var newTotalSlots =
+                    newSlotSharingGroupMaxParallelisms.values().stream()
+                            .mapToInt(Integer::intValue)
+                            .sum();
+            var newNumTms = newTotalSlots / numSlotsPerTm;
+
+            if (newNumTms <= currentNumTms) {
+                LOG.debug(
+                        "Skipping quota check due to new resource allocation is less or equals than the current");
+                return false;
+            }
+
+            if (cpuQuota.isPresent()) {
+                LOG.debug("CPU resource quota is {}, checking limits", cpuQuota.get());
+                double totalCPU = tmCpu * newNumTms;
+                if (totalCPU > cpuQuota.get()) {
+                    LOG.info("CPU resource quota reached with value: {}", totalCPU);
+                    return true;
+                }
+            }
+
+            if (memoryQuota.isPresent()) {
+                LOG.debug("Memory resource quota is {}, checking limits", memoryQuota.get());
+                long totalMemory = tmMemory.getBytes() * newNumTms;
+                if (totalMemory > memoryQuota.get().getBytes()) {
+                    LOG.info(
+                            "Memory resource quota reached with value: {}",
+                            new MemorySize(totalMemory));
+                    return true;
+                }
+            }
+        }
+
+        return false;
+    }
+
     private static Map<String, String> getVertexParallelismOverrides(
             Map<JobVertexID, Map<ScalingMetric, EvaluatedScalingMetric>> evaluatedMetrics,
             Map<JobVertexID, ScalingSummary> summaries) {
 
@@ -210,7 +210,15 @@ protected JobTopology getJobTopology(
     @VisibleForTesting
     @SneakyThrows
     protected JobTopology getJobTopology(JobDetailsInfo jobDetailsInfo) {
-        Map<JobVertexID, Integer> maxParallelismMap =
+        var slotSharingGroupIdMap =
+                jobDetailsInfo.getJobVertexInfos().stream()
+                        .filter(e -> e.getSlotSharingGroupId() != null)
+                        .collect(
+                                Collectors.toMap(
+                                        JobDetailsInfo.JobVertexDetailsInfo::getJobVertexID,
+                                        JobDetailsInfo.JobVertexDetailsInfo
+                                                ::getSlotSharingGroupId));
+        var maxParallelismMap =
                 jobDetailsInfo.getJobVertexInfos().stream()
                         .collect(
                                 Collectors.toMap(
@@ -235,7 +243,8 @@ protected JobTopology getJobTopology(JobDetailsInfo jobDetailsInfo) {
                                     d.getJobVertexID(), IOMetrics.from(d.getJobVertexMetrics()));
                         });
 
-        return JobTopology.fromJsonPlan(json, maxParallelismMap, metrics, finished);
+        return JobTopology.fromJsonPlan(
+                json, slotSharingGroupIdMap, maxParallelismMap, metrics, finished);
     }
 
     private void updateKafkaSourceMaxParallelisms(Context ctx, JobID jobId, JobTopology topology)
@@ -254,7 +263,7 @@ private void updateKafkaSourceMaxParallelisms(Context ctx, JobID jobId, JobTopol
                                 "Updating source {} max parallelism based on available partitions to {}",
                                 sourceVertex,
                                 numPartitions);
-                        topology.updateMaxParallelism(sourceVertex, (int) numPartitions);
+                        topology.get(sourceVertex).updateMaxParallelism((int) numPartitions);
                     }
                 }
             }
 
@@ -20,6 +20,7 @@
 import org.apache.flink.autoscaler.metrics.MetricAggregator;
 import org.apache.flink.configuration.ConfigOption;
 import org.apache.flink.configuration.ConfigOptions;
+import org.apache.flink.configuration.MemorySize;
 
 import java.time.Duration;
 import java.util.List;
@@ -327,4 +328,20 @@ private static ConfigOptions.OptionBuilder autoScalerConfig(String key) {
                     .defaultValue(Duration.ofSeconds(10))
                     .withFallbackKeys(oldOperatorConfigKey("flink.rest-client.timeout"))
                     .withDescription("The timeout for waiting the flink rest client to return.");
+
+    public static final ConfigOption<MemorySize> MEMORY_QUOTA =
+            autoScalerConfig("quota.memory")
+                    .memoryType()
+                    .noDefaultValue()
+                    .withFallbackKeys(oldOperatorConfigKey("quota.memory"))
+                    .withDescription(
+                            "Quota of the memory size. When scaling would go beyond this number the the scaling is not going to happen.");
+
+    public static final ConfigOption<Double> CPU_QUOTA =
+            autoScalerConfig("quota.cpu")
+                    .doubleType()
+                    .noDefaultValue()
+                    .withFallbackKeys(oldOperatorConfigKey("quota.cpu"))
+                    .withDescription(
+                            "Quota of the CPU count. When scaling would go beyond this number the the scaling is not going to happen.");
 }
@@ -17,6 +17,7 @@
 
 package org.apache.flink.autoscaler.topology;
 
+import org.apache.flink.runtime.instance.SlotSharingGroupId;
 import org.apache.flink.runtime.jobgraph.JobVertexID;
 
 import org.apache.flink.shaded.guava31.com.google.common.collect.ImmutableMap;
@@ -48,6 +49,7 @@ public class JobTopology {
     private static final ObjectMapper objectMapper = new ObjectMapper();
 
     @Getter private final Map<JobVertexID, VertexInfo> vertexInfos;
+    @Getter private final Map<SlotSharingGroupId, Set<JobVertexID>> slotSharingGroupMapping;
     @Getter private final Set<JobVertexID> finishedVertices;
     @Getter private final List<JobVertexID> verticesInTopologicalOrder;
 
@@ -66,6 +68,7 @@ public JobTopology(Set<VertexInfo> vertexInfo) {
                 ImmutableMap.copyOf(
                         vertexInfo.stream().collect(Collectors.toMap(VertexInfo::getId, v -> v)));
 
+        Map<SlotSharingGroupId, Set<JobVertexID>> vertexSlotSharingGroupMapping = new HashMap<>();
         var finishedVertices = ImmutableSet.<JobVertexID>builder();
 
         vertexInfo.forEach(
@@ -79,12 +82,21 @@ public JobTopology(Set<VertexInfo> vertexInfo) {
                                             vertexOutputs
                                                     .computeIfAbsent(inputId, id -> new HashMap<>())
                                                     .put(vertexId, shipStrategy));
+
+                    var slotSharingGroupId = info.getSlotSharingGroupId();
+                    if (slotSharingGroupId != null) {
+                        vertexSlotSharingGroupMapping
+                                .computeIfAbsent(slotSharingGroupId, id -> new HashSet<>())
+                                .add(vertexId);
+                    }
+
                     if (info.isFinished()) {
                         finishedVertices.add(vertexId);
                     }
                 });
         vertexOutputs.forEach((v, outputs) -> vertexInfos.get(v).setOutputs(outputs));
 
+        this.slotSharingGroupMapping = ImmutableMap.copyOf(vertexSlotSharingGroupMapping);
         this.finishedVertices = finishedVertices.build();
         this.verticesInTopologicalOrder = returnVerticesInTopologicalOrder();
     }
@@ -97,10 +109,6 @@ public boolean isSource(JobVertexID jobVertexID) {
         return get(jobVertexID).getInputs().isEmpty();
     }
 
-    public void updateMaxParallelism(JobVertexID vertexID, int maxParallelism) {
-        get(vertexID).updateMaxParallelism(maxParallelism);
-    }
-
     private List<JobVertexID> returnVerticesInTopologicalOrder() {
         List<JobVertexID> sorted = new ArrayList<>(vertexInfos.size());
 
@@ -134,6 +142,7 @@ private List<JobVertexID> returnVerticesInTopologicalOrder() {
 
     public static JobTopology fromJsonPlan(
             String jsonPlan,
+            Map<JobVertexID, SlotSharingGroupId> slotSharingGroupIdMap,
             Map<JobVertexID, Integer> maxParallelismMap,
             Map<JobVertexID, IOMetrics> metrics,
             Set<JobVertexID> finishedVertices)
@@ -151,6 +160,7 @@ public static JobTopology fromJsonPlan(
             vertexInfo.add(
                     new VertexInfo(
                             vertexId,
+                            slotSharingGroupIdMap.get(vertexId),
                             inputs,
                             node.get("parallelism").asInt(),
                             maxParallelismMap.get(vertexId),
 
@@ -18,9 +18,12 @@
 package org.apache.flink.autoscaler.topology;
 
 import org.apache.flink.annotation.VisibleForTesting;
+import org.apache.flink.runtime.instance.SlotSharingGroupId;
 import org.apache.flink.runtime.jobgraph.JobVertexID;
 
+import lombok.AccessLevel;
 import lombok.Data;
+import lombok.Setter;
 
 import java.util.Map;
 
@@ -33,11 +36,14 @@ public class VertexInfo {
     // All input vertices and the ship_strategy
     private final Map<JobVertexID, ShipStrategy> inputs;
 
+    private final SlotSharingGroupId slotSharingGroupId;
+
     // All output vertices and the ship_strategy
     private Map<JobVertexID, ShipStrategy> outputs;
 
     private final int parallelism;
 
+    @Setter(AccessLevel.NONE)
     private int maxParallelism;
 
     private final int originalMaxParallelism;
@@ -48,12 +54,14 @@ public class VertexInfo {
 
     public VertexInfo(
             JobVertexID id,
+            SlotSharingGroupId slotSharingGroupId,
             Map<JobVertexID, ShipStrategy> inputs,
             int parallelism,
             int maxParallelism,
             boolean finished,
             IOMetrics ioMetrics) {
         this.id = id;
+        this.slotSharingGroupId = slotSharingGroupId;
         this.inputs = inputs;
         this.parallelism = parallelism;
         this.maxParallelism = maxParallelism;
@@ -69,7 +77,18 @@ public VertexInfo(
             int parallelism,
             int maxParallelism,
             IOMetrics ioMetrics) {
-        this(id, inputs, parallelism, maxParallelism, false, ioMetrics);
+        this(id, null, inputs, parallelism, maxParallelism, false, ioMetrics);
+    }
+
+    @VisibleForTesting
+    public VertexInfo(
+            JobVertexID id,
+            Map<JobVertexID, ShipStrategy> inputs,
+            int parallelism,
+            int maxParallelism,
+            boolean finished,
+            IOMetrics ioMetrics) {
+        this(id, null, inputs, parallelism, maxParallelism, finished, ioMetrics);
     }
 
     @VisibleForTesting
@@ -82,6 +101,6 @@ public VertexInfo(
     }
 
     public void updateMaxParallelism(int maxParallelism) {
-        setMaxParallelism(Math.min(originalMaxParallelism, maxParallelism));
+        this.maxParallelism = Math.min(originalMaxParallelism, maxParallelism);
     }
 }