Skip to content

Commit 3523ef0

Browse files
Merge pull request #21 from datazip-inc/feat/prevent-optimizer-getting-killed
feat: prevent optimizer getting killed
1 parent 16d712f commit 3523ef0

2 files changed

Lines changed: 6 additions & 1 deletion

File tree

amoro-format-iceberg/src/main/java/org/apache/amoro/table/TableProperties.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ private TableProperties() {}
9090

9191
public static final String SELF_OPTIMIZING_EXECUTE_RETRY_NUMBER =
9292
"self-optimizing.execute.num-retries";
93-
public static final int SELF_OPTIMIZING_EXECUTE_RETRY_NUMBER_DEFAULT = 5;
93+
public static final int SELF_OPTIMIZING_EXECUTE_RETRY_NUMBER_DEFAULT = 0;
9494

9595
public static final String SELF_OPTIMIZING_TARGET_SIZE = "self-optimizing.target-size";
9696
public static final long SELF_OPTIMIZING_TARGET_SIZE_DEFAULT = 134217728; // 128 MB

local-test/config.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,11 @@ containers:
171171
spark-conf.spark.driver.memory: "1g"
172172
spark-conf.spark.executor.memory: "1g"
173173
spark-conf.spark.executor.cores: "1"
174+
# Configs to prevent spark context from getting failed on executor failures
175+
# Number of retries by spark = spark-conf.spark.task.maxFailures - 1
176+
spark-conf.spark.executor.maxNumFailures: "100"
177+
spark-conf.spark.executor.failuresValidityInterval: "60s"
178+
spark-conf.spark.task.maxFailures: "4"
174179
# AWS/MinIO credentials for S3FileIO (Spark optimizer pods need these)
175180
spark-conf.spark.kubernetes.driver.secretKeyRef.AWS_ACCESS_KEY_ID: "fusion-s3-credentials:access-key-id"
176181
spark-conf.spark.kubernetes.driver.secretKeyRef.AWS_SECRET_ACCESS_KEY: "fusion-s3-credentials:secret-access-key"

0 commit comments

Comments
 (0)