materialize-iceberg: do not limit result set size

williamhbaker · williamhbaker · commit 58c6dedf3e2f · 2025-06-19T11:39:34.000-04:00
We must handle arbitrarily large result sets for load queries, and the default 1
GiB limit is not compatible with that requirement.

This change removes the limit for result set size. It is possible that the Spark
driver itself will OOM if the result set is too large, and in that case the user
will need to set higher limits for the EMR Application.
diff --git a/materialize-iceberg/emr.go b/materialize-iceberg/emr.go
@@ -145,7 +145,7 @@ func (e *emrClient) runJob(ctx context.Context, input any, entryPointUri, pyFile
 		ExecutionRoleArn: aws.String(e.cfg.ExecutionRoleArn),
 		JobDriver: &emrTypes.JobDriverMemberSparkSubmit{
 			Value: emrTypes.SparkSubmit{
-				SparkSubmitParameters: aws.String(fmt.Sprintf("--py-files %s", pyFilesCommonURI)),
+				SparkSubmitParameters: aws.String(fmt.Sprintf("--py-files %s --conf spark.driver.maxResultSize=0", pyFilesCommonURI)),
 				EntryPoint:            aws.String(entryPointUri),
 				EntryPointArguments:   args,
 			},