Skip to content

Commit 224b8e9

Browse files
authored
Merge branch 'main' into rebase_iceberg
2 parents 2fc99ed + bae22ec commit 224b8e9

File tree

1,426 files changed

+373084
-122
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,426 files changed

+373084
-122
lines changed

.github/workflows/util/check.sh

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,30 @@
1515
# limitations under the License.
1616

1717
export BASE_COMMIT=$1
18+
19+
RESULT=0
20+
1821
./.github/workflows/util/check.py header branch
1922
if [ $? -ne 0 ]; then
2023
./.github/workflows/util/check.py header branch --fix
2124
echo -e "\n==== Apply using:"
2225
echo "patch -p1 \<<EOF"
2326
git --no-pager diff
2427
echo "EOF"
25-
false
28+
RESULT=1
29+
fi
30+
31+
# Check that shell scripts use #!/usr/bin/env bash instead of #!/bin/bash
32+
BAD_SHEBANGS=$(git diff --relative --name-only --diff-filter='ACM' "$BASE_COMMIT" -- '*.sh' | while read -r f; do
33+
[ -f "$f" ] && head -1 "$f" | grep -q '^#!/bin/bash' && echo "$f"
34+
done)
35+
36+
if [ -n "$BAD_SHEBANGS" ]; then
37+
echo -e "\n==== The following scripts use #!/bin/bash instead of #!/usr/bin/env bash:"
38+
echo "$BAD_SHEBANGS"
39+
echo "Please replace '#!/bin/bash' with '#!/usr/bin/env bash' for portability."
40+
RESULT=1
2641
fi
2742
43+
exit $RESULT
44+

.github/workflows/util/install-flink-resources.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/bin/bash
1+
#!/usr/bin/env bash
22
# Licensed to the Apache Software Foundation (ASF) under one or more
33
# contributor license agreements. See the NOTICE file distributed with
44
# this work for additional information regarding copyright ownership.

.github/workflows/util/install-resources.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/bin/bash
1+
#!/usr/bin/env bash
22
# Licensed to the Apache Software Foundation (ASF) under one or more
33
# contributor license agreements. See the NOTICE file distributed with
44
# this work for additional information regarding copyright ownership.

backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -551,12 +551,6 @@ object VeloxConfig extends ConfigRegistry {
551551
.stringConf
552552
.createWithDefault("legacy")
553553

554-
val AWS_S3_CONNECT_TIMEOUT =
555-
buildConf("spark.gluten.velox.fs.s3a.connect.timeout")
556-
.doc("Timeout for AWS s3 connection.")
557-
.stringConf
558-
.createWithDefault("200s")
559-
560554
val VELOX_ORC_SCAN_ENABLED =
561555
buildConf("spark.gluten.sql.columnar.backend.velox.orc.scan.enabled")
562556
.doc("Enable velox orc scan. If disabled, vanilla spark orc scan will be used.")

backends-velox/src/main/scala/org/apache/gluten/execution/ColumnarCollectLimitExec.scala

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package org.apache.gluten.execution
1919
import org.apache.gluten.columnarbatch.ColumnarBatches
2020
import org.apache.gluten.columnarbatch.VeloxColumnarBatches
2121

22+
import org.apache.spark.sql.catalyst.InternalRow
2223
import org.apache.spark.sql.execution.SparkPlan
2324
import org.apache.spark.sql.vectorized.ColumnarBatch
2425

@@ -96,6 +97,24 @@ case class ColumnarCollectLimitExec(
9697
}
9798
}
9899

100+
override def executeCollect(): Array[InternalRow] = {
101+
val rowsRdd = child.executeColumnar().mapPartitions {
102+
it =>
103+
val rows = VeloxColumnarToRowExec.toRowIterator(it)
104+
rows.map(_.copy())
105+
}
106+
107+
if (limit >= 0) {
108+
if (offset > 0) {
109+
rowsRdd.take(limit).drop(offset)
110+
} else {
111+
rowsRdd.take(limit)
112+
}
113+
} else {
114+
rowsRdd.collect().drop(offset)
115+
}
116+
}
117+
99118
override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
100119
copy(child = newChild)
101120
}

backends-velox/src/main/scala/org/apache/gluten/execution/VeloxColumnarToRowExec.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,15 @@ case class VeloxColumnarToRowExec(child: SparkPlan) extends ColumnarToRowExecBas
9292
VeloxColumnarToRowExec.toRowIterator(_, numOutputRows, numInputBatches, convertTime))
9393
}
9494

95+
override def executeCollect(): Array[InternalRow] = {
96+
child match {
97+
case l: ColumnarCollectLimitExec =>
98+
l.executeCollect()
99+
case _ =>
100+
super.executeCollect()
101+
}
102+
}
103+
95104
protected def withNewChildInternal(newChild: SparkPlan): VeloxColumnarToRowExec =
96105
copy(child = newChild)
97106
}

cpp/velox/compute/VeloxBackend.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ void VeloxBackend::initConnector(const std::shared_ptr<velox::config::ConfigBase
314314
ioThreads >= 0,
315315
kVeloxIOThreads + " was set to negative number " + std::to_string(ioThreads) + ", this should not happen.");
316316
if (ioThreads > 0) {
317-
ioExecutor_ = std::make_unique<folly::IOThreadPoolExecutor>(ioThreads);
317+
ioExecutor_ = std::make_unique<folly::CPUThreadPoolExecutor>(ioThreads);
318318
}
319319
velox::connector::registerConnector(
320320
std::make_shared<velox::connector::hive::HiveConnector>(kHiveConnectorId, hiveConf, ioExecutor_.get()));

cpp/velox/compute/VeloxBackend.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class VeloxBackend {
9090
std::shared_ptr<facebook::velox::cache::AsyncDataCache> asyncDataCache_;
9191

9292
std::unique_ptr<folly::IOThreadPoolExecutor> ssdCacheExecutor_;
93-
std::unique_ptr<folly::IOThreadPoolExecutor> ioExecutor_;
93+
std::unique_ptr<folly::CPUThreadPoolExecutor> ioExecutor_;
9494
std::shared_ptr<facebook::velox::memory::MmapAllocator> cacheAllocator_;
9595

9696
std::string cachePathPrefix_;

cpp/velox/utils/ConfigExtractor.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ void getS3HiveConfig(
6363
{S3Config::Keys::kPathStyleAccess, std::make_pair("path.style.access", "false")},
6464
{S3Config::Keys::kMaxAttempts, std::make_pair("retry.limit", std::nullopt)},
6565
{S3Config::Keys::kRetryMode, std::make_pair("retry.mode", "legacy")},
66-
{S3Config::Keys::kMaxConnections, std::make_pair("connection.maximum", "15")},
66+
{S3Config::Keys::kMaxConnections, std::make_pair("connection.maximum", "25")},
6767
{S3Config::Keys::kSocketTimeout, std::make_pair("connection.timeout", "200s")},
6868
{S3Config::Keys::kConnectTimeout, std::make_pair("connection.establish.timeout", "30s")},
6969
{S3Config::Keys::kUseInstanceCredentials, std::make_pair("instance.credentials", "false")},

dev/bloop-test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/bin/bash
1+
#!/usr/bin/env bash
22
# Licensed to the Apache Software Foundation (ASF) under one or more
33
# contributor license agreements. See the NOTICE file distributed with
44
# this work for additional information regarding copyright ownership.

0 commit comments

Comments
 (0)