Skip to content

Commit

Permalink
[VL] Re-enable background IO threads by default (apache#7845)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhztheplayer authored Nov 8, 2024
1 parent 6cf278c commit 3e6263e
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 52 deletions.
42 changes: 0 additions & 42 deletions .github/workflows/velox_backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -249,48 +249,6 @@ jobs:
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
--extra-conf=spark.gluten.ras.enabled=true
run-tpc-test-ubuntu-iothreads:
needs: build-native-lib-centos-7
strategy:
fail-fast: false
matrix:
spark: [ "spark-3.5" ]
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- name: Download All Native Artifacts
uses: actions/download-artifact@v3
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases/
- name: Download All Arrow Jar Artifacts
uses: actions/download-artifact@v3
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /home/runner/.m2/repository/org/apache/arrow/
- name: Setup java and maven
run: |
sudo apt-get update
sudo apt-get install -y openjdk-8-jdk maven
- name: Set environment variables
run: |
echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV
- name: Build for Spark ${{ matrix.spark }}
run: |
cd $GITHUB_WORKSPACE/
$MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests
cd $GITHUB_WORKSPACE/tools/gluten-it
$MVN_CMD clean install -P${{ matrix.spark }}
- name: Build and run TPC-H / TPC-DS
run: |
cd $GITHUB_WORKSPACE/tools/gluten-it
GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
--extra-conf=spark.gluten.sql.columnar.backend.velox.IOThreads=16
GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
--extra-conf=spark.gluten.sql.columnar.backend.velox.IOThreads=16
run-tpc-test-ubuntu-oom:
needs: build-native-lib-centos-7
strategy:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ public Object ask(Object message) throws Exception {
private static SparkConf newSparkConf() {
final SparkConf conf = new SparkConf();
conf.set(GlutenConfig.SPARK_OFFHEAP_SIZE_KEY(), "1g");
conf.set(GlutenConfig.COLUMNAR_VELOX_CONNECTOR_IO_THREADS(), "0");
return conf;
}
}
18 changes: 8 additions & 10 deletions shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ class GlutenConfig(conf: SQLConf) extends Logging {
def veloxSsdODirectEnabled: Boolean = conf.getConf(COLUMNAR_VELOX_SSD_ODIRECT_ENABLED)

def veloxConnectorIOThreads: Int = {
conf.getConf(COLUMNAR_VELOX_CONNECTOR_IO_THREADS)
conf.getConf(COLUMNAR_VELOX_CONNECTOR_IO_THREADS).getOrElse(numTaskSlotsPerExecutor)
}

def veloxSplitPreloadPerDriver: Integer = conf.getConf(COLUMNAR_VELOX_SPLIT_PRELOAD_PER_DRIVER)
Expand Down Expand Up @@ -759,7 +759,9 @@ object GlutenConfig {
(AWS_S3_RETRY_MODE.key, AWS_S3_RETRY_MODE.defaultValueString),
(
COLUMNAR_VELOX_CONNECTOR_IO_THREADS.key,
COLUMNAR_VELOX_CONNECTOR_IO_THREADS.defaultValueString),
conf.getOrElse(
NUM_TASK_SLOTS_PER_EXECUTOR.key,
NUM_TASK_SLOTS_PER_EXECUTOR.defaultValueString)),
(COLUMNAR_SHUFFLE_CODEC.key, ""),
(COLUMNAR_SHUFFLE_CODEC_BACKEND.key, ""),
("spark.hadoop.input.connect.timeout", "180000"),
Expand Down Expand Up @@ -1449,19 +1451,15 @@ object GlutenConfig {
.booleanConf
.createWithDefault(false)

// FIXME: May cause issues when toggled on. Examples:
// https://github.com/apache/incubator-gluten/issues/7161
// https://github.com/facebookincubator/velox/issues/10173
val COLUMNAR_VELOX_CONNECTOR_IO_THREADS =
buildStaticConf("spark.gluten.sql.columnar.backend.velox.IOThreads")
.internal()
.doc(
"Experimental: The Size of the IO thread pool in the Connector." +
" This thread pool is used for split preloading and DirectBufferedInput." +
" The option is experimental. Toggling on it (setting a non-zero value) may cause some" +
" unexpected issues when application reaches some certain conditions.")
"The Size of the IO thread pool in the Connector. " +
"This thread pool is used for split preloading and DirectBufferedInput. " +
"By default, the value is the same as the maximum task slots per Spark executor.")
.intConf
.createWithDefault(0)
.createOptional

val COLUMNAR_VELOX_ASYNC_TIMEOUT =
buildStaticConf("spark.gluten.sql.columnar.backend.velox.asyncTimeoutOnTaskStopping")
Expand Down

0 comments on commit 3e6263e

Please sign in to comment.