diff --git a/.github/workflows/velox_be.yml b/.github/workflows/velox_be.yml index 3ebf43e48564..7dbb164a0b57 100644 --- a/.github/workflows/velox_be.yml +++ b/.github/workflows/velox_be.yml @@ -459,7 +459,7 @@ jobs: -d=PARTIAL_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ -d=PARTIAL_MODE:CACHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ -d=PARTIAL_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0' - - name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q97 low memory + - name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q97 low memory # The case currently causes crash with "free: invalid size". run: | docker exec centos7-test-$GITHUB_RUN_ID bash -c 'cd /opt/gluten/tools/gluten-it && \ GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ diff --git a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxTPCHSuite.scala b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxTPCHSuite.scala index a9c586c5d10c..64c5308e6794 100644 --- a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxTPCHSuite.scala +++ b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxTPCHSuite.scala @@ -19,7 +19,7 @@ package io.glutenproject.execution import org.apache.spark.SparkConf import org.apache.spark.sql.{Row, TestUtils} -abstract class VeloxTPCHSuite extends VeloxWholeStageTransformerSuite { +abstract class VeloxTPCHTableSupport extends VeloxWholeStageTransformerSuite { protected val rootPath: String = getClass.getResource("/").getPath override protected val backend: String = "velox" override protected val resourcePath: String = "/tpch-data-parquet-velox" @@ -32,11 +32,6 @@ abstract class VeloxTPCHSuite extends VeloxWholeStageTransformerSuite { // TODO: result comparison is not supported currently. protected val queriesResults: String = rootPath + "queries-output" - override def beforeAll(): Unit = { - super.beforeAll() - createTPCHNotNullTables() - } - override protected def sparkConf: SparkConf = { super.sparkConf .set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.ColumnarShuffleManager") @@ -47,6 +42,13 @@ abstract class VeloxTPCHSuite extends VeloxWholeStageTransformerSuite { .set("spark.sql.autoBroadcastJoinThreshold", "-1") } + override def beforeAll(): Unit = { + super.beforeAll() + createTPCHNotNullTables() + } +} + +abstract class VeloxTPCHSuite extends VeloxTPCHTableSupport { test("TPC-H q1") { runTPCHQuery(1, veloxTPCHQueries, queriesResults, compareResult = false, noFallBack = false) { _ => @@ -195,6 +197,19 @@ abstract class VeloxTPCHSuite extends VeloxWholeStageTransformerSuite { } } +class VeloxTPCHDistinctSpill extends VeloxTPCHTableSupport { + override protected def sparkConf: SparkConf = { + super.sparkConf + .set("spark.memory.offHeap.size", "50m") + .set("spark.gluten.memory.overAcquiredMemoryRatio", "0.9") // to trigger distinct spill early + } + + test("distinct spill") { + val df = spark.sql("select count(distinct *) from lineitem limit 1") + TestUtils.compareAnswers(df.collect(), Seq(Row(60175))) + } +} + class VeloxTPCHV1Suite extends VeloxTPCHSuite { override protected def sparkConf: SparkConf = { super.sparkConf