Skip to content

Commit

Permalink
[VL] Activate CI test for distinct aggregation spill (#3762)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhztheplayer authored Nov 22, 2023
1 parent 5ab5646 commit c799238
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/velox_be.yml
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ jobs:
-d=PARTIAL_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \
-d=PARTIAL_MODE:CACHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \
-d=PARTIAL_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0'
- name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q97 low memory
- name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q97 low memory # The case currently causes crash with "free: invalid size".
run: |
docker exec centos7-test-$GITHUB_RUN_ID bash -c 'cd /opt/gluten/tools/gluten-it && \
GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package io.glutenproject.execution
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Row, TestUtils}

abstract class VeloxTPCHSuite extends VeloxWholeStageTransformerSuite {
abstract class VeloxTPCHTableSupport extends VeloxWholeStageTransformerSuite {
protected val rootPath: String = getClass.getResource("/").getPath
override protected val backend: String = "velox"
override protected val resourcePath: String = "/tpch-data-parquet-velox"
Expand All @@ -32,11 +32,6 @@ abstract class VeloxTPCHSuite extends VeloxWholeStageTransformerSuite {
// TODO: result comparison is not supported currently.
protected val queriesResults: String = rootPath + "queries-output"

override def beforeAll(): Unit = {
super.beforeAll()
createTPCHNotNullTables()
}

override protected def sparkConf: SparkConf = {
super.sparkConf
.set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.ColumnarShuffleManager")
Expand All @@ -47,6 +42,13 @@ abstract class VeloxTPCHSuite extends VeloxWholeStageTransformerSuite {
.set("spark.sql.autoBroadcastJoinThreshold", "-1")
}

override def beforeAll(): Unit = {
super.beforeAll()
createTPCHNotNullTables()
}
}

abstract class VeloxTPCHSuite extends VeloxTPCHTableSupport {
test("TPC-H q1") {
runTPCHQuery(1, veloxTPCHQueries, queriesResults, compareResult = false, noFallBack = false) {
_ =>
Expand Down Expand Up @@ -195,6 +197,19 @@ abstract class VeloxTPCHSuite extends VeloxWholeStageTransformerSuite {
}
}

class VeloxTPCHDistinctSpill extends VeloxTPCHTableSupport {
override protected def sparkConf: SparkConf = {
super.sparkConf
.set("spark.memory.offHeap.size", "50m")
.set("spark.gluten.memory.overAcquiredMemoryRatio", "0.9") // to trigger distinct spill early
}

test("distinct spill") {
val df = spark.sql("select count(distinct *) from lineitem limit 1")
TestUtils.compareAnswers(df.collect(), Seq(Row(60175)))
}
}

class VeloxTPCHV1Suite extends VeloxTPCHSuite {
override protected def sparkConf: SparkConf = {
super.sparkConf
Expand Down

0 comments on commit c799238

Please sign in to comment.