Skip to content

Commit

Permalink
[HOTFIX][CH] Using sparkMurmurHash3_32 function instead of the murmur…
Browse files Browse the repository at this point in the history
…Hash3_32 for the shuffle hash algorithm (#3613)

The ch murmurHash3_32 function is not compatible of the vanilla spark function, using sparkMurmurHash3_32 function instead of it.
  • Loading branch information
zzcclp authored Nov 3, 2023
1 parent 8ee2bac commit 13a0a63
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ object CHBackendSettings extends BackendSettingsApi with Logging {
private val GLUTEN_CLICKHOUSE_SHUFFLE_HASH_ALGORITHM: String =
GlutenConfig.GLUTEN_CONFIG_PREFIX + CHBackend.BACKEND_NAME +
".shuffle.hash.algorithm"
// valid values are: cityHash64 or murmurHash3_32
// valid values are: cityHash64 or sparkMurmurHash3_32
private val GLUTEN_CLICKHOUSE_SHUFFLE_HASH_ALGORITHM_DEFAULT = "cityHash64"
lazy val shuffleHashAlgorithm: String = {
def shuffleHashAlgorithm: String = {
val algorithm = SparkEnv.get.conf.get(
CHBackendSettings.GLUTEN_CLICKHOUSE_SHUFFLE_HASH_ALGORITHM,
CHBackendSettings.GLUTEN_CLICKHOUSE_SHUFFLE_HASH_ALGORITHM_DEFAULT
)
if (!algorithm.equals("cityHash64") && !algorithm.equals("murmurHash3_32")) {
if (!algorithm.equals("cityHash64") && !algorithm.equals("sparkMurmurHash3_32")) {
CHBackendSettings.GLUTEN_CLICKHOUSE_SHUFFLE_HASH_ALGORITHM_DEFAULT
} else {
algorithm
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class GlutenClickHouseTPCHColumnarShuffleParquetAQESuite
.set("spark.sql.autoBroadcastJoinThreshold", "10MB")
.set("spark.gluten.sql.columnar.backend.ch.use.v2", "false")
.set("spark.sql.adaptive.enabled", "true")
.set("spark.gluten.sql.columnar.backend.ch.shuffle.hash.algorithm", "murmurHash3_32")
.set("spark.gluten.sql.columnar.backend.ch.shuffle.hash.algorithm", "sparkMurmurHash3_32")
}

override protected def createTPCHNotNullTables(): Unit = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class GlutenClickHouseTPCHParquetAQESuite
.set("spark.gluten.sql.columnar.backend.ch.use.v2", "false")
.set("spark.sql.adaptive.enabled", "true")
.set("spark.gluten.sql.columnar.backend.ch.runtime_config.use_local_format", "true")
.set("spark.gluten.sql.columnar.backend.ch.shuffle.hash.algorithm", "murmurHash3_32")
.set("spark.gluten.sql.columnar.backend.ch.shuffle.hash.algorithm", "sparkMurmurHash3_32")
}

override protected def createTPCHNotNullTables(): Unit = {
Expand Down

0 comments on commit 13a0a63

Please sign in to comment.