Skip to content

Commit

Permalink
[VL] Enable Spark functions for translate, add_months, `array_min…
Browse files Browse the repository at this point in the history
…` & `array_max` (#3687)
  • Loading branch information
PHILO-HE authored Nov 14, 2023
1 parent 7cf715b commit 5dabb5b
Show file tree
Hide file tree
Showing 12 changed files with 192 additions and 11 deletions.
4 changes: 0 additions & 4 deletions cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,11 @@ static const std::unordered_set<std::string> kBlackList = {
"json_array_length",
"from_unixtime",
"repeat",
"translate",
"add_months",
"date_format",
"trunc",
"sequence",
"posexplode",
"arrays_overlap",
"array_min",
"array_max",
"approx_percentile"};

bool validateColNames(const ::substrait::NamedStruct& schema) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("WeekDay")
.exclude("WeekOfYear")
.exclude("DateFormat")
.exclude("Gluten - DateFormat")
.exclude("Hour")
.exclude("Minute")
.exclude("date add interval")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,8 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_unix_timestamp")
// Unsupported format: yyyy-MM-dd HH:mm:ss.SSS
.exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError")
// Replaced by a gluten test to pass timezone through config.
.exclude("DateFormat")
enableSuite[GlutenDecimalExpressionSuite]
enableSuite[GlutenStringFunctionsSuite]
enableSuite[GlutenRegexpExpressionsSuite]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,9 @@ class GlutenSQLQueryTestSuite
"current_database_catalog.sql",
"date.sql",
"datetime-formatting-invalid.sql",
"datetime-formatting-legacy.sql",
"datetime-formatting.sql",
// Velox had different handling for some illegal cases.
// "datetime-formatting-legacy.sql",
// "datetime-formatting.sql",
"datetime-legacy.sql",
"datetime-parsing-invalid.sql",
"datetime-parsing-legacy.sql",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, TimeZoneUTC}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{DateType, IntegerType, StringType}
import org.apache.spark.sql.types.{DateType, IntegerType, StringType, TimestampType}
import org.apache.spark.unsafe.types.UTF8String

import java.sql.{Date, Timestamp}
Expand Down Expand Up @@ -285,4 +285,62 @@ class GlutenDateExpressionsSuite extends DateExpressionsSuite with GlutenTestsTr
GenerateUnsafeProjection.generate(
ToUnixTimestamp(Literal("2015-07-24"), Literal("\""), UTC_OPT) :: Nil)
}

// Modified based on vanilla spark to explicitly set timezone in config.
test(GlutenTestConstants.GLUTEN_TEST + "DateFormat") {
val PST_OPT = Option(PST.getId)
val JST_OPT = Option(JST.getId)

Seq("legacy", "corrected").foreach {
legacyParserPolicy =>
withSQLConf(
SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy,
SQLConf.SESSION_LOCAL_TIMEZONE.key -> UTC_OPT.get) {
checkEvaluation(
DateFormatClass(Literal.create(null, TimestampType), Literal("y"), UTC_OPT),
null)
checkEvaluation(
DateFormatClass(
Cast(Literal(d), TimestampType, UTC_OPT),
Literal.create(null, StringType),
UTC_OPT),
null)

checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, UTC_OPT), Literal("y"), UTC_OPT),
"2015")
checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), UTC_OPT), "2013")
checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, UTC_OPT), Literal("H"), UTC_OPT),
"0")
checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), UTC_OPT), "13")
}

withSQLConf(
SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy,
SQLConf.SESSION_LOCAL_TIMEZONE.key -> PST_OPT.get) {
checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, PST_OPT), Literal("y"), PST_OPT),
"2015")
checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), PST_OPT), "2013")
checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, PST_OPT), Literal("H"), PST_OPT),
"0")
checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), PST_OPT), "5")
}

withSQLConf(
SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy,
SQLConf.SESSION_LOCAL_TIMEZONE.key -> PST_OPT.get) {
checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, JST_OPT), Literal("y"), JST_OPT),
"2015")
checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), JST_OPT), "2013")
checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, JST_OPT), Literal("H"), JST_OPT),
"0")
checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), JST_OPT), "22")
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("WeekDay")
.exclude("WeekOfYear")
.exclude("DateFormat")
.exclude("Gluten - DateFormat")
.exclude("Hour")
.exclude("Minute")
.exclude("date add interval")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_unix_timestamp")
// Unsupported format: yyyy-MM-dd HH:mm:ss.SSS
.exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError")
// Replaced by a gluten test to pass timezone through config.
.exclude("DateFormat")
enableSuite[GlutenDecimalExpressionSuite]
enableSuite[GlutenHashExpressionsSuite]
enableSuite[GlutenIntervalExpressionsSuite]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,9 @@ class GlutenSQLQueryTestSuite
"current_database_catalog.sql",
"date.sql",
"datetime-formatting-invalid.sql",
"datetime-formatting-legacy.sql",
"datetime-formatting.sql",
// Velox had different handling for some illegal cases.
// "datetime-formatting-legacy.sql",
// "datetime-formatting.sql",
"datetime-legacy.sql",
"datetime-parsing-invalid.sql",
"datetime-parsing-legacy.sql",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,4 +285,62 @@ class GlutenDateExpressionsSuite extends DateExpressionsSuite with GlutenTestsTr
GenerateUnsafeProjection.generate(
ToUnixTimestamp(Literal("2015-07-24"), Literal("\""), UTC_OPT) :: Nil)
}

// Modified based on vanilla spark to explicitly set timezone in config.
test(GLUTEN_TEST + "DateFormat") {
val PST_OPT = Option(PST.getId)
val JST_OPT = Option(JST.getId)

Seq("legacy", "corrected").foreach {
legacyParserPolicy =>
withSQLConf(
SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy,
SQLConf.SESSION_LOCAL_TIMEZONE.key -> UTC_OPT.get) {
checkEvaluation(
DateFormatClass(Literal.create(null, TimestampType), Literal("y"), UTC_OPT),
null)
checkEvaluation(
DateFormatClass(
Cast(Literal(d), TimestampType, UTC_OPT),
Literal.create(null, StringType),
UTC_OPT),
null)

checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, UTC_OPT), Literal("y"), UTC_OPT),
"2015")
checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), UTC_OPT), "2013")
checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, UTC_OPT), Literal("H"), UTC_OPT),
"0")
checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), UTC_OPT), "13")
}

withSQLConf(
SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy,
SQLConf.SESSION_LOCAL_TIMEZONE.key -> PST_OPT.get) {
checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, PST_OPT), Literal("y"), PST_OPT),
"2015")
checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), PST_OPT), "2013")
checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, PST_OPT), Literal("H"), PST_OPT),
"0")
checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), PST_OPT), "5")
}

withSQLConf(
SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy,
SQLConf.SESSION_LOCAL_TIMEZONE.key -> PST_OPT.get) {
checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, JST_OPT), Literal("y"), JST_OPT),
"2015")
checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), JST_OPT), "2013")
checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, JST_OPT), Literal("H"), JST_OPT),
"0")
checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), JST_OPT), "22")
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_unix_timestamp")
// Unsupported format: yyyy-MM-dd HH:mm:ss.SSS
.exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError")
// Replaced by a gluten test to pass timezone through config.
.exclude("DateFormat")
enableSuite[GlutenDecimalExpressionSuite]
enableSuite[GlutenHashExpressionsSuite]
enableSuite[GlutenIntervalExpressionsSuite]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,9 @@ class GlutenSQLQueryTestSuite
"current_database_catalog.sql",
// "date.sql",
// "datetime-formatting-invalid.sql",
"datetime-formatting-legacy.sql",
"datetime-formatting.sql",
// Velox had different handling for some illegal cases.
// "datetime-formatting-legacy.sql",
// "datetime-formatting.sql",
// "datetime-legacy.sql",
// "datetime-parsing-invalid.sql",
"datetime-parsing-legacy.sql",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,4 +285,62 @@ class GlutenDateExpressionsSuite extends DateExpressionsSuite with GlutenTestsTr
GenerateUnsafeProjection.generate(
ToUnixTimestamp(Literal("2015-07-24"), Literal("\""), UTC_OPT) :: Nil)
}

// Modified based on vanilla spark to explicitly set timezone in config.
test(GLUTEN_TEST + "DateFormat") {
val PST_OPT = Option(PST.getId)
val JST_OPT = Option(JST.getId)

Seq("legacy", "corrected").foreach {
legacyParserPolicy =>
withSQLConf(
SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy,
SQLConf.SESSION_LOCAL_TIMEZONE.key -> UTC_OPT.get) {
checkEvaluation(
DateFormatClass(Literal.create(null, TimestampType), Literal("y"), UTC_OPT),
null)
checkEvaluation(
DateFormatClass(
Cast(Literal(d), TimestampType, UTC_OPT),
Literal.create(null, StringType),
UTC_OPT),
null)

checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, UTC_OPT), Literal("y"), UTC_OPT),
"2015")
checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), UTC_OPT), "2013")
checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, UTC_OPT), Literal("H"), UTC_OPT),
"0")
checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), UTC_OPT), "13")
}

withSQLConf(
SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy,
SQLConf.SESSION_LOCAL_TIMEZONE.key -> PST_OPT.get) {
checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, PST_OPT), Literal("y"), PST_OPT),
"2015")
checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), PST_OPT), "2013")
checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, PST_OPT), Literal("H"), PST_OPT),
"0")
checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), PST_OPT), "5")
}

withSQLConf(
SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy,
SQLConf.SESSION_LOCAL_TIMEZONE.key -> PST_OPT.get) {
checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, JST_OPT), Literal("y"), JST_OPT),
"2015")
checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), JST_OPT), "2013")
checkEvaluation(
DateFormatClass(Cast(Literal(d), TimestampType, JST_OPT), Literal("H"), JST_OPT),
"0")
checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), JST_OPT), "22")
}
}
}
}

0 comments on commit 5dabb5b

Please sign in to comment.