From 5dabb5b66bd180cfc541c3ab652a863630481608 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Tue, 14 Nov 2023 08:46:21 +0800 Subject: [PATCH] [VL] Enable Spark functions for `translate`, `add_months`, `array_min` & `array_max` (#3687) --- .../SubstraitToVeloxPlanValidator.cc | 4 -- .../clickhouse/ClickHouseTestSettings.scala | 1 + .../utils/velox/VeloxTestSettings.scala | 2 + .../spark/sql/GlutenSQLQueryTestSuite.scala | 5 +- .../GlutenDateExpressionsSuite.scala | 60 ++++++++++++++++++- .../clickhouse/ClickHouseTestSettings.scala | 1 + .../utils/velox/VeloxTestSettings.scala | 2 + .../spark/sql/GlutenSQLQueryTestSuite.scala | 5 +- .../GlutenDateExpressionsSuite.scala | 58 ++++++++++++++++++ .../utils/velox/VeloxTestSettings.scala | 2 + .../spark/sql/GlutenSQLQueryTestSuite.scala | 5 +- .../GlutenDateExpressionsSuite.scala | 58 ++++++++++++++++++ 12 files changed, 192 insertions(+), 11 deletions(-) diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc index f80d2e1cb5cb..2a083426aaac 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc @@ -45,15 +45,11 @@ static const std::unordered_set kBlackList = { "json_array_length", "from_unixtime", "repeat", - "translate", - "add_months", "date_format", "trunc", "sequence", "posexplode", "arrays_overlap", - "array_min", - "array_max", "approx_percentile"}; bool validateColNames(const ::substrait::NamedStruct& schema) { diff --git a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala index 3efe82eae11f..dc15e2fee1c2 100644 --- a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala @@ -678,6 +678,7 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("WeekDay") .exclude("WeekOfYear") .exclude("DateFormat") + .exclude("Gluten - DateFormat") .exclude("Hour") .exclude("Minute") .exclude("date add interval") diff --git a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala index 22ec7643833c..5ff651b05fa9 100644 --- a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala @@ -207,6 +207,8 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("to_unix_timestamp") // Unsupported format: yyyy-MM-dd HH:mm:ss.SSS .exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError") + // Replaced by a gluten test to pass timezone through config. + .exclude("DateFormat") enableSuite[GlutenDecimalExpressionSuite] enableSuite[GlutenStringFunctionsSuite] enableSuite[GlutenRegexpExpressionsSuite] diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala index 3da1f13d538e..24a44b802d68 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala @@ -240,8 +240,9 @@ class GlutenSQLQueryTestSuite "current_database_catalog.sql", "date.sql", "datetime-formatting-invalid.sql", - "datetime-formatting-legacy.sql", - "datetime-formatting.sql", + // Velox had different handling for some illegal cases. +// "datetime-formatting-legacy.sql", +// "datetime-formatting.sql", "datetime-legacy.sql", "datetime-parsing-invalid.sql", "datetime-parsing-legacy.sql", diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala index 79e26e336ab2..fcc8912996bb 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, TimeZoneUTC} import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.{DateType, IntegerType, StringType} +import org.apache.spark.sql.types.{DateType, IntegerType, StringType, TimestampType} import org.apache.spark.unsafe.types.UTF8String import java.sql.{Date, Timestamp} @@ -285,4 +285,62 @@ class GlutenDateExpressionsSuite extends DateExpressionsSuite with GlutenTestsTr GenerateUnsafeProjection.generate( ToUnixTimestamp(Literal("2015-07-24"), Literal("\""), UTC_OPT) :: Nil) } + + // Modified based on vanilla spark to explicitly set timezone in config. + test(GlutenTestConstants.GLUTEN_TEST + "DateFormat") { + val PST_OPT = Option(PST.getId) + val JST_OPT = Option(JST.getId) + + Seq("legacy", "corrected").foreach { + legacyParserPolicy => + withSQLConf( + SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy, + SQLConf.SESSION_LOCAL_TIMEZONE.key -> UTC_OPT.get) { + checkEvaluation( + DateFormatClass(Literal.create(null, TimestampType), Literal("y"), UTC_OPT), + null) + checkEvaluation( + DateFormatClass( + Cast(Literal(d), TimestampType, UTC_OPT), + Literal.create(null, StringType), + UTC_OPT), + null) + + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, UTC_OPT), Literal("y"), UTC_OPT), + "2015") + checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), UTC_OPT), "2013") + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, UTC_OPT), Literal("H"), UTC_OPT), + "0") + checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), UTC_OPT), "13") + } + + withSQLConf( + SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy, + SQLConf.SESSION_LOCAL_TIMEZONE.key -> PST_OPT.get) { + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, PST_OPT), Literal("y"), PST_OPT), + "2015") + checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), PST_OPT), "2013") + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, PST_OPT), Literal("H"), PST_OPT), + "0") + checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), PST_OPT), "5") + } + + withSQLConf( + SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy, + SQLConf.SESSION_LOCAL_TIMEZONE.key -> PST_OPT.get) { + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, JST_OPT), Literal("y"), JST_OPT), + "2015") + checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), JST_OPT), "2013") + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, JST_OPT), Literal("H"), JST_OPT), + "0") + checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), JST_OPT), "22") + } + } + } } diff --git a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala index 38678b2ad0d5..d40a9735e4cc 100644 --- a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala @@ -721,6 +721,7 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("WeekDay") .exclude("WeekOfYear") .exclude("DateFormat") + .exclude("Gluten - DateFormat") .exclude("Hour") .exclude("Minute") .exclude("date add interval") diff --git a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala index 7d5c2f7588e5..b22ffad87599 100644 --- a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala @@ -146,6 +146,8 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("to_unix_timestamp") // Unsupported format: yyyy-MM-dd HH:mm:ss.SSS .exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError") + // Replaced by a gluten test to pass timezone through config. + .exclude("DateFormat") enableSuite[GlutenDecimalExpressionSuite] enableSuite[GlutenHashExpressionsSuite] enableSuite[GlutenIntervalExpressionsSuite] diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala index 666bbe3aadff..90046c0b15b2 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala @@ -242,8 +242,9 @@ class GlutenSQLQueryTestSuite "current_database_catalog.sql", "date.sql", "datetime-formatting-invalid.sql", - "datetime-formatting-legacy.sql", - "datetime-formatting.sql", + // Velox had different handling for some illegal cases. +// "datetime-formatting-legacy.sql", +// "datetime-formatting.sql", "datetime-legacy.sql", "datetime-parsing-invalid.sql", "datetime-parsing-legacy.sql", diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala index 5d24d7e20439..b599a277214b 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala @@ -285,4 +285,62 @@ class GlutenDateExpressionsSuite extends DateExpressionsSuite with GlutenTestsTr GenerateUnsafeProjection.generate( ToUnixTimestamp(Literal("2015-07-24"), Literal("\""), UTC_OPT) :: Nil) } + + // Modified based on vanilla spark to explicitly set timezone in config. + test(GLUTEN_TEST + "DateFormat") { + val PST_OPT = Option(PST.getId) + val JST_OPT = Option(JST.getId) + + Seq("legacy", "corrected").foreach { + legacyParserPolicy => + withSQLConf( + SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy, + SQLConf.SESSION_LOCAL_TIMEZONE.key -> UTC_OPT.get) { + checkEvaluation( + DateFormatClass(Literal.create(null, TimestampType), Literal("y"), UTC_OPT), + null) + checkEvaluation( + DateFormatClass( + Cast(Literal(d), TimestampType, UTC_OPT), + Literal.create(null, StringType), + UTC_OPT), + null) + + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, UTC_OPT), Literal("y"), UTC_OPT), + "2015") + checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), UTC_OPT), "2013") + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, UTC_OPT), Literal("H"), UTC_OPT), + "0") + checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), UTC_OPT), "13") + } + + withSQLConf( + SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy, + SQLConf.SESSION_LOCAL_TIMEZONE.key -> PST_OPT.get) { + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, PST_OPT), Literal("y"), PST_OPT), + "2015") + checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), PST_OPT), "2013") + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, PST_OPT), Literal("H"), PST_OPT), + "0") + checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), PST_OPT), "5") + } + + withSQLConf( + SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy, + SQLConf.SESSION_LOCAL_TIMEZONE.key -> PST_OPT.get) { + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, JST_OPT), Literal("y"), JST_OPT), + "2015") + checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), JST_OPT), "2013") + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, JST_OPT), Literal("H"), JST_OPT), + "0") + checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), JST_OPT), "22") + } + } + } } diff --git a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala index 5640092f7511..731da2305b79 100644 --- a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala @@ -153,6 +153,8 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("to_unix_timestamp") // Unsupported format: yyyy-MM-dd HH:mm:ss.SSS .exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError") + // Replaced by a gluten test to pass timezone through config. + .exclude("DateFormat") enableSuite[GlutenDecimalExpressionSuite] enableSuite[GlutenHashExpressionsSuite] enableSuite[GlutenIntervalExpressionsSuite] diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala index 8513c8c0e208..803010a6c584 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala @@ -245,8 +245,9 @@ class GlutenSQLQueryTestSuite "current_database_catalog.sql", // "date.sql", // "datetime-formatting-invalid.sql", - "datetime-formatting-legacy.sql", - "datetime-formatting.sql", + // Velox had different handling for some illegal cases. +// "datetime-formatting-legacy.sql", +// "datetime-formatting.sql", // "datetime-legacy.sql", // "datetime-parsing-invalid.sql", "datetime-parsing-legacy.sql", diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala index 5d24d7e20439..b599a277214b 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala @@ -285,4 +285,62 @@ class GlutenDateExpressionsSuite extends DateExpressionsSuite with GlutenTestsTr GenerateUnsafeProjection.generate( ToUnixTimestamp(Literal("2015-07-24"), Literal("\""), UTC_OPT) :: Nil) } + + // Modified based on vanilla spark to explicitly set timezone in config. + test(GLUTEN_TEST + "DateFormat") { + val PST_OPT = Option(PST.getId) + val JST_OPT = Option(JST.getId) + + Seq("legacy", "corrected").foreach { + legacyParserPolicy => + withSQLConf( + SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy, + SQLConf.SESSION_LOCAL_TIMEZONE.key -> UTC_OPT.get) { + checkEvaluation( + DateFormatClass(Literal.create(null, TimestampType), Literal("y"), UTC_OPT), + null) + checkEvaluation( + DateFormatClass( + Cast(Literal(d), TimestampType, UTC_OPT), + Literal.create(null, StringType), + UTC_OPT), + null) + + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, UTC_OPT), Literal("y"), UTC_OPT), + "2015") + checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), UTC_OPT), "2013") + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, UTC_OPT), Literal("H"), UTC_OPT), + "0") + checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), UTC_OPT), "13") + } + + withSQLConf( + SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy, + SQLConf.SESSION_LOCAL_TIMEZONE.key -> PST_OPT.get) { + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, PST_OPT), Literal("y"), PST_OPT), + "2015") + checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), PST_OPT), "2013") + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, PST_OPT), Literal("H"), PST_OPT), + "0") + checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), PST_OPT), "5") + } + + withSQLConf( + SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy, + SQLConf.SESSION_LOCAL_TIMEZONE.key -> PST_OPT.get) { + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, JST_OPT), Literal("y"), JST_OPT), + "2015") + checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), JST_OPT), "2013") + checkEvaluation( + DateFormatClass(Cast(Literal(d), TimestampType, JST_OPT), Literal("H"), JST_OPT), + "0") + checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), JST_OPT), "22") + } + } + } }