From 8f49d3852ec3edd7b347e6782bf6a0a3218ce069 Mon Sep 17 00:00:00 2001 From: Jia Ke Date: Tue, 24 Oct 2023 07:35:06 +0000 Subject: [PATCH] Pass the compile --- .../velox/VeloxFormatWriterInjects.scala | 20 +- .../clickhouse/ClickHouseTestSettings.scala | 226 +----------------- .../utils/velox/VeloxTestSettings.scala | 39 +-- .../spark/sql/GlutenSQLQueryTestSuite.scala | 7 +- .../expressions/GlutenAnsiCastSuite.scala | 112 --------- .../expressions/GlutenCastSuite.scala | 6 +- ... GlutenDataSourceV2SQLSuiteV1Filter.scala} | 4 +- .../GlutenDataSourceV2SQLSuiteV2Filter.scala | 23 ++ .../parquet/GlutenParquetFilterSuite.scala | 7 +- .../GlutenSessionExtensionSuite.scala | 12 +- .../statistics/SparkFunctionStatistics.scala | 19 +- .../execution/FileSourceScanExecShim.scala | 3 +- 12 files changed, 70 insertions(+), 408 deletions(-) delete mode 100644 gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenAnsiCastSuite.scala rename gluten-ut/spark34/src/test/scala/org/apache/spark/sql/connector/{GlutenDataSourceV2SQLSuite.scala => GlutenDataSourceV2SQLSuiteV1Filter.scala} (88%) create mode 100644 gluten-ut/spark34/src/test/scala/org/apache/spark/sql/connector/GlutenDataSourceV2SQLSuiteV2Filter.scala diff --git a/backends-velox/src/main/scala/org/apache/spark/sql/execution/datasources/velox/VeloxFormatWriterInjects.scala b/backends-velox/src/main/scala/org/apache/spark/sql/execution/datasources/velox/VeloxFormatWriterInjects.scala index 1a69417ec7f9b..482a6a8495cec 100644 --- a/backends-velox/src/main/scala/org/apache/spark/sql/execution/datasources/velox/VeloxFormatWriterInjects.scala +++ b/backends-velox/src/main/scala/org/apache/spark/sql/execution/datasources/velox/VeloxFormatWriterInjects.scala @@ -34,26 +34,18 @@ import org.apache.spark.util.TaskResources import com.google.common.base.Preconditions import org.apache.arrow.c.ArrowSchema -import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} +import org.apache.hadoop.fs.FileStatus import org.apache.hadoop.mapreduce.TaskAttemptContext import java.io.IOException trait VeloxFormatWriterInjects extends GlutenFormatWriterInjectsBase { def createOutputWriter( - filePath: String, + path: String, dataSchema: StructType, context: TaskAttemptContext, nativeConf: java.util.Map[String, String]): OutputWriter = { - // Create the hdfs path if not existed. - val hdfsSchema = "hdfs://" - if (filePath.startsWith(hdfsSchema)) { - val fs = FileSystem.get(context.getConfiguration) - val hdfsPath = new Path(filePath) - if (!fs.exists(hdfsPath)) { - fs.mkdirs(hdfsPath) - } - } + val originPath = path val arrowSchema = SparkArrowUtil.toArrowSchema(dataSchema, SQLConf.get.sessionLocalTimeZone) @@ -64,7 +56,7 @@ trait VeloxFormatWriterInjects extends GlutenFormatWriterInjectsBase { try { ArrowAbiUtil.exportSchema(allocator, arrowSchema, cSchema) dsHandle = datasourceJniWrapper.nativeInitDatasource( - filePath, + originPath, cSchema.memoryAddress(), NativeMemoryManagers.contextInstance("VeloxWriter").getNativeInstanceHandle, nativeConf) @@ -82,7 +74,7 @@ trait VeloxFormatWriterInjects extends GlutenFormatWriterInjectsBase { arrowSchema, allocator, datasourceJniWrapper, - filePath) + originPath) new OutputWriter { override def write(row: InternalRow): Unit = { @@ -99,7 +91,7 @@ trait VeloxFormatWriterInjects extends GlutenFormatWriterInjectsBase { // Do NOT add override keyword for compatibility on spark 3.1. def path(): String = { - filePath + originPath } } } diff --git a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala index b6e14df51e4cf..733d301a7cb3c 100644 --- a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala @@ -60,7 +60,9 @@ class ClickHouseTestSettings extends BackendTestSettings { false // nativeDoValidate failed due to spark conf cleanup case "GlutenDataSourceV2FunctionSuite" => false // nativeDoValidate failed due to spark conf cleanup - case "GlutenDataSourceV2SQLSuite" => + case "GlutenDataSourceV2SQLSuiteV1Filter" => + false // nativeDoValidate failed due to spark conf cleanup + case "GlutenDataSourceV2SQLSuiteV2Filter" => false // nativeDoValidate failed due to spark conf cleanup case "GlutenMetadataColumnSuite" => false // nativeDoValidate failed due to spark conf cleanup case "GlutenQueryCompilationErrorsDSv2Suite" => @@ -443,112 +445,6 @@ class ClickHouseTestSettings extends BackendTestSettings { enableSuite[GlutenUnwrapCastInComparisonEndToEndSuite].exclude("cases when literal is max") enableSuite[GlutenXPathFunctionsSuite] enableSuite[QueryTestSuite] - enableSuite[GlutenAnsiCastSuiteWithAnsiModeOff] - .exclude("null cast") - .exclude("cast string to date") - .exclude("cast string to timestamp") - .exclude("cast from boolean") - .exclude("cast from int") - .exclude("cast from long") - .exclude("cast from float") - .exclude("cast from double") - .exclude("cast from timestamp") - .exclude("data type casting") - .exclude("cast and add") - .exclude("from decimal") - .exclude("cast from array") - .exclude("cast from map") - .exclude("cast from struct") - .exclude("cast struct with a timestamp field") - .exclude("cast between string and interval") - .exclude("cast string to boolean") - .exclude("SPARK-20302 cast with same structure") - .exclude("SPARK-22500: cast for struct should not generate codes beyond 64KB") - .exclude("SPARK-27671: cast from nested null type in struct") - .exclude("Process Infinity, -Infinity, NaN in case insensitive manner") - .exclude("SPARK-22825 Cast array to string") - .exclude("SPARK-33291: Cast array with null elements to string") - .exclude("SPARK-22973 Cast map to string") - .exclude("SPARK-22981 Cast struct to string") - .exclude("SPARK-33291: Cast struct with null elements to string") - .exclude("SPARK-34667: cast year-month interval to string") - .exclude("SPARK-34668: cast day-time interval to string") - .exclude("SPARK-35698: cast timestamp without time zone to string") - .exclude("SPARK-35711: cast timestamp without time zone to timestamp with local time zone") - .exclude("SPARK-35716: cast timestamp without time zone to date type") - .exclude("SPARK-35718: cast date type to timestamp without timezone") - .exclude("SPARK-35719: cast timestamp with local time zone to timestamp without timezone") - .exclude("SPARK-35720: cast string to timestamp without timezone") - .exclude("SPARK-35112: Cast string to day-time interval") - .exclude("SPARK-35111: Cast string to year-month interval") - .exclude("SPARK-35820: Support cast DayTimeIntervalType in different fields") - .exclude("SPARK-35819: Support cast YearMonthIntervalType in different fields") - .exclude("SPARK-35768: Take into account year-month interval fields in cast") - .exclude("SPARK-35735: Take into account day-time interval fields in cast") - .exclude("ANSI mode: Throw exception on casting out-of-range value to byte type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to short type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to int type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to long type") - .exclude("Fast fail for cast string type to decimal type in ansi mode") - .exclude("cast a timestamp before the epoch 1970-01-01 00:00:00Z") - .exclude("cast from array III") - .exclude("cast from map II") - .exclude("cast from map III") - .exclude("cast from struct II") - .exclude("cast from struct III") - enableSuite[GlutenAnsiCastSuiteWithAnsiModeOn] - .exclude("null cast") - .exclude("cast string to date") - .exclude("cast string to timestamp") - .exclude("cast from boolean") - .exclude("cast from int") - .exclude("cast from long") - .exclude("cast from float") - .exclude("cast from double") - .exclude("cast from timestamp") - .exclude("data type casting") - .exclude("cast and add") - .exclude("from decimal") - .exclude("cast from array") - .exclude("cast from map") - .exclude("cast from struct") - .exclude("cast struct with a timestamp field") - .exclude("cast between string and interval") - .exclude("cast string to boolean") - .exclude("SPARK-20302 cast with same structure") - .exclude("SPARK-22500: cast for struct should not generate codes beyond 64KB") - .exclude("SPARK-27671: cast from nested null type in struct") - .exclude("Process Infinity, -Infinity, NaN in case insensitive manner") - .exclude("SPARK-22825 Cast array to string") - .exclude("SPARK-33291: Cast array with null elements to string") - .exclude("SPARK-22973 Cast map to string") - .exclude("SPARK-22981 Cast struct to string") - .exclude("SPARK-33291: Cast struct with null elements to string") - .exclude("SPARK-34667: cast year-month interval to string") - .exclude("SPARK-34668: cast day-time interval to string") - .exclude("SPARK-35698: cast timestamp without time zone to string") - .exclude("SPARK-35711: cast timestamp without time zone to timestamp with local time zone") - .exclude("SPARK-35716: cast timestamp without time zone to date type") - .exclude("SPARK-35718: cast date type to timestamp without timezone") - .exclude("SPARK-35719: cast timestamp with local time zone to timestamp without timezone") - .exclude("SPARK-35720: cast string to timestamp without timezone") - .exclude("SPARK-35112: Cast string to day-time interval") - .exclude("SPARK-35111: Cast string to year-month interval") - .exclude("SPARK-35820: Support cast DayTimeIntervalType in different fields") - .exclude("SPARK-35819: Support cast YearMonthIntervalType in different fields") - .exclude("SPARK-35768: Take into account year-month interval fields in cast") - .exclude("SPARK-35735: Take into account day-time interval fields in cast") - .exclude("ANSI mode: Throw exception on casting out-of-range value to byte type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to short type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to int type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to long type") - .exclude("Fast fail for cast string type to decimal type in ansi mode") - .exclude("cast a timestamp before the epoch 1970-01-01 00:00:00Z") - .exclude("cast from array III") - .exclude("cast from map II") - .exclude("cast from map III") - .exclude("cast from struct II") - .exclude("cast from struct III") enableSuite[GlutenArithmeticExpressionSuite] .exclude("- (UnaryMinus)") .exclude("/ (Divide) basic") @@ -613,59 +509,6 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-36924: Cast IntegralType to DayTimeIntervalType") .exclude("SPARK-36924: Cast YearMonthIntervalType to IntegralType") .exclude("SPARK-36924: Cast IntegralType to YearMonthIntervalType") - enableSuite[GlutenCastSuiteWithAnsiModeOn] - .exclude("null cast") - .exclude("cast string to date") - .exclude("cast string to timestamp") - .exclude("cast from boolean") - .exclude("cast from int") - .exclude("cast from long") - .exclude("cast from float") - .exclude("cast from double") - .exclude("cast from timestamp") - .exclude("data type casting") - .exclude("cast and add") - .exclude("from decimal") - .exclude("cast from array") - .exclude("cast from map") - .exclude("cast from struct") - .exclude("cast struct with a timestamp field") - .exclude("cast between string and interval") - .exclude("cast string to boolean") - .exclude("SPARK-20302 cast with same structure") - .exclude("SPARK-22500: cast for struct should not generate codes beyond 64KB") - .exclude("SPARK-27671: cast from nested null type in struct") - .exclude("Process Infinity, -Infinity, NaN in case insensitive manner") - .exclude("SPARK-22825 Cast array to string") - .exclude("SPARK-33291: Cast array with null elements to string") - .exclude("SPARK-22973 Cast map to string") - .exclude("SPARK-22981 Cast struct to string") - .exclude("SPARK-33291: Cast struct with null elements to string") - .exclude("SPARK-34667: cast year-month interval to string") - .exclude("SPARK-34668: cast day-time interval to string") - .exclude("SPARK-35698: cast timestamp without time zone to string") - .exclude("SPARK-35711: cast timestamp without time zone to timestamp with local time zone") - .exclude("SPARK-35716: cast timestamp without time zone to date type") - .exclude("SPARK-35718: cast date type to timestamp without timezone") - .exclude("SPARK-35719: cast timestamp with local time zone to timestamp without timezone") - .exclude("SPARK-35720: cast string to timestamp without timezone") - .exclude("SPARK-35112: Cast string to day-time interval") - .exclude("SPARK-35111: Cast string to year-month interval") - .exclude("SPARK-35820: Support cast DayTimeIntervalType in different fields") - .exclude("SPARK-35819: Support cast YearMonthIntervalType in different fields") - .exclude("SPARK-35768: Take into account year-month interval fields in cast") - .exclude("SPARK-35735: Take into account day-time interval fields in cast") - .exclude("ANSI mode: Throw exception on casting out-of-range value to byte type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to short type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to int type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to long type") - .exclude("Fast fail for cast string type to decimal type in ansi mode") - .exclude("cast a timestamp before the epoch 1970-01-01 00:00:00Z") - .exclude("cast from array III") - .exclude("cast from map II") - .exclude("cast from map III") - .exclude("cast from struct II") - .exclude("cast from struct III") enableSuite[GlutenCollectionExpressionsSuite] .exclude("Array and Map Size") .exclude("MapEntries") @@ -910,69 +753,10 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("ParseUrl") .exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url") .exclude("Sentences") - enableSuite[GlutenTryCastSuite] - .exclude("null cast") - .exclude("cast string to date") - .exclude("cast string to timestamp") - .exclude("cast from boolean") - .exclude("cast from int") - .exclude("cast from long") - .exclude("cast from float") - .exclude("cast from double") - .exclude("cast from timestamp") - .exclude("data type casting") - .exclude("cast and add") - .exclude("from decimal") - .exclude("cast from array") - .exclude("cast from map") - .exclude("cast from struct") - .exclude("cast struct with a timestamp field") - .exclude("cast between string and interval") - .exclude("cast string to boolean") - .exclude("SPARK-20302 cast with same structure") - .exclude("SPARK-22500: cast for struct should not generate codes beyond 64KB") - .exclude("SPARK-27671: cast from nested null type in struct") - .exclude("Process Infinity, -Infinity, NaN in case insensitive manner") - .exclude("SPARK-22825 Cast array to string") - .exclude("SPARK-33291: Cast array with null elements to string") - .exclude("SPARK-22973 Cast map to string") - .exclude("SPARK-22981 Cast struct to string") - .exclude("SPARK-33291: Cast struct with null elements to string") - .exclude("SPARK-34667: cast year-month interval to string") - .exclude("SPARK-34668: cast day-time interval to string") - .exclude("SPARK-35698: cast timestamp without time zone to string") - .exclude("SPARK-35711: cast timestamp without time zone to timestamp with local time zone") - .exclude("SPARK-35716: cast timestamp without time zone to date type") - .exclude("SPARK-35718: cast date type to timestamp without timezone") - .exclude("SPARK-35719: cast timestamp with local time zone to timestamp without timezone") - .exclude("SPARK-35720: cast string to timestamp without timezone") - .exclude("SPARK-35112: Cast string to day-time interval") - .exclude("SPARK-35111: Cast string to year-month interval") - .exclude("SPARK-35820: Support cast DayTimeIntervalType in different fields") - .exclude("SPARK-35819: Support cast YearMonthIntervalType in different fields") - .exclude("SPARK-35768: Take into account year-month interval fields in cast") - .exclude("SPARK-35735: Take into account day-time interval fields in cast") - .exclude("ANSI mode: Throw exception on casting out-of-range value to byte type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to short type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to int type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to long type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to decimal type") - .exclude("cast from invalid string to numeric should throw NumberFormatException") - .exclude("Fast fail for cast string type to decimal type in ansi mode") - .exclude("ANSI mode: cast string to boolean with parse error") - .exclude("cast from timestamp II") - .exclude("cast a timestamp before the epoch 1970-01-01 00:00:00Z II") - .exclude("cast a timestamp before the epoch 1970-01-01 00:00:00Z") - .exclude("cast from map II") - .exclude("cast from struct II") - .exclude("ANSI mode: cast string to timestamp with parse error") - .exclude("ANSI mode: cast string to date with parse error") - .exclude("SPARK-26218: Fix the corner case of codegen when casting float to Integer") - .exclude("SPARK-35720: cast invalid string input to timestamp without time zone") - .exclude("Gluten - SPARK-35698: cast timestamp without time zone to string") enableSuite[GlutenDataSourceV2DataFrameSessionCatalogSuite] enableSuite[GlutenDataSourceV2SQLSessionCatalogSuite] - enableSuite[GlutenDataSourceV2SQLSuite] + enableSuite[GlutenDataSourceV2SQLSuiteV1Filter] + enableSuite[GlutenDataSourceV2SQLSuiteV2Filter] enableSuite[GlutenDataSourceV2Suite] .exclude("partitioning reporting") .exclude("SPARK-33267: push down with condition 'in (..., null)' should not throw NPE") diff --git a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala index 4cf2768369f74..cb16d13b42247 100644 --- a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala @@ -19,8 +19,8 @@ package io.glutenproject.utils.velox import io.glutenproject.utils.BackendTestSettings import org.apache.spark.sql._ -import org.apache.spark.sql.catalyst.expressions.{GlutenAnsiCastSuiteWithAnsiModeOff, GlutenAnsiCastSuiteWithAnsiModeOn, GlutenArithmeticExpressionSuite, GlutenBitwiseExpressionsSuite, GlutenCastSuite, GlutenCastSuiteWithAnsiModeOn, GlutenCollectionExpressionsSuite, GlutenComplexTypeSuite, GlutenConditionalExpressionSuite, GlutenDateExpressionsSuite, GlutenDecimalExpressionSuite, GlutenHashExpressionsSuite, GlutenIntervalExpressionsSuite, GlutenLiteralExpressionSuite, GlutenMathExpressionsSuite, GlutenMiscExpressionsSuite, GlutenNondeterministicSuite, GlutenNullExpressionsSuite, GlutenPredicateSuite, GlutenRandomSuite, GlutenRegexpExpressionsSuite, GlutenSortOrderExpressionsSuite, GlutenStringExpressionsSuite, GlutenTryCastSuite} -import org.apache.spark.sql.connector.{GlutenDataSourceV2DataFrameSessionCatalogSuite, GlutenDataSourceV2DataFrameSuite, GlutenDataSourceV2FunctionSuite, GlutenDataSourceV2SQLSessionCatalogSuite, GlutenDataSourceV2SQLSuite, GlutenDataSourceV2Suite, GlutenDeleteFromTableSuite, GlutenFileDataSourceV2FallBackSuite, GlutenKeyGroupedPartitioningSuite, GlutenLocalScanSuite, GlutenMetadataColumnSuite, GlutenSupportsCatalogOptionsSuite, GlutenTableCapabilityCheckSuite, GlutenWriteDistributionAndOrderingSuite} +import org.apache.spark.sql.catalyst.expressions.{GlutenArithmeticExpressionSuite, GlutenBitwiseExpressionsSuite, GlutenCastSuite, GlutenCollectionExpressionsSuite, GlutenComplexTypeSuite, GlutenConditionalExpressionSuite, GlutenDateExpressionsSuite, GlutenDecimalExpressionSuite, GlutenHashExpressionsSuite, GlutenIntervalExpressionsSuite, GlutenLiteralExpressionSuite, GlutenMathExpressionsSuite, GlutenMiscExpressionsSuite, GlutenNondeterministicSuite, GlutenNullExpressionsSuite, GlutenPredicateSuite, GlutenRandomSuite, GlutenRegexpExpressionsSuite, GlutenSortOrderExpressionsSuite, GlutenStringExpressionsSuite} +import org.apache.spark.sql.connector.{GlutenDataSourceV2DataFrameSessionCatalogSuite, GlutenDataSourceV2DataFrameSuite, GlutenDataSourceV2FunctionSuite, GlutenDataSourceV2SQLSessionCatalogSuite, GlutenDataSourceV2SQLSuiteV1Filter, GlutenDataSourceV2SQLSuiteV2Filter, GlutenDataSourceV2Suite, GlutenDeleteFromTableSuite, GlutenFileDataSourceV2FallBackSuite, GlutenKeyGroupedPartitioningSuite, GlutenLocalScanSuite, GlutenMetadataColumnSuite, GlutenSupportsCatalogOptionsSuite, GlutenTableCapabilityCheckSuite, GlutenWriteDistributionAndOrderingSuite} import org.apache.spark.sql.errors.{GlutenQueryCompilationErrorsDSv2Suite, GlutenQueryCompilationErrorsSuite, GlutenQueryExecutionErrorsSuite, GlutenQueryParsingErrorsSuite} import org.apache.spark.sql.execution.{FallbackStrategiesSuite, GlutenBroadcastExchangeSuite, GlutenCoalesceShufflePartitionsSuite, GlutenExchangeSuite, GlutenReplaceHashWithSortAggSuite, GlutenReuseExchangeAndSubquerySuite, GlutenSameResultSuite, GlutenSortSuite, GlutenSQLWindowFunctionSuite, GlutenTakeOrderedAndProjectSuite} import org.apache.spark.sql.execution.adaptive.GlutenAdaptiveQueryExecSuite @@ -57,7 +57,8 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenDataSourceV2DataFrameSuite] enableSuite[GlutenDataSourceV2FunctionSuite] enableSuite[GlutenDataSourceV2SQLSessionCatalogSuite] - enableSuite[GlutenDataSourceV2SQLSuite] + enableSuite[GlutenDataSourceV2SQLSuiteV1Filter] + enableSuite[GlutenDataSourceV2SQLSuiteV2Filter] enableSuite[GlutenDataSourceV2Suite] // Rewrite the following test in GlutenDataSourceV2Suite. .exclude("partitioning reporting") @@ -81,38 +82,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude( "INCONSISTENT_BEHAVIOR_CROSS_VERSION: compatibility with Spark 2.4/3.2 in reading/writing dates") enableSuite[GlutenQueryParsingErrorsSuite] - enableSuite[GlutenAnsiCastSuiteWithAnsiModeOff] - .exclude( - "Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly. - ) - .exclude("Fast fail for cast string type to decimal type in ansi mode") - .exclude("SPARK-35711: cast timestamp without time zone to timestamp with local time zone") - .exclude("SPARK-35719: cast timestamp with local time zone to timestamp without timezone") - - enableSuite[GlutenAnsiCastSuiteWithAnsiModeOn] - .exclude( - "Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly. - ) -// .exclude("Fast fail for cast string type to decimal type in ansi mode") - .exclude("SPARK-35711: cast timestamp without time zone to timestamp with local time zone") - .exclude("SPARK-35719: cast timestamp with local time zone to timestamp without timezone") - - enableSuite[GlutenCastSuiteWithAnsiModeOn] - .exclude( - "Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly. - ) - .exclude("SPARK-35711: cast timestamp without time zone to timestamp with local time zone") - .exclude("SPARK-35719: cast timestamp with local time zone to timestamp without timezone") - enableSuite[GlutenTryCastSuite] - .exclude( - // array/map/struct not supported yet. - "cast from invalid string array to numeric array should throw NumberFormatException", - "cast from array II", - "cast from map II", - "cast from struct II" - ) - .exclude("SPARK-35711: cast timestamp without time zone to timestamp with local time zone") - .exclude("SPARK-35719: cast timestamp with local time zone to timestamp without timezone") enableSuite[GlutenArithmeticExpressionSuite] .exclude( "% (Remainder)" // Velox will throw exception when right is zero, need fallback diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala index 666bbe3aadff5..5b782bcf3b3ae 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala @@ -132,7 +132,8 @@ class GlutenSQLQueryTestSuite import IntegratedUDFTestUtils._ - private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1" + override protected val regenerateGoldenFiles: Boolean = + System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1" // FIXME it's not needed to install Spark in testing since the following code only fetchs // some resource files from source folder @@ -507,7 +508,7 @@ class GlutenSQLQueryTestSuite /* Do nothing */ } case udfTestCase: UDFTest - if udfTestCase.udf.isInstanceOf[TestScalarPandasUDF] && !shouldTestScalarPandasUDFs => + if udfTestCase.udf.isInstanceOf[TestScalarPandasUDF] && !shouldTestPandasUDFs => ignore( s"${testCase.name} is skipped because pyspark," + s"pandas and/or pyarrow were not available in [$pythonExec].") { @@ -715,7 +716,7 @@ class GlutenSQLQueryTestSuite if udfTestCase.udf.isInstanceOf[TestPythonUDF] && shouldTestPythonUDFs => s"${testCase.name}${System.lineSeparator()}Python: $pythonVer${System.lineSeparator()}" case udfTestCase: UDFTest - if udfTestCase.udf.isInstanceOf[TestScalarPandasUDF] && shouldTestScalarPandasUDFs => + if udfTestCase.udf.isInstanceOf[TestScalarPandasUDF] && shouldTestPandasUDFs => s"${testCase.name}${System.lineSeparator()}" + s"Python: $pythonVer Pandas: $pandasVer PyArrow: $pyarrowVer${System.lineSeparator()}" case _ => diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenAnsiCastSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenAnsiCastSuite.scala deleted file mode 100644 index f92072eb9d5b9..0000000000000 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenAnsiCastSuite.scala +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.catalyst.expressions - -import org.apache.spark.sql.{GlutenTestConstants, GlutenTestsTrait} -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.{DataType, StringType} - -import java.time.LocalDateTime - -class GlutenCastSuiteWithAnsiModeOn extends AnsiCastSuiteBase with GlutenTestsTrait { - - override def beforeAll(): Unit = { - super.beforeAll() - SQLConf.get.setConf(SQLConf.ANSI_ENABLED, true) - } - - override def afterAll(): Unit = { - super.afterAll() - SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED) - } - - override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = { - v match { - case lit: Expression => Cast(lit, targetType, timeZoneId) - case _ => Cast(Literal(v), targetType, timeZoneId) - } - } - - override def setConfigurationHint: String = - s"set ${SQLConf.ANSI_ENABLED.key} as false" -} - -class GlutenAnsiCastSuiteWithAnsiModeOn extends AnsiCastSuiteBase with GlutenTestsTrait { - - override def beforeAll(): Unit = { - super.beforeAll() - SQLConf.get.setConf(SQLConf.ANSI_ENABLED, true) - } - - override def afterAll(): Unit = { - super.afterAll() - SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED) - } - - override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = { - v match { - case lit: Expression => AnsiCast(lit, targetType, timeZoneId) - case _ => AnsiCast(Literal(v), targetType, timeZoneId) - } - } - - override def setConfigurationHint: String = - s"set ${SQLConf.STORE_ASSIGNMENT_POLICY.key} as" + - s" ${SQLConf.StoreAssignmentPolicy.LEGACY.toString}" -} - -class GlutenAnsiCastSuiteWithAnsiModeOff extends AnsiCastSuiteBase with GlutenTestsTrait { - - override def beforeAll(): Unit = { - super.beforeAll() - SQLConf.get.setConf(SQLConf.ANSI_ENABLED, false) - } - - override def afterAll(): Unit = { - super.afterAll() - SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED) - } - - override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = { - v match { - case lit: Expression => AnsiCast(lit, targetType, timeZoneId) - case _ => AnsiCast(Literal(v), targetType, timeZoneId) - } - } - - override def setConfigurationHint: String = - s"set ${SQLConf.STORE_ASSIGNMENT_POLICY.key} as" + - s" ${SQLConf.StoreAssignmentPolicy.LEGACY.toString}" -} - -class GlutenTryCastSuite extends TryCastSuite with GlutenTestsTrait { - - private val specialTs = Seq( - "0001-01-01T00:00:00", // the fist timestamp of Common Era - "1582-10-15T23:59:59", // the cutover date from Julian to Gregorian calendar - "1970-01-01T00:00:00", // the epoch timestamp - "9999-12-31T23:59:59" // the last supported timestamp according to SQL standard - ) - - test( - GlutenTestConstants.GLUTEN_TEST + - "SPARK-35698: cast timestamp without time zone to string") { - specialTs.foreach { - s => checkEvaluation(cast(LocalDateTime.parse(s), StringType), s.replace("T", " ")) - } - } -} diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala index 0debb5826b51b..6d330cf025977 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala @@ -21,8 +21,8 @@ import org.apache.spark.sql.types._ import java.sql.Date -class GlutenCastSuite extends CastSuite with GlutenTestsTrait { - override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = { +class GlutenCastSuite extends CastSuiteBase with GlutenTestsTrait { + override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): Cast = { v match { case lit: Expression => logDebug(s"Cast from: ${lit.dataType.typeName}, to: ${targetType.typeName}") @@ -74,4 +74,6 @@ class GlutenCastSuite extends CastSuite with GlutenTestsTrait { val d = Date.valueOf("1970-01-01") checkEvaluation(cast(d, DateType), d) } + + override protected def evalMode: EvalMode.Value = EvalMode.LEGACY } diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/connector/GlutenDataSourceV2SQLSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/connector/GlutenDataSourceV2SQLSuiteV1Filter.scala similarity index 88% rename from gluten-ut/spark34/src/test/scala/org/apache/spark/sql/connector/GlutenDataSourceV2SQLSuite.scala rename to gluten-ut/spark34/src/test/scala/org/apache/spark/sql/connector/GlutenDataSourceV2SQLSuiteV1Filter.scala index c3666f80fae65..ff76180086802 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/connector/GlutenDataSourceV2SQLSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/connector/GlutenDataSourceV2SQLSuiteV1Filter.scala @@ -18,4 +18,6 @@ package org.apache.spark.sql.connector import org.apache.spark.sql._ -class GlutenDataSourceV2SQLSuite extends DataSourceV2SQLSuite with GlutenSQLTestsBaseTrait {} +class GlutenDataSourceV2SQLSuiteV1Filter + extends DataSourceV2SQLSuiteV1Filter + with GlutenSQLTestsBaseTrait {} diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/connector/GlutenDataSourceV2SQLSuiteV2Filter.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/connector/GlutenDataSourceV2SQLSuiteV2Filter.scala new file mode 100644 index 0000000000000..7e02fc07cec04 --- /dev/null +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/connector/GlutenDataSourceV2SQLSuiteV2Filter.scala @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.connector + +import org.apache.spark.sql._ + +class GlutenDataSourceV2SQLSuiteV2Filter + extends DataSourceV2SQLSuiteV2Filter + with GlutenSQLTestsBaseTrait {} diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFilterSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFilterSuite.scala index 707e2b311b63f..8bb57e0755e93 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFilterSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFilterSuite.scala @@ -34,10 +34,9 @@ import org.apache.spark.tags.ExtendedSQLTest import org.apache.spark.util.Utils import org.apache.hadoop.fs.Path -import org.apache.parquet.filter2.predicate.{FilterApi, FilterPredicate} +import org.apache.parquet.filter2.predicate.{FilterApi, FilterPredicate, Operators} import org.apache.parquet.filter2.predicate.FilterApi._ -import org.apache.parquet.filter2.predicate.Operators -import org.apache.parquet.filter2.predicate.Operators.{Column => _, _} +import org.apache.parquet.filter2.predicate.Operators.{Column => _, Eq, Gt, GtEq, Lt, LtEq, NotEq} import org.apache.parquet.hadoop.{ParquetFileReader, ParquetInputFormat, ParquetOutputFormat} import org.apache.parquet.hadoop.util.HadoopInputFile @@ -419,7 +418,7 @@ class GlutenParquetV2FilterSuite extends GltuenParquetFilterSuite with GlutenSQL case PhysicalOperation( _, filters, - DataSourceV2ScanRelation(_, scan: ParquetScan, _, None)) => + DataSourceV2ScanRelation(_, scan: ParquetScan, _, None, None)) => assert(filters.nonEmpty, "No filter is analyzed from the given query") val sourceFilters = filters.flatMap(DataSourceStrategy.translateFilter(_, true)).toArray val pushedFilters = scan.pushedFilters diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala index d5e6ed7cff353..847f066bf4d65 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala @@ -31,9 +31,15 @@ class GlutenSessionExtensionSuite extends GlutenSQLTestsTrait { } test("test gluten extensions") { - assert(spark.sessionState.queryStagePrepRules.contains(FallbackOnANSIMode(spark))) - assert(spark.sessionState.queryStagePrepRules.contains(FallbackMultiCodegens(spark))) - assert(spark.sessionState.queryStagePrepRules.contains(FallbackBroadcastExchange(spark))) + assert( + spark.sessionState.adaptiveRulesHolder.queryStagePrepRules + .contains(FallbackOnANSIMode(spark))) + assert( + spark.sessionState.adaptiveRulesHolder.queryStagePrepRules + .contains(FallbackMultiCodegens(spark))) + assert( + spark.sessionState.adaptiveRulesHolder.queryStagePrepRules + .contains(FallbackBroadcastExchange(spark))) assert(spark.sessionState.columnarRules.contains(ColumnarOverrideRules(spark))) assert(spark.sessionState.planner.strategies.contains(JoinSelectionOverrides(spark))) diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/statistics/SparkFunctionStatistics.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/statistics/SparkFunctionStatistics.scala index c8e0d245df95d..dd9524c8686e6 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/statistics/SparkFunctionStatistics.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/statistics/SparkFunctionStatistics.scala @@ -21,7 +21,6 @@ import io.glutenproject.extension.GlutenPlan import io.glutenproject.utils.{BackendTestUtils, SystemParameters} import org.apache.spark.sql.{GlutenTestConstants, QueryTest, SparkSession} -import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.optimizer.{ConstantFolding, ConvertToLocalRelation, NullPropagation} import org.apache.spark.sql.execution.{ProjectExec, SparkPlan} import org.apache.spark.sql.internal.SQLConf @@ -89,17 +88,15 @@ class SparkFunctionStatistics extends QueryTest { initializeSession val functionRegistry = spark.sessionState.functionRegistry val sparkBuiltInFunctions = functionRegistry.listFunction() - // According to expressionsForTimestampNTZSupport in FunctionRegistry.scala, - // these functions are registered only for testing, not available for end users. + // expressionsForTimestampNTZSupport is deleted in spark 3.4 FunctionRegistry.scala. // Other functions like current_database is NOT necessarily offloaded to native. - val ignoreFunctions = FunctionRegistry.expressionsForTimestampNTZSupport.keySet ++ - Seq( - "get_fake_app_name", - "current_catalog", - "current_database", - "spark_partition_id", - "current_user", - "current_timezone") + val ignoreFunctions = Seq( + "get_fake_app_name", + "current_catalog", + "current_database", + "spark_partition_id", + "current_user", + "current_timezone") val supportedFunctions = new java.util.ArrayList[String]() val unsupportedFunctions = new java.util.ArrayList[String]() val needInspectFunctions = new java.util.ArrayList[String]() diff --git a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala index 6230cedbd13b8..0a62c41a69df9 100644 --- a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala +++ b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala @@ -84,8 +84,7 @@ class FileSourceScanExecShim( } } - @transient override protected lazy val dynamicallySelectedPartitions - : Array[PartitionDirectory] = { + @transient override lazy val dynamicallySelectedPartitions: Array[PartitionDirectory] = { val dynamicPartitionFilters = partitionFilters.filter(isDynamicPruningFilter) val selected = if (dynamicPartitionFilters.nonEmpty) {