diff --git a/shims/common/src/main/scala/io/glutenproject/sql/shims/SparkShims.scala b/shims/common/src/main/scala/io/glutenproject/sql/shims/SparkShims.scala index f0c4cb7cf059b..a99e244a17ab4 100644 --- a/shims/common/src/main/scala/io/glutenproject/sql/shims/SparkShims.scala +++ b/shims/common/src/main/scala/io/glutenproject/sql/shims/SparkShims.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, PlanExp import org.apache.spark.sql.catalyst.plans.physical.Distribution import org.apache.spark.sql.connector.catalog.Table import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.datasources.v2.BatchScanExec @@ -64,7 +65,8 @@ trait SparkShims { def splitFiles( sparkSession: SparkSession, - file: FileStatusWithMetadata, + // Hack -- the type changes in a non-backwards compatible way + file: Object, filePath: Path, isSplitable: Boolean, maxSplitBytes: Long, diff --git a/shims/spark34/src/main/scala/io/glutenproject/sql/shims/spark34/Spark34Shims.scala b/shims/spark34/src/main/scala/io/glutenproject/sql/shims/spark34/Spark34Shims.scala index cc0d7ef928d62..3e36c999064bc 100644 --- a/shims/spark34/src/main/scala/io/glutenproject/sql/shims/spark34/Spark34Shims.scala +++ b/shims/spark34/src/main/scala/io/glutenproject/sql/shims/spark34/Spark34Shims.scala @@ -37,10 +37,10 @@ import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.datasources.v2.BatchScanExec import org.apache.spark.sql.execution.datasources.v2.text.TextScan import org.apache.spark.sql.execution.datasources.v2.utils.CatalogUtil -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.types._ import org.apache.spark.sql.util.CaseInsensitiveStringMap -import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs._ class Spark34Shims extends SparkShims { override def getShimDescriptor: ShimDescriptor = SparkShimProvider.DESCRIPTOR @@ -51,9 +51,8 @@ class Spark34Shims extends SparkShims { ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil } - override def structFromAttributes( - attrs: Seq[Attribute]): StructType = { - StructType.fromAttributes(attrs) + override def structFromAttributes(attrs: Seq[Attribute]): StructType = { + StructType(attrs.map(a => StructField(a.name, a.dataType, a.nullable, a.metadata))) } override def expressionMappings: Seq[Sig] = { @@ -142,15 +141,15 @@ class Spark34Shims extends SparkShims { cause = null) } override def splitFiles( - sparkSession: SparkSession, - file: FileStatusWithMetadata, - filePath: Path, - isSplitable: Boolean, - maxSplitBytes: Long, - partitionValues: InternalRow): Seq[PartitionedFile] = { + sparkSession: SparkSession, + file: Object, + filePath: Path, + isSplitable: Boolean, + maxSplitBytes: Long, + partitionValues: InternalRow): Seq[PartitionedFile] = { PartitionedFileUtil.splitFiles( sparkSession, - file, + file.asInstanceOf[FileStatus], filePath, isSplitable, maxSplitBytes, diff --git a/shims/spark35/src/main/scala/io/glutenproject/sql/shims/spark35/Spark35Shims.scala b/shims/spark35/src/main/scala/io/glutenproject/sql/shims/spark35/Spark35Shims.scala index ff643339d1374..6237eaa63e2f0 100644 --- a/shims/spark35/src/main/scala/io/glutenproject/sql/shims/spark35/Spark35Shims.scala +++ b/shims/spark35/src/main/scala/io/glutenproject/sql/shims/spark35/Spark35Shims.scala @@ -138,11 +138,16 @@ class Spark35Shims extends SparkShims { } override def splitFiles( sparkSession: SparkSession, - file: FileStatusWithMetadata, + file: Object, filePath: Path, isSplitable: Boolean, maxSplitBytes: Long, partitionValues: InternalRow): Seq[PartitionedFile] = { - PartitionedFileUtil.splitFiles(sparkSession, file, isSplitable, maxSplitBytes, partitionValues) + PartitionedFileUtil.splitFiles( + sparkSession, + file.asInstanceOf[FileStatusWithMetadata], + isSplitable, + maxSplitBytes, + partitionValues) } }