GoogleCloudDataproc · mutianf · Oct 7, 2024 · Oct 7, 2024
diff --git a/examples/java-maven/pom.xml b/examples/java-maven/pom.xml
@@ -25,24 +25,26 @@
     <version>0.0.1-SNAPSHOT</version>
 
     <properties>
-        <scala.binary.version>2.12</scala.binary.version>
-        <spark.version>3.1.2</spark.version>
+        <scala.binary.version>2.13</scala.binary.version>
+        <spark.version>3.3.0</spark.version>
         <reload4j.version>1.7.36</reload4j.version>
-        <spark.bigtable.version>0.1.0</spark.bigtable.version>
+        <spark.bigtable.version>0.2.2-SNAPSHOT</spark.bigtable.version>
         <maven.compiler.source>1.8</maven.compiler.source>
         <maven.compiler.target>1.8</maven.compiler.target>
     </properties>
 
     <dependencies>
         <dependency>
             <groupId>org.apache.spark</groupId>
-            <artifactId>spark-core_${scala.binary.version}</artifactId>
+<!--            <artifactId>spark-core_${scala.binary.version}</artifactId>-->
+            <artifactId>spark-core_2.13</artifactId>
             <version>${spark.version}</version>
             <scope>provided</scope>
         </dependency>
         <dependency>
             <groupId>org.apache.spark</groupId>
-            <artifactId>spark-sql_${scala.binary.version}</artifactId>
+<!--            <artifactId>spark-sql_${scala.binary.version}</artifactId>-->
+            <artifactId>spark-sql_2.13</artifactId>
             <version>${spark.version}</version>
             <scope>provided</scope>
         </dependency>

diff --git a/pom.xml b/pom.xml
@@ -71,8 +71,8 @@
     <gpg.skip>true</gpg.skip>
     <nexus.remote.skip>false</nexus.remote.skip>
 
-    <scala.version>2.12.14</scala.version>
-    <scala.binary.version>2.12</scala.binary.version>
+    <scala.version>2.13.1</scala.version>
+    <scala.binary.version>2.13</scala.binary.version>
     <spark.version>3.3.3</spark.version>
     <bigtable.java.version>2.42.0</bigtable.java.version>
     <bigtable.java.emulator.version>0.175.0</bigtable.java.emulator.version>
@@ -90,7 +90,7 @@
     <slf4j.version>1.7.36</slf4j.version>
     <slf4j-reload4j.version>1.7.36</slf4j-reload4j.version>
     <reload4j.version>1.2.24</reload4j.version>
-    <scala-parser-combinators.version>1.1.2</scala-parser-combinators.version>
+    <scala-parser-combinators.version>2.0.0</scala-parser-combinators.version>
     <commons-lang.version>2.6</commons-lang.version>
     <openlineage.version>1.22.0</openlineage.version>
   </properties>

diff --git a/spark-bigtable_2.12-it/pom.xml b/spark-bigtable_2.12-it/pom.xml
@@ -14,21 +14,20 @@
  See the License for the specific language governing permissions and
  limitations under the License.
  -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <modelVersion>4.0.0</modelVersion>
 
   <parent>
     <groupId>com.google.cloud.spark.bigtable</groupId>
     <artifactId>spark-bigtable-connector</artifactId>
-    <version>0.2.1</version>  <!-- ${NEXT_VERSION_FLAG} -->
+    <version>0.2.2-SNAPSHOT</version>  <!-- ${NEXT_VERSION_FLAG} -->
     <relativePath>../</relativePath>
   </parent>
 
   <groupId>com.google.cloud.spark.bigtable</groupId>
   <artifactId>spark-bigtable_2.12-it</artifactId>
   <name>Google Bigtable - Spark Connector Integration Tests</name>
-  <version>0.2.1</version>  <!-- ${NEXT_VERSION_FLAG} -->
+  <version>0.2.2-SNAPSHOT</version>  <!-- ${NEXT_VERSION_FLAG} -->
 
   <dependencies>
     <dependency>
@@ -52,7 +51,7 @@
     <dependency>
       <groupId>com.google.cloud.spark.bigtable</groupId>
       <artifactId>spark-bigtable_2.12</artifactId>
-      <version>0.2.1</version>  <!-- ${NEXT_VERSION_FLAG} -->
+      <version>0.2.2-SNAPSHOT</version>  <!-- ${NEXT_VERSION_FLAG} -->
     </dependency>
 
     <dependency>
@@ -64,14 +63,16 @@
 
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
+<!--      <artifactId>spark-core_${scala.binary.version}</artifactId>-->
+      <artifactId>spark-core_2.13</artifactId>
       <version>${spark.version}</version>
       <scope>provided</scope>
       <!-- TODO: consider excluding log4j slf4j-log4j12 if needed. -->
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+<!--      <artifactId>spark-sql_${scala.binary.version}</artifactId>-->
+      <artifactId>spark-sql_2.13</artifactId>
       <version>${spark.version}</version>
       <scope>provided</scope>
     </dependency>
@@ -97,7 +98,8 @@
     </dependency>
     <dependency>
       <groupId>io.openlineage</groupId>
-      <artifactId>openlineage-spark_${scala.binary.version}</artifactId>
+<!--      <artifactId>openlineage-spark_${scala.binary.version}</artifactId>-->
+      <artifactId>openlineage-spark_2.13</artifactId>
       <version>${openlineage.version}</version>
       <scope>test</scope>
     </dependency>

diff --git a/spark-bigtable_2.12/pom.xml b/spark-bigtable_2.12/pom.xml
@@ -14,21 +14,20 @@
  See the License for the specific language governing permissions and
  limitations under the License.
  -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <modelVersion>4.0.0</modelVersion>
 
   <parent>
     <groupId>com.google.cloud.spark.bigtable</groupId>
     <artifactId>spark-bigtable-connector</artifactId>
-    <version>0.2.1</version>  <!-- ${NEXT_VERSION_FLAG} -->
+    <version>0.2.2-SNAPSHOT</version>  <!-- ${NEXT_VERSION_FLAG} -->
     <relativePath>../</relativePath>
   </parent>
 
   <groupId>com.google.cloud.spark.bigtable</groupId>
   <artifactId>spark-bigtable_2.12</artifactId>
   <name>Google Bigtable - Apache Spark Connector</name>
-  <version>0.2.1</version>  <!-- ${NEXT_VERSION_FLAG} -->
+  <version>0.2.2-SNAPSHOT</version>  <!-- ${NEXT_VERSION_FLAG} -->
 
   <dependencies>
     <dependency>
@@ -45,7 +44,8 @@
     <!-- To fix the "NoClassDefFoundError: scala/util/parsing/json/JSON$" error with Spark 3.4. -->
     <dependency>
       <groupId>org.scala-lang.modules</groupId>
-      <artifactId>scala-parser-combinators_${scala.binary.version}</artifactId>
+<!--      <artifactId>scala-parser-combinators_${scala.binary.version}</artifactId>-->
+      <artifactId>scala-parser-combinators_2.13</artifactId>
       <version>${scala-parser-combinators.version}</version>
     </dependency>
 
@@ -58,7 +58,8 @@
 
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
+<!--      <artifactId>spark-core_${scala.binary.version}</artifactId>-->
+      <artifactId>spark-core_2.13</artifactId>
       <version>${spark.version}</version>
       <scope>provided</scope>
       <exclusions>
@@ -83,7 +84,8 @@
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+<!--      <artifactId>spark-sql_${scala.binary.version}</artifactId>-->
+      <artifactId>spark-sql_2.13</artifactId>
       <version>${spark.version}</version>
       <scope>provided</scope>
     </dependency>
@@ -96,7 +98,8 @@
 
     <dependency>
       <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
+<!--      <artifactId>scalatest_${scala.binary.version}</artifactId>-->
+      <artifactId>scalatest_2.13</artifactId>
       <version>${scalatest.version}</version>
       <scope>test</scope>
       <exclusions>
@@ -200,17 +203,13 @@
             </goals>
             <configuration>
               <transformers>
-                <transformer
-                        implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
-                <transformer
-                        implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
                   <addHeader>false</addHeader>
                 </transformer>
-                <transformer
-                        implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer">
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer">
                 </transformer>
-                <transformer
-                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                   <manifestEntries>
                     <Main-Class>${app.main.class}</Main-Class>
                     <X-Compile-Source-JDK>${maven.compile.source}</X-Compile-Source-JDK>

diff --git a/...nectors/src/main/scala/com/google/cloud/spark/bigtable/datasources/SchemaConverters.scala b/...nectors/src/main/scala/com/google/cloud/spark/bigtable/datasources/SchemaConverters.scala
@@ -33,7 +33,8 @@ import java.nio.ByteBuffer
 import java.sql.Timestamp
 import java.util
 import java.util.HashMap
-import scala.collection.JavaConversions._
+import java.util.stream.Collectors
+import scala.collection.JavaConverters._
 
 @InterfaceAudience.Private
 abstract class AvroException(msg: String) extends Exception(msg)
@@ -69,10 +70,12 @@ object SchemaConverters {
       case ENUM    => SchemaType(StringType, nullable = false)
 
       case RECORD =>
-        val fields = avroSchema.getFields.map { f =>
-          val schemaType = toSqlType(f.schema())
-          StructField(f.name, schemaType.dataType, schemaType.nullable)
-        }
+
+        val fields = avroSchema.getFields.stream.map(
+          f => {
+            val schemaType = toSqlType(f.schema())
+            StructField(f.name, schemaType.dataType, schemaType.nullable)
+          }).collect(Collectors.toList[StructField])
 
         SchemaType(StructType(fields), nullable = false)
 
@@ -95,18 +98,18 @@ object SchemaConverters {
         )
 
       case UNION =>
-        if (avroSchema.getTypes.exists(_.getType == NULL)) {
+        if (avroSchema.getTypes.stream.anyMatch(_.getType == null)) {
           // In case of a union with null, eliminate it and make a recursive call
           val remainingUnionTypes =
-            avroSchema.getTypes.filterNot(_.getType == NULL)
+            avroSchema.getTypes.stream.filter(_.getType == NULL).collect(Collectors.toList[Schema])
           if (remainingUnionTypes.size == 1) {
             toSqlType(remainingUnionTypes.get(0)).copy(nullable = true)
           } else {
             toSqlType(Schema.createUnion(remainingUnionTypes))
               .copy(nullable = true)
           }
         } else
-          avroSchema.getTypes.map(_.getType) match {
+          avroSchema.getTypes.stream.map(_.getType) match {
             case Seq(t1, t2) if Set(t1, t2) == Set(INT, LONG) =>
               SchemaType(LongType, nullable = false)
             case Seq(t1, t2) if Set(t1, t2) == Set(FLOAT, DOUBLE) =>
@@ -182,7 +185,7 @@ object SchemaConverters {
           }
       case RECORD =>
         val fieldConverters =
-          schema.getFields.map(f => createConverterToSQL(f.schema))
+          schema.getFields.stream.map(f => createConverterToSQL(f.schema)).collect(Collectors.toList[Any => Any])
         (item: Any) =>
           if (item == null) {
             null
@@ -191,7 +194,7 @@ object SchemaConverters {
             val converted = new Array[Any](fieldConverters.size)
             var idx = 0
             while (idx < fieldConverters.size) {
-              converted(idx) = fieldConverters.apply(idx)(record.get(idx))
+              converted(idx) = fieldConverters.get(idx)(record.get(idx))
               idx += 1
             }
             Row.fromSeq(converted.toSeq)
@@ -203,10 +206,10 @@ object SchemaConverters {
             null
           } else {
             try {
-              item.asInstanceOf[GenericData.Array[Any]].map(elementConverter)
+              item.asInstanceOf[GenericData.Array[Any]].stream.map(element => elementConverter.apply(element))
             } catch {
               case e: Throwable =>
-                item.asInstanceOf[util.ArrayList[Any]].map(elementConverter)
+                item.asInstanceOf[util.ArrayList[Any]].stream.map(element => elementConverter.apply(element))
             }
           }
       case MAP =>
@@ -217,19 +220,18 @@ object SchemaConverters {
           } else {
             item
               .asInstanceOf[HashMap[Any, Any]]
-              .map(x => (x._1.toString, valueConverter(x._2)))
-              .toMap
+              .forEach((k, v) => (k.toString, valueConverter(v)))
           }
       case UNION =>
-        if (schema.getTypes.exists(_.getType == NULL)) {
-          val remainingUnionTypes = schema.getTypes.filterNot(_.getType == NULL)
+        if (schema.getTypes.stream.anyMatch(_.getType == NULL)) {
+          val remainingUnionTypes = schema.getTypes.stream.filter(_.getType == NULL).collect(Collectors.toList[Schema])
           if (remainingUnionTypes.size == 1) {
             createConverterToSQL(remainingUnionTypes.get(0))
           } else {
             createConverterToSQL(Schema.createUnion(remainingUnionTypes))
           }
         } else
-          schema.getTypes.map(_.getType) match {
+          schema.getTypes.stream.map(_.getType) match {
             case Seq(t1, t2) if Set(t1, t2) == Set(INT, LONG) =>
               (item: Any) => {
                 item match {