CDAP-14442 removed parquet, and orc dependencies

Removed parquet and orc dependencies, as the formats have been moved to plugins. Did not remove avro since it is used by the BigQuery plugins. Removed exports for avro, parquet, and orc. Avro is now a private dependency, used only within the source and not required by the program. Also fixed a NPE that would happen if the big query table does not exist when hitting the get schema button.
data-integrations · Nov 5, 2018 · 242309f · 242309f
1 parent 05b09b4
commit 242309f
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 51 deletions.
diff --git a/pom.xml b/pom.xml
@@ -57,7 +57,7 @@
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <cdap.version>5.1.0</cdap.version>
-    <hydrator.version>2.1.0</hydrator.version>
+    <hydrator.version>2.1.1-SNAPSHOT</hydrator.version>
     <guava.version>20.0</guava.version>
     <slf4j.version>1.7.5</slf4j.version>
     <junit.version>4.12</junit.version>
@@ -67,11 +67,9 @@
     <google.cloud.spanner.version>0.53.0-beta</google.cloud.spanner.version>
     <spark.version>1.6.1</spark.version>
     <google.cloud.speech.version>0.54.0-beta</google.cloud.speech.version>
-    <parquet.avro.version>1.8.1</parquet.avro.version>
     <google.protobuf.java.version>3.4.0</google.protobuf.java.version>
     <google.cloud.pubsub.version>1.36.0</google.cloud.pubsub.version>
-    <orc.mapreduce.version>1.1.0</orc.mapreduce.version>
-    <avro.mapred.version>1.7.7</avro.mapred.version>
+    <avro.version>1.7.7</avro.version>
     <jackson.core.version>2.8.11.1</jackson.core.version>
   </properties>
 
@@ -97,7 +95,7 @@
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro</artifactId>
-      <version>1.7.7</version>
+      <version>${avro.version}</version>
     </dependency>
     <dependency>
       <groupId>co.cask.cdap</groupId>
@@ -265,17 +263,6 @@
       <artifactId>google-cloud-speech</artifactId>
       <version>${google.cloud.speech.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.parquet</groupId>
-      <artifactId>parquet-avro</artifactId>
-      <version>${parquet.avro.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
     <dependency>
       <groupId>com.google.protobuf</groupId>
       <artifactId>protobuf-java</artifactId>
@@ -286,34 +273,11 @@
       <artifactId>google-cloud-pubsub</artifactId>
       <version>${google.cloud.pubsub.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.orc</groupId>
-      <artifactId>orc-mapreduce</artifactId>
-      <version>${orc.mapreduce.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-hdfs</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-common</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-mapreduce-client-core</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.hive</groupId>
-          <artifactId>hive-storage-api</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro-mapred</artifactId>
       <classifier>hadoop2</classifier>
-      <version>${avro.mapred.version}</version>
+      <version>${avro.version}</version>
     </dependency>
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
@@ -524,12 +488,7 @@
             <!--Only @Plugin classes in the export packages will be included as plugin-->
             <_exportcontents>
               co.cask.gcp.*;
-              co.cask.hydrator.format.*;
               com.google.cloud.hadoop.*;
-              org.apache.avro.mapred.*;
-              org.apache.avro.mapreduce;
-              org.apache.parquet.avro.*;
-              org.apache.parquet.hadoop.*;
               org.apache.spark.streaming.pubsub*;
             </_exportcontents>
           </instructions>

diff --git a/src/main/java/co/cask/gcp/bigquery/BigQuerySource.java b/src/main/java/co/cask/gcp/bigquery/BigQuerySource.java
@@ -165,16 +165,16 @@ public void onRunFinish(boolean succeeded, BatchSourceContext context) {
   @Path("getSchema")
   public Schema getSchema(BigQuerySourceConfig request) throws Exception {
     String dataset = request.getDataset();
-    String table = request.getTable();
+    String tableName = request.getTable();
     String project = request.getDatasetProject();
-    Table bqTable = BigQueryUtils.getBigQueryTable(request.getServiceAccountFilePath(), project, dataset, table);
+    Table table = BigQueryUtils.getBigQueryTable(request.getServiceAccountFilePath(), project, dataset, tableName);
     if (table == null) {
       // Table does not exist
       throw new IllegalArgumentException(String.format("BigQuery table '%s:%s.%s' does not exist",
-                                                       project, dataset, table));
+                                                       project, dataset, tableName));
     }
 
-    com.google.cloud.bigquery.Schema bgSchema = bqTable.getDefinition().getSchema();
+    com.google.cloud.bigquery.Schema bgSchema = table.getDefinition().getSchema();
     if (bgSchema == null) {
       throw new IllegalArgumentException(String.format("Cannot read from table '%s:%s.%s' because it has no schema.",
                                                        project, dataset, table));
@@ -195,7 +195,7 @@ private void validateOutputSchema() throws IOException {
     if (table == null) {
       // Table does not exist
       throw new IllegalArgumentException(String.format("BigQuery table '%s:%s.%s' does not exist.",
-                                                       project, dataset, table));
+                                                       project, dataset, tableName));
     }
 
     com.google.cloud.bigquery.Schema bgSchema = table.getDefinition().getSchema();

diff --git a/src/main/java/co/cask/gcp/spanner/sink/SpannerSink.java b/src/main/java/co/cask/gcp/spanner/sink/SpannerSink.java
@@ -42,11 +42,11 @@
 import com.google.cloud.spanner.Spanner;
 import com.google.cloud.spanner.SpannerException;
 import com.google.cloud.spanner.Statement;
+import com.google.common.base.Strings;
 import com.google.spanner.admin.database.v1.CreateDatabaseMetadata;
 import com.google.spanner.admin.database.v1.UpdateDatabaseDdlMetadata;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.NullWritable;
-import org.apache.parquet.Strings;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;