From b9473c47c7746543d0ee315afac30488191d1a3f Mon Sep 17 00:00:00 2001 From: Emmanuel T Odeke Date: Mon, 18 Sep 2023 23:55:42 -0700 Subject: [PATCH] SpannerScanner: add option to disableDataboost Allows Databoost to be disabled; it is on by default given the point of this connector. However, there is something to be said about compatibility so that by default most users who haven't enabled Databoost can still use it, but that's to be discussed for later. Fixes #68 --- .../com/google/cloud/spark/spanner/SpannerScanner.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/spark-3.1-spanner-lib/src/main/java/com/google/cloud/spark/spanner/SpannerScanner.java b/spark-3.1-spanner-lib/src/main/java/com/google/cloud/spark/spanner/SpannerScanner.java index eb9229f3..26ea9867 100644 --- a/spark-3.1-spanner-lib/src/main/java/com/google/cloud/spark/spanner/SpannerScanner.java +++ b/spark-3.1-spanner-lib/src/main/java/com/google/cloud/spark/spanner/SpannerScanner.java @@ -89,6 +89,13 @@ public InputPartition[] planInputPartitions() { if (filters.length > 0) { sqlStmt += " WHERE " + SparkFilterUtils.getCompiledFilter(true, filters); } + + // By default, dataBoost is enabled, given the point of this + // integration was to take advantage of dataBoost firstly. + // Please see https://github.com/GoogleCloudDataproc/spark-spanner-connector/issues/68 + boolean disableDataboost = this.opts.get("disableDataboost"); + boolean enableDataboost = disableDataboost == null || !disableDataboost; + try (BatchReadOnlyTransaction txn = batchClient.batchClient.batchReadOnlyTransaction(TimestampBound.strong())) { String mapAsJSON = SpannerUtils.serializeMap(this.opts); @@ -96,7 +103,7 @@ public InputPartition[] planInputPartitions() { txn.partitionQuery( PartitionOptions.getDefaultInstance(), Statement.of(sqlStmt), - Options.dataBoostEnabled(true)); + Options.dataBoostEnabled(enableDataboost); List parts = Streams.mapWithIndex(