From d15c645d932c26ddf631815f16e15c3459105934 Mon Sep 17 00:00:00 2001 From: Emmanuel T Odeke Date: Mon, 18 Sep 2023 23:55:42 -0700 Subject: [PATCH] SpannerScanner: add option to disableDataboost Allows Databoost to be disabled; it is on by default given the point of this connector. However, there is something to be said about compatibility so that by default most users who haven't enabled Databoost can still use it, but that's to be discussed for later. Fixes #68 --- .../com/google/cloud/spark/spanner/SpannerScanner.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/spark-3.1-spanner-lib/src/main/java/com/google/cloud/spark/spanner/SpannerScanner.java b/spark-3.1-spanner-lib/src/main/java/com/google/cloud/spark/spanner/SpannerScanner.java index eb9229f3..9f337b35 100644 --- a/spark-3.1-spanner-lib/src/main/java/com/google/cloud/spark/spanner/SpannerScanner.java +++ b/spark-3.1-spanner-lib/src/main/java/com/google/cloud/spark/spanner/SpannerScanner.java @@ -89,6 +89,12 @@ public InputPartition[] planInputPartitions() { if (filters.length > 0) { sqlStmt += " WHERE " + SparkFilterUtils.getCompiledFilter(true, filters); } + + // By default, dataBoost is enabled, given the point of this + // integration was to take advantage of dataBoost firstly. + // Please see https://github.com/GoogleCloudDataproc/spark-spanner-connector/issues/68 + Boolean enableDataboost = this.opts.get("disableDataboost") != "true"; + try (BatchReadOnlyTransaction txn = batchClient.batchClient.batchReadOnlyTransaction(TimestampBound.strong())) { String mapAsJSON = SpannerUtils.serializeMap(this.opts); @@ -96,7 +102,7 @@ public InputPartition[] planInputPartitions() { txn.partitionQuery( PartitionOptions.getDefaultInstance(), Statement.of(sqlStmt), - Options.dataBoostEnabled(true)); + Options.dataBoostEnabled(enableDataboost)); List parts = Streams.mapWithIndex(