-
Notifications
You must be signed in to change notification settings - Fork 80
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Added native support to read/write parquet files from GCS URIs (#…
- Loading branch information
1 parent
483a72f
commit 7480812
Showing
6 changed files
with
213 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
72 changes: 72 additions & 0 deletions
72
extensions/s3/src/main/java/io/deephaven/extensions/s3/GCSSeekableChannelProvider.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
// | ||
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending | ||
// | ||
package io.deephaven.extensions.s3; | ||
|
||
import io.deephaven.internal.log.LoggerFactory; | ||
import io.deephaven.io.logger.Logger; | ||
import io.deephaven.util.channel.CompletableOutputStream; | ||
import io.deephaven.util.channel.SeekableChannelContext; | ||
import org.jetbrains.annotations.NotNull; | ||
|
||
import java.net.URI; | ||
import java.net.URISyntaxException; | ||
import java.nio.channels.SeekableByteChannel; | ||
import java.util.stream.Stream; | ||
|
||
import static io.deephaven.extensions.s3.GCSSeekableChannelProviderPlugin.GCS_URI_SCHEME; | ||
import static io.deephaven.extensions.s3.S3SeekableChannelProviderPlugin.S3_URI_SCHEME; | ||
|
||
final class GCSSeekableChannelProvider extends S3SeekableChannelProvider { | ||
|
||
private static final Logger log = LoggerFactory.getLogger(GCSSeekableChannelProvider.class); | ||
|
||
GCSSeekableChannelProvider(@NotNull final S3Instructions s3Instructions) { | ||
super(s3Instructions); | ||
} | ||
|
||
@Override | ||
public boolean exists(@NotNull final URI uri) { | ||
return super.exists(gcsToS3Uri(uri)); | ||
} | ||
|
||
@Override | ||
public SeekableByteChannel getReadChannel( | ||
@NotNull final SeekableChannelContext channelContext, | ||
@NotNull final URI uri) { | ||
return super.getReadChannel(channelContext, gcsToS3Uri(uri)); | ||
} | ||
|
||
@Override | ||
public CompletableOutputStream getOutputStream(@NotNull final URI uri, final int bufferSizeHint) { | ||
return super.getOutputStream(gcsToS3Uri(uri), bufferSizeHint); | ||
} | ||
|
||
@Override | ||
public Stream<URI> list(@NotNull final URI directory) { | ||
if (log.isDebugEnabled()) { | ||
log.debug().append("Fetching child URIs for directory: ").append(directory.toString()).endl(); | ||
} | ||
return createStream(gcsToS3Uri(directory), false, GCS_URI_SCHEME); | ||
} | ||
|
||
@Override | ||
public Stream<URI> walk(@NotNull final URI directory) { | ||
if (log.isDebugEnabled()) { | ||
log.debug().append("Performing recursive traversal from directory: ").append(directory.toString()).endl(); | ||
} | ||
return createStream(gcsToS3Uri(directory), true, GCS_URI_SCHEME); | ||
} | ||
|
||
private static URI gcsToS3Uri(@NotNull final URI uri) { | ||
try { | ||
if (S3_URI_SCHEME.equals(uri.getScheme())) { | ||
return uri; | ||
} | ||
return new URI(S3_URI_SCHEME, uri.getUserInfo(), uri.getHost(), uri.getPort(), uri.getPath(), | ||
uri.getQuery(), uri.getFragment()); | ||
} catch (final URISyntaxException e) { | ||
throw new IllegalArgumentException("Failed to convert GCS URI " + uri + " to s3 URI", e); | ||
} | ||
} | ||
} |
63 changes: 63 additions & 0 deletions
63
extensions/s3/src/main/java/io/deephaven/extensions/s3/GCSSeekableChannelProviderPlugin.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
// | ||
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending | ||
// | ||
package io.deephaven.extensions.s3; | ||
|
||
import com.google.auto.service.AutoService; | ||
import io.deephaven.util.channel.SeekableChannelsProvider; | ||
import io.deephaven.util.channel.SeekableChannelsProviderPlugin; | ||
import org.jetbrains.annotations.NotNull; | ||
import org.jetbrains.annotations.Nullable; | ||
|
||
import java.net.URI; | ||
|
||
/** | ||
* {@link SeekableChannelsProviderPlugin} implementation used for reading files from Google Cloud Storage. | ||
*/ | ||
@AutoService(SeekableChannelsProviderPlugin.class) | ||
public final class GCSSeekableChannelProviderPlugin implements SeekableChannelsProviderPlugin { | ||
|
||
static final String GCS_URI_SCHEME = "gs"; | ||
|
||
private static final String ENDPOINT_OVERRIDE_SUFFIX = ".googleapis.com"; | ||
private static final URI DEFAULT_ENDPOINT_OVERRIDE = URI.create("https://storage.googleapis.com"); | ||
private static final S3Instructions DEFAULT_INSTRUCTIONS = | ||
S3Instructions.builder().endpointOverride(DEFAULT_ENDPOINT_OVERRIDE).build(); | ||
|
||
@Override | ||
public boolean isCompatible(@NotNull final URI uri, @Nullable final Object config) { | ||
return GCS_URI_SCHEME.equals(uri.getScheme()); | ||
} | ||
|
||
@Override | ||
public SeekableChannelsProvider createProvider(@NotNull final URI uri, @Nullable final Object config) { | ||
if (!isCompatible(uri, config)) { | ||
throw new IllegalArgumentException("Arguments not compatible, provided uri " + uri); | ||
} | ||
return new GCSSeekableChannelProvider(s3Instructions(config)); | ||
} | ||
|
||
/** | ||
* Get the S3Instructions from the config object, or use the default if the config is null. | ||
*/ | ||
private static S3Instructions s3Instructions(@Nullable final Object config) { | ||
if (config == null) { | ||
return DEFAULT_INSTRUCTIONS; | ||
} | ||
if (!(config instanceof S3Instructions)) { | ||
throw new IllegalArgumentException("Only S3Instructions are valid when reading GCS URIs, " + | ||
"provided config instance of class " + config.getClass().getName()); | ||
} | ||
final S3Instructions s3Instructions = (S3Instructions) config; | ||
if (s3Instructions.endpointOverride().isEmpty()) { | ||
return s3Instructions.withEndpointOverride(DEFAULT_ENDPOINT_OVERRIDE); | ||
} | ||
if (!(s3Instructions.endpointOverride().get()).toString().endsWith(ENDPOINT_OVERRIDE_SUFFIX)) { | ||
throw new IllegalArgumentException("Provided endpoint override=(" + | ||
s3Instructions.endpointOverride().get() + " not supported when reading GCS URIs, must end with " + | ||
ENDPOINT_OVERRIDE_SUFFIX); | ||
} | ||
return s3Instructions; | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters