From a5c56ba765e84aa5d531f376542dc8003bffb8c7 Mon Sep 17 00:00:00 2001 From: Brandon Neth Date: Tue, 13 Aug 2024 14:20:51 -0700 Subject: [PATCH 1/4] tests and implementation for supporting different compression algorithms --- Signed-off-by: Brandon Neth --- modules/packages/Zarr.chpl | 31 ++++++--- .../packages/Zarr/ZarrCompressors.chpl | 66 +++++++++++++++++++ .../packages/Zarr/ZarrCompressors.good | 6 ++ 3 files changed, 94 insertions(+), 9 deletions(-) create mode 100644 test/library/packages/Zarr/ZarrCompressors.chpl create mode 100644 test/library/packages/Zarr/ZarrCompressors.good diff --git a/modules/packages/Zarr.chpl b/modules/packages/Zarr.chpl index 237dd0188510..54edc6aec699 100644 --- a/modules/packages/Zarr.chpl +++ b/modules/packages/Zarr.chpl @@ -83,6 +83,7 @@ module Zarr { var chunks: list(int); var dtype: string; var shape: list(int); + var compressor: string; } /* Unused until support is added for v3.0 stores */ @@ -141,6 +142,10 @@ module Zarr { throw new Error("Expected entries of type %s. Found %s".format(dtype:string, chplType)); } + private proc validateCompressor(compressor) throws { + const supportedCompressors = ["blosclz", "lz4", "lz4hc", "zlib", "zstd"]; + if supportedCompressors.find(compressor) == -1 then throw new Error("Unsupported compressor: %s".format(compressor)); + } private proc buildChunkPath(directoryPath: string, delimiter: string, const chunkIndex: ?dimCount * int) { var indexStrings: dimCount*string; @@ -277,7 +282,7 @@ module Zarr { :throws Error: If the compression fails */ - proc writeChunk(param dimCount, chunkPath: string, chunkDomain: domain(dimCount), ref arraySlice: [] ?t, bloscLevel: int(32) = 9) throws { + proc writeChunk(param dimCount, chunkPath: string, chunkDomain: domain(dimCount), ref arraySlice: [] ?t, bloscLevel: int(32) = 9, compressor: string="blosclz") throws { var s: stopwatch; // bloscLevel must be between 0 and 9 @@ -305,7 +310,7 @@ module Zarr { var bytesCompressed = blosc_compress_ctx(_bloscLevel, 0, c_sizeof(t), (copyOut.size*c_sizeof(t)) : c_size_t, c_ptrTo(copyOut), compressedBuffer, ((copyOut.size + 16) * c_sizeof(t)) : c_size_t, - "blosclz", 0 : c_size_t, 1 : c_size_t); + compressor.c_str(), 0 : c_size_t, 1 : c_size_t); if bytesCompressed == 0 then throw new Error("Failed to compress bytes"); if zarrProfiling then times["Compression"].add(s.elapsed()); @@ -404,14 +409,18 @@ module Zarr { :arg bloscLevel: Compression level to use. 0 indicates no compression, 9 (default) indicates maximum compression. + + :arg compressor: Compression algorithm to use. Supported values are "blosclz" (default), + "lz4", "lz4hc", "zlib", and "zstd". */ - proc writeZarrArray(directoryPath: string, const ref A: [?domainType] ?dtype, chunkShape: ?dimCount*int, bloscLevel: int(32) = 9) throws { + proc writeZarrArray(directoryPath: string, const ref A: [?domainType] ?dtype, chunkShape: ?dimCount*int, bloscLevel: int(32) = 9, compressor="blosclz") throws { // Create the metadata record that is written before the chunks var shape, chunks: list(int); for size in A.shape do shape.pushBack(size); for size in chunkShape do chunks.pushBack(size); - const md: zarrMetadataV2 = new zarrMetadataV2(2, chunks, dtypeString(dtype), shape); + validateCompressor(compressor); + const md: zarrMetadataV2 = new zarrMetadataV2(2, chunks, dtypeString(dtype), shape, compressor); // Clear the directory before writing if exists(directoryPath) then rmTree(directoryPath); @@ -454,7 +463,7 @@ module Zarr { ref thisChunkSlice = hereA.localSlice(thisChunkHere); const chunkPath = buildChunkPath(directoryPath, ".", chunkIndex); locks[chunkIndex].writeEF(true); - writeChunk(dimCount, chunkPath, thisChunkDomain, thisChunkSlice, bloscLevel=bloscLevel); + writeChunk(dimCount, chunkPath, thisChunkDomain, thisChunkSlice, bloscLevel=bloscLevel, compressor=compressor); locks[chunkIndex].readFE(); } } @@ -521,14 +530,18 @@ module Zarr { :arg bloscLevel: Compression level to use. 0 indicates no compression, 9 (default) indicates maximum compression. + + :arg compressor: Compression algorithm to use. Supported values are "blosclz" (default), + "lz4", "lz4hc", "zlib", and "zstd". */ - proc writeZarrArrayLocal(directoryPath: string, ref A: [?domainType] ?dtype, chunkShape: ?dimCount*int, bloscLevel: int(32) = 9) throws { + proc writeZarrArrayLocal(directoryPath: string, ref A: [?domainType] ?dtype, chunkShape: ?dimCount*int, bloscLevel: int(32) = 9, compressor="blosclz") throws { // Create the metadata record that is written before the chunks var shape, chunks: list(int); for size in A.shape do shape.pushBack(size); for size in chunkShape do chunks.pushBack(size); - const md: zarrMetadataV2 = new zarrMetadataV2(2, chunks, dtypeString(dtype), shape); + validateCompressor(compressor); + const md: zarrMetadataV2 = new zarrMetadataV2(2, chunks, dtypeString(dtype), shape, compressor); // Clear the directory before writing if exists(directoryPath) then rmTree(directoryPath); @@ -555,7 +568,7 @@ module Zarr { const chunkForDomain = D[chunkBounds]; ref chunkData = normA[chunkForDomain]; const chunkPath = buildChunkPath(directoryPath, ".", chunkIndex); - writeChunk(dimCount, chunkPath, chunkBounds, chunkData, bloscLevel=bloscLevel); + writeChunk(dimCount, chunkPath, chunkBounds, chunkData, bloscLevel=bloscLevel, compressor=compressor); } blosc_destroy(); @@ -593,7 +606,7 @@ module Zarr { const chunkPath = buildChunkPath(directoryPath, ".", chunkIndex); blosc_init(); - writeChunk(dimCount, chunkPath, chunkData.domain, chunkData); + writeChunk(dimCount, chunkPath, chunkData.domain, chunkData, compressor=md.compressor); blosc_destroy(); } diff --git a/test/library/packages/Zarr/ZarrCompressors.chpl b/test/library/packages/Zarr/ZarrCompressors.chpl new file mode 100644 index 000000000000..d88539e4bfb5 --- /dev/null +++ b/test/library/packages/Zarr/ZarrCompressors.chpl @@ -0,0 +1,66 @@ +use Zarr; +use IO; +use FileSystem; +use BlockDist; +use Random; + +proc localTest(compressor: string) { + const N = 20; + const D: domain(2) = {0.. Date: Tue, 13 Aug 2024 14:53:23 -0700 Subject: [PATCH 2/4] fix module index text --- Signed-off-by: Brandon Neth --- modules/packages/Zarr.chpl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/packages/Zarr.chpl b/modules/packages/Zarr.chpl index 54edc6aec699..0bfe8b1dc0e1 100644 --- a/modules/packages/Zarr.chpl +++ b/modules/packages/Zarr.chpl @@ -17,11 +17,11 @@ * limitations under the License. */ -/* - Support for distributed reading and writing of Zarr stores. Support is - limited to v2 Zarr arrays stored on local filesystems. NFS is not supported. - The module uses c-blosc to compress and decompress chunks. Zarr - specification: https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html +/* Support for reading and writing of Zarr stores. + + Support is limited to v2 Zarr arrays stored on local filesystems. NFS + is not supported. The module uses c-blosc to compress and decompress chunks. + Zarr specification: https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html */ module Zarr { use IO; From 4424ef704bf164c4d920c31b726f7bbb899e8634 Mon Sep 17 00:00:00 2001 From: Brandon Neth Date: Wed, 14 Aug 2024 09:49:26 -0700 Subject: [PATCH 3/4] reviewer feedback --- Signed-off-by: Brandon Neth --- modules/packages/Zarr.chpl | 6 +++++- test/library/packages/Zarr/ZarrCompressors.chpl | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/modules/packages/Zarr.chpl b/modules/packages/Zarr.chpl index 0bfe8b1dc0e1..f839e9203c03 100644 --- a/modules/packages/Zarr.chpl +++ b/modules/packages/Zarr.chpl @@ -144,7 +144,11 @@ module Zarr { private proc validateCompressor(compressor) throws { const supportedCompressors = ["blosclz", "lz4", "lz4hc", "zlib", "zstd"]; - if supportedCompressors.find(compressor) == -1 then throw new Error("Unsupported compressor: %s".format(compressor)); + if supportedCompressors.find(compressor) == -1 { + throw new IllegalArgumentError("Unsupported compressor: %s.".format(compressor) + + " Supported compressors are: blosclz, lz4, lz4hc, zlib, and zstd."); + } + } private proc buildChunkPath(directoryPath: string, delimiter: string, const chunkIndex: ?dimCount * int) { diff --git a/test/library/packages/Zarr/ZarrCompressors.chpl b/test/library/packages/Zarr/ZarrCompressors.chpl index d88539e4bfb5..1d20843395b9 100644 --- a/test/library/packages/Zarr/ZarrCompressors.chpl +++ b/test/library/packages/Zarr/ZarrCompressors.chpl @@ -42,13 +42,13 @@ proc testUnsupportedCompressor() { writeZarrArrayLocal("TestUnsupportedCompressor", A, (7,), compressor="unsupported"); assert(false, "Expected an error for unsupported compressor"); } catch e { - assert(e.message() == "Unsupported compressor: unsupported"); + assert(e.message() == "Unsupported compressor: unsupported. Supported compressors are: blosclz, lz4, lz4hc, zlib, and zstd.", e.message()); } try { writeZarrArray("TestUnsupportedCompressor", A, (7,), compressor="unsupported"); assert(false, "Expected an error for unsupported compressor"); } catch e { - assert(e.message() == "Unsupported compressor: unsupported"); + assert(e.message() == "Unsupported compressor: unsupported. Supported compressors are: blosclz, lz4, lz4hc, zlib, and zstd.", e.message()); } } From 9e5d38eeeb6d0dc6f2ed87d8c2a3ee6ad273fdfb Mon Sep 17 00:00:00 2001 From: Brandon Neth Date: Wed, 14 Aug 2024 09:58:39 -0700 Subject: [PATCH 4/4] trailing space --- Signed-off-by: Brandon Neth --- modules/packages/Zarr.chpl | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/packages/Zarr.chpl b/modules/packages/Zarr.chpl index f839e9203c03..15f529b04678 100644 --- a/modules/packages/Zarr.chpl +++ b/modules/packages/Zarr.chpl @@ -148,7 +148,6 @@ module Zarr { throw new IllegalArgumentError("Unsupported compressor: %s.".format(compressor) + " Supported compressors are: blosclz, lz4, lz4hc, zlib, and zstd."); } - } private proc buildChunkPath(directoryPath: string, delimiter: string, const chunkIndex: ?dimCount * int) {