Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CRC32c] Support AbstractVector{UInt8} as input #56164

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions stdlib/CRC32c/src/CRC32c.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ See [`CRC32c.crc32c`](@ref) for more information.
"""
module CRC32c

import Base.FastContiguousSubArray
import Base: DenseBytes

export crc32c
Expand All @@ -16,9 +15,9 @@ export crc32c
crc32c(data, crc::UInt32=0x00000000)

Compute the CRC-32c checksum of the given `data`, which can be
an `Array{UInt8}`, a contiguous subarray thereof, or a `String`. Optionally, you can pass
a starting `crc` integer to be mixed in with the checksum. The `crc` parameter
can be used to compute a checksum on data divided into chunks: performing
an `Array{UInt8}`, a contiguous subarray thereof, an `AbstractVector{UInt8}`, or a `String`.
Optionally, you can pass a starting `crc` integer to be mixed in with the checksum.
The `crc` parameter can be used to compute a checksum on data divided into chunks: performing
`crc32c(data2, crc32c(data1))` is equivalent to the checksum of `[data1; data2]`.
(Technically, a little-endian checksum is computed.)

Expand All @@ -30,11 +29,26 @@ calling [`take!`](@ref).

For a `String`, note that the result is specific to the UTF-8 encoding
(a different checksum would be obtained from a different Unicode encoding).
To checksum an `a::Array` of some other bitstype, you can do `crc32c(reinterpret(UInt8,a))`,
Copy link
Contributor Author

@nhz2 nhz2 Oct 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't work if a has padding or is multidimensional. I updated the docstring to note that a must not have padding, and that vec should be used in addition to reinterpret(UInt8.

To checksum an `a::AbstractArray` of some other bitstype without padding,
you can do `crc32c(vec(reinterpret(UInt8,a)))`,
but note that the result may be endian-dependent.
"""
function crc32c end

function crc32c(a::AbstractVector{UInt8}, crc::UInt32=0x00000000)
# use block size 24576=8192*3, since that is the threshold for
# 3-way parallel SIMD code in the underlying jl_crc32c C function.
last = lastindex(a)
nb = length(a)
buf = Memory{UInt8}(undef, Int(min(nb, 24576)))
while nb > 0
n = min(nb, 24576)
copyto!(buf, 1, a, last - nb + 1, n)
crc = Base.unsafe_crc32c(buf, n % Csize_t, crc)
nb -= n
end
return crc
end

function crc32c(a::DenseBytes, crc::UInt32=0x00000000)
Base._crc32c(a, crc)
Expand All @@ -51,6 +65,5 @@ mixed with a starting `crc` integer. If `nb` is not supplied, then
"""
crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000) = Base._crc32c(io, nb, crc)
crc32c(io::IO, crc::UInt32=0x00000000) = Base._crc32c(io, crc)
crc32c(io::IOStream, crc::UInt32=0x00000000) = Base._crc32c(io, crc)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method is redundant with the method above.


end
32 changes: 30 additions & 2 deletions stdlib/CRC32c/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,23 @@
using Test, Random
using CRC32c

const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
using .Main.OffsetArrays: Origin

isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
using .Main.FillArrays: Fill

function test_crc32c(crc32c)
# CRC32c checksum (test data generated from @andrewcooke's CRC.jl package)
for (n,crc) in [(0,0x00000000),(1,0xa016d052),(2,0x03f89f52),(3,0xf130f21e),(4,0x29308cf4),(5,0x53518fab),(6,0x4f4dfbab),(7,0xbd3a64dc),(8,0x46891f81),(9,0x5a14b9f9),(10,0xb219db69),(11,0xd232a91f),(12,0x51a15563),(13,0x9f92de41),(14,0x4d8ae017),(15,0xc8b74611),(16,0xa0de6714),(17,0x672c992a),(18,0xe8206eb6),(19,0xc52fd285),(20,0x327b0397),(21,0x318263dd),(22,0x08485ccd),(23,0xea44d29e),(24,0xf6c0cb13),(25,0x3969bba2),(26,0x6a8810ec),(27,0x75b3d0df),(28,0x82d535b1),(29,0xbdf7fc12),(30,0x1f836b7d),(31,0xd29f33af),(32,0x8e4acb3e),(33,0x1cbee2d1),(34,0xb25f7132),(35,0xb0fa484c),(36,0xb9d262b4),(37,0x3207fe27),(38,0xa024d7ac),(39,0x49a2e7c5),(40,0x0e2c157f),(41,0x25f7427f),(42,0x368c6adc),(43,0x75efd4a5),(44,0xa84c5c31),(45,0x0fc817b2),(46,0x8d99a881),(47,0x5cc3c078),(48,0x9983d5e2),(49,0x9267c2db),(50,0xc96d4745),(51,0x058d8df3),(52,0x453f9cf3),(53,0xb714ade1),(54,0x55d3c2bc),(55,0x495710d0),(56,0x3bddf494),(57,0x4f2577d0),(58,0xdae0f604),(59,0x3c57c632),(60,0xfe39bbb0),(61,0x6f5d1d41),(62,0x7d996665),(63,0x68c738dc),(64,0x8dfea7ae)]
s = String(UInt8[1:n;])
ss = SubString(String(UInt8[0:(n+1);]), 2:(n+1))
@test crc32c(UInt8[1:n;]) == crc == crc32c(s) == crc32c(ss)
@test crc == crc32c(UInt8(1):UInt8(n))
m = Memory{UInt8}(undef, n)
m .= 1:n
@test crc == crc32c(m)
end

# test that crc parameter is equivalent to checksum of concatenated data,
Expand Down Expand Up @@ -50,9 +61,24 @@ function test_crc32c(crc32c)
LONG = 8192 # from crc32c.c
SHORT = 256 # from crc32c.c
n = LONG*3+SHORT*3+SHORT*2+64+7
big = vcat(reinterpret(UInt8, hton.(0x74d7f887 .^ (1:n÷4))), UInt8[1:n%4;])
bigg = vcat(reinterpret(UInt8, hton.(0x74d7f887 .^ (1:n÷4))), UInt8[1:n%4;])
for (offset,crc) in [(0, 0x13a5ecd5), (1, 0xecf34b7e), (2, 0xfa71b596), (3, 0xbfd24745), (4, 0xf0cb3370), (5, 0xb0ec88b5), (6, 0x258c20a8), (7, 0xa9bd638d)]
@test crc == crc32c(@view big[1+offset:end])
@test crc == crc32c(@view bigg[1+offset:end])
end

# test crc of AbstractVector{UInt8}
@test crc32c(Origin(0)(b"hello")) == crc32c(b"hello")
weird_vectors = [
view(rand(UInt8, 300000), 1:2:300000),
vec(reinterpret(UInt8, collect(Int64(1):Int64(4)))),
vec(reinterpret(UInt8, Int64(1):Int64(4))),
view([0x01, 0x02], UInt(1):UInt(2)),
Fill(0x00, UInt(100)),
Fill(0x00, big(100)),
reinterpret(UInt8, BitVector((true, false, true, false))),
]
for a in weird_vectors
@test crc32c(a) == crc32c(collect(a))
end
end
unsafe_crc32c_sw(a, n, crc) =
Expand All @@ -64,6 +90,8 @@ function crc32c_sw(s::Union{String, SubString{String}}, crc::UInt32=0x00000000)
unsafe_crc32c_sw(s, sizeof(s), crc)
end

crc32c_sw(a::AbstractVector{UInt8}, crc::UInt32=0x00000000) =
crc32c_sw(copyto!(Vector{UInt8}(undef, length(a)), a))
function crc32c_sw(io::IO, nb::Integer, crc::UInt32=0x00000000)
nb < 0 && throw(ArgumentError("number of bytes to checksum must be ≥ 0"))
buf = Vector{UInt8}(undef, min(nb, 24576))
Expand Down