diff --git a/src/DSP.jl b/src/DSP.jl index 66e0f1b12..f66f64c91 100644 --- a/src/DSP.jl +++ b/src/DSP.jl @@ -5,7 +5,7 @@ using LinearAlgebra: mul!, rmul! using IterTools: subsets using Compat: Compat -export conv, deconv, filt, filt!, xcorr +export conv, conv!, deconv, filt, filt!, xcorr # This function has methods added in `periodograms` but is not exported, # so we define it here so one can do `DSP.allocate_output` instead of diff --git a/src/dspbase.jl b/src/dspbase.jl index 5fb8880a0..696d73ed4 100644 --- a/src/dspbase.jl +++ b/src/dspbase.jl @@ -483,16 +483,16 @@ end # Assumes u is larger than, or the same size as, v # nfft should be greater than or equal to 2*sv-1 function unsafe_conv_kern_os!(out, + output_indices, u::AbstractArray{<:Any, N}, v, - su, - sv, - sout, nffts) where N + sout = size(out) + su = size(u) + sv = size(v) u_start = first.(axes(u)) - out_axes = axes(out) - out_start = first.(out_axes) - out_stop = last.(out_axes) + out_start = Tuple(first(output_indices)) + out_stop = Tuple(last(output_indices)) ideal_save_blocksize = nffts .- sv .+ 1 # Number of samples that are "missing" if the output is smaller than the # valid portion of the convolution @@ -502,7 +502,7 @@ function unsafe_conv_kern_os!(out, nblocks = cld.(sout, save_blocksize) # Pre-allocation - tdbuff, fdbuff, p, ip = os_prepare_conv(u, nffts) + tdbuff, fdbuff, p, ip = os_prepare_conv(out, nffts) tdbuff_axes = axes(tdbuff) # Transform the smaller filter @@ -603,14 +603,13 @@ function unsafe_conv_kern_os!(out, out end -function _conv_kern_fft!(out, - u::AbstractArray{T, N}, - v::AbstractArray{T, N}, - su, - sv, - outsize, - nffts) where {T<:Real, N} - padded = _zeropad(u, nffts) +function _conv_kern_fft!(out::AbstractArray{T, N}, + output_indices, + u::AbstractArray{<:Real, N}, + v::AbstractArray{<:Real, N}) where {T<:Real, N} + outsize = size(output_indices) + nffts = nextfastfft(outsize) + padded = _zeropad!(similar(u, T, nffts), u) p = plan_rfft(padded) uf = p * padded _zeropad!(padded, v) @@ -618,13 +617,15 @@ function _conv_kern_fft!(out, uf .*= vf raw_out = irfft(uf, nffts[1]) copyto!(out, - CartesianIndices(out), + output_indices, raw_out, CartesianIndices(UnitRange.(1, outsize))) end -function _conv_kern_fft!(out, u, v, su, sv, outsize, nffts) - upad = _zeropad(u, nffts) - vpad = _zeropad(v, nffts) +function _conv_kern_fft!(out::AbstractArray{T}, output_indices, u, v) where {T} + outsize = size(output_indices) + nffts = nextfastfft(outsize) + upad = _zeropad!(similar(u, T, nffts), u) + vpad = _zeropad!(similar(v, T, nffts), v) p! = plan_fft!(upad) ip! = inv(p!) p! * upad # Operates in place on upad @@ -632,72 +633,78 @@ function _conv_kern_fft!(out, u, v, su, sv, outsize, nffts) upad .*= vpad ip! * upad copyto!(out, - CartesianIndices(out), + output_indices, upad, CartesianIndices(UnitRange.(1, outsize))) end -# v should be smaller than u for good performance -function _conv_fft!(out, u, v, su, sv, outsize) - os_nffts = map(optimalfftfiltlength, sv, su) - if any(os_nffts .< outsize) - unsafe_conv_kern_os!(out, u, v, su, sv, outsize, os_nffts) - else - nffts = nextfastfft(outsize) - _conv_kern_fft!(out, u, v, su, sv, outsize, nffts) +function _conv_td!(out, output_indices, u::AbstractArray{<:Number, N}, v::AbstractArray{<:Number, N}) where {N} + index_offset = first(CartesianIndices(u)) + first(CartesianIndices(v)) - first(output_indices) + checkbounds(out, output_indices) + fill!(out, zero(eltype(out))) + for m in CartesianIndices(u), n in CartesianIndices(v) + @inbounds out[n+m - index_offset] = muladd(u[m], v[n], out[n+m - index_offset]) end + return out end +const FFTTypes = Union{Float32, Float64, ComplexF32, ComplexF64} + +function conv!( + out::AbstractArray{T, N}, + u::AbstractArray{<:Number, N}, + v::AbstractArray{<:Number, N}; + algorithm=:auto +) where {T<:Number, N} + calc_index_offset(ao::Base.OneTo, au::Base.OneTo, av::Base.OneTo) = 1 + calc_index_offset(ao::Base.OneTo, au, av) = # first(au) + first(av) - 1 + throw(ArgumentError("output must have offset axes if the input has")) + calc_index_offset(ao, au::Base.OneTo, av::Base.OneTo) = # 2 + throw(ArgumentError("output must not have offset axes if none of the inputs has")) + calc_index_offset(ao, au, av) = 0 + output_indices = CartesianIndices(map(axes(out), axes(u), axes(v)) do ao, au, av + return (first(au)+first(av) : last(au)+last(av)) .- calc_index_offset(ao, au, av) + end) -# For arrays with weird offsets -function _conv_similar(u, outsize, axesu, axesv) - out_offsets = first.(axesu) .+ first.(axesv) - out_axes = UnitRange.(out_offsets, out_offsets .+ outsize .- 1) - similar(u, out_axes) -end -function _conv_similar( - u, outsize, ::NTuple{<:Any, Base.OneTo{Int}}, ::NTuple{<:Any, Base.OneTo{Int}} -) - similar(u, outsize) -end -_conv_similar(u, v, outsize) = _conv_similar(u, outsize, axes(u), axes(v)) - -# Does convolution, will not switch argument order -function _conv!(out, u, v, su, sv, outsize) - # TODO: Add spatial / time domain algorithm - _conv_fft!(out, u, v, su, sv, outsize) -end - -# Does convolution, will not switch argument order -function _conv(u, v, su, sv) - outsize = su .+ sv .- 1 - out = _conv_similar(u, v, outsize) - _conv!(out, u, v, su, sv, outsize) -end - -function _conv_td(u::AbstractArray{<:Number, N}, v::AbstractArray{<:Number, N}) where {N} - output_indices = CartesianIndices(map(axes(u), axes(v)) do au, av - r = (first(au)+first(av)):(last(au)+last(av)) - if au isa Base.OneTo && av isa Base.OneTo - return r + if algorithm===:auto + algorithm = T <: FFTTypes ? :fast : :direct + end + if algorithm===:fast + if length(u) * length(v) < 2^16 # TODO: better heuristic + algorithm = :direct else - return Base.IdentityUnitRange(r) + algorithm = :fft end - end) - return [ - sum(u[m] * v[n-m] - for m in CartesianIndices(ntuple(Val(N)) do d - max(firstindex(u,d),n[d]-lastindex(v,d)):min(lastindex(u,d), n[d]-firstindex(v,d)) - end) - ) - for n in output_indices - ] + end + if algorithm===:direct + return _conv_td!(out, output_indices, u, v) + else + if output_indices != CartesianIndices(out) + fill!(out, zero(eltype(out))) + end + os_nffts = length(u) >= length(v) ? map(optimalfftfiltlength, size(v), size(u)) : map(optimalfftfiltlength, size(u), size(v)) + if algorithm===:fft + if any(os_nffts .< size(output_indices)) + algorithm = :fft_overlapsave + else + algorithm = :fft_simple + end + end + if algorithm === :fft_overlapsave + # v should be smaller than u for good performance + if length(u) >= length(v) + return unsafe_conv_kern_os!(out, output_indices, u, v, os_nffts) + else + return unsafe_conv_kern_os!(out, output_indices, v, u, os_nffts) + end + elseif algorithm === :fft_simple + return _conv_kern_fft!(out, output_indices, u, v) + else + throw(ArgumentError("algorithm must be :auto, :fast, :direct, :fft, :fft_simple, or :fft_overlapsave")) + end + end end -# We use this type definition for clarity -const RealOrComplexFloat = Union{AbstractFloat, Complex{T} where T<:AbstractFloat} - -# May switch argument order """ conv(u,v) @@ -705,46 +712,24 @@ Convolution of two arrays. Uses either FFT convolution or overlap-save, depending on the size of the input. `u` and `v` can be N-dimensional arrays, with arbitrary indexing offsets, but their axes must be a `UnitRange`. """ -function conv(u::AbstractArray{T, N}, - v::AbstractArray{T, N}) where {T<:RealOrComplexFloat, N} - su = size(u) - sv = size(v) - if length(u) >= length(v) - _conv(u, v, su, sv) - else - _conv(v, u, sv, su) - end -end - -function conv(u::AbstractArray{<:RealOrComplexFloat, N}, - v::AbstractArray{<:RealOrComplexFloat, N}) where N - fu, fv = promote(u, v) - conv(fu, fv) -end - -conv(u::AbstractArray{<:Integer, N}, v::AbstractArray{<:Integer, N}) where {N} = - _conv_td(u, v) - -conv(u::AbstractArray{<:Number, N}, v::AbstractArray{<:Number, N}) where {N} = - conv(float(u), float(v)) - -function conv(u::AbstractArray{<:Number, N}, - v::AbstractArray{<:RealOrComplexFloat, N}) where N - conv(float(u), v) -end - -function conv(u::AbstractArray{<:RealOrComplexFloat, N}, - v::AbstractArray{<:Number, N}) where N - conv(u, float(v)) +function conv( + u::AbstractArray{Tu, N}, v::AbstractArray{Tv, N}; kwargs... +) where {Tu<:Number, Tv<:Number, N} + T = promote_type(Tu, Tv) + out_axis(au, av) = (first(au)+first(av)):(last(au)+last(av)) + out_axis(au::Base.OneTo, av::Base.OneTo) = Base.OneTo(last(au) + last(av) - 1) + out_axes = map(out_axis, axes(u), axes(v)) + out = similar(u, T, out_axes) + return conv!(out, u, v; kwargs...) end function conv(A::AbstractArray{<:Number, M}, - B::AbstractArray{<:Number, N}) where {M, N} + B::AbstractArray{<:Number, N}; kwargs...) where {M, N} if (M < N) - conv(cat(A, dims=N)::AbstractArray{eltype(A), N}, B) + conv(cat(A, dims=N)::AbstractArray{eltype(A), N}, B; kwargs...) else @assert M > N - conv(A, cat(B, dims=M)::AbstractArray{eltype(B), M}) + conv(A, cat(B, dims=M)::AbstractArray{eltype(B), M}; kwargs...) end end diff --git a/test/dsp.jl b/test/dsp.jl index 26db336c0..e1bf55d13 100644 --- a/test/dsp.jl +++ b/test/dsp.jl @@ -2,7 +2,7 @@ # TODO: parameterize conv tests using Test, OffsetArrays using DSP: filt, filt!, deconv, conv, xcorr, - optimalfftfiltlength, unsafe_conv_kern_os!, _conv_kern_fft!, _conv_similar, + optimalfftfiltlength, unsafe_conv_kern_os!, _conv_kern_fft! nextfastfft @@ -59,6 +59,10 @@ end @test conv(f32a, b) ≈ fexp @test conv(fb, a) ≈ fexp + u = rand(190) + v = rand(200) + @test conv(u, v; algorithm=:direct) ≈ conv(u, v; algorithm=:fft_simple) ≈ conv(u, v; algorithm=:fft_overlapsave) + # issue #410 n = 314159265 @test conv([n], [n]) == [n^2] @@ -69,6 +73,32 @@ end offset_arr_f = OffsetVector{Float64}(undef, -1:2) offset_arr_f[:] = fa @test conv(offset_arr_f, 1:3) ≈ OffsetVector(fexp, 0:5) + + for M in [10, 200], N in [10, 200], T in [Float64, ComplexF64] + u = rand(T, M) + v = rand(T, N) + u_off = OffsetVector(u, 23) + v_off = OffsetVector(v, -42) + @test conv(u, v; algorithm=:direct) ≈ conv(u, v; algorithm=:fft_simple) ≈ conv(u, v; algorithm=:fft_overlapsave) + @test conv(u_off, v_off; algorithm=:direct) ≈ conv(u_off, v_off; algorithm=:fft_simple) ≈ conv(u_off, v_off; algorithm=:fft_overlapsave) + @test conv(u, v) ≈ conv(u_off, v_off)[23-42+2:23-42+N+M] + + for algorithm in [:direct, :fft_simple, :fft_overlapsave] + # pre-allocated non-offset output larger than necessary + out = ones(T, M+N+10) + conv!(out, u, v; algorithm) + @test out[1:M+N-1] == conv(u, v; algorithm) + @test all(iszero, out[M+N:end]) + + # pre-allocated output with offset larger than necessary + out = OffsetVector(ones(T, M+N+10), 23-42-5) + conv!(out, u_off, v_off; algorithm) + @test out[23-42+2:23-42+N+M] ≈ conv(u, v; algorithm) + @test all(iszero, out[begin:23-42+1]) + @test all(iszero, out[23-42+N+M+1:end]) + end + end + # Issue #352 @test conv([1//2, 1//3, 1//4], [1, 2]) ≈ [1//2, 4//3, 11//12, 1//2] # Non-numerical arrays should not be convolved @@ -113,6 +143,10 @@ end @test conv(f32a, b) ≈ fexp @test conv(fb, a) ≈ fexp + u = rand(10, 20) + v = rand(10, 10) + @test conv(u, v; algorithm=:direct) ≈ conv(u, v; algorithm=:fft) + offset_arr = OffsetMatrix{Int}(undef, -1:1, -1:1) offset_arr[:] = a @test conv(offset_arr, b) == OffsetArray(expectation, 0:3, 0:3) @@ -197,11 +231,10 @@ end su, u = os_test_data(T, nu, N) sv, v = os_test_data(T, nv, N) sout = su .+ sv .- 1 - out = _conv_similar(u, sout, axes(u), axes(v)) - unsafe_conv_kern_os!(out, u, v, su, sv, sout, nffts) + out = similar(u, T, sout) + unsafe_conv_kern_os!(out, CartesianIndices(out), u, v, nffts) os_out = copy(out) - fft_nfft = nextfastfft(sout) - _conv_kern_fft!(out, u, v, su, sv, sout, fft_nfft) + _conv_kern_fft!(out, CartesianIndices(out), u, v) @test out ≈ os_out end Ns = [1, 2, 3]