Fix ConvTranspose output padding on AMDGPU

This commit applies the output padding for ConvTranspose layers on AMDGPU devices. Tests are added that ensure the output size is identical to a ConvTranspose layer on the CPU. In addition, this fixes the following test failure on AMDGPU: Convolution: Error During Test at /var/lib/buildkite-agent/builds/amdgpu1-luraess-com/julialang/flux-dot-jl/test/ext_amdgpu/basic.jl:27 Got exception outside of a @test MethodError: no method matching ConvTranspose(::typeof(identity), ::ROCArray{Float32, 3, AMDGPU.Runtime.Mem.HIPBuffer}, ::ROCArray{Float32, 1, AMDGPU.Runtime.Mem.HIPBuffer}, ::Tuple{Int64}, ::Tuple{Int64, Int64}, ::Tuple{Int64}, ::Int64)
FluxML · Jul 27, 2024 · eba195b · eba195b
1 parent 36abc73
commit eba195b
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 3 deletions.
diff --git a/ext/FluxAMDGPUExt/conv.jl b/ext/FluxAMDGPUExt/conv.jl
@@ -8,7 +8,7 @@ function Flux.conv_transpose_dims(c::ConvTranspose, x::T) where T <: ROCArray
     # Calculate size of "input", from ∇conv_data()'s perspective...
     combined_pad = (c.pad[1:2:end] .+ c.pad[2:2:end])
     I = (size(x)[1:end - 2] .- 1) .* c.stride .+ 1 .+
-        (size(c.weight)[1:end - 2] .- 1) .* c.dilation .- combined_pad
+        (size(c.weight)[1:end - 2] .- 1) .* c.dilation .- combined_pad .+ c.outpad
     C_in = size(c.weight)[end - 1] * c.groups
     batch_size = size(x)[end]
 

diff --git a/ext/FluxAMDGPUExt/functor.jl b/ext/FluxAMDGPUExt/functor.jl
@@ -81,6 +81,9 @@ function _amd(id::Union{Nothing, Int}, x)
     fmap(x -> Adapt.adapt(FluxAMDGPUAdaptor(id), x), x; exclude=_exclude)
 end
 
+_other_args(m::Conv) = (m.stride, m.pad, m.dilation, m.groups)
+_other_args(m::ConvTranspose) = (m.stride, m.pad, m.outpad, m.dilation, m.groups)
+
 # CPU -> GPU
 
 function Adapt.adapt_structure(to::FluxAMDGPUAdaptor, m::CPU_CONV)
@@ -89,7 +92,7 @@ function Adapt.adapt_structure(to::FluxAMDGPUAdaptor, m::CPU_CONV)
         Adapt.adapt(to, m.σ),
         Adapt.adapt(to, flipped_weight),
         Adapt.adapt(to, m.bias),
-        m.stride, m.pad, m.dilation, m.groups)
+        _other_args(m)...)
 end
 
 # Don't adapt again.
@@ -102,7 +105,7 @@ function Adapt.adapt_structure(to::FluxCPUAdaptor, m::AMDGPU_CONV)
     dims = ntuple(i -> i, ndims(m.weight) - 2)
     _conv_basetype(m)(
         Adapt.adapt(to, m.σ), reverse(Adapt.adapt(to, m.weight); dims),
-        Adapt.adapt(to, m.bias), m.stride, m.pad, m.dilation, m.groups)
+        Adapt.adapt(to, m.bias), _other_args(m)...)
 end
 
 function Flux.get_device(::Val{:AMDGPU}, id::Int)     # id should start from 0

diff --git a/test/ext_amdgpu/basic.jl b/test/ext_amdgpu/basic.jl
@@ -46,6 +46,18 @@ end
     end
 end
 
+@testset "ConvTranspose output padding" begin
+    x = randn(Float32, 10, 11, 3, 2)
+    m = ConvTranspose((3, 5), 3=>6, stride=3, outpad=(1, 0))
+    md, xd = Flux.gpu.((m, x))
+    @test size(m(x)) == size(md(xd))
+
+    x = randn(Float32, 10, 11, 12, 3, 2)
+    m = ConvTranspose((3, 5, 3), 3=>6, stride=3, outpad=(1, 0, 1))
+    md, xd = Flux.gpu.((m, x))
+    @test size(m(x)) == size(md(xd))
+end
+
 @testset "Chain(Conv)" begin
     m = Chain(Conv((3, 3), 3 => 3)) |> f32
     x = rand(Float32, 10, 10, 3, 2)