Skip to content

conv_bias_act broken pullback #329

Open
@carterjgreen

Description

@carterjgreen

I was trying to test if using conv_bias_act on gpu would speed up an implementation. When trying to take the gradient I get the error

ERROR: LoadError: MethodError: no method matching iterate(::ErrorException)
Closest candidates are:
  iterate(::Union{LinRange, StepRangeLen}) at range.jl:664
  iterate(::Union{LinRange, StepRangeLen}, ::Int64) at range.jl:664
  iterate(::T) where T<:Union{Base.KeySet{var"#s79", var"#s78"} where {var"#s79", var"#s78"<:Dict}, Base.ValueIterator{var"#s77"} where var"#s77"<:Dict} at dict.jl:693

The same error occurs on CPU and GPU with 1D and 2D convolutions. I wrote a "MWE" that slaps together a bunch of things to work in an environment that has Flux and CUDA installed.

Details Versions
[052768ef] CUDA v3.3.0
[587475ba] Flux v0.12.4
[872c559c] NNlib v0.7.22
[a00861dc] NNlibCUDA v0.1.3

MWE

using Flux, CUDA
using CUDA.CUDNN: scalingParameter, CUDNN_CONVOLUTION, convdims, 
                  cudnnConvolutionDescriptor, cudnnConvolutionBwdDataAlgoPerf,
                  cudnnConvolutionForward!, cudnnConvolutionBwdFilterAlgoPerf,
                  cudnnConvolutionBackwardData, cudnnConvolutionBackwardFilter,
                  cudnnConvolutionBackwardBias, CUDNN_ACTIVATION_IDENTITY,
                  CUDNN_ACTIVATION_RELU

const CUDNNFloat = Union{Float16,Float32,Float64}

# From https://github.com/FluxML/Flux.jl/pull/1302
function (c::Conv)(x::AbstractArray)
    σ, b = c.σ, reshape(c.bias, ntuple(_->1, length(c.stride))..., :, 1)
    cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation)
    # σ.(conv(x, c.weight, cdims) .+ b)
    conv_bias_act(x, c.weight, cdims, b, σ)
end

# From https://github.com/FluxML/Flux.jl/pull/1302
NNlib.conv_bias_act(x, w, cdims::DenseConvDims, b::Flux.Zeros, σ) = σ.(conv(x, w, cdims))
function NNlib.conv_bias_act(x::CuArray, w::CuArray{T}, cdims::DenseConvDims, b::Flux.Zeros, σ) where T
  bz = gpu(collect(b))
  conv_bias_act(x, w, cdims, bz, σ)
end

# https://github.com/FluxML/NNlibCUDA.jl/blob/master/src/cudnn/conv.jl#L51
function NNlib.conv_bias_act!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{T}, 
                            cdims::DenseConvDims, bias::DenseCuArray{T}, σ=identity;
                            z::DenseCuArray{T}=y, alpha=1, beta=0, algo=-1) where T<:CUDNNFloat
    # if cudnnversion() < v"6"
    #     all(x -> x == 1, dilation(cdims)) || error("Only dilation = 1 is supported in cuDNN version < 6")
    # end
    if algo != -1
        @warn "The algo option has been deprecated, the fastest algo is computed automatically" maxlog=1
    end    
    d = cudnnConvolutionDescriptor(cdims, x)
    # only relu and identity are supported by cudnnConvolutionForward!
    activation === NNlib.relu ? CUDA.CUDNN.CUDNN_ACTIVATION_RELU : CUDA.CUDNN.CUDNN_ACTIVATION_IDENTITY)
    cudnnConvolutionForward!(y, w, x, d; z, bias, activation, alpha, beta)
    if activation === CUDA.CUDNN.CUDNN_ACTIVATION_IDENTITY && σ  (nothing, identity)
        y = σ.(y)
    end
    return y
end

function oned_test()
    # Sequential MNIST size
    x = randn(Float32, 782, 1, 32)
    c = Conv((3,), 1=>2, relu)
    out = c(x)
    g = gradient(Flux.params(c)) do 
        sum(abs2, c(x))
    end
    return g
end

function oned_test_gpu()
    # Sequential MNIST size
    x = CUDA.randn(Float32, 782, 1, 32)
    c = Conv((3,), 1=>2, relu) |> gpu
    out = c(x)
    g = gradient(Flux.params(c)) do 
        sum(abs2, c(x))
    end
    return g
end

function twod_test()
    # MNIST size
    x = randn(Float32, 28, 28, 1, 32)
    c = Conv((3,3), 1=>2, relu)
    out = c(x)
    g = gradient(Flux.params(c)) do 
        sum(abs2, c(x))
    end
    return g
end

function twod_test_gpu()
    # MNIST size
    x = CUDA.randn(Float32, 28, 28, 1, 32)
    c = Conv((3,3), 1=>2, relu) |> gpu
    out = c(x)
    g = gradient(Flux.params(c)) do 
        sum(abs2, c(x))
    end
    return g
end

oned_test()
twod_test()

oned_test_gpu()
twod_test_gpu()

Each of these fails with the same error.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions