Skip to content

GPU tests fail on GTX970 and P100 #57

Closed
@leios

Description

@leios

I could not get the tests to work on my GTX970 GPU. Seems like there is an issue with

function DistanceVecNeighborFinder(;
                                nb_matrix,
                                matrix_14=falses(size(nb_matrix)),
                                n_steps=10,
                                dist_cutoff)
    n_atoms = size(nb_matrix, 1)
    if isa(nb_matrix, CuArray)
        is = cu(hcat([collect(1:n_atoms) for i in 1:n_atoms]...))
        js = cu(permutedims(is, (2, 1)))
        m14 = cu(matrix_14)
    else
        is = hcat([collect(1:n_atoms) for i in 1:n_atoms]...)
        js = permutedims(is, (2, 1))
        m14 = matrix_14
    end
    return DistanceVecNeighborFinder{typeof(dist_cutoff), typeof(nb_matrix), typ
eof(is)}(
            nb_matrix, m14, n_steps, dist_cutoff, is, js)
end

Specifically when called in test/protein.jl

I think permuteddims doesn't work on a CuArray, so I tried keeping it as an array, but eventually ran into an issue with turning is into an array for the DistanceVecNeighborFinder. I tried a bunch of different variations, so I'll just leave the unchanged error here:

OpenMM protein comparison: Error During Test at /home/leios/projects/Molly.jl/test/protein.jl:54
  Got exception outside of a @test
  MethodError: no method matching iterate(::Nothing)
  Closest candidates are:
    iterate(::Union{LinRange, StepRangeLen}) at ~/builds/julia-1.7.1/share/julia/base/range.jl:826
    iterate(::Union{LinRange, StepRangeLen}, ::Integer) at ~/builds/julia-1.7.1/share/julia/base/range.jl:826
    iterate(::T) where T<:Union{Base.KeySet{<:Any, <:Dict}, Base.ValueIterator{<:Dict}} at ~/builds/julia-1.7.1/share/julia/base/dict.jl:695
    ...
  Stacktrace:
    [1] indexed_iterate(I::Nothing, i::Int64)
      @ Base ./tuple.jl:92
    [2] CUDA.MemoryInfo()
      @ CUDA ~/.julia/packages/CUDA/VWaZ6/src/pool.jl:155
    [3] OutOfGPUMemoryError (repeats 2 times)
      @ ~/.julia/packages/CUDA/VWaZ6/src/pool.jl:199 [inlined]
    [4] throw_api_error(res::CUDA.cudaError_enum)
      @ CUDA ~/.julia/packages/CUDA/VWaZ6/lib/cudadrv/error.jl:89
    [5] macro expansion
      @ ~/.julia/packages/CUDA/VWaZ6/lib/cudadrv/error.jl:101 [inlined]
    [6] cuMemAlloc_v2(dptr::Base.RefValue{CuPtr{Nothing}}, bytesize::Int64)
      @ CUDA ~/.julia/packages/CUDA/VWaZ6/lib/utils/call.jl:26
    [7] #alloc#1
      @ ~/.julia/packages/CUDA/VWaZ6/lib/cudadrv/memory.jl:86 [inlined]
    [8] macro expansion
      @ ~/.julia/packages/CUDA/VWaZ6/src/pool.jl:41 [inlined]
    [9] macro expansion
      @ ./timing.jl:299 [inlined]
   [10] actual_alloc(bytes::Int64; async::Bool, stream::CuStream)
      @ CUDA ~/.julia/packages/CUDA/VWaZ6/src/pool.jl:39
   [11] macro expansion
      @ ~/.julia/packages/CUDA/VWaZ6/src/pool.jl:224 [inlined]
   [12] macro expansion
      @ ./timing.jl:299 [inlined]
   [13] #_alloc#204
      @ ~/.julia/packages/CUDA/VWaZ6/src/pool.jl:305 [inlined]
   [14] #alloc#203
      @ ~/.julia/packages/CUDA/VWaZ6/src/pool.jl:291 [inlined]
   [15] alloc
      @ ~/.julia/packages/CUDA/VWaZ6/src/pool.jl:287 [inlined]
   [16] CuArray{Int64, 2, CUDA.Mem.DeviceBuffer}(#unused#::UndefInitializer, dims::Tuple{Int64, Int64})
      @ CUDA ~/.julia/packages/CUDA/VWaZ6/src/array.jl:42
   [17] similar
      @ ~/.julia/packages/CUDA/VWaZ6/src/array.jl:164 [inlined]
   [18] permutedims(B::CuArray{Int64, 2, CUDA.Mem.DeviceBuffer}, perm::Tuple{Int64, Int64})
      @ Base ./multidimensional.jl:1503
   [19] DistanceVecNeighborFinder(; nb_matrix::CuArray{Bool, 2, CUDA.Mem.DeviceBuffer}, matrix_14::CuArray{Bool, 2, CUDA.Mem.DeviceBuffer}, n_steps::Int64, dist_cutoff::Quantity{Float64, 𝐋, Unitful.FreeUnits{(nm,), 𝐋, nothing}})
      @ Molly ~/projects/Molly.jl/src/neighbors.jl:134
   [20] System(coord_file::String, force_field::OpenMMForceField{Float64, Quantity{Float64, 𝐌, Unitful.FreeUnits{(u,), 𝐌, nothing}}, Quantity{Float64, 𝐋, Unitful.FreeUnits{(nm,), 𝐋, nothing}}, Quantity{Float64, 𝐋^2 𝐌 𝐍^-1 𝐓^-2, Unitful.FreeUnits{(kJ, mol^-1), 𝐋^2 𝐌 𝐍^-1 𝐓^-2, nothing}}, Quantity{Float64, 𝐌 𝐍^-1 𝐓^-2, Unitful.FreeUnits{(kJ, nm^-2, mol^-1), 𝐌 𝐍^-1 𝐓^-2, nothing}}}; velocities::CuArray{SVector{3, Quantity{Float64, 𝐋 𝐓^-1, Unitful.FreeUnits{(nm, ps^-1), 𝐋 𝐓^-1, nothing}}}, 1, CUDA.Mem.DeviceBuffer}, box_size::Nothing, loggers::Dict{Any, Any}, units::Bool, gpu::Bool, gpu_diff_safe::Bool, dist_cutoff::Quantity{Float64, 𝐋, Unitful.FreeUnits{(nm,), 𝐋, nothing}}, nl_dist::Quantity{Float64, 𝐋, Unitful.FreeUnits{(nm,), 𝐋, nothing}}, rename_terminal_res::Bool)
      @ Molly ~/projects/Molly.jl/src/setup.jl:678
   [21] macro expansion
      @ ~/projects/Molly.jl/test/protein.jl:156 [inlined]
   [22] macro expansion
      @ ~/builds/julia-1.7.1/share/julia/stdlib/v1.7/Test/src/Test.jl:1283 [inlined]
   [23] top-level scope
      @ ~/projects/Molly.jl/test/protein.jl:55
   [24] include(fname::String)
      @ Base.MainInclude ./client.jl:451
   [25] top-level scope
      @ ~/projects/Molly.jl/test/runtests.jl:71
   [26] include(fname::String)
      @ Base.MainInclude ./client.jl:451
   [27] top-level scope
      @ none:6
   [28] eval
      @ ./boot.jl:373 [inlined]
   [29] exec_options(opts::Base.JLOptions)
      @ Base ./client.jl:268
   [30] _start()
      @ Base ./client.jl:495

This could be related to #16 , but I felt it was different enough to warrant a separate issue.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions