Skip to content

Use invariant.load for ldg #2655

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 33 additions & 25 deletions src/device/pointer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,41 @@ const Local = 5
end


## ldg
@inline @generated function pointerref_ldg(ptr::LLVMPtr{T,A}, i::I, ::Val{align}) where {T,A,I,align}
sizeof(T) == 0 && return T.instance
LLVM.@dispose ctx=LLVM.Context() begin
eltyp = convert(LLVM.LLVMType, T)

const LDGTypes = (UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64,
Float32, Float64)
T_idx = convert(LLVM.LLVMType, I)
T_ptr = convert(LLVM.LLVMType, ptr)

# TODO: this functionality should throw <sm_32
# NOTE: CUDA 8.0 supports more caching modifiers, but those aren't supported by LLVM yet
for T in LDGTypes
class = if T <: Integer
:i
elseif T <: AbstractFloat
:f
end
# TODO: p class
width = sizeof(T)*8 # in bits
typ = Symbol(class, width)
T_typed_ptr = LLVM.PointerType(eltyp, A)

intr = "llvm.nvvm.ldg.global.$class.$typ.p1$typ"
@eval @inline function pointerref_ldg(base_ptr::LLVMPtr{$T,AS.Global}, i::Integer,
::Val{align}) where align
offset = i-one(i) # in elements
ptr = base_ptr + offset*sizeof($T)
@typed_ccall($intr, llvmcall, $T, (LLVMPtr{$T,AS.Global}, Int32), ptr, Val(align))
# create a function
param_types = [T_ptr, T_idx]
llvm_f, _ = LLVM.Interop.create_function(eltyp, param_types)

# generate IR
LLVM.@dispose builder=LLVM.IRBuilder() begin
entry = LLVM.BasicBlock(llvm_f, "entry")
LLVM.position!(builder, entry)
ptr = if LLVM.supports_typed_pointers(ctx)
typed_ptr = LLVM.bitcast!(builder, LLVM.parameters(llvm_f)[1], T_typed_ptr)
LLVM.inbounds_gep!(builder, eltyp, typed_ptr, [LLVM.parameters(llvm_f)[2]])
else
LLVM.inbounds_gep!(builder, eltyp, LLVM.parameters(llvm_f)[1], [LLVM.parameters(llvm_f)[2]])
end
ld = LLVM.load!(builder, eltyp, ptr)
if A != 0
LLVM.metadata(ld)[LLVM.MD_tbaa] = LLVM.Interop.tbaa_addrspace(A)
end
LLVM.alignment!(ld, align)
LLVM.metadata(ld)[LLVM.MD_invariant_load] = LLVM.MDNode(LLVM.Metadata[nothing])

LLVM.ret!(builder, ld)
end

LLVM.Interop.call_function(llvm_f, T, Tuple{LLVMPtr{T,A}, I}, :ptr, :(i-one(I)))
end
end

Expand Down Expand Up @@ -66,9 +78,5 @@ end

export unsafe_cached_load

unsafe_cached_load(p::LLVMPtr{<:Union{LDGTypes...},AS.Global}, i::Integer=1, align::Val=Val(1)) =
pointerref_ldg(p, i, align)
# NOTE: fall back to normal unsafe_load for unsupported types. we could be smarter here,
# e.g. destruct/load/reconstruct, but that's too complicated for what it's worth.
unsafe_cached_load(p::LLVMPtr, i::Integer=1, align::Val=Val(1)) =
unsafe_load(p, i, align)
pointerref_ldg(p, i, align)