Register regression on 1.9

On Julia `#master`:

```julia
julia> main()
CUDA.registers(kernel) = 40
CUDA.memory(kernel) = (local = 648, shared = 0, constant = 0)
CUDA.maxthreads(kernel) = 1024
```

vs `1.8.2`:

```julia
julia> main()
CUDA.registers(kernel) = 4
CUDA.memory(kernel) = (local = 0, shared = 0, constant = 0)
CUDA.maxthreads(kernel) = 1024
```

---

MWE:

```julia
using CUDA

function ld_random_val(index::UInt32, seed::UInt32, dim::UInt32 = UInt32(0))
    nested_uniform_scramble_base2(sobol(index, dim), seed)
end

function nested_uniform_scramble_base2(x::UInt32, seed::UInt32)
    x = reverse_bits(x)
    x = laine_karras_permutation(x, seed)
    reverse_bits(x)
end

function reverse_bits(x::UInt32)
    x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1))
    x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2))
    x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4))
    x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8))
    (x >> 16) | (x << 16)
end

function laine_karras_permutation(x::UInt32, seed::UInt32)
    x += seed
    x ⊻= x * 0x6c50b47c
    x ⊻= x * 0xb82f1e52
    x ⊻= x * 0xc7afe638
    x ⊻= x * 0x8d22f6e6
    x
end

function sobol(index::UInt32, dim::UInt32)
    directions = (
        (
        0x80000000, 0x40000000, 0x20000000, 0x10000000,
        0x08000000, 0x04000000, 0x02000000, 0x01000000,
        0x00800000, 0x00400000, 0x00200000, 0x00100000,
        0x00080000, 0x00040000, 0x00020000, 0x00010000,
        0x00008000, 0x00004000, 0x00002000, 0x00001000,
        0x00000800, 0x00000400, 0x00000200, 0x00000100,
        0x00000080, 0x00000040, 0x00000020, 0x00000010,
        0x00000008, 0x00000004, 0x00000002, 0x00000001,
        ), (
        0x80000000, 0xc0000000, 0xa0000000, 0xf0000000,
        0x88000000, 0xcc000000, 0xaa000000, 0xff000000,
        0x80800000, 0xc0c00000, 0xa0a00000, 0xf0f00000,
        0x88880000, 0xcccc0000, 0xaaaa0000, 0xffff0000,
        0x80008000, 0xc000c000, 0xa000a000, 0xf000f000,
        0x88008800, 0xcc00cc00, 0xaa00aa00, 0xff00ff00,
        0x80808080, 0xc0c0c0c0, 0xa0a0a0a0, 0xf0f0f0f0,
        0x88888888, 0xcccccccc, 0xaaaaaaaa, 0xffffffff,
        ), (
        0x80000000, 0xc0000000, 0x60000000, 0x90000000,
        0xe8000000, 0x5c000000, 0x8e000000, 0xc5000000,
        0x68800000, 0x9cc00000, 0xee600000, 0x55900000,
        0x80680000, 0xc09c0000, 0x60ee0000, 0x90550000,
        0xe8808000, 0x5cc0c000, 0x8e606000, 0xc5909000,
        0x6868e800, 0x9c9c5c00, 0xeeee8e00, 0x5555c500,
        0x8000e880, 0xc0005cc0, 0x60008e60, 0x9000c590,
        0xe8006868, 0x5c009c9c, 0x8e00eeee, 0xc5005555,
        ), (
        0x80000000, 0xc0000000, 0x20000000, 0x50000000,
        0xf8000000, 0x74000000, 0xa2000000, 0x93000000,
        0xd8800000, 0x25400000, 0x59e00000, 0xe6d00000,
        0x78080000, 0xb40c0000, 0x82020000, 0xc3050000,
        0x208f8000, 0x51474000, 0xfbea2000, 0x75d93000,
        0xa0858800, 0x914e5400, 0xdbe79e00, 0x25db6d00,
        0x58800080, 0xe54000c0, 0x79e00020, 0xb6d00050,
        0x800800f8, 0xc00c0074, 0x200200a2, 0x50050093,
        ), (
        0x80000000, 0x40000000, 0x20000000, 0xb0000000,
        0xf8000000, 0xdc000000, 0x7a000000, 0x9d000000,
        0x5a800000, 0x2fc00000, 0xa1600000, 0xf0b00000,
        0xda880000, 0x6fc40000, 0x81620000, 0x40bb0000,
        0x22878000, 0xb3c9c000, 0xfb65a000, 0xddb2d000,
        0x78022800, 0x9c0b3c00, 0x5a0fb600, 0x2d0ddb00,
        0xa2878080, 0xf3c9c040, 0xdb65a020, 0x6db2d0b0,
        0x800228f8, 0x400b3cdc, 0x200fb67a, 0xb00ddb9d,
        ),
    )

    x::UInt32 = 0
    for bit in UInt32(0):UInt32(31)
        mask = (index >> bit) & UInt32(1)
        x ⊻= mask * directions[dim + 0x1][bit + 0x1]
    end
    x
end

function f()
    i::UInt32 = threadIdx().x
    ld_random_val(i, 0xdeadbeef)
    return nothing
end

function main()
    kernel = CUDA.@cuda launch=false f()
    @show CUDA.registers(kernel)
    @show CUDA.memory(kernel)
    @show CUDA.maxthreads(kernel)
end
# main()
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Register regression on 1.9 #1673

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Register regression on 1.9 #1673

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions