Open
Description
Found in: https://github.com/JuliaData/DataFrames.jl/actions/runs/8635821381/job/23674443172
I can reproduce it by starting Julia with julia --check-bounds=yes -t 4
and running the following code:
@testset "CategoricalArray thread safety" begin
# These tests do not actually trigger multithreading bugs,
# but at least they check that the code that disables multithreading
# with CategoricalArray when levels are different works
Random.seed!(35)
df = DataFrame(x=rand(1:10, 100),
y=categorical(rand(10:15, 100)),
z=categorical(rand(0:20, 100)))
df.y2 = reverse(df.y) # Same levels
gd = groupby(df, :x)
@test combine(gd, :y => (y -> y[1]) => :res) ==
combine(gd, [:y, :y2] => ((y, x) -> y[1]) => :res) ==
combine(gd, [:y, :x] => ((y, x) -> y[1]) => :res) ==
combine(gd, [:y, :z] => ((y, z) -> y[1]) => :res) ==
combine(gd, :y => (y -> unwrap(y[1])) => :res)
@test combine(gd, [:x, :y, :y2] =>
((x, y, y2) -> x[1] <= 5 ? y[1] : y2[1]) => :res) ==
combine(gd, [:x, :y, :y2] =>
((x, y, y2) -> x[1] <= 5 ? unwrap(y[1]) : unwrap(y2[1])) => :res)
@test combine(gd, [:x, :y, :z] =>
((x, y, z) -> x[1] <= 5 ? y[1] : z[1]) => :res) ==
combine(gd, [:x, :y, :z] =>
((x, y, z) -> x[1] <= 5 ? unwrap(y[1]) : unwrap(z[1])) => :res)
end
Note that the error does not happen always unfortunately.
The relevant part of error is:
nested task error: BoundsError: attempt to access 7-element Vector{UInt32} at index [14]
Stacktrace:
[1] throw_boundserror(A::Vector{UInt32}, I::Tuple{Int64})
@ Base .\essentials.jl:14
[2] getindex
@ .\essentials.jl:891 [inlined]
[3] update_refs!(A::CategoricalVector{Int64, UInt32, Int64, CategoricalValue{Int64, UInt32}, Union{}}, newlevels::Vector{Int64})
@ CategoricalArrays \CategoricalArrays\0yLZN\src\array.jl:472
[4] merge_pools!(A::CategoricalVector{Int64, UInt32, Int64, CategoricalValue{Int64, UInt32}, Union{}}, B::CategoricalValue{Int64, UInt32}; updaterefs::Bool, updatepool::Bool)
@ CategoricalArrays \CategoricalArrays\0yLZN\src\array.jl:489
[5] merge_pools!
@ \CategoricalArrays\0yLZN\src\array.jl:477 [inlined]
[6] setindex!
@ \CategoricalArrays\0yLZN\src\array.jl:500 [inlined]
[7] fill_row!(row::@NamedTuple{x1::CategoricalValue{Int64, UInt32}}, outcols::Tuple{CategoricalVector{Int64, UInt32, Int64, CategoricalValue{Int64, UInt32}, Union{}}}, i::Int64, colstart::Int64, colnames::Tuple{Symbol})
Metadata
Assignees
Labels
No labels
Activity