I am working on profiling my code, and have run into some trouble when trying to ensure type stability of some code that uses CuArrays. When using mapreduce
, if I pass in two multidimensional CuArrays, @code_warntype
fails to deduce the output type. One multidimensional CuArray
s works, two one-dimensional CuArray
s works, and two multidimensional Array
s works.
Why does this happen? Also, is using code_warntype
useful when using CUDA.jl
, or will things like this pop up often?
Here is an MWE
julia> @code_warntype mapreduce((a,b)->a, +, CUDA.zeros(20,20), CUDA.zeros(20,20))
MethodInstance for mapreduce(::var"#57#58", ::typeof(+), ::CuArray{Float32, 2, CUDA.DeviceMemory}, ::CuArray{Float32, 2, CUDA.DeviceMemory})
from mapreduce(f, op, A::AnyGPUArray, As::Union{Base.Broadcast.Broadcasted, AbstractArray}...; dims, init) @ GPUArrays ~/.julia/packages/GPUArrays/qt4ax/src/host/mapreduce.jl:28
Arguments
#self#::Core.Const(mapreduce)
f::Core.Const(var"#57#58"())
op::Core.Const(+)
A::CuArray{Float32, 2, CUDA.DeviceMemory}
As::Tuple{CuArray{Float32, 2, CUDA.DeviceMemory}}
Body::Any
1 ─ %1 = GPUArrays.:(var"#mapreduce#40")::Core.Const(GPUArrays.var"#mapreduce#40")
│ %2 = Core.tuple(GPUArrays.:(:), GPUArrays.nothing, #self#, f, op, A)::Tuple{Colon, Nothing, typeof(mapreduce), var"#57#58", typeof(+), CuArray{Float32, 2, CUDA.DeviceMemory}}
│ %3 = Core._apply_iterate(Base.iterate, %1, %2, As)::Any
└── return %3
julia> @code_warntype mapreduce((a,b)->a, +, CUDA.zeros(20), CUDA.zeros(20))
MethodInstance for mapreduce(::var"#59#60", ::typeof(+), ::CuArray{Float32, 1, CUDA.DeviceMemory}, ::CuArray{Float32, 1, CUDA.DeviceMemory})
from mapreduce(f, op, A::AnyGPUArray, As::Union{Base.Broadcast.Broadcasted, AbstractArray}...; dims, init) @ GPUArrays ~/.julia/packages/GPUArrays/qt4ax/src/host/mapreduce.jl:28
Arguments
#self#::Core.Const(mapreduce)
f::Core.Const(var"#59#60"())
op::Core.Const(+)
A::CuArray{Float32, 1, CUDA.DeviceMemory}
As::Tuple{CuArray{Float32, 1, CUDA.DeviceMemory}}
Body::Float32
1 ─ %1 = GPUArrays.:(var"#mapreduce#40")::Core.Const(GPUArrays.var"#mapreduce#40")
│ %2 = Core.tuple(GPUArrays.:(:), GPUArrays.nothing, #self#, f, op, A)::Tuple{Colon, Nothing, typeof(mapreduce), var"#59#60", typeof(+), CuArray{Float32, 1, CUDA.DeviceMemory}}
│ %3 = Core._apply_iterate(Base.iterate, %1, %2, As)::Float32
└── return %3
julia> @code_warntype mapreduce((a)->a, +, CUDA.zeros(20,20))
MethodInstance for mapreduce(::var"#61#62", ::typeof(+), ::CuArray{Float32, 2, CUDA.DeviceMemory})
from mapreduce(f, op, A::AnyGPUArray, As::Union{Base.Broadcast.Broadcasted, AbstractArray}...; dims, init) @ GPUArrays ~/.julia/packages/GPUArrays/qt4ax/src/host/mapreduce.jl:28
Arguments
#self#::Core.Const(mapreduce)
f::Core.Const(var"#61#62"())
op::Core.Const(+)
A::CuArray{Float32, 2, CUDA.DeviceMemory}
As::Tuple{}
Body::Float32
1 ─ %1 = GPUArrays.:(var"#mapreduce#40")::Core.Const(GPUArrays.var"#mapreduce#40")
│ %2 = Core.tuple(GPUArrays.:(:), GPUArrays.nothing, #self#, f, op, A)::Tuple{Colon, Nothing, typeof(mapreduce), var"#61#62", typeof(+), CuArray{Float32, 2, CUDA.DeviceMemory}}
│ %3 = Core._apply_iterate(Base.iterate, %1, %2, As)::Float32
└── return %3
julia> @code_warntype mapreduce((a,b)->a+b, +, zeros(Float32,20,20), zeros(Float32,20,20))
MethodInstance for mapreduce(::var"#63#64", ::typeof(+), ::Matrix{Float32}, ::Matrix{Float32})
from mapreduce(f, op, A::Union{Base.AbstractBroadcasted, AbstractArray}...; kw...) @ Base reducedim.jl:359
Arguments
#self#::Core.Const(mapreduce)
f::Core.Const(var"#63#64"())
op::Core.Const(+)
A::Tuple{Matrix{Float32}, Matrix{Float32}}
Body::Float32
1 ─ %1 = Base.:(var"#mapreduce#822")::Core.Const(Base.var"#mapreduce#822")
│ %2 = Core.NamedTuple()::Core.Const(NamedTuple())
│ %3 = Base.pairs(%2)::Core.Const(Base.Pairs{Symbol, Union{}, Tuple{}, @NamedTuple{}}())
│ %4 = Core.tuple(%3, #self#, f, op)::Core.Const((Base.Pairs{Symbol, Union{}, Tuple{}, @NamedTuple{}}(), mapreduce, var"#63#64"(), +))
│ %5 = Core._apply_iterate(Base.iterate, %1, %4, A)::Float32
└── return %5