Hello. I use RTX 3070. Why is there no difference?
using CUDA, BenchmarkTools
function add!(X::AbstractVector{T}) where T
return X .+= T(1)
end
A = CUDA.zeros(Float32, 10_000_000);
B = CUDA.zeros(Float64, 10_000_000);
@btime add!(A); # -> 5.250 μs (37 allocations: 1.09 KiB)
@btime add!(B); # -> 5.267 μs (37 allocations: 1.09 KiB)