Hi,
I compare the performances of 3 functions implementing X.+=1
where X is a 2D Array of float.
-
shift2D_1D!
splits the 2D loop in two nested functions. -
shift2DLoop!
uses a single function with a 2D loop nest, -
shift2DNative!
uses the broadcast iterator,
The shift2D_1D!
exhibits lower performances. Is there a way to improve this ?
In particular, is the signature of the inner function shift1D!
acting on a SubArray OK ?
Results : (Julia 0.6)
GFlops=10.829836198727493 (shift2D_1D!)
GFlops=18.726591760299627 (shift2DLoop!)
GFlops=17.362785762515674 (shift2DNative!)
Thank you for your help.
Laurent
using BenchmarkTools
# Implementation #1 shift1D! and shift2D!
function shift1D!(x::AbstractArray{T,1}) where T<:Real
one_T=T(1)
nx=length(x)
@simd for i=1:nx
@inbounds x[i]+=one_T
end
end
function shift2D_1D!(x2D::Array{T,2}) where T<:Real
nx,ny=size(x2D)
for j=1:ny
shift1D!(view(x2D,:,j))
end
end
# Implementation #2 Nested Loops impl for X2D+=1
function shift2DLoop!(x2D::Array{T,2}) where T<:Real
nx,ny=size(x2D)
one_T=T(1)
@simd for j=1:ny
@simd for i=1:nx
@inbounds x2D[i,j]+=one_T
end
end
end
# Implementation #3 native Julia broadcast op for X2D+=1
function shift2DNative!(x2D::Array{T,2}) where T<:Real
one_T=T(1)
x2D.+=one_T
end
# A function to evaluate the performances
function testShift(shiftFunction, T::Type,n::Int64)
x=zeros(T,n,n)
# t=@belapsed shift2D!($x)
@benchmark $shiftFunction($x)
t=@belapsed $shiftFunction($x)
print("GFlops=",n*n/(t*1.e9)," (",string(shiftFunction),")\n")
end
testShift(shift2D_1D!,Float32,200)
testShift(shift2DLoop!,Float32,200)
testShift(shift2DNative!,Float32,200)