I tried some versions, and even if I did not use benchmark but just @time maybe the results are interesting to some of you.
My main surprise was how performant the vectorised version is. Faster than a loop without @inbounds.
Here my result, code is appended. Ehm. No can’t append Julia code. So code is below.
Kind regards, z.
> include("perf1.jl")
:perf1
> test()
zeroing with logical indexing : 0.581205 seconds (9 allocations: 11.925 MiB)
zeroing vectorized : 0.145941 seconds
zeroing loop branch : 0.438066 seconds
zeroing loop no branch : 0.195619 seconds
zeroing loop no branch inbounds : 0.145681 seconds
> test()
zeroing with logical indexing : 0.583505 seconds (9 allocations: 11.925 MiB)
zeroing vectorized : 0.150736 seconds
zeroing loop branch : 0.439501 seconds
zeroing loop no branch : 0.177093 seconds
zeroing loop no branch inbounds : 0.145801 seconds
> test()
zeroing with logical indexing : 0.566651 seconds (9 allocations: 11.925 MiB)
zeroing vectorized : 0.147940 seconds
zeroing loop branch : 0.440264 seconds
zeroing loop no branch : 0.204290 seconds
zeroing loop no branch inbounds : 0.146339 seconds
And the code:
# Logical Indexing
function zeroingLogicalIndexing(A, B)
A[B .!= 0] = 0
A
end
function testLogicalIndexing()
print("zeroing with logical indexing :")
n1 = 100
n2 = 100
A = rand(n1, n2)
B = rand(0:1, n1, n2) * 1.0
# preHeating
zeroingLogicaIndexing(A, B)
n1 = 10000
n2 = 10000
A = rand(n1, n2)
B = rand(0:1, n1, n2) * 1.0
@time zeroingLogicaIndexing(A, B)
end
# Vectorized
function zeroingVectorized(A, B)
# A .= .!iszero.(B) .* A
@. A = !iszero(B) * A
end
function testVectorized()
print("zeroing vectorized :")
n1 = 100
n2 = 100
A = rand(n1, n2)
B = rand(0:1, n1, n2) * 1.0
# preHeating
zeroingVectorized(A, B)
n1 = 10000
n2 = 10000
A = rand(n1, n2)
B = rand(0:1, n1, n2) * 1.0
@time zeroingVectorized(A, B)
end
# Loop
function zeroingLoopBranch(A, B)
for i in eachindex(B)
if iszero(B[i])
A[i] = 0
end
end
end
function zeroingLoopNoBranch(A, B)
for i in eachindex(B)
A[i] = !iszero(B[i]) * A[i]
end
end
function zeroingLoopNoBranchInbounds(A, B)
for i in eachindex(B)
@inbounds A[i] = !iszero(B[i]) * A[i]
end
end
function testLoopBranch()
print("zeroing loop branch :")
n1 = 100
n2 = 100
A = rand(n1, n2)
B = rand(0:1, n1, n2) * 1.0
# preHeating
zeroingLoopBranch(A, B)
n1 = 10000
n2 = 10000
A = rand(n1, n2)
B = rand(0:1, n1, n2) * 1.0
@time zeroingLoopBranch(A, B)
end
# https://www.juliabloggers.com/cpu-pipelines-when-more-is-less/
function testLoopNoBranch()
print("zeroing loop no branch :")
n1 = 100
n2 = 100
A = rand(n1, n2)
B = rand(0:1, n1, n2) * 1.0
# preHeating
zeroingLoopNoBranch(A, B)
n1 = 10000
n2 = 10000
A = rand(n1, n2)
B = rand(0:1, n1, n2) * 1.0
@time zeroingLoopNoBranch(A, B)
end
function testLoopNoBranchInbounds()
print("zeroing loop no branch inbounds :")
n1 = 100
n2 = 100
A = rand(n1, n2)
B = rand(0:1, n1, n2) * 1.0
# preHeating
zeroingLoopNoBranchInbounds(A, B)
n1 = 10000
n2 = 10000
A = rand(n1, n2)
B = rand(0:1, n1, n2) * 1.0
@time zeroingLoopNoBranchInbounds(A, B)
end
function test()
testLogicalIndexing()
testVectorized()
testLoopBranch()
testLoopNoBranch()
testLoopNoBranchInbounds()
end
:perf1