Hello,
I am using LoopVectorization
combined with StructArrays
to vectorize some code that used structures. I have created a simple example of what I am trying to achive using the struct Point2D
.
using StructArrays, LoopVectorization, BenchmarkTools
struct Point2D
x::Float64
y::Float64
end
struct FirstWay end
struct SecondWay end
@inline function get_new_point(::Type{FirstWay}, p1, p2)
Point2D(p1.x + p2.x, p1.y + p2.y)
end
@inline function get_new_point(::Type{SecondWay}, p1, p2)
Point2D(p1.x - p2.x, p1.y - p2.y)
end
# Function working with @turbo but specific to FirstWay
function structarray_turbo_get_all_points(::Type{FirstWay}, p1, p2)
length(p1) == length(p2) || error()
x_out = Vector{Float64}(undef, length(p1))
y_out = Vector{Float64}(undef, length(p1))
out = StructArray{Point2D}((x_out, y_out))
@turbo for i in eachindex(p1)
out.x[i] = p2.x[i] + p1.x[i]
out.y[i] = p2.y[i] + p1.y[i]
end
out
end
# Function working with @turbo but specific to SecondWay
function structarray_turbo_get_all_points(::Type{SecondWay}, p1, p2)
length(p1) == length(p2) || error()
x_out = Vector{Float64}(undef, length(p1))
y_out = Vector{Float64}(undef, length(p1))
out = StructArray{Point2D}((x_out, y_out))
@turbo for i in eachindex(p1)
out.x[i] = p1.x[i] - p2.x[i]
out.y[i] = p1.y[i] - p2.y[i]
end
out
end
# I would like my function to be formated like this for clarity and to avoid repetions
function aim_function_noturbo(T, p1, p2)
length(p1) == length(p2) || error()
x_out = Vector{Float64}(undef, length(p1))
y_out = Vector{Float64}(undef, length(p1))
out = StructArray{Point2D}((x_out, y_out))
@inbounds for i in eachindex(p1)
out[i] = get_new_point(T, p1[i], p2[i])
end
out
end
# Function added to compare time without struct arrays
function no_struct(T, p1, p2)
length(p1) == length(p2) || error()
out = Vector{Point2D}(undef, length(p1))
@inbounds for i in eachindex(p1)
out[i] = get_new_point(T, p1[i], p2[i])
end
out
end
p1 = StructArray{Point2D}((rand(100000), rand(100000)))
p2 = StructArray{Point2D}((rand(100000), rand(100000)))
p1_nostruct = Point2D.(rand(100000), rand(100000))
p2_nostruct = Point2D.(rand(100000), rand(100000))
@btime no_struct(FirstWay, p1_nostruct, p2_nostruct) # 305.97 µs
@btime structarray_turbo_get_all_points(FirstWay, p1, p2) # 130.248 µs
@btime structarray_turbo_get_all_points(SecondWay, p1, p2) # 132.144 µs
@btime aim_function_noturbo(SecondWay, p1, p2) # 406.816
The code aims to compute the get_new_point
(which is dependent on an external struct
) function for a vector of Points2D
using LoopVectorization
. Ideally, I would like my vectorised code to be something like:
function aim_function(T, p1, p2)
length(p1) == length(p2) || error()
x_out = Vector{Float64}(undef, length(p1))
y_out = Vector{Float64}(undef, length(p1))
out = StructArray{Point2D}((x_out, y_out))
@turbo for i in eachindex(p1)
out[i] = get_new_point(T, p1[i], p2[i])
end
out
end
@btime aim_function_noturbo(SecondWay, p1, p2) # 425 µs
However, @turbo
cannot vectorise this code. My question is: If possible, what would be the best way for LoopVectorization
to vectorise a function formatted like this?
Thanks