I had a few minutes so I played around a little bit: you can remove some redundant computations by doing some linear algebra (see rotate3 below), but overall performance didn’t change on my device. You can probably squeeze out more by using more efficient memory access (you are iterating over the third axis now).
using Rotations, NaNStatistics, BenchmarkTools, LinearAlgebra, Random
Random.seed!(42)
function rotate(x,θ,axis)
θ = (θ*π)/180
r,c = size(x)
nm = div(c,3)
x3 = reshape(x,r,3,nm)
point = nanmean.([x3[:,k,:] for k in 1:3])
R = AngleAxis(θ, axis...)
r3 = similar(x3)
point_reshaped = reshape(point, 1, 3)
tmp = Matrix{Float64}(undef,r,3)
@views for j in 1:nm
tmp = x3[:,:,j] .- point_reshaped
mul!(r3[:,:,j], tmp, R')
r3[:,:,j] .+= point_reshaped
end
x2 = reshape(r3,r,c)
end
function rotate2(x,θ,axis)
θ = (θ*π)/180
r,c = size(x)
nm = div(c,3)
x3 = reshape(x,r,3,nm)
point = @views nanmean.([x3[:,k,:] for k in 1:3])
R = AngleAxis(θ, axis...)
r3 = similar(x3)
point_reshaped = reshape(point, 1, 3)
tmp = Matrix{Float64}(undef,r,3)
@views for j in axes(x3, 3)
tmp .= x3[:,:,j] .- point_reshaped
mul!(r3[:,:,j], tmp, R')
r3[:,:,j] .+= point_reshaped
end
x2 = reshape(r3,r,c)
end
function rotate3(x,θ,axis)
θ = (θ*π)/180
r,c = size(x)
nm = div(c,3)
x3 = reshape(x,r,3,nm)
point = @views nanmean.([x3[:,k,:] for k in 1:3])
R = AngleAxis(θ, axis...)
r3 = similar(x3)
tmp = Matrix{Float64}(undef,r,3)
@views for j in axes(x3, 3)
tmp .= x3[:,:,j]
mul!(r3[:,:,j], tmp, R')
end
r3 .+= reshape(point, 1, 3) - reshape(point, 1, 3) * R'
x2 = reshape(r3,r,c)
end
function run_test()
x = randn(20700,63)
y1 = rotate(x, 90, [1 0 0]) # warmup
y2 = rotate2(x, 90, [1 0 0]) # warmup
y3 = rotate3(x, 90, [1 0 0]) # warmup
@assert all(y1 .≈ y2 .≈ y3)
display("all versions match")
display(@benchmark rotate($x, 90, [1 0 0]))
display(@benchmark rotate2($x, 90, [1 0 0]))
display(@benchmark rotate3($x, 90, [1 0 0]))
end
run_test()