This is an addendum to report and fix a mistake that slowed the execution speed of the unit test by a factor of about 4.2.
Before the fix:
julia> @btime utest(false)
19.200 μs (479 allocations: 14.75 KiB)
After the fix:
julia> @btime utest(false)
4.571 μs (77 allocations: 8.70 KiB)
My mistake:
res = Vector{Float32}(undef,nField)
I used that line of code in several of the overloaded operator implementations to allocate stack memory for a result (“res”) vector. While it was efficient to use undef to avoid unnecessarily zeroing out memory that would soon be set by calculations, it was inefficient to use nField that is defined outside the function. And that slow outside-the-function reference was totally unnecessary given that the length of the result vector is implicitly defined in the function’s vector argument(s).
The fix:
Using length() to calculate the result vector length within the overloaded operator functions would be a reasonable fix. However, I noticed that each of the overloaded operator functions contain at least one component that can be calculated by a vector broadcast operation. Initializing the result vector with the result of that vector broadcast operation not only allocates the result vector memory on the stack, it also slightly reduces the rather lengthy list of multiplies and accumulates in the calculation of the result vector. For example,
# function Base.:*(a::Vector{Float32},b::Vector{Float32})
# res = Vector{Float32}(undef,nField)
# res[1]=b[1]*a[1]+b[3]*a[3]+b[4]*a[4]+b[5]*a[5]-b[9]*a[9]-b[10]*a[10]-b[11]*a[11]-b[15]*a[15]
# res[2]=b[2]*a[1]+b[1]*a[2]-b[6]*a[3]-b[7]*a[4]-b[8]*a[5]+b[3]*a[6]+b[4]*a[7]+b[5]*a[8]+b[12]*a[9]+b[13]*a[10]+b[14]*a[11]+b[9]*a[12]+b[10]*a[13]+b[11]*a[14]+b[16]*a[15]-b[15]*a[16]
# res[3]=b[3]*a[1]+b[1]*a[3]-b[9]*a[4]+b[10]*a[5]+b[4]*a[9]-b[5]*a[10]-b[15]*a[11]-b[11]*a[15]
# res[4]=b[4]*a[1]+b[9]*a[3]+b[1]*a[4]-b[11]*a[5]-b[3]*a[9]-b[15]*a[10]+b[5]*a[11]-b[10]*a[15]
# res[5]=b[5]*a[1]-b[10]*a[3]+b[11]*a[4]+b[1]*a[5]-b[15]*a[9]+b[3]*a[10]-b[4]*a[11]-b[9]*a[15]
# res[6]=b[6]*a[1]+b[3]*a[2]-b[2]*a[3]-b[12]*a[4]+b[13]*a[5]+b[1]*a[6]-b[9]*a[7]+b[10]*a[8]+b[7]*a[9]-b[8]*a[10]-b[16]*a[11]-b[4]*a[12]+b[5]*a[13]+b[15]*a[14]-b[14]*a[15]-b[11]*a[16]
# res[7]=b[7]*a[1]+b[4]*a[2]+b[12]*a[3]-b[2]*a[4]-b[14]*a[5]+b[9]*a[6]+b[1]*a[7]-b[11]*a[8]-b[6]*a[9]-b[16]*a[10]+b[8]*a[11]+b[3]*a[12]+b[15]*a[13]-b[5]*a[14]-b[13]*a[15]-b[10]*a[16]
# res[8]=b[8]*a[1]+b[5]*a[2]-b[13]*a[3]+b[14]*a[4]-b[2]*a[5]-b[10]*a[6]+b[11]*a[7]+b[1]*a[8]-b[16]*a[9]+b[6]*a[10]-b[7]*a[11]+b[15]*a[12]-b[3]*a[13]+b[4]*a[14]-b[12]*a[15]-b[9]*a[16]
# res[9]=b[9]*a[1]+b[4]*a[3]-b[3]*a[4]+b[15]*a[5]+b[1]*a[9]+b[11]*a[10]-b[10]*a[11]+b[5]*a[15]
# res[10]=b[10]*a[1]-b[5]*a[3]+b[15]*a[4]+b[3]*a[5]-b[11]*a[9]+b[1]*a[10]+b[9]*a[11]+b[4]*a[15]
# res[11]=b[11]*a[1]+b[15]*a[3]+b[5]*a[4]-b[4]*a[5]+b[10]*a[9]-b[9]*a[10]+b[1]*a[11]+b[3]*a[15]
# res[12]=b[12]*a[1]-b[9]*a[2]+b[7]*a[3]-b[6]*a[4]+b[16]*a[5]-b[4]*a[6]+b[3]*a[7]-b[15]*a[8]-b[2]*a[9]+b[14]*a[10]-b[13]*a[11]+b[1]*a[12]+b[11]*a[13]-b[10]*a[14]+b[8]*a[15]-b[5]*a[16]
# res[13]=b[13]*a[1]-b[10]*a[2]-b[8]*a[3]+b[16]*a[4]+b[6]*a[5]+b[5]*a[6]-b[15]*a[7]-b[3]*a[8]-b[14]*a[9]-b[2]*a[10]+b[12]*a[11]-b[11]*a[12]+b[1]*a[13]+b[9]*a[14]+b[7]*a[15]-b[4]*a[16]
# res[14]=b[14]*a[1]-b[11]*a[2]+b[16]*a[3]+b[8]*a[4]-b[7]*a[5]-b[15]*a[6]-b[5]*a[7]+b[4]*a[8]+b[13]*a[9]-b[12]*a[10]-b[2]*a[11]+b[10]*a[12]-b[9]*a[13]+b[1]*a[14]+b[6]*a[15]-b[3]*a[16]
# res[15]=b[15]*a[1]+b[11]*a[3]+b[10]*a[4]+b[9]*a[5]+b[5]*a[9]+b[4]*a[10]+b[3]*a[11]+b[1]*a[15]
# res[16]=b[16]*a[1]+b[15]*a[2]+b[14]*a[3]+b[13]*a[4]+b[12]*a[5]+b[11]*a[6]+b[10]*a[7]+b[9]*a[8]+b[8]*a[9]+b[7]*a[10]+b[6]*a[11]-b[5]*a[12]-b[4]*a[13]-b[3]*a[14]-b[2]*a[15]+b[1]*a[16]
# return res
# end
is now
function Base.:*(a::Vector{Float32},b::Vector{Float32})
res = b .* a[1]
res[1] += b[3]*a[3]+b[4]*a[4]+b[5]*a[5]-b[9]*a[9]-b[10]*a[10]-b[11]*a[11]-b[15]*a[15]
res[2] += b[1]*a[2]-b[6]*a[3]-b[7]*a[4]-b[8]*a[5]+b[3]*a[6]+b[4]*a[7]+b[5]*a[8]+b[12]*a[9]+b[13]*a[10]+b[14]*a[11]+b[9]*a[12]+b[10]*a[13]+b[11]*a[14]+b[16]*a[15]-b[15]*a[16]
res[3] += b[1]*a[3]-b[9]*a[4]+b[10]*a[5]+b[4]*a[9]-b[5]*a[10]-b[15]*a[11]-b[11]*a[15]
res[4] += b[9]*a[3]+b[1]*a[4]-b[11]*a[5]-b[3]*a[9]-b[15]*a[10]+b[5]*a[11]-b[10]*a[15]
res[5] += -b[10]*a[3]+b[11]*a[4]+b[1]*a[5]-b[15]*a[9]+b[3]*a[10]-b[4]*a[11]-b[9]*a[15]
res[6] += b[3]*a[2]-b[2]*a[3]-b[12]*a[4]+b[13]*a[5]+b[1]*a[6]-b[9]*a[7]+b[10]*a[8]+b[7]*a[9]-b[8]*a[10]-b[16]*a[11]-b[4]*a[12]+b[5]*a[13]+b[15]*a[14]-b[14]*a[15]-b[11]*a[16]
res[7] += b[4]*a[2]+b[12]*a[3]-b[2]*a[4]-b[14]*a[5]+b[9]*a[6]+b[1]*a[7]-b[11]*a[8]-b[6]*a[9]-b[16]*a[10]+b[8]*a[11]+b[3]*a[12]+b[15]*a[13]-b[5]*a[14]-b[13]*a[15]-b[10]*a[16]
res[8] += b[5]*a[2]-b[13]*a[3]+b[14]*a[4]-b[2]*a[5]-b[10]*a[6]+b[11]*a[7]+b[1]*a[8]-b[16]*a[9]+b[6]*a[10]-b[7]*a[11]+b[15]*a[12]-b[3]*a[13]+b[4]*a[14]-b[12]*a[15]-b[9]*a[16]
res[9] += b[4]*a[3]-b[3]*a[4]+b[15]*a[5]+b[1]*a[9]+b[11]*a[10]-b[10]*a[11]+b[5]*a[15]
res[10]+= -b[5]*a[3]+b[15]*a[4]+b[3]*a[5]-b[11]*a[9]+b[1]*a[10]+b[9]*a[11]+b[4]*a[15]
res[11]+= b[15]*a[3]+b[5]*a[4]-b[4]*a[5]+b[10]*a[9]-b[9]*a[10]+b[1]*a[11]+b[3]*a[15]
res[12]+= -b[9]*a[2]+b[7]*a[3]-b[6]*a[4]+b[16]*a[5]-b[4]*a[6]+b[3]*a[7]-b[15]*a[8]-b[2]*a[9]+b[14]*a[10]-b[13]*a[11]+b[1]*a[12]+b[11]*a[13]-b[10]*a[14]+b[8]*a[15]-b[5]*a[16]
res[13]+= -b[10]*a[2]-b[8]*a[3]+b[16]*a[4]+b[6]*a[5]+b[5]*a[6]-b[15]*a[7]-b[3]*a[8]-b[14]*a[9]-b[2]*a[10]+b[12]*a[11]-b[11]*a[12]+b[1]*a[13]+b[9]*a[14]+b[7]*a[15]-b[4]*a[16]
res[14]+= -b[11]*a[2]+b[16]*a[3]+b[8]*a[4]-b[7]*a[5]-b[15]*a[6]-b[5]*a[7]+b[4]*a[8]+b[13]*a[9]-b[12]*a[10]-b[2]*a[11]+b[10]*a[12]-b[9]*a[13]+b[1]*a[14]+b[6]*a[15]-b[3]*a[16]
res[15]+= b[11]*a[3]+b[10]*a[4]+b[9]*a[5]+b[5]*a[9]+b[4]*a[10]+b[3]*a[11]+b[1]*a[15]
res[16]+= b[15]*a[2]+b[14]*a[3]+b[13]*a[4]+b[12]*a[5]+b[11]*a[6]+b[10]*a[7]+b[9]*a[8]+b[8]*a[9]+b[7]*a[10]+b[6]*a[11]-b[5]*a[12]-b[4]*a[13]-b[3]*a[14]-b[2]*a[15]+b[1]*a[16]
return res
end
Here is the current version of pga.jl including the fix:
# pga.jl : reference implementation of Projective Geometric Algebra
#
# This is a Julia port of pga3d.cpp, bivector.net's C++
# reference implementation of projective geometric algebra
# that is available at https://bivector.net/tools.html
#
# The implementation of pga3d.cpp, and this port of it,
# are currently limited to projective geometric algebra in
# 3D space. However, I'm looking forward to extending this
# implementation to projective geometric algebra in 2D, 4D,
# and possibly 5D spaces. That planned extension is the
# reason this file is named pga.jl instead of pga3d.jl.
#
using Printf
# define number of dimensions in space (e.g., 2D, 3D, 4D)
nD = 3
if (nD == 1)
nField = 0 # TODO: make nonzero after implementation
elseif (nD == 2)
nField = 0 # TODO: make nonzero after implementation
# define multivector field names
basis = [ # iField
"1" # 1
"e0" # 2 basis vectors
"e1" # 3
"e2" # 4
"e01" # 5 basis bivectors
"e02" # 6
"e12" # 7
"e021" # 8 basis trivectors
"e0123" # 9 pseudoscalar
]
elseif (nD == 3)
nField = 16
# define multivector field names
basis = [ # iField
"1" # 1
"e0" # 2 basis vectors
"e1" # 3
"e2" # 4
"e3" # 5
"e01" # 6 basis bivectors
"e02" # 7
"e03" # 8
"e12" # 9
"e31" # 10
"e23" # 11
"e021" # 12 basis trivectors
"e013" # 13
"e032" # 14
"e123" # 15
"e0123" # 16 pseudoscalar
]
# define basis vectors
e0 = zeros(Float32, nField); e0[2] = 1
e1 = zeros(Float32, nField); e1[3] = 1
e2 = zeros(Float32, nField); e2[4] = 1
e3 = zeros(Float32, nField); e3[5] = 1
# operator overloads dependent upon dimension
function Base.:*(a::Vector{Float32},b::Vector{Float32})
res = b .* a[1]
res[1] += b[3]*a[3]+b[4]*a[4]+b[5]*a[5]-b[9]*a[9]-b[10]*a[10]-b[11]*a[11]-b[15]*a[15]
res[2] += b[1]*a[2]-b[6]*a[3]-b[7]*a[4]-b[8]*a[5]+b[3]*a[6]+b[4]*a[7]+b[5]*a[8]+b[12]*a[9]+b[13]*a[10]+b[14]*a[11]+b[9]*a[12]+b[10]*a[13]+b[11]*a[14]+b[16]*a[15]-b[15]*a[16]
res[3] += b[1]*a[3]-b[9]*a[4]+b[10]*a[5]+b[4]*a[9]-b[5]*a[10]-b[15]*a[11]-b[11]*a[15]
res[4] += b[9]*a[3]+b[1]*a[4]-b[11]*a[5]-b[3]*a[9]-b[15]*a[10]+b[5]*a[11]-b[10]*a[15]
res[5] += -b[10]*a[3]+b[11]*a[4]+b[1]*a[5]-b[15]*a[9]+b[3]*a[10]-b[4]*a[11]-b[9]*a[15]
res[6] += b[3]*a[2]-b[2]*a[3]-b[12]*a[4]+b[13]*a[5]+b[1]*a[6]-b[9]*a[7]+b[10]*a[8]+b[7]*a[9]-b[8]*a[10]-b[16]*a[11]-b[4]*a[12]+b[5]*a[13]+b[15]*a[14]-b[14]*a[15]-b[11]*a[16]
res[7] += b[4]*a[2]+b[12]*a[3]-b[2]*a[4]-b[14]*a[5]+b[9]*a[6]+b[1]*a[7]-b[11]*a[8]-b[6]*a[9]-b[16]*a[10]+b[8]*a[11]+b[3]*a[12]+b[15]*a[13]-b[5]*a[14]-b[13]*a[15]-b[10]*a[16]
res[8] += b[5]*a[2]-b[13]*a[3]+b[14]*a[4]-b[2]*a[5]-b[10]*a[6]+b[11]*a[7]+b[1]*a[8]-b[16]*a[9]+b[6]*a[10]-b[7]*a[11]+b[15]*a[12]-b[3]*a[13]+b[4]*a[14]-b[12]*a[15]-b[9]*a[16]
res[9] += b[4]*a[3]-b[3]*a[4]+b[15]*a[5]+b[1]*a[9]+b[11]*a[10]-b[10]*a[11]+b[5]*a[15]
res[10]+= -b[5]*a[3]+b[15]*a[4]+b[3]*a[5]-b[11]*a[9]+b[1]*a[10]+b[9]*a[11]+b[4]*a[15]
res[11]+= b[15]*a[3]+b[5]*a[4]-b[4]*a[5]+b[10]*a[9]-b[9]*a[10]+b[1]*a[11]+b[3]*a[15]
res[12]+= -b[9]*a[2]+b[7]*a[3]-b[6]*a[4]+b[16]*a[5]-b[4]*a[6]+b[3]*a[7]-b[15]*a[8]-b[2]*a[9]+b[14]*a[10]-b[13]*a[11]+b[1]*a[12]+b[11]*a[13]-b[10]*a[14]+b[8]*a[15]-b[5]*a[16]
res[13]+= -b[10]*a[2]-b[8]*a[3]+b[16]*a[4]+b[6]*a[5]+b[5]*a[6]-b[15]*a[7]-b[3]*a[8]-b[14]*a[9]-b[2]*a[10]+b[12]*a[11]-b[11]*a[12]+b[1]*a[13]+b[9]*a[14]+b[7]*a[15]-b[4]*a[16]
res[14]+= -b[11]*a[2]+b[16]*a[3]+b[8]*a[4]-b[7]*a[5]-b[15]*a[6]-b[5]*a[7]+b[4]*a[8]+b[13]*a[9]-b[12]*a[10]-b[2]*a[11]+b[10]*a[12]-b[9]*a[13]+b[1]*a[14]+b[6]*a[15]-b[3]*a[16]
res[15]+= b[11]*a[3]+b[10]*a[4]+b[9]*a[5]+b[5]*a[9]+b[4]*a[10]+b[3]*a[11]+b[1]*a[15]
res[16]+= b[15]*a[2]+b[14]*a[3]+b[13]*a[4]+b[12]*a[5]+b[11]*a[6]+b[10]*a[7]+b[9]*a[8]+b[8]*a[9]+b[7]*a[10]+b[6]*a[11]-b[5]*a[12]-b[4]*a[13]-b[3]*a[14]-b[2]*a[15]+b[1]*a[16]
return res
end
# function Base.:*(a::Vector{Float32},b::Vector{Float32})
# res = Vector{Float32}(undef,nField)
# res[1]=b[1]*a[1]+b[3]*a[3]+b[4]*a[4]+b[5]*a[5]-b[9]*a[9]-b[10]*a[10]-b[11]*a[11]-b[15]*a[15]
# res[2]=b[2]*a[1]+b[1]*a[2]-b[6]*a[3]-b[7]*a[4]-b[8]*a[5]+b[3]*a[6]+b[4]*a[7]+b[5]*a[8]+b[12]*a[9]+b[13]*a[10]+b[14]*a[11]+b[9]*a[12]+b[10]*a[13]+b[11]*a[14]+b[16]*a[15]-b[15]*a[16]
# res[3]=b[3]*a[1]+b[1]*a[3]-b[9]*a[4]+b[10]*a[5]+b[4]*a[9]-b[5]*a[10]-b[15]*a[11]-b[11]*a[15]
# res[4]=b[4]*a[1]+b[9]*a[3]+b[1]*a[4]-b[11]*a[5]-b[3]*a[9]-b[15]*a[10]+b[5]*a[11]-b[10]*a[15]
# res[5]=b[5]*a[1]-b[10]*a[3]+b[11]*a[4]+b[1]*a[5]-b[15]*a[9]+b[3]*a[10]-b[4]*a[11]-b[9]*a[15]
# res[6]=b[6]*a[1]+b[3]*a[2]-b[2]*a[3]-b[12]*a[4]+b[13]*a[5]+b[1]*a[6]-b[9]*a[7]+b[10]*a[8]+b[7]*a[9]-b[8]*a[10]-b[16]*a[11]-b[4]*a[12]+b[5]*a[13]+b[15]*a[14]-b[14]*a[15]-b[11]*a[16]
# res[7]=b[7]*a[1]+b[4]*a[2]+b[12]*a[3]-b[2]*a[4]-b[14]*a[5]+b[9]*a[6]+b[1]*a[7]-b[11]*a[8]-b[6]*a[9]-b[16]*a[10]+b[8]*a[11]+b[3]*a[12]+b[15]*a[13]-b[5]*a[14]-b[13]*a[15]-b[10]*a[16]
# res[8]=b[8]*a[1]+b[5]*a[2]-b[13]*a[3]+b[14]*a[4]-b[2]*a[5]-b[10]*a[6]+b[11]*a[7]+b[1]*a[8]-b[16]*a[9]+b[6]*a[10]-b[7]*a[11]+b[15]*a[12]-b[3]*a[13]+b[4]*a[14]-b[12]*a[15]-b[9]*a[16]
# res[9]=b[9]*a[1]+b[4]*a[3]-b[3]*a[4]+b[15]*a[5]+b[1]*a[9]+b[11]*a[10]-b[10]*a[11]+b[5]*a[15]
# res[10]=b[10]*a[1]-b[5]*a[3]+b[15]*a[4]+b[3]*a[5]-b[11]*a[9]+b[1]*a[10]+b[9]*a[11]+b[4]*a[15]
# res[11]=b[11]*a[1]+b[15]*a[3]+b[5]*a[4]-b[4]*a[5]+b[10]*a[9]-b[9]*a[10]+b[1]*a[11]+b[3]*a[15]
# res[12]=b[12]*a[1]-b[9]*a[2]+b[7]*a[3]-b[6]*a[4]+b[16]*a[5]-b[4]*a[6]+b[3]*a[7]-b[15]*a[8]-b[2]*a[9]+b[14]*a[10]-b[13]*a[11]+b[1]*a[12]+b[11]*a[13]-b[10]*a[14]+b[8]*a[15]-b[5]*a[16]
# res[13]=b[13]*a[1]-b[10]*a[2]-b[8]*a[3]+b[16]*a[4]+b[6]*a[5]+b[5]*a[6]-b[15]*a[7]-b[3]*a[8]-b[14]*a[9]-b[2]*a[10]+b[12]*a[11]-b[11]*a[12]+b[1]*a[13]+b[9]*a[14]+b[7]*a[15]-b[4]*a[16]
# res[14]=b[14]*a[1]-b[11]*a[2]+b[16]*a[3]+b[8]*a[4]-b[7]*a[5]-b[15]*a[6]-b[5]*a[7]+b[4]*a[8]+b[13]*a[9]-b[12]*a[10]-b[2]*a[11]+b[10]*a[12]-b[9]*a[13]+b[1]*a[14]+b[6]*a[15]-b[3]*a[16]
# res[15]=b[15]*a[1]+b[11]*a[3]+b[10]*a[4]+b[9]*a[5]+b[5]*a[9]+b[4]*a[10]+b[3]*a[11]+b[1]*a[15]
# res[16]=b[16]*a[1]+b[15]*a[2]+b[14]*a[3]+b[13]*a[4]+b[12]*a[5]+b[11]*a[6]+b[10]*a[7]+b[9]*a[8]+b[8]*a[9]+b[7]*a[10]+b[6]*a[11]-b[5]*a[12]-b[4]*a[13]-b[3]*a[14]-b[2]*a[15]+b[1]*a[16]
# return res
# end
# # NOTE:
# # According to @btime utest(false, this attempt at speeding up this calculation
# # by vectorizing multiplications caused a 4.8 times slower unit test.
# # Therefore, this vectorized approach is commented out. However, some other
# # vectorized multiplication may be faster, particularly if implemented in a GPU.
# function Base.:*(a::Vector{Float32},b::Vector{Float32})
# res = b .* a[1]
#
# tmp = b[[3,1,1,9,10,3,4,5,4,5,15,9,10,11,11,15]] .*
# a[[3,2,3,3, 3,2,2,2,3,3, 3,2, 2, 2, 3, 2]]
# tmp[[5,10,12,13,14]] .*= -1
# res += tmp
#
# tmp = b[[4,6,9,1,11,2,12,13,3,15,5,7,8,16,10,14]] .*
# a[[4,3,4,4, 4,3, 3, 3,4, 4,4,3,3, 3, 4, 3]]
# tmp[[2,3,6,8,9,13]] .*= -1
# res += tmp
#
# tmp = b[[5,7,10,11,1,12,2,14,15,3,4,6,16,8,9,13]] .*
# a[[5,4, 5, 5,5, 4,4, 4, 5,5,5,4, 4,4,5, 4]]
# tmp[[2,4,6,7,11,12]] .*= -1
# res += tmp
#
# tmp = b[[9,8,4,3,15,13,14,2,1,11,10,16,6,7,5,12]] .*
# a[[9,5,9,9, 9, 5, 5,5,9, 9, 9, 5,5,5,9, 5]]
# tmp[[1,2,4,5,7,8,10,14]] .*= -1
# res += tmp
#
# tmp = b[[10,3, 5,15, 3,1,9,10,11, 1, 9,4,5,15, 4,11]] .*
# a[[10,6,10,10,10,6,6, 6,10,10,10,6,6, 6,10, 6]]
# tmp[[1,3,4,8,11,12,14]] .*= -1
# res += tmp
#
# tmp = b[[11,4,15, 5, 4,9,1,11,10, 9, 1,3,15,5, 3,10]] .*
# a[[11,7,11,11,11,7,7, 7,11,11,11,7, 7,7,11, 7]]
# tmp[[1,3,5,6,9,13,14]] .*= -1
# res += tmp
#
# tmp = b[[15,5,11,10, 9,10,11,1, 5, 4, 3,15,3,4, 1,9]] .*
# a[[15,8,15,15,15, 8, 8,8,15,15,15, 8,8,8,15,8]]
# tmp[[1,3,4,5,7,12,13]] .*= -1
# res += tmp
#
# tmp = b[[12,7,6,16,2,14,13,8]] .* a[9]
# tmp[[3,4,5,6]] .*= -1
# res[[2,6,7,8,12,13,14,16]] .+= tmp
#
# tmp = b[[13,8,16,6,14,2,12,7]] .* a[10]
# tmp[[2,3,6,7]] .*= -1
# res[[2,6,7,8,12,13,14,16]] .+= tmp
#
# tmp = b[[14,16,8,7,13,12,2,6]] .* a[11]
# tmp[[2,4,5,7]] .*= -1
# res[[2,6,7,8,12,13,14,16]] .+= tmp
#
# tmp = b[[9,4,3,15,1,11,10,5]] .* a[12]
# tmp[[2,6,8]] .*= -1
# res[[2,6,7,8,12,13,14,16]] .+= tmp
#
# tmp = b[[10,5,15,3,11,1,9,4]] .* a[13]
# tmp[[4,7,8]] .*= -1
# res[[2,6,7,8,12,13,14,16]] .+= tmp
#
# tmp = b[[11,15,5,4,10,9,1,3]] .* a[14]
# tmp[[3,5,8]] .*= -1
# res[[2,6,7,8,12,13,14,16]] .+= tmp
#
# tmp = b[[16,14,13,12,8,7,6,2]] .* a[15]
# tmp[[2,3,4,8]] .*= -1
# res[[2,6,7,8,12,13,14,16]] .+= tmp
#
# tmp = b[[15,11,10,9,5,4,3,1]] .* a[16]
# tmp[[1,2,3,4,5,6,7]] .*= -1
# res[[2,6,7,8,12,13,14,16]] .+= tmp
#
# return res
# end
function Base.:&(a::Vector{Float32},b::Vector{Float32})
res = a .* b[16]
res[1] += a[2]*b[15]*-1+a[3]*b[14]*-1+a[4]*b[13]*-1+a[5]*b[12]*-1+a[6]*b[11]+a[7]*b[10]+a[8]*b[9]+a[9]*b[8]+a[10]*b[7]+a[11]*b[6]-a[12]*-1*b[5]-a[13]*-1*b[4]-a[14]*-1*b[3]-a[15]*-1*b[2]+a[16]*b[1]
res[2] += a[6]*b[14]*-1+a[7]*b[13]*-1+a[8]*b[12]*-1+a[12]*-1*b[8]+a[13]*-1*b[7]+a[14]*-1*b[6]+a[16]*b[2]
res[3] += -a[6]*b[15]*-1+a[9]*b[13]*-1-a[10]*b[12]*-1-a[12]*-1*b[10]+a[13]*-1*b[9]-a[15]*-1*b[6]+a[16]*b[3]
res[4] += -a[7]*b[15]*-1-a[9]*b[14]*-1+a[11]*b[12]*-1+a[12]*-1*b[11]-a[14]*-1*b[9]-a[15]*-1*b[7]+a[16]*b[4]
res[5] += -a[8]*b[15]*-1+a[10]*b[14]*-1-a[11]*b[13]*-1-a[13]*-1*b[11]+a[14]*-1*b[10]-a[15]*-1*b[8]+a[16]*b[5]
res[6] += a[12]*-1*b[13]*-1-a[13]*-1*b[12]*-1+a[16]*b[6]
res[7] += -a[12]*-1*b[14]*-1+a[14]*-1*b[12]*-1+a[16]*b[7]
res[8] += a[13]*-1*b[14]*-1-a[14]*-1*b[13]*-1+a[16]*b[8]
res[9] += a[12]*-1*b[15]*-1-a[15]*-1*b[12]*-1+a[16]*b[9]
res[10]+= a[13]*-1*b[15]*-1-a[15]*-1*b[13]*-1+a[16]*b[10]
res[11]+= a[14]*-1*b[15]*-1-a[15]*-1*b[14]*-1+a[16]*b[11]
res[12]+= a[16]*b[12]
res[13]+= a[16]*b[13]
res[14]+= a[16]*b[14]
res[15]+= a[16]*b[15]
return res
end
# function Base.:&(a::Vector{Float32},b::Vector{Float32})
# res = Vector{Float32}(undef,nField)
# res[16]=1*(a[16]*b[16])
# res[15]=-1*(a[15]*-1*b[16]+a[16]*b[15]*-1)
# res[14]=-1*(a[14]*-1*b[16]+a[16]*b[14]*-1)
# res[13]=-1*(a[13]*-1*b[16]+a[16]*b[13]*-1)
# res[12]=-1*(a[12]*-1*b[16]+a[16]*b[12]*-1)
# res[11]=1*(a[11]*b[16]+a[14]*-1*b[15]*-1-a[15]*-1*b[14]*-1+a[16]*b[11])
# res[10]=1*(a[10]*b[16]+a[13]*-1*b[15]*-1-a[15]*-1*b[13]*-1+a[16]*b[10])
# res[9]=1*(a[9]*b[16]+a[12]*-1*b[15]*-1-a[15]*-1*b[12]*-1+a[16]*b[9])
# res[8]=1*(a[8]*b[16]+a[13]*-1*b[14]*-1-a[14]*-1*b[13]*-1+a[16]*b[8])
# res[7]=1*(a[7]*b[16]-a[12]*-1*b[14]*-1+a[14]*-1*b[12]*-1+a[16]*b[7])
# res[6]=1*(a[6]*b[16]+a[12]*-1*b[13]*-1-a[13]*-1*b[12]*-1+a[16]*b[6])
# res[5]=1*(a[5]*b[16]-a[8]*b[15]*-1+a[10]*b[14]*-1-a[11]*b[13]*-1-a[13]*-1*b[11]+a[14]*-1*b[10]-a[15]*-1*b[8]+a[16]*b[5])
# res[4]=1*(a[4]*b[16]-a[7]*b[15]*-1-a[9]*b[14]*-1+a[11]*b[12]*-1+a[12]*-1*b[11]-a[14]*-1*b[9]-a[15]*-1*b[7]+a[16]*b[4])
# res[3]=1*(a[3]*b[16]-a[6]*b[15]*-1+a[9]*b[13]*-1-a[10]*b[12]*-1-a[12]*-1*b[10]+a[13]*-1*b[9]-a[15]*-1*b[6]+a[16]*b[3])
# res[2]=1*(a[2]*b[16]+a[6]*b[14]*-1+a[7]*b[13]*-1+a[8]*b[12]*-1+a[12]*-1*b[8]+a[13]*-1*b[7]+a[14]*-1*b[6]+a[16]*b[2])
# res[1]=1*(a[1]*b[16]+a[2]*b[15]*-1+a[3]*b[14]*-1+a[4]*b[13]*-1+a[5]*b[12]*-1+a[6]*b[11]+a[7]*b[10]+a[8]*b[9]+a[9]*b[8]+a[10]*b[7]+a[11]*b[6]-a[12]*-1*b[5]-a[13]*-1*b[4]-a[14]*-1*b[3]-a[15]*-1*b[2]+a[16]*b[1])
# return res
# end
function Base.:|(a::Vector{Float32},b::Vector{Float32})
res = b .* a[1]
res[1] += b[3]*a[3]+b[4]*a[4]+b[5]*a[5]-b[9]*a[9]-b[10]*a[10]-b[11]*a[11]-b[15]*a[15]
res[2] += b[1]*a[2]-b[6]*a[3]-b[7]*a[4]-b[8]*a[5]+b[3]*a[6]+b[4]*a[7]+b[5]*a[8]+b[12]*a[9]+b[13]*a[10]+b[14]*a[11]+b[9]*a[12]+b[10]*a[13]+b[11]*a[14]+b[16]*a[15]-b[15]*a[16]
res[3] += b[1]*a[3]-b[9]*a[4]+b[10]*a[5]+b[4]*a[9]-b[5]*a[10]-b[15]*a[11]-b[11]*a[15]
res[4] += b[9]*a[3]+b[1]*a[4]-b[11]*a[5]-b[3]*a[9]-b[15]*a[10]+b[5]*a[11]-b[10]*a[15]
res[5] += -b[10]*a[3]+b[11]*a[4]+b[1]*a[5]-b[15]*a[9]+b[3]*a[10]-b[4]*a[11]-b[9]*a[15]
res[6] += -b[12]*a[4]+b[13]*a[5]+b[1]*a[6]-b[16]*a[11]-b[4]*a[12]+b[5]*a[13]-b[11]*a[16]
res[7] += b[12]*a[3]-b[14]*a[5]+b[1]*a[7]-b[16]*a[10]+b[3]*a[12]-b[5]*a[14]-b[10]*a[16]
res[8] += -b[13]*a[3]+b[14]*a[4]+b[1]*a[8]-b[16]*a[9]-b[3]*a[13]+b[4]*a[14]-b[9]*a[16]
res[9] += b[15]*a[5]+b[1]*a[9]+b[5]*a[15]
res[10]+= b[15]*a[4]+b[1]*a[10]+b[4]*a[15]
res[11]+= b[15]*a[3]+b[1]*a[11]+b[3]*a[15]
res[12]+= b[16]*a[5]+b[1]*a[12]-b[5]*a[16]
res[13]+= b[16]*a[4]+b[1]*a[13]-b[4]*a[16]
res[14]+= b[16]*a[3]+b[1]*a[14]-b[3]*a[16]
res[15]+= b[1]*a[15]
res[16]+= b[1]*a[16]
return res
end
# function Base.:|(a::Vector{Float32},b::Vector{Float32})
# res = Vector{Float32}(undef,nField)
# res[1]=b[1]*a[1]+b[3]*a[3]+b[4]*a[4]+b[5]*a[5]-b[9]*a[9]-b[10]*a[10]-b[11]*a[11]-b[15]*a[15]
# res[2]=b[2]*a[1]+b[1]*a[2]-b[6]*a[3]-b[7]*a[4]-b[8]*a[5]+b[3]*a[6]+b[4]*a[7]+b[5]*a[8]+b[12]*a[9]+b[13]*a[10]+b[14]*a[11]+b[9]*a[12]+b[10]*a[13]+b[11]*a[14]+b[16]*a[15]-b[15]*a[16]
# res[3]=b[3]*a[1]+b[1]*a[3]-b[9]*a[4]+b[10]*a[5]+b[4]*a[9]-b[5]*a[10]-b[15]*a[11]-b[11]*a[15]
# res[4]=b[4]*a[1]+b[9]*a[3]+b[1]*a[4]-b[11]*a[5]-b[3]*a[9]-b[15]*a[10]+b[5]*a[11]-b[10]*a[15]
# res[5]=b[5]*a[1]-b[10]*a[3]+b[11]*a[4]+b[1]*a[5]-b[15]*a[9]+b[3]*a[10]-b[4]*a[11]-b[9]*a[15]
# res[6]=b[6]*a[1]-b[12]*a[4]+b[13]*a[5]+b[1]*a[6]-b[16]*a[11]-b[4]*a[12]+b[5]*a[13]-b[11]*a[16]
# res[7]=b[7]*a[1]+b[12]*a[3]-b[14]*a[5]+b[1]*a[7]-b[16]*a[10]+b[3]*a[12]-b[5]*a[14]-b[10]*a[16]
# res[8]=b[8]*a[1]-b[13]*a[3]+b[14]*a[4]+b[1]*a[8]-b[16]*a[9]-b[3]*a[13]+b[4]*a[14]-b[9]*a[16]
# res[9]=b[9]*a[1]+b[15]*a[5]+b[1]*a[9]+b[5]*a[15]
# res[10]=b[10]*a[1]+b[15]*a[4]+b[1]*a[10]+b[4]*a[15]
# res[11]=b[11]*a[1]+b[15]*a[3]+b[1]*a[11]+b[3]*a[15]
# res[12]=b[12]*a[1]+b[16]*a[5]+b[1]*a[12]-b[5]*a[16]
# res[13]=b[13]*a[1]+b[16]*a[4]+b[1]*a[13]-b[4]*a[16]
# res[14]=b[14]*a[1]+b[16]*a[3]+b[1]*a[14]-b[3]*a[16]
# res[15]=b[15]*a[1]+b[1]*a[15]
# res[16]=b[16]*a[1]+b[1]*a[16]
# return res
# end
function Base.:^(a::Vector{Float32},b::Vector{Float32})
res = b .* a[1]
res[2] += b[1]*a[2]
res[3] += b[1]*a[3]
res[4] += b[1]*a[4]
res[5] += b[1]*a[5]
res[6] += b[3]*a[2]-b[2]*a[3]+b[1]*a[6]
res[7] += b[4]*a[2]-b[2]*a[4]+b[1]*a[7]
res[8] += b[5]*a[2]-b[2]*a[5]+b[1]*a[8]
res[9] += b[4]*a[3]-b[3]*a[4]+b[1]*a[9]
res[10]+= -b[5]*a[3]+b[3]*a[5]+b[1]*a[10]
res[11]+= b[5]*a[4]-b[4]*a[5]+b[1]*a[11]
res[12]+= -b[9]*a[2]+b[7]*a[3]-b[6]*a[4]-b[4]*a[6]+b[3]*a[7]-b[2]*a[9]+b[1]*a[12]
res[13]+= -b[10]*a[2]-b[8]*a[3]+b[6]*a[5]+b[5]*a[6]-b[3]*a[8]-b[2]*a[10]+b[1]*a[13]
res[14]+= -b[11]*a[2]+b[8]*a[4]-b[7]*a[5]-b[5]*a[7]+b[4]*a[8]-b[2]*a[11]+b[1]*a[14]
res[15]+= b[11]*a[3]+b[10]*a[4]+b[9]*a[5]+b[5]*a[9]+b[4]*a[10]+b[3]*a[11]+b[1]*a[15]
res[16]+= b[15]*a[2]+b[14]*a[3]+b[13]*a[4]+b[12]*a[5]+b[11]*a[6]+b[10]*a[7]+b[9]*a[8]+b[8]*a[9]+b[7]*a[10]+b[6]*a[11]-b[5]*a[12]-b[4]*a[13]-b[3]*a[14]-b[2]*a[15]+b[1]*a[16]
return res
end
# function Base.:^(a::Vector{Float32},b::Vector{Float32})
# res = Vector{Float32}(undef,nField)
# res[1]=b[1]*a[1]
# res[2]=b[2]*a[1]+b[1]*a[2]
# res[3]=b[3]*a[1]+b[1]*a[3]
# res[4]=b[4]*a[1]+b[1]*a[4]
# res[5]=b[5]*a[1]+b[1]*a[5]
# res[6]=b[6]*a[1]+b[3]*a[2]-b[2]*a[3]+b[1]*a[6]
# res[7]=b[7]*a[1]+b[4]*a[2]-b[2]*a[4]+b[1]*a[7]
# res[8]=b[8]*a[1]+b[5]*a[2]-b[2]*a[5]+b[1]*a[8]
# res[9]=b[9]*a[1]+b[4]*a[3]-b[3]*a[4]+b[1]*a[9]
# res[10]=b[10]*a[1]-b[5]*a[3]+b[3]*a[5]+b[1]*a[10]
# res[11]=b[11]*a[1]+b[5]*a[4]-b[4]*a[5]+b[1]*a[11]
# res[12]=b[12]*a[1]-b[9]*a[2]+b[7]*a[3]-b[6]*a[4]-b[4]*a[6]+b[3]*a[7]-b[2]*a[9]+b[1]*a[12]
# res[13]=b[13]*a[1]-b[10]*a[2]-b[8]*a[3]+b[6]*a[5]+b[5]*a[6]-b[3]*a[8]-b[2]*a[10]+b[1]*a[13]
# res[14]=b[14]*a[1]-b[11]*a[2]+b[8]*a[4]-b[7]*a[5]-b[5]*a[7]+b[4]*a[8]-b[2]*a[11]+b[1]*a[14]
# res[15]=b[15]*a[1]+b[11]*a[3]+b[10]*a[4]+b[9]*a[5]+b[5]*a[9]+b[4]*a[10]+b[3]*a[11]+b[1]*a[15]
# res[16]=b[16]*a[1]+b[15]*a[2]+b[14]*a[3]+b[13]*a[4]+b[12]*a[5]+b[11]*a[6]+b[10]*a[7]+b[9]*a[8]+b[8]*a[9]+b[7]*a[10]+b[6]*a[11]-b[5]*a[12]-b[4]*a[13]-b[3]*a[14]-b[2]*a[15]+b[1]*a[16]
# return res
# end
function Base.:~(a::Vector{Float32}) # reverse operator
res = copy(a)
res[[6 7 8 9 10 11 12 13 14 15]] .*= -1
return res
end
# basis bivectors (planes)
function plane(a::Float32, b::Float32, c::Float32, d::Float32)
return a*e1 + b*e2 + c*e3 + d*e0
end
# basis trivectors (points)
e123 = e1^e2^e3
e032 = e0^e3^e2
e013 = e0^e1^e3
e021 = e0^e2^e1
function point(x::Float32, y::Float32, z::Float32)
return e123 + x*e032 + y*e013 + z*e021
end
# rotate, translate, conjugate
function rotor(angle::Float32, line::Vector{Float32})
return cos(angle/2f0) + sin(angle/2f0)*normalize(line)
end
function translator(dist::Float32, line::Vector{Float32})
return 1f0 + dist/2f0 * line
end
function conjugate(a::Vector{Float32})
res = copy(a)
res[[2 3 4 5 6 8 9 10 11]] .*= -1
return res
end
# circle and torus
function circle(t::Float32, radius::Float32, line::Vector{Float32})
return rotor(t*2f0*pi,line) * translator(radius,e1*e0)
end
function torus(s::Float32, t::Float32,
r1::Float32, l1::Vector{Float32},
r2::Float32, l2::Vector{Float32})
return circle(s,r2,l2) * circle(t,r1,l1)
end
elseif (nD == 4)
nField = 0 # TODO: make nonzero after implementation
elseif (nD == 5)
nField = 0 # TODO: make nonzero after implementation
else
nField = 0
end
if (nD == 3)
end
# overload + operator
function Base.:+(a::Vector{Float32},b::Float32)
res = copy(a)
res[1] += b
return res
end
function Base.:+(a::Float32,b::Vector{Float32})
res = copy(b)
res[1] += a
return res
end
# overload - operator
function Base.:-(a::Vector{Float32},b::Vector{Float32})
return a - b
end
function Base.:-(a::Vector{Float32},b::Float32)
res = copy(a)
res[1] -= b
return res
end
function Base.:-(a::Float32,b::Vector{Float32})
res = copy(.-b)
res[1] += a
return res
end
# normalization functions
function norm(a::Vector{Float32})
return sqrt(abs((a * conjugate(a))[1]))
end
function normalize(a::Vector{Float32})
return a ./ norm(a)
end
# convert multivector fields to string
function toStr(mv::Vector{Float32})
nField = size(mv,1)
nNZField = 0
S = String[]
for iField = 1:nField
if mv[iField] != 0
if nNZField != 0
push!(S, @sprintf(" + %0.7g%s",
mv[iField], basis[iField]))
else
push!(S, @sprintf("%0.7g%s",
mv[iField],
(iField==1) ? "" : basis[iField]))
end
nNZField += 1
end
end
return string(S...)
end
## NOTE:
## According to @btime utest(false), this evaluated version
## of utest() is 98 times slower than the non-evaluated version
## (1.846 ms versus 18.9 us). Therefore, this evaluated version
## of utest() is commented out but not removed ... because it is
## a reminder to avoid calling eval() when execution speed is an
## issue.
##
## unit test
## - flgOutput should be set to false to help with benchmarking
## - nD currently does nothing because nD currently is always 3 (like in pga3d.cpp)
#function utest(flgOutput::Bool=true, nD::Int64=3)
# nError = 0
# axis_z = e1 ^ e2
# origin = axis_z ^ e3
# long_exp = quote
# to = torus(0f0, 0f0, 0.25f0, e1*e2, 0.6f0, e1*e3)
# point_on_torus = to * e123 * ~to
# end
#
# E = [ # Expression array; col 1 is GA equation, col 2 is GA to string conversion
# :(px = point(1f0,0f0,0f0)) :(toStr(px));
# :(line = origin & px) :(toStr(line));
# :(p = plane(2f0,0f0,1f0,-3f0)) :(toStr(p));
# :(rot = rotor(Float32(pi/2),e1*e2)) :(toStr(rot));
# :(rot_point = rot * px * ~rot) :(toStr(rot_point));
# :(rot_line = rot * line * ~rot) :(toStr(rot_line));
# :(rot_plane = rot * p * ~rot) :(toStr(rot_plane));
# :(point_on_plane = (p | px)*p) :(toStr(normalize(point_on_plane)));
# long_exp :(toStr(point_on_torus));
# :(tst = e0 - 1f0) :(toStr(tst));
# :(tst = -1f0 + e0) :(toStr(tst));
# ]
# S = [ # String array; col 1 is test label, col 2 is expected result
# " point : " "1e032 + 1e123";
# " line : " "-1e23";
# " plane : " "-3e0 + 2e1 + 1e3";
# " rotor : " "0.7071068 + 0.7071068e12";
# " rotated point : " "-0.9999999e013 + 0.9999999e123";
# " rotated line : " "0.9999999e31";
# " rotated plane : " "-3e0 + -2e2 + 0.9999999e3";
# " point on plane : " "0.2e021 + 1.4e032 + 1e123";
# " point on torus : " "0.85e032 + 1e123";
# " toStr() test 1 : " "-1 + 1e0";
# " toStr() test 2 : " "1 + -1e0";
# ]
#
# nTest = size(S,1)
# for iTest = 1:nTest
# eval(E[iTest,1])
# str = eval(E[iTest,2])
# if flgOutput
# isError = str != S[iTest,2]
# xChar = isError ? 'x' : ' '
# println(xChar * S[iTest,1] * str)
# if (isError)
# println(' ' * S[iTest,1] * str)
# nError += 1
# end
# end
# end
# return nError # 'x' is first character in lines with errors
#end
# unit test
# - flgOutput should be set to false to help with benchmarking
# - nD currently does nothing because nD currently is always 3 (like in pga3d.cpp)
function utest(flgOutput::Bool=true, nD::Int64=3)
nError = 0
# geometric algebra equations in programming syntax
axis_z = e1 ^ e2
origin = axis_z ^ e3
px = point(1f0, 0f0, 0f0)
line = origin & px
p = plane(2f0,0f0,1f0,-3f0)
rot = rotor(Float32(pi/2), e1*e2)
rot_point = rot * px * ~rot
rot_line = rot * line * ~rot
rot_plane = rot * p * ~rot
point_on_plane = (p | px) * p
to = torus(0f0, 0f0,
0.25f0, e1*e2,
0.6f0, e1*e3)
point_on_torus = to * e123 * ~to
tst1 = e0 - 1f0
tst2 = 1f0 - e0
# geometric algebra equations in math syntax
# axis_z = e1 ∧ e2
# if (slow) output of unit test results wanted
if flgOutput
S = Matrix{String}(undef,11,3) # 3 columns:
S[1,1] = " point : " # 1) label
S[1,2] = toStr(px) # 2) toStr()
S[1,3] = "1e032 + 1e123" # 3) expected string
S[2,1] = " line : "
S[2,2] = toStr(line)
S[2,3] = "-1e23"
S[3,1] = " plane : "
S[3,2] = toStr(p)
S[3,3] = "-3e0 + 2e1 + 1e3"
S[4,1] = " rotor : "
S[4,2] = toStr(rot)
S[4,3] = "0.7071068 + 0.7071068e12"
S[5,1] = " rotated point : "
S[5,2] = toStr(rot_point)
S[5,3] = "-0.9999999e013 + 0.9999999e123"
S[6,1] = " rotated line : "
S[6,2] = toStr(rot_line)
S[6,3] = "0.9999999e31"
S[7,1] = " rotated plane : "
S[7,2] = toStr(rot_plane)
S[7,3] = "-3e0 + -2e2 + 0.9999999e3"
S[8,1] = " point on plane : "
S[8,2] = toStr(normalize(point_on_plane))
S[8,3] = "0.2e021 + 1.4e032 + 1e123"
S[9,1] = " point on torus : "
S[9,2] = toStr(point_on_torus)
S[9,3] = "0.85e032 + 1e123"
S[10,1] = " toStr() test 1 : "
S[10,2] = toStr(tst1)
S[10,3] = "-1 + 1e0"
S[11,1] = " toStr() test 2 : "
S[11,2] = toStr(tst2)
S[11,3] = "1 + -1e0"
# print unit test results;
# 'x' in first column in tests with errors
nTest = size(S,1)
for iTest = 1:nTest
isError = S[iTest,2] != S[iTest,3]
xChar = isError ? 'x' : ' '
println(xChar * S[iTest,1] * S[iTest,2])
if (isError)
println(' ' * S[iTest,1] * S[iTest,3])
nError += 1
end
end
end
return nError # return unit test results
end