Polyester mentions that it
moves arrays to threads by turning them into StrideArraysCore.PtrArrays. This means that under an
@batch
slices will create views by default(!). You may want to start Julia with --check-bounds=yes while debugging.
but it may also cause issues when passing the hermitian conjugate (complex transpose) m'
?
case in point, consider this matrix multiplication function (T in sum_ab is for later use with dual numbers):
using Polyester
function sum_ab(a, b, i, j, _::T) where {T}
ab_sum = zero(T)
for k in axes(a, 2)
ab_sum = muladd(a[i, k], b[k, j], ab_sum)
end
ab_sum
end
function mmul!(c, a, b)
c1 = c[1]
@batch for i in axes(c, 1)
for j in axes(c, 2)
c[i, j] = sum_ab(a, b, i, j, c1)
end
end
c
end
x, y, z = [rand(5, 5) for _ in 1:3];
mmul!(z, x, y')
z ≈ x * y' # true/expected
xc, yc, zc = [rand(ComplexF64, 5, 5) for _ in 1:3];
mmul!(zc, xc, yc')
zc ≈ xc * yc' # false!!
zc ≈ xc * conj.(yc)' # true!!
disable_polyester_threads() do
mmul!(zc, xc, yc')
end
zc ≈ xc * yc' # true/expected
how do one correctly write the functions involving passing m'
for m::Matrix{ComplexF64}
? manually set conjugate for xomplex inputs seems hacky