Inplace multiplication by a square matrix

Didn’t know about @simd. Thanks!

A few comments.

  1. I put view(Z,:,k) because it made a huge difference in the number of allocations. Compare the allocs estimate below (≈ 500) with the one shown above (= 3):
function applyAinplace2(A)
    n = 1000
    ∆θ = 2π/n
    Z = Array{Float64}(2,n)
    x = Vector{Float64}(2)
    y = Vector{Float64}(2)
    for k = 1:n  # 1000 angles
        x[1] = cos(k*∆θ)
        x[2] = sin(k*∆θ)
        BLAS.gemv!(y, 'N', A, x)
        Z[:,k] = y
    end

    return Z
end

julia> @benchmark applyAinplace2($A)
BenchmarkTools.Trial:
  memory estimate:  23.58 KiB
  allocs estimate:  492
  mean time:        115.919 μs (2.25% GC)

Similar trend in Julia v0.6:

julia> VERSION
v"0.6.0-dev.2673"

julia> @benchmark applyAinplace(A)
BenchmarkTools.Trial:
  memory estimate:  15.94 KiB
  allocs estimate:  3
  median time:      90.076 μs (0.00% GC)

julia> @benchmark applyAinplace2(A)
BenchmarkTools.Trial:
  memory estimate:  39.20 KiB
  allocs estimate:  1492
  mean time:        140.926 μs (1.55% GC)

Should I file an issue on this?

  1. StaticArrays helps a lot, but @simd doesn’t seem to be giving an extra speedup. Am I doing something wrong? I built Julia with make -j 8, though not sure if that’s relevant.
julia> function applyAstatic(A)
           n = 1000
           ∆θ = 2π/n
           Z = Array{Float64}(2,n)
           for k = 1:n
               θ = k*∆θ
               y = A * SVector(cos(θ), sin(θ))
               Z[1,k] = y[1]
               Z[2,k] = y[2]
           end

           return Z
       end
applyAstatic (generic function with 1 method)

julia> function applyAstaticSIMD(A)
           n = 1000
           ∆θ = 2π/n
           Z = Array{Float64}(2,n)
           @simd for k = 1:n
               θ = k*∆θ
               y = A * SVector(cos(θ), sin(θ))
               Z[1,k] = y[1]
               Z[2,k] = y[2]
           end

           return Z
       end
applyAstaticSIMD (generic function with 1 method)

julia> A = rand(2,2); SA = SArray{size(A)}(A);

julia> @benchmark applyAstatic($SA)
BenchmarkTools.Trial:
  memory estimate:  15.75 KiB
  allocs estimate:  1
  mean time:        31.877 μs (0.00% GC)

julia> @benchmark applyAstaticSIMD($SA)
BenchmarkTools.Trial:
  memory estimate:  15.75 KiB
  allocs estimate:  1
  mean time:        30.492 μs (0.00% GC)