Allocation-efficient calculation of Hermite (and Laguerre) polynomials

After some tests, we can write code with just loops only

function ComputeBasis_No_Vec!(MX::AbstractMatrix{Float64}, X::AbstractMatrix{Float64}, op::HermiteOperation)
       
       tmp = X[:, op.init_domain]
       @views for (i, x) in enumerate(eachcol(MX))
           for j in eachindex(x)
               if i == 1 
                   x[j] = tmp[j]
                   continue
               end 
               if i == 2
                   x[j] = tmp[j] * tmp[j] - 1
                   continue
               end
               x[j] = tmp[j] * MX[j, op.range[i-1]] - (i-1) * MX[j, op.range[i - 2]]
           end
       end
       MX
       end

which is nearly 2X with my benchmarks .

julia> @btime RunTest()
  2.699 μs (17 allocations: 1.33 KiB)

julia> @btime RunTest_No_Vec()
  1.287 μs (10 allocations: 800 bytes)
2 Likes