I had a similar issue couple of years and looks like its back. See the MWE function below:
function getdW{T<:AbstractFloat}(k::T,work::AbstractVector{T})
local sqrt2l = 1.41421356237309510
local K2c0 = 0.7542472332656508
local K2c1 = -0.2
local K2c2 = 0.08081220356417687
local K2c3 = -0.031746031746031744
local K2c4 = 0.012244273267299524
local K2c5 = -2.0/429.0
local K2c6 = 8.0 * 1.41421356237309510/6435.0
local K2c7 = -8.0/12155.0
local K2c8 = 8.0*1.41421356237309510/46189.0
local k00 = 0.3535533905932738
local k02 = 3.00*k00*0.25
local k03 = -2.00/3.0
local k04 = 15.0*1.41421356237309510/128.0
local k05 = -2.0/5.0
local k06 = 35.0*1.41421356237309510/512.0
local k07 = -8/35.0
local k08 = 315.0*1.41421356237309510/8192.0
local sqrt2by4 = sqrt2l*0.25
local dk01 = 3.00*sqrt2by4/2.00
local dk02 = -2.0
local dk03 = 15.00*sqrt2l/32.0
local dk04 = -2.0
local dK2c0 = -1.0/5.0
local dK2c1 = 4.0*sqrt2l/35.0
local dK2c2 = -2.0/21.0
local dK2c3 = 8.0*sqrt2l/231.0
local dK2c4 = -10.0/429.0
local dK2c5 = 16.0*sqrt2l/2145.0
local dK2c6 = -56.0/12155.0
local dK2c7 = 64.0*sqrt2l/46189.0
W::T = 0.0
dW::T = 0.0
t2::T = k*k
m = (2.0-t2)
bym = 1.0/m
if k<=-1E-3
t3 = acos(t2 - 1.0)
t6 = 2.0*Ď - t3
W = (t6 * sqrt(bym) - k)*bym
dW = (-2.0+3.0*W*k)*bym
elseif k >= 1.4342135623730952
t7 = (k+1.0) * (k-1.00)
t3 = log(t7 + sqrt(t7*t7-1.0))
t4 = t7-1.0
t5 = 1.0/t4
W = (-t3 * sqrt(t5) + k)*t5
dW = (-2.0+3.0*W*k)*bym
elseif k>=1E-3 && k < 1.3942135623730951::T
t3 = acos(t2 - 1.0)
t6 = t3
W = (t6 * sqrt(bym) - k)*bym
dW = (-2.0+3.0*W*k)*bym
elseif k<1E-3 # series for k ~= 0 improve convergence
xt = Ď
t3 = t2*k
t4 = t2*t2
t5 = t4*k
t6 = t3*t3
t7 = t5*t2
t8 = t4*t4
W = xt*k00 - k + (xt*k02)*t2 + k03*t3 + (xt*k04)*t4 + k05*t5 + (xt*k06)*t6 + k07*t7 + (xt*k08)*t8
dW = -1.00 + xt*dk01*k +dk02*t2 + xt*dk03*t3 + dk04*t4
elseif k < 1.4342135623730952
xt ::T = k-1.41421356237309510
t2 = xt*xt
t3 = t2*xt
t4 = t3*xt
t5 = t4*xt
t6 = t5*xt
t7 = t6*xt
t8 = t7*xt
W = K2c0 + K2c1*k + K2c2*t2 + K2c3*t3 + K2c4*t4 + K2c5*t5 + K2c6*t6 + K2c7*t7 + K2c8*t8
dW = dK2c0 + dK2c1*xt+ dK2c2*t2 + dK2c3*t3 + dK2c4*t4 + dK2c5*t5 + dK2c6*t6 + dK2c7*t7
end
work[1] = W::T
work[2] = dW::T
return nothing
end
now look at the following calls:
const work = Vector{Float64}(2)
julia> @btime getdW(-1.2,work)
29.962 ns (0 allocations: 0 bytes)
julia> @btime getdW(1.2,work)
30.806 ns (0 allocations: 0 bytes)
julia> @btime getdW(1.414,work)
86.559 ns (14 allocations: 224 bytes)
julia> @btime getdW(0.0001,work)
47.341 ns (7 allocations: 112 bytes)
The function seems to allocate memory based on which branch it is in !
This makes no sense to me as the @code_warntype shows no type instabilities and the branches are statically types. I opened a similar issue an year or so ago on Julia google forums and it seemed to be fixed but the problem has cropped up again more severely this time.
Does anyone know whats going on ? I plan to call this function millions of times and not sure how to get around this weird performance issue.
I am on Julia 0.6.
Thanks!