Hello everyone,

I have a question regarding performance of the following mwe:

```
using BenchmarkTools
function integrate!(f!, buf, a, b, N)
h = (b - a) / N
buf .= 0.0
for i in 1 : N
f!(buf, a + i * h, h)
end
return nothing
end
function f!(b, x, dx)
for i in eachindex(b)
b[i] += x * dx
end
return nothing
end
function g!(b, x, dx, a)
for i in eachindex(b)
b[i] += a[i] * x * dx
end
return nothing
end
let
buf = zeros(10)
a = rand(10)
h!(b, x, dx) = g!(b, x, dx, a)
@btime integrate!((b, x, dx) -> f!(b, x, dx), buf, 0.0, 1.0, 100) # 474.847 ns (0 allocations: 0 bytes)
@btime integrate!((b, x, dx) -> h!(b, x, dx), buf, 0.0, 1.0, 100) # 2.857 μs (200 allocations: 3.13 KiB)
end
```

Is there a way to get rid of the allocations for the integration of `h!`

? In my application I have to compute several of these integrals with different input arrays (and structs) and the performance loss seems quite substantial …