I just ran a similar benchmark, and it confirms the logic.
julia> df = DataFrame(a = rand(1_000_000), b = rand(1_000_000));
julia> a = df.a; b = df.b;
julia> function comp(a, b)
ma = mean(a)
mb = mean(b)
c = cor(a, b)
z = (a .- mb ./ std(a))
return b .+ mb .- z .* c
end;
julia> function make_fun_2(df)
pn = propertynames(df)
if :a in pn && :b in pn
[:a, :b] => function(a, b)
ma = mean(a)
mb = mean(b)
c = cor(a, b)
z = (a .- mb ./ std(a))
return b .+ mb .- z .* c => :c
end => :c
else
[:a, :b] => ((a, b) -> fill("hello", length(a))) => :c
end
end;
julia> @btime comp(a, b);
5.794 ms (4 allocations: 15.26 MiB)
julia> @btime transform!(df, [:a, :b] => comp => :c);
6.067 ms (88 allocations: 15.26 MiB) # within a margin of error from the above
julia> @btime transform!(df, make_fun_2(df));
8.109 ms (97 allocations: 30.52 MiB)