This is performance of captured variables in closures · Issue #15276 · JuliaLang/julia · GitHub. You can work around it using a let block
using BenchmarkTools
@noinline g(x) = x[1]
function f1(M, ::Type{Val{N}}) where N
s = 0
for j = 1:M
let j = j
x = ntuple(i -> i + j, Val{N})
s += g(x)
end
end
return s
end