The functions shuf1 and shuf2 are two different implementations of a Fisher-Yates shuffle for tuples. Why do both allocate, even in trivial cases? The recursion depth should be small and a compile-time constant and all types should likewise be inferrable, as far as I understand.
err_bounds(t::Tuple, i::Integer) = throw(BoundsError(t, i))
without_elem_at_index(t::Tuple{}, i::Integer) = err_bounds(t, i)
axis(c) = (only ∘ axes)(c)
function without_elem_at_index(t::Tuple{Any,Vararg{Any,n}}, i::Integer) where {n}
  (i ∈ axis(t)) || err_bounds(t, i)
  f = let t = t, i = i
    j -> (j < i) ? t[j] : t[j + 1]
  end
  ntuple(f, Val(n))::NTuple{n,eltype(t)}
end
struct CatL end
struct CatR end
cat(t::Tuple, e, ::CatL = CatL()) = (e, t...)
cat(t::Tuple, e, ::CatR) = (t..., e)
shuf1(::Type, ::Any, r::Tuple, ::Tuple{}) = r
shuf1(::Type, ::Any, r::Tuple, s::Tuple{Any}) = cat(r, only(s))
function shuf1(::Type{T}, prg, r::Tuple, s::Tuple{Any,Any,Vararg{Any,n}}) where {T, n}
  i = rand(prg, Base.OneTo(T(n + 2)::T))::T
  done = cat(r, s[i])
  todo = without_elem_at_index(s, i)
  shuf1(T, prg, done, todo)
end
shuf2(::Type, ::Any, ::Tuple{}) = ()
function shuf2(::Type{T}, prg, t::Tuple{Any,Vararg{Any,n}}) where {T, n}
  i = rand(prg, Base.OneTo(T(n + 1)::T))::T
  todo = without_elem_at_index(t, i)
  cat(shuf2(T, prg, todo), t[i])
end
using Random
shuf1(prg, t::Tuple) = shuf1(UInt8, prg, (), t)
shuf2(prg, t::Tuple) = shuf2(UInt8, prg, t)
let t = ntuple((_ -> rand()), Val(1)), prg = Xoshiro()
  shuf1(prg, t)
  shuf2(prg, t)
  @allocated shuf1(prg, t)
  @allocated shuf2(prg, t)
  println(@allocated shuf1(prg, t))
  println(@allocated shuf2(prg, t))
end
The behavior is the same with v1.9, v1.10, v1.11:
julia> include("/tmp/mre.jl")
16
16
Doing something like this doesn’t even show my code in the stack, and it ends with a Tuple{Float64} GC entry:
let prg = Xoshiro(), t = (0.3,)
  @profview_allocs shuf1(prg, t) sample_rate=1
end