I managed to write a version of my `merge`

function, that has excellent type-inference, by basically manually telling the compiler that the first results of `_mergeo`

are the same type as x:

```
struct B{T}
val::T
end
f(x::B{T}, y::B{T}) where T = B{T}(x.val + y.val) #always returns same type as x and y
__merge(f, unused_ys, x_i) = x_i, unused_ys
__merge(f, unused_ys, x_i::T, y_j::T, y...) where T = f(x_i, y_j), (unused_ys..., y...)
__merge(f, unused_ys, x_i, y_j, y...) = __merge(f, (unused_ys..., y_j), x_i, y...)
@inline _mergeo(f, results, remaining_ys) = (results, remaining_ys)
@inline function _mergeo(f, results, remaining_ys, x_i, x...)
_result, _remaining_ys = __merge(f, (), x_i, remaining_ys...)
return _mergeo(f, (results..., _result), _remaining_ys, x...)
end
@inline function mergeo(f, x::T, y) where T
let (res::T, rem) = _mergeo(f, (), y, x...)
return (res..., rem...)
end
end
```

As one can see, type inference works great:

```
julia> @code_warntype mergeo(f, (B(2.), B(1), B(0x03), B(Float16(4.))), (B(3), B(3f0), B(1.), B(0xf), B(0x0012)))
Body::Tuple{B{Float64},B{Int64},B{UInt8},B{Float16},B{Float32},B{UInt16}}
27 1 β %1 = (getfield)(x, 1)::B{Float64} β
β %2 = (getfield)(x, 2)::B{Int64} β
β %3 = (getfield)(x, 3)::B{UInt8} β
β %4 = (getfield)(x, 4)::B{Float16} β
β %5 = (getfield)(y, 1)::B{Int64} ββ» _mergeo
β %6 = (getfield)(y, 2)::B{Float32} ββ
β %7 = (getfield)(y, 3)::B{Float64} ββ
β %8 = (getfield)(y, 4)::B{UInt8} ββ
β %9 = (getfield)(y, 5)::B{UInt16} ββ
β %10 = (Base.getfield)(%1, :val)::Float64 βββ»β·β·β· __merge
β %11 = (Base.getfield)(%7, :val)::Float64 βββββββ __merge
β %12 = (Base.add_float)(%10, %11)::Float64 βββββ» __merge
β %13 = %new(B{Float64}, %12)::B{Float64} ββββββ» f
β %14 = (Core.tuple)(%5, %6, %8, %9)::Tuple{B{Int64},B{Float32},B{UInt8},B{UInt16}}βββββ
β %15 = (Core.tuple)(%13)::Tuple{B{Float64}} ββ» _mergeo
β %16 = invoke Main._mergeo(_2::Function, %15::Tuple{B{Float64}}, %14::Tuple{B{Int64},B{Float32},B{UInt8},B{UInt16}}, %2::B{Int64}, %3::B{UInt8}, %4::Vararg{Any,N} where N)::Tuple{Tuple,Tuple{B{Float32},B{UInt16}}}
β %17 = (Base.getfield)(%16, 1)::Tuple βββ» indexed_iterate
β %18 = (isa)(%17, Tuple{B{Float64},B{Int64},B{UInt8},B{Float16}})::Bool β
βββ goto #3 if not %18 β
2 β %20 = Ο (%17, Tuple{B{Float64},B{Int64},B{UInt8},B{Float16}}) β
βββ goto #4 β
3 β %22 = (Base.convert)($(Expr(:static_parameter, 1)), %17)::Tuple{B{Float64},B{Int64},B{UInt8},B{Float16}}
βββ goto #4 β
4 β %24 = Ο (#2 => %20, #3 => %22)::Tuple{B{Float64},B{Int64},B{UInt8},B{Float16}} β
β %25 = (Base.getfield)(%16, 2)::Tuple{B{Float32},B{UInt16}} ββ» indexed_iterate
28 β %26 = (getfield)(%24, 1)::B{Float64} β
β %27 = (getfield)(%24, 2)::B{Int64} β
β %28 = (getfield)(%24, 3)::B{UInt8} β
β %29 = (getfield)(%24, 4)::B{Float16} β
β %30 = (getfield)(%25, 1)::B{Float32} β
β %31 = (getfield)(%25, 2)::B{UInt16} β
β %32 = (Core.tuple)(%26, %27, %28, %29, %30, %31)::Tuple{B{Float64},B{Int64},B{UInt8},B{Float16},B{Float32},B{UInt16}}
βββ return %32 β
```

Iβve also written a small function to do some microbenchmarks. You can basically specify the length of `xs`

and `ys`

and how many of the types should be the same. Iβve pasted it here if anyone wants to experiment.

I canβt say how representative the benchmark is for real-world applications, type-inference can probably be better leveraged in the context of a more complex program. The results were also all over the place, often times my first example was about factor 10 times faster than yours, but for other numbers of arguments, your function took in the realms of tens of nanoseconds, while mine took microseconds.

With `@inline`

added, my type-inferable function was often a bit faster than my first attempt, but there were also times where it was a bit slower. The other functions didnβt seem to really benefit from inlining. These benchmarks made me wonder, how important full type inference here really is, or wether were kind of barking up the wrong tree by just looking at `@code_warntype`

.

What was most important to my application, was that there is basically no performance penalty, when all entries are the same type, which all of these functions achieve, but Iβll probably do some benchmarking of these functions once Iβve fully implemented them.