sizeof(::Vector{Union{missing,T}}), and hypothetical size

This thread was missing some clear closure, and I think all the questions are still relevant, so I guess resurrecting it is OK.

I did some experiments, the results seem to be that each Union type has one selector byte that accounts for the type of the object, the same applies to Arrays of Unions, and the selector bytes are not accounted for by sizeof, but are accounted for by Base.summarysize and by @allocated.

Experimental module:

# Copyright © 2021: Neven Sajko
#
# Licensed under the MIT license.

module SingletonUnionArray

export reportSize0, reportSize1, reportSize2

function isInteresting(n::Int)::Bool
	0.5 < cos(n)
end

######## Union of two singleton types

struct Interesting end
const U = Union{Nothing, Interesting}
const VU = Vector{U}

function fU(a::VU)::VU
	for i in eachindex(a)
		isInteresting(i) && (a[i] = Interesting())
	end
	a
end

function makeU(n::Int)::VU
	fU(VU(nothing, n))
end

######## Union of two parameterized singleton types

struct Interesty{B} end
const P = Union{Interesty{false}, Interesty{true}}
const VP = Vector{P}

function fP(a::VP)::VP
	for i in eachindex(a)
		isInteresting(i) && (a[i] = Interesty{true}())
	end
	a
end

function makeP(n::Int)::VP
	fP(fill!(VP(undef, n), Interesty{false}()))
end

######## A single concrete type, Bool (which is byte-sized)

const VB = Vector{Bool}

function fB(a::VB)::VB
	for i in eachindex(a)
		isInteresting(i) && (a[i] = true)
	end
	a
end

function makeB(n::Int)::VB
	fB(fill!(VB(undef, n), false))
end

######## Finally, the measurements

const m = (makeU, makeP, makeB)

const R = LinRange{Int}
const inGen = ((1 << i) for i in R(16, 28, 4))

function reportSize0()::Nothing
	println("sizeof: ", inGen)
	println()

	for mak in m
		println(mak)

		for s in inGen
			println(s, ": ", sizeof(mak(s)))
		end

		println()
	end

	return nothing
end

const Charg = Union{Core.TypeMapEntry, Method, VU, VP, VB, U, P, Bool}

function reportSize1()::Nothing
	println("Base.summarysize: ", inGen)
	println()

	for mak in m
		println(mak)

		for s in inGen
			println(s, ": ", Base.summarysize(mak(s), chargeall = Charg))
		end

		println()
	end

	return nothing
end

function reportSize2()::Nothing
	println("@allocated: ", inGen)
	println()

	for mak in m
		println(mak)

		for s in inGen
			println(s, ": ", @allocated mak(s))
		end

		println()
	end

	return nothing
end

end  # module SingletonUnionArray

REPL session, numerical results:

$ julia
               _
   _       _ _(_)_     |  Documentation: https://docs.julialang.org
  (_)     | (_) (_)    |
   _ _   _| |_  __ _   |  Type "?" for help, "]?" for Pkg help.
  | | | | | | |/ _` |  |
  | | |_| | | | (_| |  |  Version 1.5.4 (2021-03-11)
 _/ |\__'_|_|_|\__'_|  |
|__/                   |

julia> include("SingletonUnionArray.jl")
Main.SingletonUnionArray

julia> using Main.SingletonUnionArray

julia> reportSize0()
sizeof: Base.Generator{LinRange{Int64},Main.SingletonUnionArray.var"#1#2"}(Main.SingletonUnionArray.var"#1#2"(), range(16, stop=28, length=4))

makeU
65536: 0
1048576: 0
16777216: 0
268435456: 0

makeP
65536: 0
1048576: 0
16777216: 0
268435456: 0

makeB
65536: 65536
1048576: 1048576
16777216: 16777216
268435456: 268435456


julia> reportSize1()
Base.summarysize: Base.Generator{LinRange{Int64},Main.SingletonUnionArray.var"#1#2"}(Main.SingletonUnionArray.var"#1#2"(), range(16, stop=28, length=4))

makeU
65536: 65576
1048576: 1048616
16777216: 16777256
268435456: 268435496

makeP
65536: 65576
1048576: 1048616
16777216: 16777256
268435456: 268435496

makeB
65536: 65576
1048576: 1048616
16777216: 16777256
268435456: 268435496


julia> reportSize2()
@allocated: Base.Generator{LinRange{Int64},Main.SingletonUnionArray.var"#1#2"}(Main.SingletonUnionArray.var"#1#2"(), range(16, stop=28, length=4))

makeU
65536: 65632
1048576: 1048672
16777216: 16777312
268435456: 268435552

makeP
65536: 65632
1048576: 1048672
16777216: 16777312
268435456: 268435552

makeB
65536: 65696
1048576: 1048736
16777216: 16777376
268435456: 268435616
1 Like