sizeof(::Vector{Union{missing,T}}), and hypothetical size

Two (related) questions:

  1.  julia> sizeof(vcat(ones(10), fill(missing, 10)))
     160
    

    but I expected 20*9 because of the type tag, what am I missing? v"1.1.0-DEV.298".

  2. If I have a type T which is a Union, can I calculate the size of a Vector{T} of n elements without actually creating one?

It looks as though Union reserves enough space per element for the largest unioned type

tx = Union{Float16, Int64};
z = Array{tx}(undef, 10);
sizeof(z[1])  -> 2
sizeof(z[1:2])  ->16
sizeof(z)  ->80

I thought it used an Int8 or similar to keep track of element types for each element. So I am wondering if this has changed, or if sizeof is missing it.

Additional weirdness in

julia> Base.summarysize(Union{Int, Missing})
16

julia> Base.summarysize(Union{Int})
172

Base.elsize will give you the size of each element of an array and indeed, isbits Union arrays will only store enough bits to hold the largest Union element.

Thanks. Just to clarify, if I was interested in the size of the total number of bytes for each element, I would have to add sizeof(Int8) for Union, is this correct?

Effectively, I am looking for

size_per_element(Vector{Union{Int, Missing}}) == 9 #hypothetical

if I understand things correctly.

I think this is an array an array of pointer to “boxes”. This is only reporting the size of the pointer (64 bits) times the number of elements. Type information is in the “boxes”.

julia> a=["abcdefghijk",1.0,BigFloat(Ď€)]
3-element Array{Any,1}:
  "abcdefghijk"                                                                  
 1.0                                                                             
 3.141592653589793238462643383279502884197169399375105820974944592307816406286198

julia> sizeof(a)
24

Base.summarysize adds 4 bytes per element per type, so a two typed union gets +8 per element
-didn’t hold for a three typed union

40 bytes for the array, and +4 for a unioned element?

I don’t think small unions of concrete types are boxed.

Other strange behavior:

julia> a=[0x1,true]
2-element Array{UInt8,1}:
 0x01
 0x01

julia> a[1]
0x01

julia> a[2]
0x01

julia> typeof(true)
Bool

we are losing the type information.

Perhaps I am missing something, but this is just standard promotion behavior of []. EDIT: defined here.

1 Like

Using [] without types will promote the types in it if possible, e.g.

julia> [1.0, 1]
2-element Array{Float64,1}:
 1.0
 1.0

You can always specify the element type if you want to opt out of that.

This thread was missing some clear closure, and I think all the questions are still relevant, so I guess resurrecting it is OK.

I did some experiments, the results seem to be that each Union type has one selector byte that accounts for the type of the object, the same applies to Arrays of Unions, and the selector bytes are not accounted for by sizeof, but are accounted for by Base.summarysize and by @allocated.

Experimental module:

# Copyright © 2021: Neven Sajko
#
# Licensed under the MIT license.

module SingletonUnionArray

export reportSize0, reportSize1, reportSize2

function isInteresting(n::Int)::Bool
	0.5 < cos(n)
end

######## Union of two singleton types

struct Interesting end
const U = Union{Nothing, Interesting}
const VU = Vector{U}

function fU(a::VU)::VU
	for i in eachindex(a)
		isInteresting(i) && (a[i] = Interesting())
	end
	a
end

function makeU(n::Int)::VU
	fU(VU(nothing, n))
end

######## Union of two parameterized singleton types

struct Interesty{B} end
const P = Union{Interesty{false}, Interesty{true}}
const VP = Vector{P}

function fP(a::VP)::VP
	for i in eachindex(a)
		isInteresting(i) && (a[i] = Interesty{true}())
	end
	a
end

function makeP(n::Int)::VP
	fP(fill!(VP(undef, n), Interesty{false}()))
end

######## A single concrete type, Bool (which is byte-sized)

const VB = Vector{Bool}

function fB(a::VB)::VB
	for i in eachindex(a)
		isInteresting(i) && (a[i] = true)
	end
	a
end

function makeB(n::Int)::VB
	fB(fill!(VB(undef, n), false))
end

######## Finally, the measurements

const m = (makeU, makeP, makeB)

const R = LinRange{Int}
const inGen = ((1 << i) for i in R(16, 28, 4))

function reportSize0()::Nothing
	println("sizeof: ", inGen)
	println()

	for mak in m
		println(mak)

		for s in inGen
			println(s, ": ", sizeof(mak(s)))
		end

		println()
	end

	return nothing
end

const Charg = Union{Core.TypeMapEntry, Method, VU, VP, VB, U, P, Bool}

function reportSize1()::Nothing
	println("Base.summarysize: ", inGen)
	println()

	for mak in m
		println(mak)

		for s in inGen
			println(s, ": ", Base.summarysize(mak(s), chargeall = Charg))
		end

		println()
	end

	return nothing
end

function reportSize2()::Nothing
	println("@allocated: ", inGen)
	println()

	for mak in m
		println(mak)

		for s in inGen
			println(s, ": ", @allocated mak(s))
		end

		println()
	end

	return nothing
end

end  # module SingletonUnionArray

REPL session, numerical results:

$ julia
               _
   _       _ _(_)_     |  Documentation: https://docs.julialang.org
  (_)     | (_) (_)    |
   _ _   _| |_  __ _   |  Type "?" for help, "]?" for Pkg help.
  | | | | | | |/ _` |  |
  | | |_| | | | (_| |  |  Version 1.5.4 (2021-03-11)
 _/ |\__'_|_|_|\__'_|  |
|__/                   |

julia> include("SingletonUnionArray.jl")
Main.SingletonUnionArray

julia> using Main.SingletonUnionArray

julia> reportSize0()
sizeof: Base.Generator{LinRange{Int64},Main.SingletonUnionArray.var"#1#2"}(Main.SingletonUnionArray.var"#1#2"(), range(16, stop=28, length=4))

makeU
65536: 0
1048576: 0
16777216: 0
268435456: 0

makeP
65536: 0
1048576: 0
16777216: 0
268435456: 0

makeB
65536: 65536
1048576: 1048576
16777216: 16777216
268435456: 268435456


julia> reportSize1()
Base.summarysize: Base.Generator{LinRange{Int64},Main.SingletonUnionArray.var"#1#2"}(Main.SingletonUnionArray.var"#1#2"(), range(16, stop=28, length=4))

makeU
65536: 65576
1048576: 1048616
16777216: 16777256
268435456: 268435496

makeP
65536: 65576
1048576: 1048616
16777216: 16777256
268435456: 268435496

makeB
65536: 65576
1048576: 1048616
16777216: 16777256
268435456: 268435496


julia> reportSize2()
@allocated: Base.Generator{LinRange{Int64},Main.SingletonUnionArray.var"#1#2"}(Main.SingletonUnionArray.var"#1#2"(), range(16, stop=28, length=4))

makeU
65536: 65632
1048576: 1048672
16777216: 16777312
268435456: 268435552

makeP
65536: 65632
1048576: 1048672
16777216: 16777312
268435456: 268435552

makeB
65536: 65696
1048576: 1048736
16777216: 16777376
268435456: 268435616
1 Like