Not worried about multiple definition writing, but getting over a certain number I feel the risk loosing the overview.
Here is the full version with dispatch inside the function:
"""
io = serialize(v) reinterprets a Julia object into a series of bytes.
v = deserialize(io) recreates the data from a byte stream
An exercise in dispatch style.
Based on
https://de.mathworks.com/matlabcentral/fileexchange/29457-serialize-deserialize
and "julianized" with the experts on
https://discourse.julialang.org/
"""
# Type encoding
itype = [
0 Float64;
1 Float32;
2 Float16;
3 Bool;
4 Int8;
5 UInt8;
6 Int16;
7 UInt16;
8 Int32;
9 UInt32;
10 Int64;
11 UInt64
12 Char;
13 String;
100 Tuple;
200 Any
]
WRITABLE = 0:12
STRUCT = 255
itype2type = Dict(itype[:,1] .=> itype[:,2])
type2itype = Dict(itype[:,2] .=> itype[:,1])
# Type and size prefix
function prefix(io, v)
#if haskey(type2itype, typeof(v)) && type2itype[typeof(v)] in WRITABLE
if v isa Real || v isa Char
println("type2itype[typeof(v)] in WRITABLE, prefix : $(type2itype[typeof(v)]))")
write(io, UInt8(type2itype[typeof(v)]))
write(io, UInt8(0))
#elseif haskey(type2itype, eltype(v)) && type2itype[eltype(v)] in WRITABLE
elseif v isa AbstractArray && (eltype(v) <: Real || eltype(v) == Char)
println("type2itype[eltype(v)] in WRITABLE, prefix : $(type2itype[eltype(v)])")
write(io, UInt8(type2itype[eltype(v)]))
write(io, UInt8(ndims(v)))
write(io, UInt32.(size(v))...)
elseif v isa String
println("v isa String, prefix : $(type2itype[String]))")
write(io, UInt8(type2itype[String]))
write(io, UInt8(1))
write(io, UInt32(length(v)))
elseif v isa Tuple
println("v isa Tuple, prefix: $(type2itype[Tuple]))")
write(io, UInt8(type2itype[Tuple]))
write(io, UInt8(1))
write(io, UInt32(length(v)))
elseif v isa AbstractArray{Any}
println("v isa AbstractArray{Any}, prefix: $(type2itype[Any]))")
write(io, UInt8(type2itype[Any]))
write(io, UInt8(ndims(v)))
write(io, UInt32.(size(v))...)
elseif v isa AbstractArray
println("v isa AbstractArray, prefix: $STRUCT")
write(io, UInt8(STRUCT))
write(io, UInt8(ndims(v)))
write(io, UInt32.(size(v))...)
else
tv = typeof(v)
tbyte = UInt8(STRUCT)
println("else, prefix: $(tbyte)")
write(io, tbyte)
write(io, UInt8(0))
end
end
function serialize(io, v)
println("v=$v")
prefix(io, v)
if (v isa Int || v isa Real || v isa Bool || v isa Char)
println("(v isa Int || v isa Real || v isa Bool || v isa Char)")
write(io, v)
elseif v isa String
println("v isa String")
writestr(io, v)
elseif v isa Tuple || v isa AbstractArray{Any} || v isa AbstractArray{String}
println("v isa Tuple || v isa AbstractArray{Any} || v isa AbstractArray{String}")
serialize.(Ref(io), v)
elseif v isa AbstractArray
println("v isa AbstractArray")
if (first(v) isa Int || first(v) isa Real || first(v) isa Bool || first(v) isa Char)
println("(first(v) isa Int || first(v) isa Real || first(v) isa Bool || first(v) isa Char)")
write(io, v)
else
fc = fieldcount(eltype(v))
writenum(io, fc, UInt32)
println("fc=$fc")
for name in fieldnames(typeof(first(v)))
sname = String(name)
len = ncodeunits(sname)
println("len=$len, sname=$sname")
writenum(io, len, UInt8)
writestr(io, sname)
serialize(io, getfield.(v, name))
end
end
else
fc = fieldcount(typeof(v))
write(io, UInt32(fc))
println("fc=$fc")
for name in fieldnames(typeof(v))
sname = String(name)
len = ncodeunits(sname)
println("len=$len, sname=$sname")
writenum(io, len, UInt8)
writestr(io, sname)
serialize(io, getfield(v, name))
end
end
end
function deserialize(io)
ity = Int(readnum(io, UInt8))
ndms = Int(readnum(io, UInt8))
dms = ndms == 0 ? 1 : Int.(readnum(io, UInt32, ndms))
println("ity=$ity, ndms=$ndms, dms=$dms")
#error("stop")
if ity in WRITABLE
println("ity in WRITABLE")
cls = itype2type[ity]
return ndms == 0 ? readnum(io, cls) : reshape(readnum(io, cls, prod(dms)), dms...)
elseif ity == STRUCT
println("ity == STRUCT")
fname = Symbol[]
nfld = readnum(io, UInt32)
fdata = []
for i = 1:nfld
fn = readstr(io, readnum(io, UInt8))
println("fn=$fn")
push!(fname, Symbol(fn))
push!(fdata, deserialize(io))
end
if ndms == 0
return NamedTuple(zip.(Ref(fname), zip(fdata...)))
else
return reshape(NamedTuple.(zip.(Ref(fname), zip(fdata...))), dms...)
end
elseif ity == type2itype[String]
println("ity == type2itype[String]")
if ndms == 1
return String(read(io, dms[1]))
else
se = String[]
for i = 1:prod(dms)
push!(se, deserialize(io))
end
return reshape(se, dms...)
end
elseif ity == type2itype[Any] || ity == type2itype[Tuple]
println("ity == type2itype[Any] || ity == type2itype[Tuple]")
istuple = ity == type2itype[Tuple]
ele = []
for i = 1:prod(dms)
push!(ele, deserialize(io))
end
if istuple
return Tuple(ele)
else
return reshape(ele, dms...)
end
else
error("unknown type index $ity")
end
end
############## IO read/write routines ##############
# write number as type T
function writenum(io, num, T)
write(io, T(num))
end
# read n numbers of type T
function readnum(io, T, n)
s = sizeof(T)
f = zeros(UInt8, s*n)
readbytes!(io, f, s*n)
return reinterpret(T, f)
end
# read single number of type T
function readnum(io, T)
s = sizeof(T)
f = zeros(UInt8, s)
readbytes!(io, f, s)
return reinterpret(T, f)[1]
end
# write string
function writestr(io, str)
write(io, codeunits(str))
end
# read string of length n
function readstr(io, n)
return String(read(io, n))
end
############## Tests ##############
function round_trip(data)
open("io.bin", "w") do io
serialize(io, data)
end
println("..deserialize..")
io = open("io.bin", "r")
data2 = deserialize(io)
println("data2=$data2")
close(io)
data2
end
mutable struct Coords
x::Float64
y::Float64
z::Float64
end
Coords() = Coords(rand(), rand(), rand())
Array_of_Int = [1, 2]
Array_of_Tuple = [(1, 2), (2, 3)]
Array_of_Any = ["Ab", (1, 2)]
Single_Num = pi
Array_of_Num = randn(3,3)
Single_Struct = Coords()
Array_of_Struct = [Coords() for i in 1:5]
Single_Tuple = ("Ab", [pi, 2.0])
Single_String = "toto"
Array_of_String = ["Ab" "toto"; "titi" "ok"]
round_trip(Array_of_Struct)
And here the version with regular/external dispatch:
"""
io = serialize(v) reinterprets a Julia object into a series of bytes.
An exercise in multiple dispatch style.
Based on
https://de.mathworks.com/matlabcentral/fileexchange/29457-serialize-deserialize
and "julianized" with the experts on
https://discourse.julialang.org/
"""
# Type codes
Struct = Union{Any, AbstractArray}
it = [
0 Float64;
1 Float32;
2 Float16;
3 Bool;
4 Char;
5 String;
6 Int8;
7 UInt8;
8 Int16;
9 UInt16;
10 Int32;
11 UInt32;
12 Int64;
13 UInt64
100 Tuple;
255 Struct
]
byte2type = Dict(UInt8.(it[:,1]) .=> it[:,2])
type2byte = Dict(it[:,2] .=> UInt8.(it[:,1]))
# write-able types
Writable = Union{<:Real, Char, AbstractArray{<:Real}, AbstractArray{Char}}
# Type and size prefix for ndims 0, 1, >=1
function prefix(io, x)
tx = typeof(x)
tbyte = haskey(type2byte, tx) ? type2byte[tx] : type2byte[Struct]
write(io, tbyte)
write(io, UInt8(0))
end
function prefix(io, x::String)
write(io, type2byte[String])
write(io, UInt8(1))
write(io, UInt32(length(x)))
end
function prefix(io, x::Tuple)
write(io, type2byte[Tuple])
write(io, UInt8(1))
write(io, UInt32(length(x)))
end
function prefix(io, x::AbstractArray)
write(io, type2byte[eltype(x)])
write(io, UInt8(ndims(x)))
write(io, UInt32.(size(x))...)
end
# Entry function
function serialize(v)
io = IOBuffer(UInt8[]; append = true)
_serialize(io, v)
return take!(io)
end
function serialize(io, v)
_serialize(io, v)
end
# Writables
function _serialize(io, v::T) where {T <: Writable}
prefix(io, v)
write(io, v)
end
# Writables
function _serialize(io, v::String)
prefix(io, v)
write(io, codeunits(v))
end
# Tuples / Array of Any
function _serialize(io, v::T) where {T <: Union{Tuple, AbstractArray{Any}}}
prefix(io, v)
_serialize.(Ref(io), v)
end
# Array of Struct (stored as Struct of Array)
function _serialize(io, v::AbstractArray)
prefix(io, v)
fc = fieldcount(eltype(v))
write(io, UInt32(fc))
for name in fieldnames(typeof(first(v)))
write(io, UInt8(ncodeunits(String(name))))
write(io, String(name))
_serialize(io, getfield.(v, name))
end
end
# Struct
function _serialize(io, v)
prefix(io, v)
fc = fieldcount(typeof(v))
write(io, UInt32(fc))
for name in fieldnames(typeof(v))
write(io, UInt8(length(String(name))))
write(io, String(name))
_serialize(io, getfield(v, name))
end
end
# Entry function
function deserialize(io)
cls = byte2type[readnum(io, UInt8)]
ret = _deserialize(io, cls)
#println(ret)
return ret
end
# Read dimensions
function readdim(io)
ndms = Int(readnum(io, UInt8))
return (ndms, ndms == 0 ? 1 : Int.(readnum(io, UInt32, ndms)))
end
# Writables
function _deserialize(io, cls::Type{T}) where {T <: Writable}
(ndms, dms) = readdim(io)
return ndms == 0 ? readnum(io, cls) : reshape(readnum(io, cls, prod(dms)), dms...)
end
function _deserialize(io, ::Type{String})
(ndms, dms) = readdim(io)
return String(read(io, dms[1]))
end
# Tuples
function _deserialize(io, cls::Type{Tuple})
(ndms, dms) = readdim(io)
tele = []
for i = 1:dms[1]
cls = byte2type[readnum(io, UInt8)]
push!(tele, _deserialize(io, cls))
end
return Tuple(tele)
end
# (Array of) struct -> (array of) named tuple
function _deserialize(io, cls::Type{Struct})
(ndms, dms) = readdim(io)
fname = Symbol[]
fdata = []
nfld = readnum(io, UInt32)
for i = 1:nfld
fn = readstr(io, readnum(io, UInt8))
push!(fname, Symbol(fn))
cls = byte2type[readnum(io, UInt8)]
push!(fdata, _deserialize(io, cls))
end
if ndms == 0
return NamedTuple(zip.(Ref(fname), zip(fdata...)))
else
return reshape(NamedTuple.(zip.(Ref(fname), zip(fdata...))), dms...)
end
end
############## IO read/write routines ##############
# write number as type T
function writenum(io, num, T)
write(io, T(num))
end
# read n numbers of type T
function readnum(io, T, n)
s = sizeof(T)
f = zeros(UInt8, s*n)
readbytes!(io, f, s*n)
return reinterpret(T, f)
end
# read single number of type T
function readnum(io, T)
s = sizeof(T)
f = zeros(UInt8, s)
readbytes!(io, f, s)
return reinterpret(T, f)[1]
end
# write string
function writestr(io, str)
write(io, codeunits(str))
end
# read string of length n
function readstr(io, n)
return String(read(io, n))
end
############## Tests ##############
Array_of_Int = [1, 2]
Array_of_Tuple = [(1, 2), (2, 3)]
Array_of_Any = ["Ab", (1, 2)]
mutable struct Coords
x::Float64
y::Float64
z::Float64
end
Coords() = Coords(rand(), rand(), rand())
Array_of_Struct = [Coords() for i in 1:5]
Single_Struct = Coords()
#write(io, serialize(1))
#write(io, serialize([1,2.0]))
#write(io, serialize(1.0))
#write(io, serialize([1.0]))
#write(io, serialize(randn(3,3)))
#write(io, serialize((1.0, 2)))
#write(io, serialize(["a" "b"; "c" "d"]))
#write(io, serialize("a"))
#write(io, serialize(Array_of_Any)) # fixme: hanging
#write(io, serialize(Array_of_Struct))
#write(io, serialize(Single_Struct))
function round_trip(data)
open("io.bin", "w") do io
serialize(io, data)
end
io = open("io.bin", "r")
data2 = deserialize(io)
println("data2=$data2")
open("io.bin", "r") do io
data3 = deserialize(io)
println("data3=$data3")
end
#println("data3=$data3")
end
data = ("Ab", [pi, 2.0])
round_trip(data)