Small update with your recommendations.
"""
io = serialize(v) reinterprets a Julia object into a series of bytes.
v = deserialize(io) recreates the data from a byte stream
As much information as possible shall be written and read together, used grouping:
- NumChar: writable
- Mixed: Any and Tuple types, serialize elements
- Struct: struct types, serialize fields.
An exercise in dispatch style, based on
https://de.mathworks.com/matlabcentral/fileexchange/29457-serialize-deserialize
and "julianized" with the experts on
https://discourse.julialang.org/
"""
# read/write routines
import Base.write
Base.write(io::IO, x::Tuple) = write(io::IO, x...)
import Base.read
function read(io, T::Type{<:Number})
read_vals = Base.read(io, sizeof(T))
return only(reinterpret(T, read_vals))
end
function read(io, T::Type{<:Number}, n)
read_vals = Base.read(io, n*sizeof(T))
return reinterpret(T, read_vals)
end
# Type encoding
struct Struct end
struct Mixed end
tcode = [
0 Float64
1 Float32
2 Float16
3 Bool
4 Int8
5 UInt8
6 Int16
7 UInt16
8 Int32
9 UInt32
10 Int64
11 UInt64
12 Char
13 String
100 Tuple
200 Mixed
255 Struct
]
tcode2type = Dict(tcode[:,1] .=> tcode[:,2])
type2tcode = Dict(tcode[:,2] .=> tcode[:,1])
NumChar = Union{Number, Char}
function serialize(io, v::T) where {T<:NumChar}
println("NumChar")
write(io, UInt8(type2tcode[T]), UInt8(0), v)
end
function serialize(io, v::AbstractArray{T}) where {T<:NumChar}
println("NumChar")
write(io, UInt8(type2tcode[T]), UInt8(ndims(v)), UInt32.(size(v))..., v)
end
function serialize(io, v::T) where {T<:Tuple}
println("Tuple")
write(io, UInt8(type2tcode[Tuple]), UInt8(1), UInt32(length(v)))
serialize.(Ref(io), v)
end
function serialize(io, v::String)
println("String")
write(io, UInt8(type2tcode[String]), UInt8(1), UInt32(ncodeunits(v)), v)
end
function serialize(io, v::AbstractArray{String})
println("String")
write(io, UInt8(type2tcode[String]), UInt8(ndims(v)), UInt32.(size(v))...)
serialize.(Ref(io), v)
end
function serialize(io, v::AbstractArray{T}) where {T<:Tuple}
println("Tuple")
write(io, UInt8(type2tcode[Mixed]), UInt8(ndims(v)), UInt32.(size(v))...)
serialize.(Ref(io), v)
end
function serialize(io, v::AbstractArray{Any})
println("Any")
write(io, UInt8(type2tcode[Mixed]), UInt8(ndims(v)), UInt32.(size(v))...)
serialize.(Ref(io), v)
end
function serialize(io, v::AbstractArray{T}) where T
println("Struct, eltype=$T)")
write(io, UInt8(type2tcode[Struct]), UInt8(ndims(v)), UInt32.(size(v))...)
write(io, UInt32(fieldcount(T)))
for name in fieldnames(T)
sname = String(name)
write(io, UInt8(ncodeunits(sname)), sname)
serialize(io, getfield.(v, name))
end
end
function serialize(io, v::T) where T
println("Struct, type=$T)")
write(io, UInt8(type2tcode[Struct]), UInt8(0))
write(io, UInt32(fieldcount(T)))
for name in fieldnames(T)
sname = String(name)
write(io, UInt8(ncodeunits(sname)), sname)
serialize(io, getfield(v, name))
end
end
function deserialize(io)
type = tcode2type[Int(read(io, UInt8))]
ndms = Int(read(io, UInt8))
dms = ndms == 0 ? 1 : Int.(read(io, UInt32, ndms))
return deserialize(io, type, ndms, dms)
end
function deserialize(io, ::Type{T}, ndms, dms) where T<:NumChar
return ndms == 0 ? read(io, T) : reshape(read(io, T, prod(dms)), dms...)
end
function deserialize(io, ::Type{Struct}, ndms, dms)
fname = Symbol[]
fdata = []
for i = 1:read(io, UInt32)
push!(fname, Symbol(String(read(io, read(io, UInt8)))))
push!(fdata, deserialize(io))
end
if ndms == 0
return NamedTuple(zip.(Ref(fname), zip(fdata...)))
else
return reshape(NamedTuple.(zip.(Ref(fname), zip(fdata...))), dms...)
end
end
function deserialize(io, ::Type{String}, ndms, dms)
if ndms == 1
return String(read(io, only(dms)))
else
return reshape(String.([deserialize(io) for i = 1:prod(dms)]), dms...)
end
end
function deserialize(io, ::Type{Mixed}, ndms, dms)
return reshape([deserialize(io) for i = 1:prod(dms)], dms...)
end
function deserialize(io, ::Type{Tuple}, ndms, dms)
return Tuple(deserialize(io) for i = 1:only(dms))
end
############## Tests ##############
mutable struct Coords
x::Float64
y::Float64
z::Float64
end
Coords() = Coords(rand(), rand(), rand())
Array_of_Int = [1, 2]
Array_of_Tuple = [(1, 2), (2, 3)]
Array_of_Any = ["Ab", (1, 2)]
Single_Num = pi
Array_of_Num = randn(3,3)
Single_Struct = Coords()
Array_of_Struct = [Coords() for i in 1:5]
Single_Tuple = ("Ab", [pi, 2.0])
Single_String = "toto"
Array_of_String = ["Ab" "toto"; "titi" "ok"]
Array_of_Char = ['a' 'b'; 'c' 'd']
function round_trip(data)
open("io.bin", "w") do io
serialize(io, data)
end
println("..deserialize..")
data2 = open("io.bin", "r") do io
deserialize(io)
end
return data2;
end
data = Array_of_Struct
data2 = round_trip(data)
[data data2] # simple side-by-side