I’m writing code to read and write 1D arrays to binary files. I want the read function to infer the element type and read back the data, I had initially a large slowdown that I partially resolved by splitting the read function into two. I am still however getting a difference in the read times that depends on the length of the array read back when compared to a fully typed function.
I would appreciate an explanation and a suggestion of how to fix the issue.
This is the write function:
function write_bin(x::Array{T, 1}, fileName::String)::Int64 where T
# Open the file
io = open(fileName,"w")
# Cast this number to make sure we know its type
write(io, Int64(size(x)[1]))
# Get the type as a string
typ = repr(T)
# Write the length of the type string
write(io, Int64(length(typ)))
# Now write the type string
for i in eachindex(typ)
write(io, Char(typ[i]))
end
# Now write the array
for i in eachindex(x)
write(io, x[i])
end
# Clean up
close(io)
return 0;
end
This is the inferred type read function:
# Sub Function which speeds up the read
function read_bin(io::IO, ::Type{T}, n::Int64) where T
# The array to be returned
x = Array{T, 1}(uninitialized, n)
@time for i in eachindex(x)
x[i] = read(io, T)
end
close(io)
return x
end
# The read function
function read_bin(fileName::String)
# Open the file
io = open(fileName, "r")
# Read the total number of elements in the resulting array
n = read(io, Int64)
# Read the length of the type name
nt = read(io, Int64)
# println("Number of elements: $n")
# Then read the type name
cName = Array{Char}(uninitialized, nt)
for i in eachindex(cName)
cName[i] = read(io, Char)
end
# The return type
T = eval(Symbol(String(cName)))
# The data
x = read_bin(io, T, n)
return x
end
and the explicitly typed read function:
function read_bin(fileName::String, ::Type{T}) where T
# Open the file
io = open(fileName, "r")
# Read the total number of elements in the resulting array
n = read(io, Int64)
# Read the length of the type name
nt = read(io, Int64)
# Then read the type name
cName = Array{Char}(uninitialized, nt)
for i in eachindex(cName)
cName[i] = read(io, Char)
end
# The array to be returned
x = Array{T, 1}(uninitialized, n)
@time for i in eachindex(x)
x[i] = read(io, T)
end
close(io)
return x
end
This is the benchmark (generates 800MB file size):
# Warm up
binFile = "data.bin"
n = 100;
arr1 = rand(Float64, n);
write_bin(arr1, binFile);
arr2 = read_bin(binFile);
arr3 = read_bin(binFile, eltype(arr1));
rm(binFile)
# Timed write read
n = 100_000_000;
arr1 = rand(Float64, n);
write_bin(arr1, binFile);
arr2 = read_bin(binFile);
arr3 = read_bin(binFile, eltype(arr1));
rm(binFile)