I have a simple (but possibly enormous) text file which has five columns, the first one has a floating point value and the other four have integer values , it looks like
F p q r s
where basically I want to write a tensor to disk which has M[p,q,r,s] = F
, after this all the entries of the tensor that could be filled using the text file have been filled, the others were filled using permutational symmetries. Here is how I am doing it now
begin
# Open the file
file = open(pathtofcidump, "r")
# Loop over each line in the file
for line in eachline(file)
# Check if the line starts with "&FCI NORB="
if startswith(line, " &FCI NORB=")
line = split(line, ",")
words1 = split(line[1], " ")
words2 = split(line[2], " ")
norb = parse(Int, words1[end])
nelec = parse(Int, words2[end])
break
end
end
# Close the file
close(file)
end
data = readdlm(pathtofcidump, skipstart=linenum)
hnuc::Float64 = data[end, 1]
data = copy(data[1:end-1, :])
h::Array{Float64,2} = fill(0.0, norb, norb)
g::Array{Float64,4} = fill(0.0, norb, norb, norb, norb)
l::Int64 = length(data[:, 1])
non_redundant_indices = []
for i in 1:l
if (data[i, 4] == 0 && data[i, 5] == 0)
I = round(Int, data[i, 2])
J = round(Int, data[i, 3])
h[I, J] = data[i, 1]
else
I = round(Int, data[i, 2])
J = round(Int, data[i, 3])
K = round(Int, data[i, 4])
L = round(Int, data[i, 5])
push!(non_redundant_indices, [I, J, K, L])
g[I, J, K, L] = data[i, 1]
end
end
for (I, J, K, L) in non_redundant_indices
open("non_redundant_indices.txt", "a") do f
println(f, I, J, K, L, " ", g[I, J, K, L])
end
g[K, L, I, J] = g[I, J, K, L]
g[J, I, L, K] = g[I, J, K, L]
g[L, K, J, I] = g[I, J, K, L]
g[J, I, K, L] = g[I, J, K, L]
g[L, K, I, J] = g[I, J, K, L]
g[I, J, L, K] = g[I, J, K, L]
g[K, L, J, I] = g[I, J, K, L]
end
serialize("g.jlbin",g)
But this has the obvious disadvantage of having the g::Array{Float64,4}
in memory all at once. We want to avoid this at all costs because norb
can take values upwards of 100
. I want to reach to the end of this process (which entails having the entire tensor on the disk) without reading the full thing into memory. Any idea on how to do that ? A sample file that I am trying to read can be found here.