I made great progress in understanding allocation mechanism, I’m still failing occasionally to write unallocating vectorized code, not even sure that in the specific cases belowmentioned would it even be possible
Under the allocating vectorized code there is, commented, working code. My concerns are ONLY for lthe last 3 large allocations which are inside the innerloop.
I like vector notation as it makes the intent much more clear.
- function write_xtc_atoms(file::IOStream, precision::Real, coords::AbstractMatrix{T}) where {T <: Real}
0 pos = position(file)
0 natoms = size(coords)[2]
- prev_atom = 0
176 write(file, hton(Int32(natoms)))
0 if natoms <= 9
0 for j in 1:3
0 for atom in 1:natoms
0 write(file, hton(Float32(coords[i, atom])))
- end
- end
- else
352 minint = MVector{3, Int32}(undef)
0 maxint = MVector{3, Int32}(undef)
352 prevcoord = @MVector Int[0, 0, 0]
352 tmp = MVector{3, Int}(undef)
2816 tmpcoords = MMatrix{3, 10, Int}(undef)
7022928 intcoords = Matrix{Int}(undef, 3, natoms)
4390320 buffer = BitBuffer(natoms * 15)
176 write(file, hton(Float32(precision)))
0 minint .= typemax(Int32)
0 maxint .= typemin(Int32)
- mindiff = typemax(Int32)
- prevrun = -1
0 for atom in 1:natoms
0 fc = @SVector Float32[round(coords[i, atom] * precision) for i in 1:3]
0 if any(fc .> max_absolute_int )
0 seek(file, pos)
0 return false
- # scaling would cause overflow
- end
0 for ii in 1:3
0 intcoords[ii, atom] = convert(Int, fc[ii])
0 minint[ii] = intcoords[ii, atom] < minint[ii] ? intcoords[ii, atom] : minint[ii]
0 maxint[ii] = intcoords[ii, atom] > maxint[ii] ? intcoords[ii, atom] : maxint[ii]
0 tmp[ii] = prevcoord[ii] - intcoords[ii, atom]
- end
0 diff = norm(tmp, 1)
0 if diff < mindiff && atom > 1
- mindiff = diff
- end
0 prevcoord .= view(intcoords, :, atom)
- end
0 for ii in 1:3
528 write(file, hton(minint[ii]))
- end
0 for ii in 1:3
528 write(file, hton(maxint[ii]))
- end
0 if any(convert.(Float32, maxint - minint) .>= max_absolute_int)
0 seek(file, pos)
0 return false
- # turning values to unsigned would cause overflow
- end
0 sizeint = SA[maxint[1] - minint[1] + 1,
- maxint[2] - minint[2] + 1,
- maxint[3] - minint[3] + 1]
0 if any( sizeint .> 2^24-1)
0 bitsizeint = sizeofint.(sizeint)
- bitsize = 0
- else
0 bitsize = sizeofints(sizeint)
- end
0 smallidx = FIRSTINDEX
0 while smallidx < LASTIDX && magicints[smallidx + 1] < mindiff
0 smallidx += 1
- end
176 write(file, hton(Int32(smallidx)))
0 maxidx = min(LASTIDX, smallidx + 8)
0 minidx = maxidx - 8
0 smaller = magicints[max(FIRSTINDEX, smallidx - 1) + 1] ÷ 2
0 smallnum = magicints[smallidx + 1] ÷ 2
0 sizesmall = SA[magicints[smallidx + 1], magicints[smallidx + 1], magicints[smallidx + 1]]
0 larger = magicints[maxidx + 1] ÷ 2
- atom = 1
0 while atom <= natoms
- is_small = 0
-
9274176 if smallidx < maxidx && atom > 1 && all(abs.(view(intcoords, :, atom) .- view(intcoords, :, prev_atom)) .< larger)
- # if smallidx < maxidx && atom > 1 &&
- # abs(intcoords[1, atom] - intcoords[1, prev_atom]) < larger &&
- # abs(intcoords[2, atom] - intcoords[2, prev_atom]) < larger &&
- # abs(intcoords[3, atom] - intcoords[3, prev_atom]) < larger
- is_smaller = 1
0 elseif smallidx > minidx
- is_smaller = -1
- else
- is_smaller = 0
- end
- # can we swap? should we swap?
-
9469920 if atom + 1 <= natoms && all(abs.(view(intcoords, :, atom) .- view(intcoords, :, atom + 1)) .< smallnum)
- # tmp .= abs.(view(intcoords, :, atom) .- view(intcoords, :, atom + 1))
- # if atom + 1 <= natoms && tmp[1] < smallnum && tmp[2] < smallnum && tmp[3] < smallnum
-
- # this was kinda shot in the dark, breaks functionality.
- # intcoords[:, atom], intcoords[:, atom + 1] = view(intcoords, :, atom + 1), view(intcoords, :, atom)
0 tmp .= view(intcoords, :, atom)
0 intcoords[:, atom] .= view(intcoords, :, atom + 1)
0 intcoords[:, atom + 1] .= tmp
- is_small = 1
- end
-
0 tmp .= view(intcoords, :, atom) .- minint
-
0 if bitsize == 0
0 for ii in 1:3
0 sendbits!(buffer, tmp[ii], bitsizeint[1])
- end
- else
0 sendints!(buffer, bitsize, sizeint, tmp)
- end
- prev_atom = atom
0 atom += 1
- run = 1
0 if is_small == 0 && is_smaller == -1
- is_smaller = 0
- end
0 while is_small != 0 && run <= 8
-
0 if is_smaller == -1 && norm(view(intcoords, :, atom) - view(intcoords, :, prev_atom)) >= smaller
- # tmp .= view(intcoords, :, atom) .- view(intcoords, :, prev_atom)
- # if is_smaller == -1 && norm(tmp) >= smaller
- is_smaller = 0
- end
0 tmpcoords[:, run] .= view(intcoords, :, atom) .- view(intcoords, :, prev_atom) .+ smallnum
0 run += 1
- prev_atom = atom
0 atom += 1
- is_small = 0
-
18618624 if atom <= natoms && all(abs.(view(intcoords, :, atom) .- view(intcoords, :, prev_atom)) .< smallnum)
-
- # this doesn't work (breaks functionality, why???)
- # tmp .= abs.(view(intcoords, :, atom) .- view(intcoords, :, prev_atom)) .< smallnum
- # if atom <= natoms && tmp[1] < smallnum && tmp[2] < smallnum && tmp[3] < smallnum
-
- # if atom <= natoms &&
- # abs(intcoords[1, atom] - intcoords[1, prev_atom]) < smallnum &&
- # abs(intcoords[2, atom] - intcoords[2, prev_atom]) < smallnum &&
- # abs(intcoords[3, atom] - intcoords[3, prev_atom]) < smallnum
-
0 is_small = 1
- end
- end
0 if prevrun != run || is_smaller != 0
- prevrun = run;
0 sendbits!(buffer, 1, 1)
0 sendbits!(buffer, (run - 1) * 3 + is_smaller + 1, 5)
- else
0 sendbits!(buffer, 0, 1)
- end
0 for k in 1:run - 1
0 sendints!(buffer, smallidx, sizesmall, view(tmpcoords, :, k))
- end
0 if is_smaller != 0
0 smallidx += is_smaller
0 if is_smaller < 0
- smallnum = smaller
0 smaller = magicints[smallidx] ÷ 2
- else
- smaller = smallnum
0 smallnum = magicints[smallidx + 1] ÷ 2
- end
0 sizesmall = SA[magicints[smallidx + 1], magicints[smallidx + 1], magicints[smallidx + 1]]
- end
- end
176 write(file, hton(Int32(length(buffer))))
0 write(file, buffer)
- # xdr format requires 32 bit alignment
0 while mod(position(file), 4) != 0
0 write(file, zero(UInt8))
- end
- end
0 return true
- end