I am trying to parallelize a part of my code where different values of an array x
are populated asynchronously. The serial function of my full code is as follows
using Distributed
using MKL
using DelimitedFiles
using LinearAlgebra
using LaTeXStrings
using TimerOutputs
using Plots
using Plots.PlotMeasures
using FFTW
using SharedArrays
BLAS.set_num_threads(1)
const to = TimerOutput()
println("Number of processes = 1")
open("ISC-SERIAL.TXT", "a") do f
write(f, "\n\n\n====================================================\n\n\n")
end
function GDSO(
t::Float64,
b::Float64,
Ξ::Float64,
sysize::Int64,
Ξ©f::Diagonal{Float64},
Ξ©i::Diagonal{Float64},
J::Matrix{Float64},
D::Matrix{Float64},
Ο::Diagonal{Float64},
Si::Diagonal{ComplexF64},
Sf::Diagonal{ComplexF64},
Bi::Diagonal{ComplexF64},
Bf::Diagonal{ComplexF64}
)
t = t * 4.1341373335e16 # converting time to atomic units
tp = -im * b - t
for i = 1:sysize
Sf[i, i] = Ξ©f[i, i] / sin(Ξ©f[i, i] * t)
Si[i, i] = Ξ©i[i, i] / sin(Ξ©i[i, i] * tp)
Bf[i, i] = Ξ©f[i, i] / tan(Ξ©f[i, i] * t)
Bi[i, i] = Ξ©i[i, i] / tan(Ξ©i[i, i] * tp)
end
BB = (Bf + J' * Bi * J)
AA = (Sf + J' * Si * J)
W = [BB -AA
-AA BB] #We wont calculate the 2Nx2N determinant directly.
# Wapp = BB*(BB- AA*(BB^-1)*AA )
Wapp = BB * (BB - AA * (BB \ AA))
U = Bi - Si
V = [J' * U * D
J' * U * D]
# F1 = sqrt(det(Si*Sf*Ο^2*Wapp^-1))
# F1 = sqrt(det(Si*Sf*Ο^2)/det(Wapp))
F1 = sqrt(det(Si * Sf * (Wapp \ Ο^2)))
F2 = (-(im / 2) * (transpose(V) * (W \ V))) + ((im) * (transpose(D) * U * D))
g = F1 * exp(F2)
Ξ = Ξ * 0.0367493 # in atomic units
g = g * exp(im * Ξ * t)
Ξ· = 2 * 4.55633e-6 # 10 cm^-1 to hatree energy
#g = g * exp(-Ξ· * abs(t)) # Adding the damping factor Lorrentzian
g = g * exp(-Ξ· * t^2) # Adding the damping factor Gaussian
# println("t = $t was evaluated by process $(myid())")
return g[1]
end
@views function GSV(
t::Float64,
b::Float64,
sysize::Int64,
Ξ©f::Diagonal{Float64},
Ξ©i::Diagonal{Float64},
J::Matrix{Float64},
D::Matrix{Float64},
Si::Diagonal{ComplexF64},
Sf::Diagonal{ComplexF64},
Bi::Diagonal{ComplexF64},
Bf::Diagonal{ComplexF64},
X11::Matrix{ComplexF64},
X12::Matrix{ComplexF64},
X21::Matrix{ComplexF64},
X22::Matrix{ComplexF64},
Y1::Matrix{ComplexF64},
Y2::Matrix{ComplexF64},
gdsopart::ComplexF64,
T::Matrix{Float64}, ## Product if NAC and SO
Hso::Float64
)
################################## Preliminary Part , Common for both Frank Condon and Herzberg Teller ##############################
t = t * 4.1341373335e16 # converting time to atomic units
tp = -im * b - t
for i = 1:sysize
Sf[i, i] = Ξ©f[i, i] / sin(Ξ©f[i, i] * t)
Si[i, i] = Ξ©i[i, i] / sin(Ξ©i[i, i] * tp)
Bf[i, i] = Ξ©f[i, i] / tan(Ξ©f[i, i] * t)
Bi[i, i] = Ξ©i[i, i] / tan(Ξ©i[i, i] * tp)
end
W = [(Bf+J'*Bi*J) -(Sf + J' * Si * J)
-(Sf + J' * Si * J) (Bf+J'*Bi*J)]
U = Bi - Si
V = [J' * U * D
J' * U * D]
########## Defining the X and Y Matrices ##################################
################# Final Calculation and returning value ##################
s = zero(ComplexF64)
MJSJ = J' * Si * J
MJBJ = J' * Bi * J
MJUD = J' * U * D
Win = inv(W)
WinV = Win * V
X = zeros(ComplexF64, 2 * sysize, 2 * sysize)
Y = zeros(ComplexF64, 2 * sysize, 1)
XWin = similar(Win)
for m in 1:sysize
for mp in 1:sysize
X11[m, :] .= (MJSJ)[mp, :] .* (-Bf[m, m])
X12[m, :] .= (MJBJ)[mp, :] .* (Bf[m, m])
X21[m, :] .= (MJSJ)[mp, :] .* (Sf[m, m])
X22[m, :] .= (MJBJ)[mp, :] .* (-Sf[m, m])
# X = [X11 X12
# X21 X22]
X[1:sysize, 1:sysize] .= X11
X[1:sysize, sysize+1:2*sysize] .= X12
X[sysize+1:2*sysize, 1:sysize] .= X21
X[sysize+1:2*sysize, sysize+1:2*sysize] .= X22
Y1[m, :] .= Bf[m, m] * (MJUD)[mp, :]
Y2[m, :] .= -Sf[m, m] * (MJUD)[mp, :]
Y[1:sysize, 1] .= Y1
Y[sysize+1:2*sysize, 1] .= Y2
g = im * (tr(mul!(XWin, X, Win)) + (transpose(WinV)*X*WinV)[1] - (transpose(Y)*WinV)[1])
s = s + g * gdsopart * T[m, mp]
end
X11[m, 1:sysize] .= 0
X12[m, 1:sysize] .= 0
X21[m, 1:sysize] .= 0
X22[m, 1:sysize] .= 0
Y1[m, 1] = 0
Y2[m, 1] = 0
end
s = s + abs(Hso)^2 * gdsopart #### LATEST MODIFICATION
return s
end
const hso_s1t3 = 1.49 * 0.0000046 ## SOC between S1 and T3 = 0.48 cm-1, converting to atomic units
const Delta = 0.11 # in eV
const Delta_s1t3 = 0.091 * 0.037 # Difference between S1 and T3 = 0.091 eV in atomic units
const dd = 22 # Change this to modify number of points
const Temp = 300
kT_temp = Temp * 1.380649e-23 # Boltzmann constant times temperature in Joules
const kT = kT_temp * 2.29371227840e17 # in atomic units
const b = 1 / kT
const omegaf = Diagonal(vec(readdlm("wf.txt", '\t', Float64, '\n'))) * 4.55633e-6 # Final state frequencies in atomic units
const omegai = Diagonal(vec(readdlm("wi.txt", '\t', Float64, '\n'))) * 4.55633e-6 # Initial state frequencies in atomic units
const sysize = size(omegai)[1]
const P = @. 2 * sinh(b * omegai / 2)
T_temp = readdlm("nac.txt", '\t', Float64, '\n') # NAC matrix
const T = (T_temp * T_temp')* (hso_s1t3^2/Delta_s1t3^2)
const D = readdlm("d.txt", Float64) # Displacement Matrix
const J = readdlm("j.txt", Float64) # Duchinsky Matrix
const t = collect(range(-5e-12, stop=5e-12, length=dd))
t[div(dd + 1, 2)] = 10e-25
function calc(t, b, Ξ, sysize, Ξ©f, Ξ©i, J, D, P)
# @sync tells Julia to wait for any tasks started inside it
open("ISC-SERIAL.TXT", "a") do f
write(f, "STARTING LOOP WITH NUMBER OF CORES = 1 \n====================================================\n")
end
x = zeros(ComplexF64, dd)
Sf = Diagonal(zeros(ComplexF64, sysize))
Si = Diagonal(zeros(ComplexF64, sysize))
Bf = Diagonal(zeros(ComplexF64, sysize))
Bi = Diagonal(zeros(ComplexF64, sysize))
X11 = zeros(ComplexF64, sysize, sysize)
X12 = zeros(ComplexF64, sysize, sysize)
X21 = zeros(ComplexF64, sysize, sysize)
X22 = zeros(ComplexF64, sysize, sysize)
Y1 = zeros(ComplexF64, sysize, 1)
Y2 = zeros(ComplexF64, sysize, 1)
Hso = 0.48 * 0.0000046 ## SOC between S1 and T2 = 0.48 cm-1, converting to atomic units
@timeit to "GT calculation" for i in 1:dd
gdsopart = GDSO(t[i], b, Ξ, sysize, Ξ©f, Ξ©i, J, D, P, Si, Sf, Bi, Bf)
gtotal = GSV(t[i], b, sysize, Ξ©f, Ξ©i, J, D, Si, Sf, Bi, Bf, X11, X12, X21, X22, Y1, Y2, gdsopart, T,Hso)
x[i] = gtotal
open("ISC-SERIAL.TXT", "a") do f
write(f, "ITERATION $i COMPLETED BY PROCESS 1 \n")
end
end # loop
open("ISC-SERIAL.TXT", "a") do f
write(f, "LOOP COMPLETE \n====================================================")
end
yr = real.(x)
yi = imag.(x)
p = plot(t, yr, linewidth=2, color="blue", title="Ξ = $Ξ", label="Real part", grid=false)
p = plot!(t, yi, linewidth=2, color="red", label="Imaginary part", size=(1000, 800), xlims=(-30e-15, 30e-15))
savefig(p, "gt.svg")
begin # This part does FFTW and finds final rate constant
fs = abs(t[1] - t[2])^-1
signal = yr
p = plan_fft(signal)
F = fftshift(p * signal)
freqs = fftshift(fftfreq(length(t), fs)) # X axis has frequencies in s^-1
#Covert freqs to eV
freqs = freqs * 4.13558e-15
F = F * 6.57e15
time_domain = plot(t, signal, title=L"G(t)", legend=false, color="orange", xlabel=L"\mathrm{Time}\ (s)", ylabel=L"\mathrm{A.u}")
freq_domain = plot(freqs, abs.(F), title=L"\mathrm{Fourier\ transformed\ g(\bar{\nu}))}", legend=false, xlabel=L"\mathrm{Energy\ (eV)}", ylabel=L"\mathrm{k_{IC}\ (s^{-1})}", xlims=(0, 4))
p = plot(time_domain, freq_domain, layout=(2, 1), size=(1000, 800), right_margin=10mm)
savefig(p, "gt-fft.svg")
ee = Ξ# * 8065.73 #Converting eV to cm^-1
index = findmin(abs.(freqs .- ee))[2] # Find the index of freq closest to ee
freqs[index]
println("The rate constant is ", abs.(F)[index])
rc = abs.(F)[index]
end
#################################################This part writes the output to a file#######################################################
begin
open("ISC-SERIAL.TXT", "a") do f
write(f, "-------------------------\n")
write(f, "NUMBER OF CORES = 1\n")
write(f, "The value of Ξ is $Ξ\n")
write(f, "Number of time points = $dd\n")
write(f, "The rate constant calculated from Julia FFTW = $rc\n")
write(f, "-------------------------\n")
write(f, "The values of t,Re(g(t)),Im(g(t)) are \n\n")
for i in 1:dd
write(f, "$(t[i]) $(yr[i]) $(yi[i])\n")
end
end
open("GT-VALUES-SERIAL.txt", "w") do f
for i in 1:dd
write(f, "$(t[i]) $(yr[i]) $(yi[i])\n")
end
end
end
end
calc(t, b, Delta, sysize, omegaf, omegai, J, D, P)
display(to)
Which I have parralelized by declaring all the functions and common variables as @everywhere
and adding @sync @distributed
macro to the intended loop.
using Distributed
@everywhere begin
using MKL
using DelimitedFiles
using LinearAlgebra
using LaTeXStrings
using TimerOutputs
using Plots
using Plots.PlotMeasures
using FFTW
using SharedArrays
BLAS.set_num_threads(1)
end
const to = TimerOutput()
println("Number of processes = $(Distributed.nprocs())")
open("ISC-DISTRIBTUED.TXT", "a") do f
write(f, "\n\n\n====================================================\n\n\n")
end
@everywhere function GDSO(
t::Float64,
b::Float64,
Ξ::Float64,
sysize::Int64,
Ξ©f::Diagonal{Float64},
Ξ©i::Diagonal{Float64},
J::Matrix{Float64},
D::Matrix{Float64},
Ο::Diagonal{Float64},
Si::Diagonal{ComplexF64},
Sf::Diagonal{ComplexF64},
Bi::Diagonal{ComplexF64},
Bf::Diagonal{ComplexF64}
)
t = t * 4.1341373335e16 # converting time to atomic units
tp = -im * b - t
for i = 1:sysize
Sf[i, i] = Ξ©f[i, i] / sin(Ξ©f[i, i] * t)
Si[i, i] = Ξ©i[i, i] / sin(Ξ©i[i, i] * tp)
Bf[i, i] = Ξ©f[i, i] / tan(Ξ©f[i, i] * t)
Bi[i, i] = Ξ©i[i, i] / tan(Ξ©i[i, i] * tp)
end
BB = (Bf + J' * Bi * J)
AA = (Sf + J' * Si * J)
W = [BB -AA
-AA BB] #We wont calculate the 2Nx2N determinant directly.
# Wapp = BB*(BB- AA*(BB^-1)*AA )
Wapp = BB * (BB - AA * (BB \ AA))
U = Bi - Si
V = [J' * U * D
J' * U * D]
# F1 = sqrt(det(Si*Sf*Ο^2*Wapp^-1))
# F1 = sqrt(det(Si*Sf*Ο^2)/det(Wapp))
F1 = sqrt(det(Si * Sf * (Wapp \ Ο^2)))
F2 = (-(im / 2) * (transpose(V) * (W \ V))) + ((im) * (transpose(D) * U * D))
g = F1 * exp(F2)
Ξ = Ξ * 0.0367493 # in atomic units
g = g * exp(im * Ξ * t)
Ξ· = 2 * 4.55633e-6 # 10 cm^-1 to hatree energy
#g = g * exp(-Ξ· * abs(t)) # Adding the damping factor Lorrentzian
g = g * exp(-Ξ· * t^2) # Adding the damping factor Gaussian
# println("t = $t was evaluated by process $(myid())")
return g[1]
end
@everywhere @views function GSV(
t::Float64,
b::Float64,
sysize::Int64,
Ξ©f::Diagonal{Float64},
Ξ©i::Diagonal{Float64},
J::Matrix{Float64},
D::Matrix{Float64},
Si::Diagonal{ComplexF64},
Sf::Diagonal{ComplexF64},
Bi::Diagonal{ComplexF64},
Bf::Diagonal{ComplexF64},
X11::Matrix{ComplexF64},
X12::Matrix{ComplexF64},
X21::Matrix{ComplexF64},
X22::Matrix{ComplexF64},
Y1::Matrix{ComplexF64},
Y2::Matrix{ComplexF64},
gdsopart::ComplexF64,
T::Matrix{Float64}, ## Product if NAC and SO
Hso::Float64
)
################################## Preliminary Part , Common for both Frank Condon and Herzberg Teller ##############################
t = t * 4.1341373335e16 # converting time to atomic units
tp = -im * b - t
for i = 1:sysize
Sf[i, i] = Ξ©f[i, i] / sin(Ξ©f[i, i] * t)
Si[i, i] = Ξ©i[i, i] / sin(Ξ©i[i, i] * tp)
Bf[i, i] = Ξ©f[i, i] / tan(Ξ©f[i, i] * t)
Bi[i, i] = Ξ©i[i, i] / tan(Ξ©i[i, i] * tp)
end
W = [(Bf+J'*Bi*J) -(Sf + J' * Si * J)
-(Sf + J' * Si * J) (Bf+J'*Bi*J)]
U = Bi - Si
V = [J' * U * D
J' * U * D]
########## Defining the X and Y Matrices ##################################
################# Final Calculation and returning value ##################
s = zero(ComplexF64)
MJSJ = J' * Si * J
MJBJ = J' * Bi * J
MJUD = J' * U * D
Win = inv(W)
WinV = Win * V
X = zeros(ComplexF64, 2 * sysize, 2 * sysize)
Y = zeros(ComplexF64, 2 * sysize, 1)
XWin = similar(Win)
for m in 1:sysize
for mp in 1:sysize
X11[m, :] .= (MJSJ)[mp, :] .* (-Bf[m, m])
X12[m, :] .= (MJBJ)[mp, :] .* (Bf[m, m])
X21[m, :] .= (MJSJ)[mp, :] .* (Sf[m, m])
X22[m, :] .= (MJBJ)[mp, :] .* (-Sf[m, m])
# X = [X11 X12
# X21 X22]
X[1:sysize, 1:sysize] .= X11
X[1:sysize, sysize+1:2*sysize] .= X12
X[sysize+1:2*sysize, 1:sysize] .= X21
X[sysize+1:2*sysize, sysize+1:2*sysize] .= X22
Y1[m, :] .= Bf[m, m] * (MJUD)[mp, :]
Y2[m, :] .= -Sf[m, m] * (MJUD)[mp, :]
Y[1:sysize, 1] .= Y1
Y[sysize+1:2*sysize, 1] .= Y2
g = im * (tr(mul!(XWin, X, Win)) + (transpose(WinV)*X*WinV)[1] - (transpose(Y)*WinV)[1])
s = s + g * gdsopart * T[m, mp]
end
X11[m, 1:sysize] .= 0
X12[m, 1:sysize] .= 0
X21[m, 1:sysize] .= 0
X22[m, 1:sysize] .= 0
Y1[m, 1] = 0
Y2[m, 1] = 0
end
s = s + abs(Hso)^2 * gdsopart #### LATEST MODIFICATION
return s
end
@everywhere begin
const hso_s1t3 = 1.49 * 0.0000046 ## SOC between S1 and T3 = 0.48 cm-1, converting to atomic units
const Delta = 0.11 # in eV
const Delta_s1t3 = 0.091 * 0.037 # Difference between S1 and T3 = 0.091 eV in atomic units
const dd = 22 # Change this to modify number of points
const Temp = 300
kT_temp = Temp * 1.380649e-23 # Boltzmann constant times temperature in Joules
const kT = kT_temp * 2.29371227840e17 # in atomic units
const b = 1 / kT
const omegaf = Diagonal(vec(readdlm("wf.txt", '\t', Float64, '\n'))) * 4.55633e-6 # Final state frequencies in atomic units
const omegai = Diagonal(vec(readdlm("wi.txt", '\t', Float64, '\n'))) * 4.55633e-6 # Initial state frequencies in atomic units
const sysize = size(omegai)[1]
const P = @. 2 * sinh(b * omegai / 2)
T_temp = readdlm("nac.txt", '\t', Float64, '\n') # NAC matrix
const T = (T_temp * T_temp')* (hso_s1t3^2/Delta_s1t3^2)
const D = readdlm("d.txt", Float64) # Displacement Matrix
const J = readdlm("j.txt", Float64) # Duchinsky Matrix
const t = collect(range(-5e-12, stop=5e-12, length=dd))
t[div(dd + 1, 2)] = 10e-25
end
function calc(t, b, Ξ, sysize, Ξ©f, Ξ©i, J, D, P)
# @sync tells Julia to wait for any tasks started inside it
open("ISC-DISTRIBTUED.TXT", "a") do f
write(f, "STARTING LOOP WITH NUMBER OF CORES = $(Distributed.nprocs()) \n====================================================\n")
end
x = zeros(ComplexF64, dd)
Sf = Diagonal(zeros(ComplexF64, sysize))
Si = Diagonal(zeros(ComplexF64, sysize))
Bf = Diagonal(zeros(ComplexF64, sysize))
Bi = Diagonal(zeros(ComplexF64, sysize))
X11 = zeros(ComplexF64, sysize, sysize)
X12 = zeros(ComplexF64, sysize, sysize)
X21 = zeros(ComplexF64, sysize, sysize)
X22 = zeros(ComplexF64, sysize, sysize)
Y1 = zeros(ComplexF64, sysize, 1)
Y2 = zeros(ComplexF64, sysize, 1)
Hso = 0.48 * 0.0000046 ## SOC between S1 and T2 = 0.48 cm-1, converting to atomic units
@timeit to "GT calculation" @sync @distributed for i in 1:dd
gdsopart = GDSO(t[i], b, Ξ, sysize, Ξ©f, Ξ©i, J, D, P, Si, Sf, Bi, Bf)
gtotal = GSV(t[i], b, sysize, Ξ©f, Ξ©i, J, D, Si, Sf, Bi, Bf, X11, X12, X21, X22, Y1, Y2, gdsopart, T,Hso)
x[i] = gtotal
open("ISC-DISTRIBTUED.TXT", "a") do f
write(f, "ITERATION $i COMPLETED BY PROCESS $(Distributed.myid()) \n")
end
end # loop
open("ISC-DISTRIBTUED.TXT", "a") do f
write(f, "LOOP COMPLETE \n====================================================")
end
yr = real.(x)
yi = imag.(x)
p = plot(t, yr, linewidth=2, color="blue", title="Ξ = $Ξ", label="Real part", grid=false)
p = plot!(t, yi, linewidth=2, color="red", label="Imaginary part", size=(1000, 800), xlims=(-30e-15, 30e-15))
savefig(p, "gt.svg")
begin # This part does FFTW and finds final rate constant
fs = abs(t[1] - t[2])^-1
signal = yr
p = plan_fft(signal)
F = fftshift(p * signal)
freqs = fftshift(fftfreq(length(t), fs)) # X axis has frequencies in s^-1
#Covert freqs to eV
freqs = freqs * 4.13558e-15
F = F * 6.57e15
time_domain = plot(t, signal, title=L"G(t)", legend=false, color="orange", xlabel=L"\mathrm{Time}\ (s)", ylabel=L"\mathrm{A.u}")
freq_domain = plot(freqs, abs.(F), title=L"\mathrm{Fourier\ transformed\ g(\bar{\nu}))}", legend=false, xlabel=L"\mathrm{Energy\ (eV)}", ylabel=L"\mathrm{k_{IC}\ (s^{-1})}", xlims=(0, 4))
p = plot(time_domain, freq_domain, layout=(2, 1), size=(1000, 800), right_margin=10mm)
savefig(p, "gt-fft.svg")
ee = Ξ# * 8065.73 #Converting eV to cm^-1
index = findmin(abs.(freqs .- ee))[2] # Find the index of freq closest to ee
freqs[index]
println("The rate constant is ", abs.(F)[index])
rc = abs.(F)[index]
end
#################################################This part writes the output to a file#######################################################
begin
open("ISC-DISTRIBTUED.TXT", "a") do f
write(f, "-------------------------\n")
write(f, "NUMBER OF CORES = 1\n")
write(f, "The value of Ξ is $Ξ\n")
write(f, "Number of time points = $dd\n")
write(f, "The rate constant calculated from Julia FFTW = $rc\n")
write(f, "-------------------------\n")
write(f, "The values of t,Re(g(t)),Im(g(t)) are \n\n")
for i in 1:dd
write(f, "$(t[i]) $(yr[i]) $(yi[i])\n")
end
end
open("GT-VALUES-DISTRIBTUED.txt", "w") do f
for i in 1:dd
write(f, "$(t[i]) $(yr[i]) $(yi[i])\n")
end
end
end
end
calc(t, b, Delta, sysize, omegaf, omegai, J, D, P)
display(to)
However, it seems the array x
is not being populated at all as itβs values are still 0.0 after the parallel loop is ran as indicated by the output GT-VALUES
which reads
-5.0e-12 0.0 0.0
-4.0e-12 0.0 0.0
-2.9999999999999997e-12 0.0 0.0
-2.0e-12 0.0 0.0
-1.0e-12 0.0 0.0
1.0e-24 6.394092795554566e-12 4.2656647577696985e-16
1.0e-12 0.0 0.0
2.0e-12 0.0 0.0
2.9999999999999997e-12 0.0 0.0
4.0e-12 0.0 0.0
5.0e-12 0.0 0.0
There is probably some data race condition going on that I am unable to detect which might be causing the problem. Please help me if possible in identifying and isolating the same.