I am trying to parallelize a part of my code where different values of an array `x`

are populated asynchronously. The serial function of my full code is as follows

```
using Distributed
using MKL
using DelimitedFiles
using LinearAlgebra
using LaTeXStrings
using TimerOutputs
using Plots
using Plots.PlotMeasures
using FFTW
using SharedArrays
BLAS.set_num_threads(1)
const to = TimerOutput()
println("Number of processes = 1")
open("ISC-SERIAL.TXT", "a") do f
write(f, "\n\n\n====================================================\n\n\n")
end
function GDSO(
t::Float64,
b::Float64,
Ξ::Float64,
sysize::Int64,
Ξ©f::Diagonal{Float64},
Ξ©i::Diagonal{Float64},
J::Matrix{Float64},
D::Matrix{Float64},
Ο::Diagonal{Float64},
Si::Diagonal{ComplexF64},
Sf::Diagonal{ComplexF64},
Bi::Diagonal{ComplexF64},
Bf::Diagonal{ComplexF64}
)
t = t * 4.1341373335e16 # converting time to atomic units
tp = -im * b - t
for i = 1:sysize
Sf[i, i] = Ξ©f[i, i] / sin(Ξ©f[i, i] * t)
Si[i, i] = Ξ©i[i, i] / sin(Ξ©i[i, i] * tp)
Bf[i, i] = Ξ©f[i, i] / tan(Ξ©f[i, i] * t)
Bi[i, i] = Ξ©i[i, i] / tan(Ξ©i[i, i] * tp)
end
BB = (Bf + J' * Bi * J)
AA = (Sf + J' * Si * J)
W = [BB -AA
-AA BB] #We wont calculate the 2Nx2N determinant directly.
# Wapp = BB*(BB- AA*(BB^-1)*AA )
Wapp = BB * (BB - AA * (BB \ AA))
U = Bi - Si
V = [J' * U * D
J' * U * D]
# F1 = sqrt(det(Si*Sf*Ο^2*Wapp^-1))
# F1 = sqrt(det(Si*Sf*Ο^2)/det(Wapp))
F1 = sqrt(det(Si * Sf * (Wapp \ Ο^2)))
F2 = (-(im / 2) * (transpose(V) * (W \ V))) + ((im) * (transpose(D) * U * D))
g = F1 * exp(F2)
Ξ = Ξ * 0.0367493 # in atomic units
g = g * exp(im * Ξ * t)
Ξ· = 2 * 4.55633e-6 # 10 cm^-1 to hatree energy
#g = g * exp(-Ξ· * abs(t)) # Adding the damping factor Lorrentzian
g = g * exp(-Ξ· * t^2) # Adding the damping factor Gaussian
# println("t = $t was evaluated by process $(myid())")
return g[1]
end
@views function GSV(
t::Float64,
b::Float64,
sysize::Int64,
Ξ©f::Diagonal{Float64},
Ξ©i::Diagonal{Float64},
J::Matrix{Float64},
D::Matrix{Float64},
Si::Diagonal{ComplexF64},
Sf::Diagonal{ComplexF64},
Bi::Diagonal{ComplexF64},
Bf::Diagonal{ComplexF64},
X11::Matrix{ComplexF64},
X12::Matrix{ComplexF64},
X21::Matrix{ComplexF64},
X22::Matrix{ComplexF64},
Y1::Matrix{ComplexF64},
Y2::Matrix{ComplexF64},
gdsopart::ComplexF64,
T::Matrix{Float64}, ## Product if NAC and SO
Hso::Float64
)
################################## Preliminary Part , Common for both Frank Condon and Herzberg Teller ##############################
t = t * 4.1341373335e16 # converting time to atomic units
tp = -im * b - t
for i = 1:sysize
Sf[i, i] = Ξ©f[i, i] / sin(Ξ©f[i, i] * t)
Si[i, i] = Ξ©i[i, i] / sin(Ξ©i[i, i] * tp)
Bf[i, i] = Ξ©f[i, i] / tan(Ξ©f[i, i] * t)
Bi[i, i] = Ξ©i[i, i] / tan(Ξ©i[i, i] * tp)
end
W = [(Bf+J'*Bi*J) -(Sf + J' * Si * J)
-(Sf + J' * Si * J) (Bf+J'*Bi*J)]
U = Bi - Si
V = [J' * U * D
J' * U * D]
########## Defining the X and Y Matrices ##################################
################# Final Calculation and returning value ##################
s = zero(ComplexF64)
MJSJ = J' * Si * J
MJBJ = J' * Bi * J
MJUD = J' * U * D
Win = inv(W)
WinV = Win * V
X = zeros(ComplexF64, 2 * sysize, 2 * sysize)
Y = zeros(ComplexF64, 2 * sysize, 1)
XWin = similar(Win)
for m in 1:sysize
for mp in 1:sysize
X11[m, :] .= (MJSJ)[mp, :] .* (-Bf[m, m])
X12[m, :] .= (MJBJ)[mp, :] .* (Bf[m, m])
X21[m, :] .= (MJSJ)[mp, :] .* (Sf[m, m])
X22[m, :] .= (MJBJ)[mp, :] .* (-Sf[m, m])
# X = [X11 X12
# X21 X22]
X[1:sysize, 1:sysize] .= X11
X[1:sysize, sysize+1:2*sysize] .= X12
X[sysize+1:2*sysize, 1:sysize] .= X21
X[sysize+1:2*sysize, sysize+1:2*sysize] .= X22
Y1[m, :] .= Bf[m, m] * (MJUD)[mp, :]
Y2[m, :] .= -Sf[m, m] * (MJUD)[mp, :]
Y[1:sysize, 1] .= Y1
Y[sysize+1:2*sysize, 1] .= Y2
g = im * (tr(mul!(XWin, X, Win)) + (transpose(WinV)*X*WinV)[1] - (transpose(Y)*WinV)[1])
s = s + g * gdsopart * T[m, mp]
end
X11[m, 1:sysize] .= 0
X12[m, 1:sysize] .= 0
X21[m, 1:sysize] .= 0
X22[m, 1:sysize] .= 0
Y1[m, 1] = 0
Y2[m, 1] = 0
end
s = s + abs(Hso)^2 * gdsopart #### LATEST MODIFICATION
return s
end
const hso_s1t3 = 1.49 * 0.0000046 ## SOC between S1 and T3 = 0.48 cm-1, converting to atomic units
const Delta = 0.11 # in eV
const Delta_s1t3 = 0.091 * 0.037 # Difference between S1 and T3 = 0.091 eV in atomic units
const dd = 22 # Change this to modify number of points
const Temp = 300
kT_temp = Temp * 1.380649e-23 # Boltzmann constant times temperature in Joules
const kT = kT_temp * 2.29371227840e17 # in atomic units
const b = 1 / kT
const omegaf = Diagonal(vec(readdlm("wf.txt", '\t', Float64, '\n'))) * 4.55633e-6 # Final state frequencies in atomic units
const omegai = Diagonal(vec(readdlm("wi.txt", '\t', Float64, '\n'))) * 4.55633e-6 # Initial state frequencies in atomic units
const sysize = size(omegai)[1]
const P = @. 2 * sinh(b * omegai / 2)
T_temp = readdlm("nac.txt", '\t', Float64, '\n') # NAC matrix
const T = (T_temp * T_temp')* (hso_s1t3^2/Delta_s1t3^2)
const D = readdlm("d.txt", Float64) # Displacement Matrix
const J = readdlm("j.txt", Float64) # Duchinsky Matrix
const t = collect(range(-5e-12, stop=5e-12, length=dd))
t[div(dd + 1, 2)] = 10e-25
function calc(t, b, Ξ, sysize, Ξ©f, Ξ©i, J, D, P)
# @sync tells Julia to wait for any tasks started inside it
open("ISC-SERIAL.TXT", "a") do f
write(f, "STARTING LOOP WITH NUMBER OF CORES = 1 \n====================================================\n")
end
x = zeros(ComplexF64, dd)
Sf = Diagonal(zeros(ComplexF64, sysize))
Si = Diagonal(zeros(ComplexF64, sysize))
Bf = Diagonal(zeros(ComplexF64, sysize))
Bi = Diagonal(zeros(ComplexF64, sysize))
X11 = zeros(ComplexF64, sysize, sysize)
X12 = zeros(ComplexF64, sysize, sysize)
X21 = zeros(ComplexF64, sysize, sysize)
X22 = zeros(ComplexF64, sysize, sysize)
Y1 = zeros(ComplexF64, sysize, 1)
Y2 = zeros(ComplexF64, sysize, 1)
Hso = 0.48 * 0.0000046 ## SOC between S1 and T2 = 0.48 cm-1, converting to atomic units
@timeit to "GT calculation" for i in 1:dd
gdsopart = GDSO(t[i], b, Ξ, sysize, Ξ©f, Ξ©i, J, D, P, Si, Sf, Bi, Bf)
gtotal = GSV(t[i], b, sysize, Ξ©f, Ξ©i, J, D, Si, Sf, Bi, Bf, X11, X12, X21, X22, Y1, Y2, gdsopart, T,Hso)
x[i] = gtotal
open("ISC-SERIAL.TXT", "a") do f
write(f, "ITERATION $i COMPLETED BY PROCESS 1 \n")
end
end # loop
open("ISC-SERIAL.TXT", "a") do f
write(f, "LOOP COMPLETE \n====================================================")
end
yr = real.(x)
yi = imag.(x)
p = plot(t, yr, linewidth=2, color="blue", title="Ξ = $Ξ", label="Real part", grid=false)
p = plot!(t, yi, linewidth=2, color="red", label="Imaginary part", size=(1000, 800), xlims=(-30e-15, 30e-15))
savefig(p, "gt.svg")
begin # This part does FFTW and finds final rate constant
fs = abs(t[1] - t[2])^-1
signal = yr
p = plan_fft(signal)
F = fftshift(p * signal)
freqs = fftshift(fftfreq(length(t), fs)) # X axis has frequencies in s^-1
#Covert freqs to eV
freqs = freqs * 4.13558e-15
F = F * 6.57e15
time_domain = plot(t, signal, title=L"G(t)", legend=false, color="orange", xlabel=L"\mathrm{Time}\ (s)", ylabel=L"\mathrm{A.u}")
freq_domain = plot(freqs, abs.(F), title=L"\mathrm{Fourier\ transformed\ g(\bar{\nu}))}", legend=false, xlabel=L"\mathrm{Energy\ (eV)}", ylabel=L"\mathrm{k_{IC}\ (s^{-1})}", xlims=(0, 4))
p = plot(time_domain, freq_domain, layout=(2, 1), size=(1000, 800), right_margin=10mm)
savefig(p, "gt-fft.svg")
ee = Ξ# * 8065.73 #Converting eV to cm^-1
index = findmin(abs.(freqs .- ee))[2] # Find the index of freq closest to ee
freqs[index]
println("The rate constant is ", abs.(F)[index])
rc = abs.(F)[index]
end
#################################################This part writes the output to a file#######################################################
begin
open("ISC-SERIAL.TXT", "a") do f
write(f, "-------------------------\n")
write(f, "NUMBER OF CORES = 1\n")
write(f, "The value of Ξ is $Ξ\n")
write(f, "Number of time points = $dd\n")
write(f, "The rate constant calculated from Julia FFTW = $rc\n")
write(f, "-------------------------\n")
write(f, "The values of t,Re(g(t)),Im(g(t)) are \n\n")
for i in 1:dd
write(f, "$(t[i]) $(yr[i]) $(yi[i])\n")
end
end
open("GT-VALUES-SERIAL.txt", "w") do f
for i in 1:dd
write(f, "$(t[i]) $(yr[i]) $(yi[i])\n")
end
end
end
end
calc(t, b, Delta, sysize, omegaf, omegai, J, D, P)
display(to)
```

Which I have parralelized by declaring all the functions and common variables as `@everywhere`

and adding `@sync @distributed`

macro to the intended loop.

```
using Distributed
@everywhere begin
using MKL
using DelimitedFiles
using LinearAlgebra
using LaTeXStrings
using TimerOutputs
using Plots
using Plots.PlotMeasures
using FFTW
using SharedArrays
BLAS.set_num_threads(1)
end
const to = TimerOutput()
println("Number of processes = $(Distributed.nprocs())")
open("ISC-DISTRIBTUED.TXT", "a") do f
write(f, "\n\n\n====================================================\n\n\n")
end
@everywhere function GDSO(
t::Float64,
b::Float64,
Ξ::Float64,
sysize::Int64,
Ξ©f::Diagonal{Float64},
Ξ©i::Diagonal{Float64},
J::Matrix{Float64},
D::Matrix{Float64},
Ο::Diagonal{Float64},
Si::Diagonal{ComplexF64},
Sf::Diagonal{ComplexF64},
Bi::Diagonal{ComplexF64},
Bf::Diagonal{ComplexF64}
)
t = t * 4.1341373335e16 # converting time to atomic units
tp = -im * b - t
for i = 1:sysize
Sf[i, i] = Ξ©f[i, i] / sin(Ξ©f[i, i] * t)
Si[i, i] = Ξ©i[i, i] / sin(Ξ©i[i, i] * tp)
Bf[i, i] = Ξ©f[i, i] / tan(Ξ©f[i, i] * t)
Bi[i, i] = Ξ©i[i, i] / tan(Ξ©i[i, i] * tp)
end
BB = (Bf + J' * Bi * J)
AA = (Sf + J' * Si * J)
W = [BB -AA
-AA BB] #We wont calculate the 2Nx2N determinant directly.
# Wapp = BB*(BB- AA*(BB^-1)*AA )
Wapp = BB * (BB - AA * (BB \ AA))
U = Bi - Si
V = [J' * U * D
J' * U * D]
# F1 = sqrt(det(Si*Sf*Ο^2*Wapp^-1))
# F1 = sqrt(det(Si*Sf*Ο^2)/det(Wapp))
F1 = sqrt(det(Si * Sf * (Wapp \ Ο^2)))
F2 = (-(im / 2) * (transpose(V) * (W \ V))) + ((im) * (transpose(D) * U * D))
g = F1 * exp(F2)
Ξ = Ξ * 0.0367493 # in atomic units
g = g * exp(im * Ξ * t)
Ξ· = 2 * 4.55633e-6 # 10 cm^-1 to hatree energy
#g = g * exp(-Ξ· * abs(t)) # Adding the damping factor Lorrentzian
g = g * exp(-Ξ· * t^2) # Adding the damping factor Gaussian
# println("t = $t was evaluated by process $(myid())")
return g[1]
end
@everywhere @views function GSV(
t::Float64,
b::Float64,
sysize::Int64,
Ξ©f::Diagonal{Float64},
Ξ©i::Diagonal{Float64},
J::Matrix{Float64},
D::Matrix{Float64},
Si::Diagonal{ComplexF64},
Sf::Diagonal{ComplexF64},
Bi::Diagonal{ComplexF64},
Bf::Diagonal{ComplexF64},
X11::Matrix{ComplexF64},
X12::Matrix{ComplexF64},
X21::Matrix{ComplexF64},
X22::Matrix{ComplexF64},
Y1::Matrix{ComplexF64},
Y2::Matrix{ComplexF64},
gdsopart::ComplexF64,
T::Matrix{Float64}, ## Product if NAC and SO
Hso::Float64
)
################################## Preliminary Part , Common for both Frank Condon and Herzberg Teller ##############################
t = t * 4.1341373335e16 # converting time to atomic units
tp = -im * b - t
for i = 1:sysize
Sf[i, i] = Ξ©f[i, i] / sin(Ξ©f[i, i] * t)
Si[i, i] = Ξ©i[i, i] / sin(Ξ©i[i, i] * tp)
Bf[i, i] = Ξ©f[i, i] / tan(Ξ©f[i, i] * t)
Bi[i, i] = Ξ©i[i, i] / tan(Ξ©i[i, i] * tp)
end
W = [(Bf+J'*Bi*J) -(Sf + J' * Si * J)
-(Sf + J' * Si * J) (Bf+J'*Bi*J)]
U = Bi - Si
V = [J' * U * D
J' * U * D]
########## Defining the X and Y Matrices ##################################
################# Final Calculation and returning value ##################
s = zero(ComplexF64)
MJSJ = J' * Si * J
MJBJ = J' * Bi * J
MJUD = J' * U * D
Win = inv(W)
WinV = Win * V
X = zeros(ComplexF64, 2 * sysize, 2 * sysize)
Y = zeros(ComplexF64, 2 * sysize, 1)
XWin = similar(Win)
for m in 1:sysize
for mp in 1:sysize
X11[m, :] .= (MJSJ)[mp, :] .* (-Bf[m, m])
X12[m, :] .= (MJBJ)[mp, :] .* (Bf[m, m])
X21[m, :] .= (MJSJ)[mp, :] .* (Sf[m, m])
X22[m, :] .= (MJBJ)[mp, :] .* (-Sf[m, m])
# X = [X11 X12
# X21 X22]
X[1:sysize, 1:sysize] .= X11
X[1:sysize, sysize+1:2*sysize] .= X12
X[sysize+1:2*sysize, 1:sysize] .= X21
X[sysize+1:2*sysize, sysize+1:2*sysize] .= X22
Y1[m, :] .= Bf[m, m] * (MJUD)[mp, :]
Y2[m, :] .= -Sf[m, m] * (MJUD)[mp, :]
Y[1:sysize, 1] .= Y1
Y[sysize+1:2*sysize, 1] .= Y2
g = im * (tr(mul!(XWin, X, Win)) + (transpose(WinV)*X*WinV)[1] - (transpose(Y)*WinV)[1])
s = s + g * gdsopart * T[m, mp]
end
X11[m, 1:sysize] .= 0
X12[m, 1:sysize] .= 0
X21[m, 1:sysize] .= 0
X22[m, 1:sysize] .= 0
Y1[m, 1] = 0
Y2[m, 1] = 0
end
s = s + abs(Hso)^2 * gdsopart #### LATEST MODIFICATION
return s
end
@everywhere begin
const hso_s1t3 = 1.49 * 0.0000046 ## SOC between S1 and T3 = 0.48 cm-1, converting to atomic units
const Delta = 0.11 # in eV
const Delta_s1t3 = 0.091 * 0.037 # Difference between S1 and T3 = 0.091 eV in atomic units
const dd = 22 # Change this to modify number of points
const Temp = 300
kT_temp = Temp * 1.380649e-23 # Boltzmann constant times temperature in Joules
const kT = kT_temp * 2.29371227840e17 # in atomic units
const b = 1 / kT
const omegaf = Diagonal(vec(readdlm("wf.txt", '\t', Float64, '\n'))) * 4.55633e-6 # Final state frequencies in atomic units
const omegai = Diagonal(vec(readdlm("wi.txt", '\t', Float64, '\n'))) * 4.55633e-6 # Initial state frequencies in atomic units
const sysize = size(omegai)[1]
const P = @. 2 * sinh(b * omegai / 2)
T_temp = readdlm("nac.txt", '\t', Float64, '\n') # NAC matrix
const T = (T_temp * T_temp')* (hso_s1t3^2/Delta_s1t3^2)
const D = readdlm("d.txt", Float64) # Displacement Matrix
const J = readdlm("j.txt", Float64) # Duchinsky Matrix
const t = collect(range(-5e-12, stop=5e-12, length=dd))
t[div(dd + 1, 2)] = 10e-25
end
function calc(t, b, Ξ, sysize, Ξ©f, Ξ©i, J, D, P)
# @sync tells Julia to wait for any tasks started inside it
open("ISC-DISTRIBTUED.TXT", "a") do f
write(f, "STARTING LOOP WITH NUMBER OF CORES = $(Distributed.nprocs()) \n====================================================\n")
end
x = zeros(ComplexF64, dd)
Sf = Diagonal(zeros(ComplexF64, sysize))
Si = Diagonal(zeros(ComplexF64, sysize))
Bf = Diagonal(zeros(ComplexF64, sysize))
Bi = Diagonal(zeros(ComplexF64, sysize))
X11 = zeros(ComplexF64, sysize, sysize)
X12 = zeros(ComplexF64, sysize, sysize)
X21 = zeros(ComplexF64, sysize, sysize)
X22 = zeros(ComplexF64, sysize, sysize)
Y1 = zeros(ComplexF64, sysize, 1)
Y2 = zeros(ComplexF64, sysize, 1)
Hso = 0.48 * 0.0000046 ## SOC between S1 and T2 = 0.48 cm-1, converting to atomic units
@timeit to "GT calculation" @sync @distributed for i in 1:dd
gdsopart = GDSO(t[i], b, Ξ, sysize, Ξ©f, Ξ©i, J, D, P, Si, Sf, Bi, Bf)
gtotal = GSV(t[i], b, sysize, Ξ©f, Ξ©i, J, D, Si, Sf, Bi, Bf, X11, X12, X21, X22, Y1, Y2, gdsopart, T,Hso)
x[i] = gtotal
open("ISC-DISTRIBTUED.TXT", "a") do f
write(f, "ITERATION $i COMPLETED BY PROCESS $(Distributed.myid()) \n")
end
end # loop
open("ISC-DISTRIBTUED.TXT", "a") do f
write(f, "LOOP COMPLETE \n====================================================")
end
yr = real.(x)
yi = imag.(x)
p = plot(t, yr, linewidth=2, color="blue", title="Ξ = $Ξ", label="Real part", grid=false)
p = plot!(t, yi, linewidth=2, color="red", label="Imaginary part", size=(1000, 800), xlims=(-30e-15, 30e-15))
savefig(p, "gt.svg")
begin # This part does FFTW and finds final rate constant
fs = abs(t[1] - t[2])^-1
signal = yr
p = plan_fft(signal)
F = fftshift(p * signal)
freqs = fftshift(fftfreq(length(t), fs)) # X axis has frequencies in s^-1
#Covert freqs to eV
freqs = freqs * 4.13558e-15
F = F * 6.57e15
time_domain = plot(t, signal, title=L"G(t)", legend=false, color="orange", xlabel=L"\mathrm{Time}\ (s)", ylabel=L"\mathrm{A.u}")
freq_domain = plot(freqs, abs.(F), title=L"\mathrm{Fourier\ transformed\ g(\bar{\nu}))}", legend=false, xlabel=L"\mathrm{Energy\ (eV)}", ylabel=L"\mathrm{k_{IC}\ (s^{-1})}", xlims=(0, 4))
p = plot(time_domain, freq_domain, layout=(2, 1), size=(1000, 800), right_margin=10mm)
savefig(p, "gt-fft.svg")
ee = Ξ# * 8065.73 #Converting eV to cm^-1
index = findmin(abs.(freqs .- ee))[2] # Find the index of freq closest to ee
freqs[index]
println("The rate constant is ", abs.(F)[index])
rc = abs.(F)[index]
end
#################################################This part writes the output to a file#######################################################
begin
open("ISC-DISTRIBTUED.TXT", "a") do f
write(f, "-------------------------\n")
write(f, "NUMBER OF CORES = 1\n")
write(f, "The value of Ξ is $Ξ\n")
write(f, "Number of time points = $dd\n")
write(f, "The rate constant calculated from Julia FFTW = $rc\n")
write(f, "-------------------------\n")
write(f, "The values of t,Re(g(t)),Im(g(t)) are \n\n")
for i in 1:dd
write(f, "$(t[i]) $(yr[i]) $(yi[i])\n")
end
end
open("GT-VALUES-DISTRIBTUED.txt", "w") do f
for i in 1:dd
write(f, "$(t[i]) $(yr[i]) $(yi[i])\n")
end
end
end
end
calc(t, b, Delta, sysize, omegaf, omegai, J, D, P)
display(to)
```

However, it seems the array `x`

is not being populated at all as itβs values are still 0.0 after the parallel loop is ran as indicated by the output `GT-VALUES`

which reads

```
-5.0e-12 0.0 0.0
-4.0e-12 0.0 0.0
-2.9999999999999997e-12 0.0 0.0
-2.0e-12 0.0 0.0
-1.0e-12 0.0 0.0
1.0e-24 6.394092795554566e-12 4.2656647577696985e-16
1.0e-12 0.0 0.0
2.0e-12 0.0 0.0
2.9999999999999997e-12 0.0 0.0
4.0e-12 0.0 0.0
5.0e-12 0.0 0.0
```

There is probably some data race condition going on that I am unable to detect which might be causing the problem. Please help me if possible in identifying and isolating the same.