@GunnarFarneback: Your suggestion significantly lowered the execution time to the order of ms
@Vasily_Pisarev:Your suggestions did lower the allocations
The revised code:
using Distributions,Distributed,StatsBase,LinearAlgebra,LightGraphs,BenchmarkTools,Random,Plots
mutable struct Arr
t::Int64
agents::Array{Int64,1}
θ::Array{Float64,1}
C::Array{Float64,1}
G::SimpleDiGraph{Int64}
W::Array{Float64,1}
end
function main(Cmin::Float64,Cmax::Float64,N::Int64,n::Int64,outdeg::Int64,Pᵣ::Float64,Pᵢ::Float64,P::Int64,μ::Float64,σ::Float64)
A=Arr(0,[1:N;],Array{Float64}(undef,N),Array{Float64}(undef,N),watts_strogatz(N,outdeg,Pᵣ,is_directed=true),
Array{Float64}(undef,N))
rand!(Normal(μ,σ),A.θ)
rand!(Uniform(Cmin,Cmax),A.C)
A.W.=1.0.-A.C
A.W./=sum(A.W)
θₜ=Vector{Float64}[]
upagents=Array{Int64}(undef,round(Int,0.5*N))
cedg=Array{Float64}(undef,N)
while(any(abs(x - A.θ[1]) > 0.01 for x in A.θ))
A.t+=1
push!(θₜ,copy(A.θ))
sample!(Random.GLOBAL_RNG,A.agents, Weights(A.W), upagents, replace=false)
@inbounds for i in upagents
cedg.=0.0
@inbounds for j in inneighbors(A.G, i)
if(rand()<Pᵢ && i!=j)
cedg[j]=exp(-abs(θₜ[A.t][i]-θₜ[A.t][j])/(1.0-A.C[i]))
end
end
cedg./=(sum(cedg)/(1.0-A.C[i]))
cedg[i]=A.C[i]
s=0.0
c=0.0
@inbounds for k in 1:N
if(cedg[k]!=0)
s+=cedg[k]*sin(θₜ[A.t][k])
c+=cedg[k]*cos(θₜ[A.t][k])
end
end
A.θ[i]=atan(s,c)
end
end
#return A.t,A.θ[1],sum(A.C.*θₜ[1])/sum(A.C),mean(A.C)
return θₜ
end
Cmin=0.5;Cmax=0.9;N=1000;n=5000;outdeg=250;Pᵣ=0.0;Pᵢ=0.5;P=500;μ=pi/2;σ=pi/18;
@btime main(Cmin,Cmax,N,n,outdeg,Pᵣ,Pᵢ,P,μ,σ)
589.232 ms (14481 allocations: 6.67 MiB)
To be able to simulate for network sizes larger than 10,000, I am planning to tweak this code (the inner for loops) to be able to run on GPU. Is that the right approach? Using the above code, I get the following performance:
Cmin=0.5;Cmax=0.9;N=10000;n=5000;outdeg=2500;Pᵣ=0.0;Pᵢ=0.5;P=5000;μ=pi/2;σ=pi/18;
@btime main(Cmin,Cmax,N,n,outdeg,Pᵣ,Pᵢ,P,μ,σ)
68.707 s (220977 allocations: 663.53 MiB)