Hey everyone, I am new to Julia and I wanted to solve a large ODE as fast as possible. I’ve tried multithreading using `Threads.@threads`

and `EnsembleThreads()`

approaches, but they both reduce performance. I’ve also tried multiprocessing and GPU implementation, but it didn’t help. It is possible that I am not able to properly make use of these tools, since I am new to Julia. Is it possible to use parallel methods where allocations are large?

Are there any other ways to run this code faster? Any other tips that could improve the performance of this code would be an invaluable help.

Thanks!

Here is the code:

```
using OrdinaryDiffEq, SparseArrays, Random, LinearAlgebra
mutable struct Param
α::Float64
γ::Float64
β::Vector{Float64}
λ::Float64
G::SparseMatrixCSC
coup_alloc::Vector{Float64}
end
function mainfun()
N = 4941;
nλ=100;
A = rand(MersenneTwister(0),N,N);
Aij = reshape([A[j] > 0.9993 ? 1.0 : 0.0 for j in 1:N*N],N,N)
degree = sum(Aij,dims=2);
Gmatrix = diagm(vec(degree))-Aij;
sparseL = sparse(Gmatrix);
α = 0.1;
γ = 18;
bf_vec = 0.0995 .+ 0.01*rand(MersenneTwister(0),N);
lambda = range(0,0.35,length = nλ);
u0 = [-2.68 -24.05 0.0] .+ 0.01.*rand(MersenneTwister(0),N,3)
coupling = similar(u0[:,2]);
par = Param(α,γ,bf_vec,0.0,sparseL,coupling);
function rossler!(du, u, p, t)
x = @view u[:,1];
y = @view u[:,2];
z = @view u[:,3];
β = @view p.β[:];
coup = @view p.coup_alloc[:];
mul!(coup,p.G,y);
@. du[:,1] = - y - z;
@. du[:,2] = x + p.α*y - p.λ*coup;
@. du[:,3] = β - p.γ*z + z*x;
end
for j in 1:nλ
par.λ = lambda[j];
println(j, ".\tλ : ", par.λ)
prob = ODEProblem(rossler!, u0, (0, 1500), par);
solve(prob, BS3(), saveat = 1400:0.25:1500);
end
end
Y = @time mainfun();
```