Hello everyone,

I am trying to parallelize this montecarlo function that I have created for measuring an option price using the BlackScholes model and I would like to improve its performance by parallelizing the double nested loop. Looking around I found that Dagger.jl may be a good solution, but when I have used @sync the results were only slightly faster. Am I doing something wrong?

```
using Random: randn
using Dagger
############## Financial Parameters ##############
S = 100.00 # initial stock price
K = 100.00 # strike price
T = 1.0 # time to maturity in years
r = 0.3 # risk free annual percentage
sigma = 0.5 # annual standard deviation percentage
t = 0.0 #time to consider the value
############## MonteCarlo Parameters ##############
steps::UInt = 20 # time steps for SDE
trials::UInt = 1000000 # number of trials
function montecarlo(start_price::F, strike_price::F, time_to_maturity::F,
risk_free::F, standard_deviation::F, time_steps::U, trials::U,
time::F) where {F <: Float64, U <: UInt64}
dt = time_to_maturity / time_steps # differential time steps
V::Float64 = 0
dS::Float64 = 0
for _ in 1:trials
dS = 0.0
for _ in 1:time_steps
dS += ((risk_free - standard_deviation^2 / 2) * dt +
standard_deviation * sqrt(dt) * randn()) # log of differential evolution
end
# apply call option formula
V += max(exp(log(start_price) + dS) - strike_price, 0)
end
# calculate the mean and add time evolution
V = V/trials * exp(-risk_free * (time_to_maturity - time))
return V
end
function parallel(start_price::F, strike_price::F, time_to_maturity::F,
risk_free::F, standard_deviation::F, time_steps::U, trials::U,
time::F) where {F <: Float64, U <: UInt64}
dt = time_to_maturity / time_steps # differential time steps
V::Float64 = 0
dS::Float64 = 0
@sync for _ in 1:trials
dS = 0.0
for _ in 1:time_steps
dS = dS + ((risk_free - standard_deviation^2 / 2) * dt +
standard_deviation * sqrt(dt) * randn()) # log of differential evolution
end
# apply call option formula
V = V + max(exp(log(start_price) + fetch(dS)) - strike_price, 0)
end
# calculate the mean and add time evolution
V = V/trials * exp(-risk_free * (time_to_maturity - time))
return V
end
```

Results:

```
@btime montecarlo(S, K, T, r, sigma, steps, trials, t) #62.635 ms (1 allocation: 16 bytes)
@btime parallel(S, K, T, r, sigma, steps, trials, t)#60.742 ms (9 allocations: 368 bytes)
```

I have obviously settled the number of threads to 8 using a quad-core machine, but I obtain a very small performance improvement.