Initial timing:

```
julia> include("trnbias-orig.jl"); @time optimize(0, 1000, 0.01, 200); @time optimize(0, 1000, 0.01, 200);
Usage: julia script.jl <which> <ncases> <trend> <nreps>
typeof(rng) = MarsagliaRng
Mean IS=0.04462564329047784 OOS=-0.0031496723033457423 Bias=0.047775315593823586
6.505962 seconds (40 allocations: 11.656 KiB)
typeof(rng) = MarsagliaRng
Mean IS=0.04462564329047784 OOS=-0.0031496723033457423 Bias=0.047775315593823586
6.507056 seconds (40 allocations: 11.656 KiB)
```

Make which a Val:

```
julia> include("trnbias.jl"); @time optimize(0, 1000, 0.01, 200); @time optimize(0, 1000, 0.01, 200);
Usage: julia script.jl <which> <ncases> <trend> <nreps>
typeof(rng) = MarsagliaRng
Mean IS=0.04462564329047784 OOS=-0.0031496723033457423 Bias=0.047775315593823586
5.198919 seconds (95.50 k allocations: 6.434 MiB, 0.46% compilation time)
typeof(rng) = MarsagliaRng
Mean IS=0.04462564329047784 OOS=-0.0031496723033457423 Bias=0.047775315593823586
5.182652 seconds (441 allocations: 21.328 KiB)
```

Precompute cumsum, sprinkle @simd @fastmath, make it multithreaded (but run only on a single thread):

```
julia> include("trnbias.jl"); @time optimize(0, 1000, 0.01, 200); @time optimize(0, 1000, 0.01, 200);
Usage: julia script.jl <which> <ncases> <trend> <nreps>
typeof(rng) = MarsagliaRng
Mean IS=0.04462564329047785 OOS=-0.0031496723033457423 Bias=0.04777531559382359
2.953527 seconds (125.84 k allocations: 12.977 MiB, 1.91% compilation time)
typeof(rng) = MarsagliaRng
Mean IS=0.04462564329047785 OOS=-0.0031496723033457423 Bias=0.04777531559382359
2.896009 seconds (3.84 k allocations: 4.840 MiB)
```

Run that code with `julia -t auto`

:

```
julia> include("trnbias.jl"); @time optimize(0, 1000, 0.01, 200); @time optimize(0, 1000, 0.01, 200);
Usage: julia script.jl <which> <ncases> <trend> <nreps>
typeof(rng) = MarsagliaRng
Mean IS=0.04462564329047785 OOS=-0.0031496723033457423 Bias=0.04777531559382359
0.452778 seconds (247.05 k allocations: 20.896 MiB, 257.25% compilation time)
typeof(rng) = MarsagliaRng
Mean IS=0.04462564329047785 OOS=-0.0031496723033457423 Bias=0.04777531559382359
0.369494 seconds (3.86 k allocations: 4.841 MiB)
```

I started from https://github.com/jlapeyre/DiscourseQuestion24Nov2023/blob/main/julia_trnbias/trnbias.jl and tried to make only minimal changes.

Note that I still use your weird RNG choice for reproducibility / same seeds. The tiny changes are mostly because floating point is not associative and @fastmath gives the compiler latitude to play with that.

## Code below the fold.

```
using Random
using Printf
struct ParamsResult
short_term::Int
long_term::Int
performance::Float64
end
mutable struct MarsagliaRng
q::Vector{UInt32}
carry::UInt32
mwc256_initialized::Bool
mwc256_seed::Int32
i::UInt8
function MarsagliaRng(seed::Vector{UInt8})
q = zeros(UInt32, 256)
carry = 362436
mwc256_initialized = false
mwc256_seed = reinterpret(Int32, seed)[1]
i = 255
new(q, carry, mwc256_initialized, mwc256_seed, i)
end
end
# Default constructor using system time for seeding
function MarsagliaRng()
ts_nano = UInt8(nanosecond(now()))
seed = [ts_nano, ts_nano >>> 8, ts_nano >>> 16, ts_nano >>> 24]
MarsagliaRng(seed)
end
# Random number generator functions
function next_u32(rng::MarsagliaRng)::UInt32
a = UInt64(809430660)
if !rng.mwc256_initialized
j = UInt32(rng.mwc256_seed)
c1 = UInt32(69069)
c2 = UInt32(12345)
rng.mwc256_initialized = true
for k in 1:256
j = (c1 * j + c2) % UInt32
rng.q[k] = j
end
end
rng.i = (rng.i + 1) % 256
t = a * UInt64(rng.q[rng.i+1]) + UInt64(rng.carry)
rng.carry = (t >> 32)
rng.q[rng.i+1] = t % UInt32
return rng.q[rng.i+1]
end
function next_u64(rng::MarsagliaRng)::UInt64
UInt64(next_u32(rng))
end
# EDIT: extend Base.rand rather than shadowing it.
# Sample function for generating a Float64
function Base.rand(rng::MarsagliaRng)::Float64
mult = 1.0 / typemax(UInt32)
mult * next_u32(rng)
end
function opt_params(w::Val{which}, ncases::Integer, x::Vector{Float64}) where which
FloatT = eltype(x)
xs = cumsum(x)
best_perf = typemin(FloatT)
ibestshort = 0
ibestlong = 0
small_float = 1e-60
@inbounds for ilong in 2:199
for ishort in 1:(ilong-1)
total_return = zero(FloatT)
win_sum = small_float
lose_sum = small_float
sum_squares = small_float
short_sum = zero(FloatT)
long_sum = zero(FloatT)
let i = ilong - 1
short_sum = xs[i+1] - xs[i-ishort + 2 - 1]
long_sum = xs[i+1]
short_mean = short_sum / ishort
long_mean = long_sum / ilong
ret = (short_mean > long_mean) ? x[i+2] - x[i+1] :
(short_mean < long_mean) ? x[i+1] - x[i+2] : zero(FloatT)
total_return += ret
sum_squares += ret^2
if ret > zero(FloatT)
win_sum += ret
else
lose_sum -= ret
end
end
# General case; i != ilong - 1
@fastmath @simd for i in (ilong:ncases-2)
x1 = x[i+1]
#short_sum += x1 - x[i-ishort+1]
#long_sum += x1 - x[i-ilong+1]
short_sum = xs[i+1] - xs[i-ishort + 1]
long_sum = xs[i+1] - xs[i-ilong + 1]
short_mean = short_sum / ishort
long_mean = long_sum / ilong
ret = (short_mean > long_mean) ? x[i+2] - x1 :
(short_mean < long_mean) ? x1 - x[i+2] : zero(FloatT)
total_return += ret
sum_squares += ret^2
if ret > zero(FloatT)
win_sum += ret
else
lose_sum -= ret
end
end
if which == 0
total_return /= (ncases - ilong)
if total_return > best_perf
best_perf = total_return
ibestshort = ishort
ibestlong = ilong
end
elseif which == 1
pf = win_sum / lose_sum
if pf > best_perf
best_perf = pf
ibestshort = ishort
ibestlong = ilong
end
elseif which == 2
total_return /= (ncases - ilong)
sum_squares /= (ncases - ilong)
sum_squares -= total_return^2
sr = total_return / (sqrt(sum_squares) + 1e-8)
if sr > best_perf
best_perf = sr
ibestshort = ishort
ibestlong = ilong
end
end
end
end
ParamsResult(ibestshort, ibestlong, best_perf)
end
function test_system(ncases::Integer, x::Vector{Float64}, short_term::Integer, long_term::Integer)
FloatT = eltype(x)
sum1 = zero(FloatT)
for i in (long_term-1:ncases-2)
short_mean = sum(@view x[i-short_term+2:i+1]) / short_term
long_mean = sum(@view x[i-long_term+2:i+1]) / long_term
if short_mean > long_mean
sum1 += x[i+2] - x[i+1]
elseif short_mean < long_mean
sum1 -= x[i+2] - x[i+1]
end
end
sum1 / (ncases - long_term)
end
function main()
if length(ARGS) < 4
println("Usage: julia script.jl <which> <ncases> <trend> <nreps>")
return
end
which = parse(Int, ARGS[1])
ncases = parse(Int, ARGS[2])
save_trend = parse(Float64, ARGS[3])
nreps = parse(Int, ARGS[4])
optimize(which, ncases, save_trend, nreps)
end
function optimize(which::Integer, ncases::Integer, save_trend::Float64, nreps::Integer, rng=MarsagliaRng(UInt8.([33, 0, 0, 0])))
@show typeof(rng)
_optimize(rng, which, ncases, save_trend, nreps)
end
function _optimize(rng, which::Integer, ncases::Integer, save_trend::Float64, nreps::Integer)
print_results = false
FloatT = typeof(save_trend)
x = zeros(FloatT, ncases)
is_mean = zero(FloatT)
oos_mean = zero(FloatT)
futures = Task[]
@inbounds for irep in 1:nreps
# Generate in-sample
trend = save_trend
x[1] = zero(FloatT)
for i in 2:ncases
if (i - 1) % 50 == 0
trend = -trend
end
x[i] = x[i-1] + trend + rand(rng) + rand(rng) - rand(rng) - rand(rng)
end
x_optim = copy(x)
# Generate out-of-sample
trend = save_trend
x[1] = zero(FloatT)
for i in 2:ncases
if (i - 1) % 50 == 0
trend = -trend
end
x[i] = x[i-1] + trend + rand(rng) + rand(rng) - rand(rng) - rand(rng)
end
x_oos = copy(x)
future = Threads.@spawn begin
params_result = opt_params(Val(which), ncases, x_optim)
oos_perf = test_system(ncases, x_oos, params_result.short_term, params_result.long_term)
(params_result, oos_perf)
end
push!(futures, future)
end
for (irep, future) in enumerate(futures)
params_result::ParamsResult, oos_perf::Float64 = fetch(future)
is_mean += params_result.performance
oos_mean += oos_perf
if print_results
@printf("%3d: %3d %3d %8.4f %8.4f (%8.4f)\n",
irep, params_result.short_term, params_result.long_term, params_result.performance, oos_perf, params_result.performance - oos_perf)
end
end
is_mean /= nreps
oos_mean /= nreps
println("Mean IS=$is_mean OOS=$oos_mean Bias=$(is_mean - oos_mean)")
end
main()
```