Initial timing:
julia> include("trnbias-orig.jl"); @time optimize(0, 1000, 0.01, 200); @time optimize(0, 1000, 0.01, 200);
Usage: julia script.jl <which> <ncases> <trend> <nreps>
typeof(rng) = MarsagliaRng
Mean IS=0.04462564329047784  OOS=-0.0031496723033457423  Bias=0.047775315593823586
  6.505962 seconds (40 allocations: 11.656 KiB)
typeof(rng) = MarsagliaRng
Mean IS=0.04462564329047784  OOS=-0.0031496723033457423  Bias=0.047775315593823586
  6.507056 seconds (40 allocations: 11.656 KiB)
Make which a Val:
julia> include("trnbias.jl"); @time optimize(0, 1000, 0.01, 200); @time optimize(0, 1000, 0.01, 200);
Usage: julia script.jl <which> <ncases> <trend> <nreps>
typeof(rng) = MarsagliaRng
Mean IS=0.04462564329047784  OOS=-0.0031496723033457423  Bias=0.047775315593823586
  5.198919 seconds (95.50 k allocations: 6.434 MiB, 0.46% compilation time)
typeof(rng) = MarsagliaRng
Mean IS=0.04462564329047784  OOS=-0.0031496723033457423  Bias=0.047775315593823586
  5.182652 seconds (441 allocations: 21.328 KiB)
Precompute cumsum, sprinkle @simd @fastmath, make it multithreaded (but run only on a single thread):
julia> include("trnbias.jl"); @time optimize(0, 1000, 0.01, 200); @time optimize(0, 1000, 0.01, 200);
Usage: julia script.jl <which> <ncases> <trend> <nreps>
typeof(rng) = MarsagliaRng
Mean IS=0.04462564329047785  OOS=-0.0031496723033457423  Bias=0.04777531559382359
  2.953527 seconds (125.84 k allocations: 12.977 MiB, 1.91% compilation time)
typeof(rng) = MarsagliaRng
Mean IS=0.04462564329047785  OOS=-0.0031496723033457423  Bias=0.04777531559382359
  2.896009 seconds (3.84 k allocations: 4.840 MiB)
Run that code with julia -t auto:
julia> include("trnbias.jl"); @time optimize(0, 1000, 0.01, 200); @time optimize(0, 1000, 0.01, 200);
Usage: julia script.jl <which> <ncases> <trend> <nreps>
typeof(rng) = MarsagliaRng
Mean IS=0.04462564329047785  OOS=-0.0031496723033457423  Bias=0.04777531559382359
  0.452778 seconds (247.05 k allocations: 20.896 MiB, 257.25% compilation time)
typeof(rng) = MarsagliaRng
Mean IS=0.04462564329047785  OOS=-0.0031496723033457423  Bias=0.04777531559382359
  0.369494 seconds (3.86 k allocations: 4.841 MiB)
I started from https://github.com/jlapeyre/DiscourseQuestion24Nov2023/blob/main/julia_trnbias/trnbias.jl and tried to make only minimal changes.
Note that I still use your weird RNG choice for reproducibility / same seeds. The tiny changes are mostly because floating point is not associative and @fastmath gives the compiler latitude to play with that.
  Code below the fold.
using Random
using Printf
struct ParamsResult
    short_term::Int
    long_term::Int
    performance::Float64
end
mutable struct MarsagliaRng
    q::Vector{UInt32}
    carry::UInt32
    mwc256_initialized::Bool
    mwc256_seed::Int32
    i::UInt8
    function MarsagliaRng(seed::Vector{UInt8})
        q = zeros(UInt32, 256)
        carry = 362436
        mwc256_initialized = false
        mwc256_seed = reinterpret(Int32, seed)[1]
        i = 255
        new(q, carry, mwc256_initialized, mwc256_seed, i)
    end
end
# Default constructor using system time for seeding
function MarsagliaRng()
    ts_nano = UInt8(nanosecond(now()))
    seed = [ts_nano, ts_nano >>> 8, ts_nano >>> 16, ts_nano >>> 24]
    MarsagliaRng(seed)
end
# Random number generator functions
function next_u32(rng::MarsagliaRng)::UInt32
    a = UInt64(809430660)
    if !rng.mwc256_initialized
        j = UInt32(rng.mwc256_seed)
        c1 = UInt32(69069)
        c2 = UInt32(12345)
        rng.mwc256_initialized = true
        for k in 1:256
            j = (c1 * j + c2) % UInt32
            rng.q[k] = j
        end
    end
    rng.i = (rng.i + 1) % 256
    t = a * UInt64(rng.q[rng.i+1]) + UInt64(rng.carry)
    rng.carry = (t >> 32)
    rng.q[rng.i+1] = t % UInt32
    return rng.q[rng.i+1]
end
function next_u64(rng::MarsagliaRng)::UInt64
    UInt64(next_u32(rng))
end
# EDIT: extend Base.rand rather than shadowing it.
# Sample function for generating a Float64
function Base.rand(rng::MarsagliaRng)::Float64
    mult = 1.0 / typemax(UInt32)
    mult * next_u32(rng)
end
function opt_params(w::Val{which}, ncases::Integer, x::Vector{Float64}) where which
    FloatT = eltype(x)
    xs = cumsum(x)
    best_perf = typemin(FloatT)
    ibestshort = 0
    ibestlong = 0
    small_float = 1e-60
@inbounds for ilong in 2:199
        for ishort in 1:(ilong-1)
            total_return = zero(FloatT)
            win_sum = small_float
            lose_sum = small_float
            sum_squares = small_float
            short_sum = zero(FloatT)
            long_sum = zero(FloatT)
            let i = ilong - 1
                short_sum = xs[i+1] - xs[i-ishort + 2 - 1] 
                long_sum = xs[i+1]
                short_mean = short_sum / ishort
                long_mean = long_sum / ilong
                ret = (short_mean > long_mean) ? x[i+2] - x[i+1] :
                      (short_mean < long_mean) ? x[i+1] - x[i+2] : zero(FloatT)
                total_return += ret
                sum_squares += ret^2
                if ret > zero(FloatT)
                    win_sum += ret
                else
                    lose_sum -= ret
                end
            end
            # General case; i != ilong - 1
             @fastmath @simd for i in (ilong:ncases-2)
                x1 = x[i+1]
		#short_sum += x1 - x[i-ishort+1]
		#long_sum += x1 - x[i-ilong+1]
                short_sum = xs[i+1] - xs[i-ishort + 1] 
                long_sum = xs[i+1] - xs[i-ilong + 1]
                short_mean = short_sum / ishort
                long_mean = long_sum / ilong
                ret = (short_mean > long_mean) ? x[i+2] - x1 :
                      (short_mean < long_mean) ? x1 - x[i+2] : zero(FloatT)
                total_return += ret
                sum_squares += ret^2
                if ret > zero(FloatT)
                    win_sum += ret
                else
                    lose_sum -= ret
                end
            end
            if which == 0
                total_return /= (ncases - ilong)
                if total_return > best_perf
                    best_perf = total_return
                    ibestshort = ishort
                    ibestlong = ilong
                end
            elseif which == 1
                pf = win_sum / lose_sum
                if pf > best_perf
                    best_perf = pf
                    ibestshort = ishort
                    ibestlong = ilong
                end
            elseif which == 2
                total_return /= (ncases - ilong)
                sum_squares /= (ncases - ilong)
                sum_squares -= total_return^2
                sr = total_return / (sqrt(sum_squares) + 1e-8)
                if sr > best_perf
                    best_perf = sr
                    ibestshort = ishort
                    ibestlong = ilong
                end
            end
        end
    end 
    ParamsResult(ibestshort, ibestlong, best_perf)
end
function test_system(ncases::Integer, x::Vector{Float64}, short_term::Integer, long_term::Integer)
    FloatT = eltype(x)
    sum1 = zero(FloatT)
    for i in (long_term-1:ncases-2)
        short_mean = sum(@view x[i-short_term+2:i+1]) / short_term
        long_mean = sum(@view x[i-long_term+2:i+1]) / long_term
        if short_mean > long_mean
            sum1 += x[i+2] - x[i+1]
        elseif short_mean < long_mean
            sum1 -= x[i+2] - x[i+1]
        end
    end
    sum1 / (ncases - long_term)
end
function main()
    if length(ARGS) < 4
        println("Usage: julia script.jl <which> <ncases> <trend> <nreps>")
        return
    end
    which = parse(Int, ARGS[1])
    ncases = parse(Int, ARGS[2])
    save_trend = parse(Float64, ARGS[3])
    nreps = parse(Int, ARGS[4])
    optimize(which, ncases, save_trend, nreps)
end
function optimize(which::Integer, ncases::Integer, save_trend::Float64, nreps::Integer, rng=MarsagliaRng(UInt8.([33, 0, 0, 0])))
    @show typeof(rng)
    _optimize(rng, which, ncases, save_trend, nreps)
end
function _optimize(rng, which::Integer, ncases::Integer, save_trend::Float64, nreps::Integer)
    print_results = false
    FloatT = typeof(save_trend)
    x = zeros(FloatT, ncases)
    is_mean = zero(FloatT)
    oos_mean = zero(FloatT)
    futures = Task[]
    @inbounds for irep in 1:nreps
        # Generate in-sample
        trend = save_trend
        x[1] = zero(FloatT)
        for i in 2:ncases
            if (i - 1) % 50 == 0
                trend = -trend
            end
            x[i] = x[i-1] + trend + rand(rng) + rand(rng) - rand(rng) - rand(rng)
        end
        x_optim = copy(x)
        # Generate out-of-sample
        trend = save_trend
        x[1] = zero(FloatT)
        for i in 2:ncases
            if (i - 1) % 50 == 0
                trend = -trend
            end
            x[i] = x[i-1] + trend + rand(rng) + rand(rng) - rand(rng) - rand(rng)
        end
        x_oos = copy(x)
        future = Threads.@spawn begin
        params_result = opt_params(Val(which), ncases, x_optim)
        oos_perf = test_system(ncases, x_oos, params_result.short_term, params_result.long_term)
        (params_result, oos_perf)
        end
        push!(futures, future)
    end
    for (irep, future) in enumerate(futures)
        params_result::ParamsResult, oos_perf::Float64 = fetch(future)
        is_mean += params_result.performance
        oos_mean += oos_perf
        if print_results
            @printf("%3d: %3d %3d  %8.4f %8.4f (%8.4f)\n",
                    irep, params_result.short_term, params_result.long_term, params_result.performance, oos_perf, params_result.performance - oos_perf)
        end
    end
    is_mean /= nreps
    oos_mean /= nreps
    println("Mean IS=$is_mean  OOS=$oos_mean  Bias=$(is_mean - oos_mean)")
end
main()