Simulation of regression coefficients

This is the fastest I can come up with on the CPU (before I go to sleep). You might be able to squeeze more out of it on a GPU.

On my computer, my version is 4x faster than the fastest version in your blog yet. :slight_smile:

But the idea is that you can do multi-threading. And you avoid memory allocating in gen_data by preallocating matrices and just re-using them.

But for multi-threading, you are better off creating multiple copies of the data so your threads can work concurrently and don’t have to share data (leading to false-sharing).

Also, you can save time by not going through with DataFrames and just create the matrix directly.

Also, if you use GPU you might want to turn off multi-threading.

function gen_data2!(X, y, x1_dmean, x2_dmean)
  ## total time periods in the the panel = 500
  tt = 500

  # x1 and x2 covariates
  x1_A = 1 .+ rand(std_normal, tt)
  x1_B = 1/4 .+ rand(std_normal, tt)
  x2_A = 1 .+ x1_A .+ rand(std_normal, tt)
  x2_B = 1 .+ x1_B .+ rand(std_normal, tt)

  # outcomes (notice different slope coefs for x2_A and x2_B)
  y_A = x1_A .+ 1*x2_A + rand(std_normal, tt)
  y_B = x1_B .+ 2*x2_B + rand(std_normal, tt)
    x1 = vcat(x1_A, x1_B)
    x2 = vcat(x2_A, x2_B)

    X[:, 2] .= vcat(fill(0, length(x1_A)), fill(1, length(x1_B)))
    X[:, 3] .= x1
    X[:, 4] .= x2
    X[:, 5] .= (x1 .* x2)

    y .= vcat(y_A, y_B)

    x1_dmean .= vcat(x1_A .- mean(x1_A), x1_B .- mean(x1_B))
    x2_dmean .= vcat(x2_A .- mean(x2_A), x2_B .- mean(x2_B))
end

function coefs_lm_formula4(X, y, x1_dmean, x2_dmean)
  mod_level = fastfit(X, y)
  @inbounds X[:, 5] .= x1_dmean .* x2_dmean
  mod_dmean = fastfit(X, y)
  (mod_level[end], mod_dmean[end])
end

function run_simulations5(nsim)
  sims = zeros(nsim, 2);
  X = [zeros(Float64, 1000, 5) for _ in 1:nthreads()]
  y = [zeros(Float64, 1000) for _ in 1:nthreads()]
  x1_dmean = [zeros(Float64, 1000) for _ in 1:nthreads()]
  x2_dmean = [zeros(Float64, 1000) for _ in 1:nthreads()]

  # set constant
  for i in 1:nthreads()
    X[i][:, 1] .= 1
  end

  @threads for i in 1:nsim
    gen_data2!(X[threadid()], y[threadid()], x1_dmean[threadid()], x2_dmean[threadid()])
    @inbounds sims[i, 1], sims[i, 2] = coefs_lm_formula4(X[threadid()], y[threadid()], x1_dmean[threadid()], x2_dmean[threadid()])
  end
  sims
end
2 Likes