The code, with some comments of my part:

```
using DelimitedFiles, Statistics
# Load grid for log(y) and transition matrix
logy_grid = readdlm("logy_grid.txt")[:]
Py = readdlm("P.txt")
#those 2 variables should be const
function main(nB=351, repeats=500)
β = .953
γ = 2.
r = 0.017
θ = 0.282
ny = size(logy_grid, 1)
Bgrid = LinRange(-.45, .45, nB)
ygrid = exp.(logy_grid)
ymean = mean(ygrid) .+ 0 * ygrid
def_y = min(0.969 * ymean, ygrid)
Vd = zeros(ny, 1)
Vc = zeros(ny, nB)
V = zeros(ny, nB)
Q = ones(ny, nB) * .95
y = reshape(ygrid, (ny, 1, 1))
B = reshape(Bgrid, (1, nB, 1))
Bnext = reshape(Bgrid, (1, 1, nB))
zero_ind = Int(ceil(nB / 2))
function u(c, γ) #add the broadcast in the call?
return c.^(1 - γ) / (1 - γ)
end
t0 = time()
function iterate(V, Vc, Vd, Q)
EV = Py * V #array created, it should be preallocated
EVd = Py * Vd#array created
EVc = Py * Vc#array created
Vd_target = u(def_y, γ) + β * (θ * EVc[:, zero_ind] + (1 - θ) * EVd[:])
#calling a[:] allocates a new array
Vd_target = reshape(Vd_target, (ny, 1))
Qnext = reshape(Q, (ny, 1, nB))
c = @. y .- Qnext .* Bnext .+ B #array created
c[c .<= 0] = 1e-14 .+ 0 * c[c .<= 0]
EV = reshape(EV, (ny, 1, nB))
m = @. u(c, γ) .+ β * EV #array created
Vc_target = reshape(maximum(m, dims=3), (ny, nB))
Vd_compat = Vd * ones(1, nB) #array created
default_states = float(Vd_compat .> Vc)
default_prob = Py * default_states #array created
Q_target = (1. .- default_prob) ./ (1 + r)
V_target = max.(Vc, Vd_compat) #array created
return V_target, Vc_target, Vd_target, Q_target
end
iterate(V, Vc, Vd, Q) # warmup
t0 = time()
for iteration in 1:repeats
V, Vc, Vd, Q = iterate(V, Vc, Vd, Q)
end
t1 = time()
out = (t1 - t0) / repeats
end
print(1000 * main(1351, 10))
```

In short, a great speedup can be achieved with just changing some calls with implace versions and preallocating the arrays before the iterate definition