My julia code is somehow much slower than the matlab code

OK, now you can’t beat the following version in MATLAB. The times compare as follows on my machine:

Julia: 0.719097 seconds (134 allocations: 47.751 MiB)
MATLAB: Elapsed time is 1.264601 seconds.

All you have to do is replace the above while with the one below. I mainly compute every thing in a single triple-loop. Note that Julia is still single-threaded till the moment, whereas MATLAB functions are multi-threaded by default. So, multithreading can further enhance the speed of Julia code.

The whole function now reads:

function main()
    ## (a) ##
    # First, set up the given data#
    α = 0.261
    β = 0.99
    δ = 0.0176
    zGrid = [0.9526, 0.9760, 1, 1.0246, 1.0497]
    PiZ = [0.9149 0.0823 0.0028 0      0
           0.0206 0.9163 0.0618 0.0014 0
           0.0005 0.0412 0.9167 0.0412 0.0005
           0      0.0014 0.0618 0.9163 0.0206
           0      0      0.0028 0.0823 0.9149]
    # Next, get the kStar and kL and kH #
    kStar = (1 / α * (1 / β - 1 + δ))^(1 / (α - 1))
    kL = kStar * 0.85
    kH = 1.15 * kStar
    println(kStar)
    println(kL)
    println(kH)

    # Setting up an 499 points of kgrid #
    kGrid = range(kL, kH, 499)
    println(median(kGrid))

    ## (b) ##
    # First, set up the length variables #
    nk = length(kGrid)
    nz = length(zGrid)

    # Make settings for the iteration #
    # First, make an array for each z_i #
    k′Grid = repeat(kGrid, 1, nz, nk)
    kGridMat = permutedims(k′Grid, (3, 2, 1))
    k′GridMat = k′Grid .* (kGridMat .^ α .* zGrid' .+ kGridMat .* (1 - δ) .- k′Grid .> 0)
    replace!(k′GridMat, 0 => NaN) 
    logC = log.(kGridMat .^ α .* zGrid' .+ kGridMat .* (1 - δ) .- k′GridMat)
    replace!(logC, NaN => -Inf)

    # precision and initial distance #
    precision = 0.01
    distance = 100    

    # start iteration #
    i = 1
    V = zeros(nk, nz)
    VPiZ = similar(V)
    logCC = similar(logC)    
    while distance > precision
        mul!(VPiZ, V, PiZ')
        d = -Inf
        for k = 1:nk, j = 1:nz
            m = -Inf
            for i = 1:nk
                logCC[i,j,k] = logC[i,j,k] + VPiZ[i,j] * β
                m = ifelse(m > logCC[i,j,k], m, logCC[i,j,k])
            end 
            distance = max(d, m-V[k,j])
            d = m - V[k,j]
            V[k,j] = m
        end
        i += 1
    end
    # Add one more iteration to find Tk_index
    _, k_index = findmax(logCC, dims=1)
    mul!(VPiZ, V, PiZ')
    logCC .= logC .+ VPiZ .* β
    _, Tk_index = findmax(logCC, dims=1)

    # Reviving k-rules from the k_index array #
    k′sol = transpose(reshape(k′Grid[Tk_index], 5, 499))
    k′sol_prev = transpose(reshape(k′Grid[k_index], 5, 499)) # matrix of previous capital to compare with solutions #
    dist_k′sol = maximum(abs(i-j) for (i,j) in zip(k′sol,k′sol_prev))

    # answers #
    println(i)
    println(distance)
    println(dist_k′sol)
end