This is my whole code but you cannot run it because it’s a part of a software, but I am providing this so that you can have a better understanding of what I want to do. And in this code, I want to implement multithreading in the function build_temp.
using Distributed
using Distributed
using TensorOperations,SharedArrays
using Base.Threads
using PyCall
using IterTools
using Einsum,LinearAlgebra,LoopVectorization
using DependencyWalker, LibSSH2_jll
using HDF5
using ChunkSplitters
using Atomix: @atomic, @atomicswap, @atomicreplace
println("number of threads ", nthreads())
function int_copy_store(t1::AbstractArray{T,2}, eris::PyObject) where T<: AbstractFloat
    nocc, nvir = size(t1)
    ovvv = Array{Float64,4}(undef,(nocc,nvir,nvir,nvir))
    ovov = similar(eris.ovov, Float64, nocc,nvir,nocc,nvir)
    oovv = similar(eris.oovv, Float64, nocc,nocc,nvir,nvir)
    ovvv = similar(eris.ovvv, Float64, nocc,nvir,nvir,nvir)
    ovoo = similar(eris.ovoo, Float64, nocc,nvir,nocc,nocc)
    oooo = similar(eris.oooo, Float64, nocc,nocc,nocc,nocc)
    ovov .= eris.ovov
    oovv .= eris.oovv
    ovvv .= eris.ovvv
    ovoo .= eris.ovoo
    oooo .= eris.oooo
    return ovov, oovv, ovvv, ovoo, oooo
end
function integral_df(t1::AbstractArray{T,2}, eris::PyObject)
    nocc, nvir = size(t1)
    naux = eris.naux
    Loo_df = similar(eris.Loo)
    Lov_df = similar(eris.Lov)
    Lvv_df = similar(eris.Lvv)
    Loo_df .= eris.Loo
    Lov_df .= eris.Lov
    Lvv_df .= eris.Lvv
    return Loo_df, Lov_df, Lvv_df
end
function fock_slice(nocc::Int64,nvir::Int64, eris::PyObject) #where T<: AbstractFloat
    nbasis = nocc + nvir
    fock = eris.fock[:,:]
    foo = fock[1:nocc, 1:nocc]
    fov = fock[1:nocc, nocc+1:nbasis]
    fvo = fock[nocc+1:nbasis, 1:nocc]
    fvv = fock[nocc+1:nbasis, nocc+1:nbasis]
    return fock, foo, fov, fvo, fvv
end
function e_pairs(nocc::Int64)
    pair_ls = Tuple{Int,Int}[]
    @fastmath @inbounds @simd for i in 1:nocc
        @fastmath @inbounds @simd for j in 1:i
            @fastmath @inbounds push!(pair_ls, (i, j))
        end
    end
    @fastmath @inbounds return pair_ls
end
function mul_e_pairs(nocc::Int64)
    pair_ls1 = Tuple{Int,Int}[]
    pair_ls2 = Tuple{Int,Int}[]
    for i in range(1, nocc)
        for j in range(1, i)
            if i == j
                push!(pair_ls1, (i, j))
            else
                push!(pair_ls2, (i, j))
            end
        end
    end
    return pair_ls1, pair_ls2
end
#=
function build_temp(t1,nocc,nvir,Lov_df,Lvv_df,tau)
    tempf = zeros(nvir,nvir)
    Lov_T120 = permutedims(Lov_df,(2,3,1))
    temps = [zeros(nvir,nvir) for i = 1:Threads.nthreads()]
    BLAS_THREADS = BLAS.get_num_threads()
    BLAS.set_num_threads(1)
     Threads.@threads for i in 1:nocc
        Threads.@spawn begin
        @inbounds begin
            id = Threads.threadid()
            temp = temps[id]
            for j in 1:i
                @views tau_ij_ = tau[i,j,:,:]
                @tensor begin
                    temp[a,b] += (-t1[k,b]*Lov_T120[k,d,t]*tau_ij_[c,d]*Lvv_df[t,a,c])
                    temp[a,b] -= t1[k,a]*Lov_T120[k,c,t]*tau_ij_[c,d]*Lvv_df[t,b,d]
                    temp[a,b] += Lvv_df[t,a,c]*tau_ij_[c,d]*Lvv_df[t,b,d]
                end
            end
            #println("Thread ", id, ": temps[", id, "] = ", temp)
        end
     println("Thread ", id, ": temps[", id, "] = ", temp)
    end #sync
    end #spawn
    temp_t = reduce(+, temps)
    BLAS.set_num_threads(BLAS_THREADS)
    display(temp_t)
    return
end
=#
function build_temp(t1,nocc,nvir,Lov_df,Lvv_df,tau)
    Lov_T120 = permutedims(Lov_df,(2,3,1))
    nchunks = Threads.nthreads()
    temps = [zeros(nvir,nvir ) for i = 1:nchunks]
    Threads.@threads for (i_range, ichunk) in chunks(1:nocc, nchunks)
        @inbounds begin
            temp = temps[ichunk]
            for i in i_range
                for j in 1:i
                @views tau_ij_ = tau[i,j,:,:]
                @tensor begin
                    temp[a,b] += (-t1[k,b]*Lov_T120[k,d,t]*tau_ij_[c,d]*Lvv_df[t,a,c])
                    temp[a,b] -= t1[k,a]*Lov_T120[k,c,t]*tau_ij_[c,d]*Lvv_df[t,b,d]
                    temp[a,b] += Lvv_df[t,a,c]*tau_ij_[c,d]*Lvv_df[t,b,d]
                end
                end
            end
        end
    end
    final_result = zeros(nvir, nvir)
    temp_list = [(i, temps[i]) for i in 1:Threads.nthreads()]
    temp_list = sort(temp_list, by=x->x[1])
    final_result = reduce(+, [x[2] for x in temp_list])
    display(final_result)
end
function pair_check(nocc::Int64, Lov_df::Array{Float64,3}, Lvv_df::Array{Float64,3})#,t1::Array{Float64,2},eris::PyObject)
    for i in 1:nocc
        for j in 1:i
            @fastmath @inbounds Lov_T120 = permutedims(Lov_df,(2,3,1))
            @fastmath tau_ij_ = @views tau[i,j,:,:]
            @tensor ttmp1[k,t,c] = Lov_T120[k,d,t]*tau_ij_[c,d]
            @tensor ttmp2[k,a] = ttmp1[k,t,c]*Lvv_df[t,a,c]
            @tensor temp[a,b] = (-t1[k,b]*ttmp2[k,a])
            @tensor ttmp3[k,t,d] = Lov_T120[k,c,t]*tau_ij_[c,d]
            @tensor ttmp4[k,b] = ttmp3[k,t,d]*Lvv_df[t,b,d]
            @tensor temp[a,b] -= t1[k,a]*ttmp4[k,b]
            @tensor ttmp5[t,a,d] = Lvv_df[t,a,c]*tau_ij_[c,d]
            @tensor temp[a,b] += ttmp5[t,a,d]*Lvv_df[t,b,d]
        end
    end
    return temp
end
function cc_energy(t1,t2,ovov,fov)
    nocc, nvir = size(t1)
    nbasis = nocc+nvir
    e = 0.0
    tau = zeros(Float64,nocc,nocc,nvir,nvir)
    @tensor tau[i,j,a,b] = (t1[i,a]*t1[j,b])  #qudratic term
    tau += t2
    @tensor e = 2*(fov[i,a]*t1[i,a])
    @tensor e += 2*(tau[i,j,a,b]*ovov[i,a,j,b])
    @tensor e -= tau[i,j,a,b]*ovov[i,b,j,a]
    return real(e)
end
function cc_iter(t1,t2,eris, cc_input)
    nocc, nvir = size(t1)
    ECCSD = 0.0
    ovov, oovv,ovvv,ovoo, oooo= int_copy_store(t1,eris)
    Loo_df, Lov_df, Lvv_df = integral_df(t1,eris)
    fock,foo,fov,fvo,fvv = fock_slice(nocc,nvir, eris)
    @inbounds for j in 1:66
        println("Iteration ", j)
        OLDCC = ECCSD
        t1, t2 = update_cc_amps(t1,t2,eris,ovov,oovv,ovvv,ovoo,oooo,fock,foo, fov, fvo, fvv,Lov_df,Lvv_df,Loo_df)
        ECCSD = cc_energy(t1,t2,ovov,fov)
        DECC = abs(ECCSD - OLDCC)
        println(" DECC  = ", DECC)
        println(" ECCSD = ", ECCSD)
        println("   ")
        println("   ")
        convergence = 1.0e-12
        if DECC < convergence
            println("TOTAL ITERATIONS: ", j)
            break
        end
    end
    ECCSD = cc_energy(t1,t2,ovov,fov)
    println("Final CCSD correlation energy  ", ECCSD)
    return t1, t2
end
function update_cc_amps(t1::AbstractArray{T,2},t2::AbstractArray{T,4},eris,ovov::AbstractArray{T,4},
    oovv::AbstractArray{T,4},ovvv::AbstractArray{T,4},ovoo::AbstractArray{T,4},
    oooo::AbstractArray{T,4},fock::AbstractArray{T,2},foo::AbstractArray{T,2},
    fov::AbstractArray{T,2}, fvo::AbstractArray{T,2},fvv::AbstractArray{T,2},
    Lov_df::AbstractArray{T,3},Lvv_df::AbstractArray{T,3},Loo_df::AbstractArray{T,3}) where T<:AbstractFloat
    level_shift = 0.00
    @fastmath @inbounds nocc, nvir = size(t1)
    @fastmath @inbounds nbasis = nocc + nvir
    @fastmath @inbounds naux = eris.naux
    Loo_df, Lov_df, Lvv_df = integral_df(t1,eris)
    @fastmath @inbounds mo_e_o = eris.mo_energy[1:nocc]
    @fastmath @inbounds mo_e_v = eris.mo_energy[nocc+1:nbasis] .+ level_shift
    Foo = cc_Foo(t1, t2,ovov,foo)
    Fvv = cc_Fvv(t1,t2,ovov,fvv)
    Fov = cc_Fov(t1,t2,ovov,fov)
    @fastmath @inbounds Foo[diagind(Foo)] -= mo_e_o
    @fastmath @inbounds Fvv[diagind(Fvv)] -= mo_e_v
    #------------- T1 equation -----------------#
    ksht = Array{Float64,2}(undef, nvir, nvir)
    t1new = Array{Float64,2}(undef, nocc, nvir)
    ttnew = Array{Float64,3}(undef, naux, nocc, nvir)
    tautemp = Array{Float64,4}(undef, nocc, nocc, nvir, nvir)
    fov_T = permutedims(fov)
    println(" Time for 1st block of T1 and T2 using @time macro ")
    @time begin
        @tensor begin
            ksht[c,a] = fov_T[c,k]*t1[k,a]
            t1new[i,a] = -2*(t1[i,c]*ksht[c,a])
            t1new[i,a] += Fvv[a,c]*t1[i,c]
            t1new[i,a] -= Foo[k,i]*t1[k,a]
        end
        t1new += conj(fov)
        @tensoropt (i=>x, j=>x, k=>x, l=>x, a=>100x, b=>100x, c=>100x, d=>100x) begin
            t1new[i,a] += 2*(Fov[k,c]*t2[k,i,c,a])
            t1new[i,a] -= Fov[k,c]*t2[i,k,c,a]
            t1new[i,a] += 2*(ovov[k,c,i,a]*t1[k,c])  #keep it on .............
            t1new[i,a] -= oovv[k,i,a,c]*t1[k,c]
            t1new[i,a] += Fov[k,c]*t1[i,c]*t1[k,a]    #qudratic term
        end
        if eris.incore < 4 && eris.df
            @tensoropt (i=>x, k=>x, c=>100x, d=>100x) begin
                tautemp[i,k,c,d]=t1[k,d]*t1[i,c]
            end
            tautemp += t2
            @tensoropt (i=>x, k=>x, m=>100x, a=>100x, c=>100x, d=>100x) begin
                ttnew[m,i,c] = Lov_df[m,k,d]*tautemp[i,k,c,d]
                t1new[i,a] += ttnew[m,i,c]*Lvv_df[m,a,c]
            end
            delete!(ttnew)
            @tensoropt (i=>x, k=>x, m=>100x, a=>100x, c=>100x, d=>100x) begin
                ttnew[m,i,d] = Lov_df[m,k,c]*tautemp[i,k,c,d]
                t1new[i,a] -= ttnew[m,i,d]*Lvv_df[m,a,d]
            end
            delete!(ttnew)
            delete!(tautemp)
        else
            @tensoropt (i=>x, j=>x, k=>x, l=>x, a=>100x, b=>100x, c=>100x, d=>100x) begin
                t1new[i,a] += 2*(ovvv[k,d,a,c]*t2[i,k,c,d])
                t1new[i,a] -= ovvv[k,c,a,d]*t2[i,k,c,d]
                t1new[i,a] += 2*(ovvv[k,d,a,c]*t1[k,d]*t1[i,c])  #qudratic term
                t1new[i,a] -= ovvv[k,c,a,d]*t1[k,d]*t1[i,c]      #qudratic term
            end
        end
        @tensoropt (i=>x, j=>x, k=>x, l=>x, a=>100x, b=>100x, c=>100x, d=>100x) begin
            t1new[i,a] -= 2*(ovoo[l,c,k,i]*t2[k,l,a,c])
            t1new[i,a] += ovoo[k,c,l,i]*t2[k,l,a,c]
        end
        kstz = zeros(Float64,nocc,nocc)
        @tensoropt (i=>x, j=>x, k=>x, l=>x, a=>10x, b=>10x, c=>10x, d=>10x) begin
            kstz[i,k] = ovoo[l,c,k,i]*t1[l,c]
            t1new[i,a] -= 2*(kstz[i,k]*t1[k,a])
            kstz[i,k] = ovoo[k,c,l,i]*t1[l,c]
            t1new[i,a] += kstz[i,k]*t1[k,a]
        end
        # ----------T2 Equation ------------ #
        tmp2_prime = Array{Float64,3}(undef, naux, nocc, nvir)
        tmp2 = zeros(Float64,nvir,nvir,nocc,nvir)
        tmp = zeros(Float64,nocc,nocc,nvir,nvir)
        ovvv_T = zeros(Float64,nvir,nvir,nocc,nvir)
        t1_minus = -t1
        if eris.incore < 4 && eris.df
            tmp4[k,i,j,b] = oovv[k,i,b,c]*(-t1[j,c])
            tmp[i,j,a,b]= tmp4[k,i,j,b]*t1[k,a]
            delete!(tmp4)
            tmp2_prime[m,j,b] = Lvv_df[m,c,b]*t1[m,j,b]
            tmp += transpose(Lov_df)[a,i,m]*tmp2_prime[m,j,b]
            delete!(tmp2_prime)
        else
            @tensor tmp2[a,b,i,c] = oovv[k,i,b,c]*(-t1[k,a])
            tmp2 += permutedims(ovvv,(2,4,1,3))
            @tensor tmp[i,j,a,b] = tmp2[a,b,i,c]*t1[j,c]
        end
        t2new = tmp + permutedims(tmp,(2,1,4,3))
        tmp2 = zeros(Float64,nvir,nocc,nocc,nocc)
        @tensor tmp2[a,k,i,j] = ovov[k,c,i,a]*t1[j,c]
        tmp2 += permutedims(ovoo,(2,4,1,3))
        tmp = zeros(nocc,nocc,nvir,nvir)
        @tensor tmp[i,j,a,b] = tmp2[a,k,i,j]*t1[k,b]
        t2new -= tmp + permutedims(tmp,(2,1,4,3))
        t2new += permutedims(ovov,(1,3,2,4))
    end
    Loo = Loioi(t1,t2,ovoo,ovov,fov,foo)
    Lvv = Lvirvir(t1,t2,ovvv,ovov,fvv,fov)
    Loo[diagind(Foo)] -= mo_e_o
    Lvv[diagind(Fvv)] -= mo_e_v
    Woooo = cc_Woooo(t1,t2,ovoo,ovov,oooo)
    Wvoov = cc_Wvoov(t1,t2,ovvv,ovov,ovoo)
    Wvovo = cc_Wvovo(t1,t2,ovvv,ovoo,oovv,ovov)
    tau = zeros(Float64,nocc,nocc,nvir,nvir)
    tmp = zeros(Float64,nocc,nocc,nvir,nvir)
    @tensor tau[i,j,a,b] = t1[i,a]*t1[j,b]
    tau += t2
    @tensor t2new[i,j,a,b] += Woooo[k,l,i,j]*tau[k,l,a,b]
    if eris.incore < 5 && eris.df
        temp_final = build_temp(t1,nocc,nvir,Lov_df,Lvv_df,tau)
        exit()
    else
        println(" time taken for cc_Wvvvv function @time macro")
        @time Wvvvv = cc_Wvvvv(t1,t2,ovvv,vvvv)
        @tensor t2new[i,j,a,b] += Wvvvv[a,b,c,d]*tau[i,j,c,d]
    end
    @tensor tmp[i,j,a,b] = Lvv[a,c]*t2[i,j,c,b]
    t2new += (tmp + permutedims(tmp,(2,1,4,3)))
    tmp = nothing
    tmp = zeros(Float64,nocc,nocc,nvir,nvir)
    @tensor tmp[i,j,a,b] = Loo[k,i]*t2[k,j,a,b]
    t2new -= (tmp +permutedims(tmp,(2,1,4,3)))
    tmp = nothing
    tmp = zeros(Float64,nocc,nocc,nvir,nvir)
    @tensor tmp[i,j,a,b] = 2*(Wvoov[a,k,i,c]*t2[k,j,c,b])
    @tensor tmp[i,j,a,b] -= Wvovo[a,k,c,i]*t2[k,j,c,b]
    t2new += (tmp + permutedims(tmp,(2,1,4,3)))
    tmp = nothing
    tmp = zeros(Float64,nocc,nocc,nvir,nvir)
    @tensor tmp[i,j,a,b] = (Wvoov[a,k,i,c]*t2[k,j,b,c])
    t2new -= (tmp + permutedims(tmp,(2,1,4,3)))
    tmp = zeros(Float64,nocc,nocc,nvir,nvir)
    @tensor tmp[i,j,a,b] = Wvovo[b,k,c,i]*t2[k,j,a,c]
    t2new -= (tmp + permutedims(tmp,(2,1,4,3)))
    tmp = nothing
    py"""
    import numpy as np
    def denomi(a,b,t1new,t2new):
        eia = a[:, None] - b
        eijab = eia[:, None, :, None] + eia[None, :, None, :]
        t1new1 = t1new/eia
        t2new1 = t2new/eijab
        return t1new1, t2new1
    """
    t1new1,t2new1 = py"denomi"(mo_e_o,mo_e_v,t1new,t2new)
    return t1new1, t2new1
end
# ----- rintermediates ------------------- #
function cc_Foo(t1, t2,ovov,foo)
    nocc, nvir = size(t1)
    Fki = zeros(Float64,nocc,nocc)
    tautemp = zeros(Float64,nocc,nocc,nvir,nvir)
    @tensor tautemp[i,l,c,d] = t1[i,c]*t1[l,d]    #qudratic term
    tautemp += t2
    @tensor Fki[k,i] = 2*(ovov[k,c,l,d]*tautemp[i,l,c,d])
    @tensor Fki[k,i] -= ovov[k,d,l,c]*tautemp[i,l,c,d]
    Fki += foo
    return Fki
end
function cc_Fvv(t1,t2,ovov,fvv)
    nocc, nvir = size(t1)
    nbasis = nocc + nvir
    tautemp = zeros(Float64,nocc,nocc,nvir,nvir)
    Fac = zeros(Float64,nvir,nvir)
    @tensor tautemp[k,l,a,d] = t1[k,a]*t1[l,d]    #qudratic term
    tautemp += t2
    @tensor Fac[a,c] = (ovov[k,d,l,c]*tautemp[k,l,a,d])
    @tensor Fac[a,c] -= 2*(ovov[k,c,l,d]*tautemp[k,l,a,d])
    Fac += copy(fvv)
    return Fac
end
function cc_Fov(t1,t2,ovov,fov)
    nocc, nvir = size(t1)
    nbasis = nocc + nvir
    Fkc = zeros(Float64,nocc,nvir)
    @tensor Fkc[k,c] = 2*(ovov[k,c,l,d]*t1[l,d])
    @tensor Fkc[k,c] -= ovov[k,d,l,c]*t1[l,d]
    Fkc += fov
    return Fkc
end
function Loioi(t1,t2,ovoo,ovov,fov,foo)
    nocc, nvir = size(t1)
    nbasis = nocc + nvir
    Lki = cc_Foo(t1, t2,ovov,foo)
    @tensor Lki[k,i] += fov[k,c]*t1[i,c]
    @tensor Lki[k,i] += 2*(ovoo[l,c,k,i]*t1[l,c])
    @tensor Lki[k,i] -= ovoo[k,c,l,i]*t1[l,c]
    return Lki
end
function Lvirvir(t1,t2,ovvv,ovov,fvv,fov)
    nocc, nvir = size(t1)
    nbasis = nocc + nvir
    Lac = cc_Fvv(t1,t2,ovov,fvv)
    @tensor Lac[a,c] -= fov[k,c]*t1[k,a]
    @tensor Lac[a,c] += 2*(ovvv[k,d,a,c]*t1[k,d])
    @tensor Lac[a,c] -= ovvv[k,c,a,d]*t1[k,d]
    return Lac
end
function cc_Woooo(t1,t2,ovoo,ovov,oooo)
    nocc,nvir = size(t1)
    Wklij = zeros(Float64,nocc,nocc,nocc,nocc)
    oooo_T = zeros(Float64,nocc,nocc,nocc,nocc)
    @tensor Wklij[k,l,i,j] = ovoo[l,c,k,i]*t1[j,c]
    @tensor Wklij[k,l,i,j] += ovoo[k,c,l,j]*t1[i,c]
    @tensor Wklij[k,l,i,j] += ovov[k,c,l,d]*t2[i,j,c,d]
    @tensor Wklij[k,l,i,j] += ovov[k,c,l,d]*t1[i,c]*t1[j,d]  #quadratic
    Wklij += permutedims(oooo,(1,3,2,4))
    return Wklij
end
function cc_Wvvvv(t1::AbstractArray{T,2}, t2::AbstractArray{T,4}, ovvv::AbstractArray{T,4}, vvvv::AbstractArray{T,4}) where T<:AbstractFloat
    nocc, nvir = size(t1)
    Wabcd = zeros(T, nvir, nvir, nvir, nvir)
    @tensoropt (a=>x, b=>y, c=>z, d=>w, k=>u) begin
        Wabcd[a,b,c,d] = ovvv[k,d,a,c]*(-t1[k,b])
        Wabcd[a,b,c,d] -= ovvv[k,c,b,d]*t1[k,a]
    end
    Wabcd += permutedims(vvvv,(1,3,2,4))
    return Wabcd
end
function cc_Wvoov(t1,t2,ovvv,ovov,ovoo)
    nocc, nvir = size(t1)
    Wakic = zeros(Float64,nvir,nocc,nocc,nvir)
    @tensor Wakic[a,k,i,c]= ovvv[k,c,a,d]*t1[i,d]
    ovov_T = zeros(Float64,nvir,nocc,nocc,nvir)
    Wakic += permutedims(ovov,(4,1,3,2))
    @tensor Wakic[a,k,i,c] -= ovoo[k,c,l,i]*t1[l,a]
    @tensor Wakic[a,k,i,c] -= 0.5*(ovov[l,d,k,c]*t2[i,l,d,a])
    @tensor Wakic[a,k,i,c] -= 0.5*(ovov[l,c,k,d]*t2[i,l,a,d])
    @tensor Wakic[a,k,i,c] -= ovov[l,d,k,c]*t1[i,d]*t1[l,a] #quadratic
    @tensor Wakic[a,k,i,c] += ovov[l,d,k,c]*t2[i,l,a,d]
    return Wakic
end
function cc_Wvovo(t1,t2,ovvv,ovoo,oovv,ovov)
    nocc, nvir = size(t1)
    Wakci = zeros(Float64,nvir,nocc,nvir,nocc)
    oovv_T = zeros(Float64,nvir,nocc,nvir,nocc)
    @tensor Wakci[a,k,c,i] = ovvv[k,d,a,c]*t1[i,d]
    @tensor Wakci[a,k,c,i]  -= ovoo[l,c,k,i]*t1[l,a]
    Wakci += permutedims(oovv,(3,1,4,2))
    @tensor Wakci[a,k,c,i] -= 0.5*(ovov[l,c,k,d]*t2[i,l,d,a])
    @tensor Wakci[a,k,c,i] -= ovov[l,c,k,d]*t1[i,d]*t1[l,a] #qudratic term
    return Wakci
end