I am new to learning GPU, I do n’t know how to put it in GPU for the 4 loops here, I hope I can get help

'''

using CuArrays,CUDAnative,CUDAdrv,BenchmarkTools

N = 3
M = 100

k = ones(N,N)*5
the =range(0,pi/2,length=M)
ph = range(0,2*pi,length=M)
theta = CuArrays.repeat(the,1,M)'
phi = CuArrays.repeat(ph,1,M)

aa = range(-5,5,length=N)
A = repeat(-aa',N,1)
B = repeat(aa,1,N)
C = ones(N,N)*-10

f = zeros(N,N)
Ave_Energy = ones(N,N)*pi/N^2
Rho = zeros(M,M)
rho = zeros(N,N)
K = zeros(N,N)
goal_E = ones(N,N)/N^2
E = zeros(N,N)
number_E = zeros(N,N)

function fun(A,B,C,Rho)
for iii = 1:M
    for jjj =1:M
        for ii =1:N
            for jj =1:N
                f[ii,jj] = (A[ii,jj] .^2 + B[ii,jj] .^2 .+ C[ii,jj] .^2) .^ 0.5
                K[ii,jj] =2 .* k[ii,jj] .+ f[ii,jj]
                rho[ii,jj] = (K[ii,jj] .^2 .- f[ii,jj] .^ 2) ./ (2 .* (K[ii,jj] .- A[ii,jj] .* sin(theta[iii,jjj]) .* cos(phi[iii,jjj]) 
                            .- B[ii,jj] .* sin(theta[iii,jjj]) .* sin(phi[iii,jjj]) .- C[ii,jj] .* cos(theta[iii,jjj])))
            end
        end

        Rho[iii,jjj] = maximum(maximum(rho[:]))
        FF = findall(x->x.== maximum(maximum(rho)),rho)
        number_E[FF] = number_E[FF] .+ 1/length(FF)
    end
end
E = number_E/(M*M)*pi
Var = Ave_Energy - E
return 
end
fun(A,B,C,Rho)


'''