GPU Julia vs GPU Matlab

I run roflmaostc code

using CUDA, BenchmarkTools
  function math1!(C, A, B)
     inner(A, B) = A^2 + B^2 + A * B + A / B - A * B - A / B + A * B + A / B - A * B - A / B
     C .= inner.(A, B) 
     return C
 end
  function math2!(D, C)
     inner(C) = C^2 + C^2 + C * C + C / C - C * C - C / C + C * C + C / C - C * C - C / C
     D .= inner.(C) 
     return D
 end
  function math3!(E, D)
     inner(D) = D^2 + D^2 + D * D + D / D - D * D - D / D + D * D + D / D - D * D - D / D
     E .= inner.(D)   
     return E
 end
 
 function f() 
     A = CUDA.rand(151,151,151) .+ 1;
     B = CUDA.rand(151,151,151) .+ 1;
     C = CUDA.zeros(151,151,151) .+ 1;
     D = similar(C);
     E = similar(C);
     F = similar(C);
     @btime CUDA.@sync begin for iter = 1:1000
             math1!($C, $A, $B)
             math2!($D, $C)
             math3!($E, $D)
         end 
         sum($E) 
     end         
 end
  f()