I’m trying to get into GPU programming and I’m trying to write a simple kernel that calculates the correlation function of an image. I’ve run into a problem where I cannot convert the result of the round or floor functions to an Int.
If I try to do the following as a very simple prototype
result = MtlArray(zeros(Int32, 10))
function test(result)
    i = thread_position_in_grid_1d()
    i1 = 1.f0
    i2 = 3.3f0
  
    result[i] = Int32(round(i1+i2))
    return
end
@metal threads = 10 groups = 1 test(result)
And run it, I get the following. Why is this not possible?
ERROR: InvalidIRError: compiling MethodInstance for test(::MtlDeviceVector{Int32, 1}) resulted in invalid LLVM IR
Reason: unsupported call to an unknown function (call to gpu_malloc)
Stacktrace:
 [1] malloc
   @ ~/.julia/packages/GPUCompiler/BxfIW/src/runtime.jl:89
 [2] macro expansion
   @ ~/.julia/packages/GPUCompiler/BxfIW/src/runtime.jl:184
 [3] macro expansion
   @ ./none:0
 [4] box
   @ ./none:0
 [5] box_float32
   @ ~/.julia/packages/GPUCompiler/BxfIW/src/runtime.jl:213
 [6] Int32
   @ ./float.jl:900
 [7] test
   @ ~/Library/Mobile Documents/com~apple~CloudDocs/Documents/PhD/JuliaProjects/InteractiveIsing.jl/Tests/MetalSampling.jl:53
Hint: catch this exception as `err` and call `code_typed(err; interactive = true)` to introspect the erronous code with Cthulhu.jl
Stacktrace:
  [1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.MetalCompilerTarget, Metal.MetalCompilerParams}, args::LLVM.Module)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/BxfIW/src/validation.jl:145
  [2] macro expansion
    @ ~/.julia/packages/GPUCompiler/BxfIW/src/driver.jl:407 [inlined]
  [3] macro expansion
    @ ~/.julia/packages/TimerOutputs/RsWnF/src/TimerOutput.jl:253 [inlined]
  [4] macro expansion
    @ ~/.julia/packages/GPUCompiler/BxfIW/src/driver.jl:406 [inlined]
  [5] emit_llvm(job::GPUCompiler.CompilerJob; libraries::Bool, deferred_codegen::Bool, optimize::Bool, cleanup::Bool, only_entry::Bool, validate::Bool, ctx::LLVM.ThreadSafeContext)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/BxfIW/src/utils.jl:89
  [6] emit_llvm
    @ ~/.julia/packages/GPUCompiler/BxfIW/src/utils.jl:83 [inlined]
  [7] codegen(output::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, deferred_codegen::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing, ctx::LLVM.ThreadSafeContext)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/BxfIW/src/driver.jl:120
  [8] codegen
    @ ~/.julia/packages/GPUCompiler/BxfIW/src/driver.jl:94 [inlined]
  [9] compile(target::Symbol, job::GPUCompiler.CompilerJob; libraries::Bool, deferred_codegen::Bool, optimize::Bool, cleanup::Bool, strip::Bool, validate::Bool, only_entry::Bool, ctx::LLVM.ThreadSafeContext)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/BxfIW/src/driver.jl:90
 [10] compile
    @ ~/.julia/packages/GPUCompiler/BxfIW/src/driver.jl:81 [inlined]
 [11] compile(job::GPUCompiler.CompilerJob, ctx::LLVM.ThreadSafeContext)
    @ Metal ~/.julia/packages/Metal/TtPHW/src/compiler/compilation.jl:59
 [12] #59
    @ ~/.julia/packages/Metal/TtPHW/src/compiler/compilation.jl:55 [inlined]
 [13] LLVM.ThreadSafeContext(f::Metal.var"#59#60"{GPUCompiler.CompilerJob{GPUCompiler.MetalCompilerTarget, Metal.MetalCompilerParams}})
    @ LLVM ~/.julia/packages/LLVM/TLGyi/src/executionengine/ts_module.jl:14
 [14] JuliaContext(f::Metal.var"#59#60"{GPUCompiler.CompilerJob{GPUCompiler.MetalCompilerTarget, Metal.MetalCompilerParams}})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/BxfIW/src/driver.jl:35
 [15] compile
    @ ~/.julia/packages/Metal/TtPHW/src/compiler/compilation.jl:54 [inlined]
 [16] actual_compilation(cache::Dict{UInt64, Any}, key::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.MetalCompilerTarget, Metal.MetalCompilerParams}, ft::Type, tt::Type, compiler::typeof(Metal.compile), linker::typeof(Metal.link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/BxfIW/src/cache.jl:53
 [17] cached_compilation(cache::Dict{UInt64, Any}, cfg::GPUCompiler.CompilerConfig{GPUCompiler.MetalCompilerTarget, Metal.MetalCompilerParams}, ft::Type, tt::Type, compiler::Function, linker::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/BxfIW/src/cache.jl:37
 [18] macro expansion
    @ ~/.julia/packages/Metal/TtPHW/src/compiler/execution.jl:161 [inlined]
 [19] macro expansion
    @ ./lock.jl:267 [inlined]
 [20] mtlfunction(f::typeof(test), tt::Type{Tuple{MtlDeviceVector{Int32, 1}}}; name::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Metal ~/.julia/packages/Metal/TtPHW/src/compiler/execution.jl:157
 [21] mtlfunction(f::typeof(test), tt::Type{Tuple{MtlDeviceVector{Int32, 1}}})
    @ Metal ~/.julia/packages/Metal/TtPHW/src/compiler/execution.jl:155
 [22] top-level scope
    @ ~/.julia/packages/Metal/TtPHW/src/compiler/execution.jl:77
