Here is a MWE.
julia> using CUDA
julia> x = widen.(CuArray(rand(Int64, 10)))
10-element CuArray{Int128, 1}:
  7760362279116003288
  1887985077088633303
 -4343813946050421943
 -3266615630613073703
  2140177075751050950
 -8479154113856376281
 -2068873100572326709
 -8353273203968219407
  6541739557653674531
  8433965827099195653
julia> .÷(x, x)
ERROR: LLVM error: Undefined external symbol "__divti3"
Stacktrace:
  [1] handle_error(reason::Cstring)
    @ LLVM ~/.julia/packages/LLVM/1GCWB/src/core/context.jl:105
  [2] macro expansion
    @ ~/.julia/packages/LLVM/1GCWB/src/util.jl:85 [inlined]
  [3] LLVMTargetMachineEmitToMemoryBuffer
    @ ~/.julia/packages/LLVM/1GCWB/lib/libLLVM_h.jl:705 [inlined]
  [4] emit(tm::LLVM.TargetMachine, mod::LLVM.Module, filetype::LLVM.API.LLVMCodeGenFileType)
    @ LLVM ~/.julia/packages/LLVM/1GCWB/src/targetmachine.jl:44
  [5] mcgen(job::GPUCompiler.CompilerJob, mod::LLVM.Module, format::LLVM.API.LLVMCodeGenFileType)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/DwnNd/src/mcgen.jl:74
  [6] macro expansion
    @ ~/.julia/packages/TimerOutputs/PZq45/src/TimerOutput.jl:226 [inlined]
  [7] macro expansion
    @ ~/.julia/packages/GPUCompiler/DwnNd/src/driver.jl:331 [inlined]
  [8] macro expansion
    @ ~/.julia/packages/TimerOutputs/PZq45/src/TimerOutput.jl:226 [inlined]
  [9] macro expansion
    @ ~/.julia/packages/GPUCompiler/DwnNd/src/driver.jl:328 [inlined]
 [10] emit_asm(job::GPUCompiler.CompilerJob, ir::LLVM.Module; strip::Bool, validate::Bool, format::LLVM.API.LLVMCodeGenFileType)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/DwnNd/src/utils.jl:62
 [11] cufunction_compile(job::GPUCompiler.CompilerJob)
    @ CUDA ~/.julia/packages/CUDA/mVgLI/src/compiler/execution.jl:313
 [12] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/DwnNd/src/cache.jl:87
 [13] cufunction(f::GPUArrays.var"#broadcast_kernel#16", tt::Type{Tuple{CUDA.CuKernelContext, CuDeviceVector{Int128, 1}, Base.Broadcast.Broadcasted{Nothing, Tuple{Base.OneTo{Int64}}, typeof(div), Tuple{Base.Broadcast.Extruded{CuDeviceVector{Int128, 1}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{CuDeviceVector{Int128, 1}, Tuple{Bool}, Tuple{Int64}}}}, Int64}}; name::Nothing, kwargs::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ CUDA ~/.julia/packages/CUDA/mVgLI/src/compiler/execution.jl:288
 [14] cufunction
    @ ~/.julia/packages/CUDA/mVgLI/src/compiler/execution.jl:282 [inlined]
 [15] macro expansion
    @ ~/.julia/packages/CUDA/mVgLI/src/compiler/execution.jl:102 [inlined]
 [16] #launch_heuristic#261
    @ ~/.julia/packages/CUDA/mVgLI/src/gpuarrays.jl:17 [inlined]
 [17] launch_heuristic
    @ ~/.julia/packages/CUDA/mVgLI/src/gpuarrays.jl:17 [inlined]
 [18] copyto!
    @ ~/.julia/packages/GPUArrays/ymNij/src/host/broadcast.jl:63 [inlined]
 [19] copyto!
    @ ./broadcast.jl:936 [inlined]
 [20] copy
    @ ~/.julia/packages/GPUArrays/ymNij/src/host/broadcast.jl:47 [inlined]
 [21] materialize(bc::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Nothing, typeof(div), Tuple{CuArray{Int128, 1}, CuArray{Int128, 1}}})
    @ Base.Broadcast ./broadcast.jl:883
 [22] top-level scope
    @ REPL[12]:1
 [23] top-level scope
    @ ~/.julia/packages/CUDA/mVgLI/src/initialization.jl:52
Is this error related to the architecture of my GPU or just not binded correctly?
My GPU is Nvidia V100, CUDA version is 3.3, julia version is 1.6.0.