julia> a = CuArray(randn(32,32))
32×32 CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}:
-1.18274 0.255296 -0.638661 -0.360422 1.36684 1.33303 -0.0344325 -2.70717 0.781341 0.288144 0.0181823 … -0.0866141 -1.32198 -1.01299 -1.12803 0.75662 -0.485362 1.45173 0.315557 -0.422909 1.20458 -0.358723
-0.361403 1.30599 -2.22817 -1.37253 -0.941448 -0.946444 0.0212881 1.35463 -1.02864 -0.376794 1.36345 -0.103276 -0.488809 0.844027 -1.33044 -0.0239039 1.23767 0.516442 -0.712109 0.564529 -0.332186 -0.115035
-0.411949 3.12781 1.43 0.640773 -1.4274 0.170879 -0.153286 -0.0742285 0.920621 -1.59156 -0.624655 2.02479 -0.195249 0.972169 1.31103 0.559333 -0.285879 1.01966 1.46324 -2.13461 0.633388 -1.21563
-0.616604 1.30324 -1.54833 1.79702 -0.901697 1.2463 0.501738 1.88585 -0.532719 -0.29198 0.513761 -1.40476 0.415974 1.13903 0.984196 -0.220871 -1.66043 -0.704748 -0.606225 -0.981755 -0.162105 0.480065
0.862917 -0.0172713 0.562449 0.876436 -1.87044 0.202472 -0.00530627 0.123258 -1.63519 -0.407302 1.61808 1.28413 -0.526744 0.881331 1.38876 0.196024 0.17778 -0.507405 0.766865 0.417664 1.18937 0.203387
-2.14176 -0.24405 0.256934 -0.530263 -0.327436 -1.65971 -0.191257 -1.09481 -0.0299011 -0.742534 -1.42675 … 0.30039 0.111672 -0.838927 -0.462576 -0.0141869 -0.0762451 0.824289 1.30072 -0.234806 -0.423197 -1.92402
0.341661 0.373806 -1.27727 -1.3742 0.374996 0.0757437 -1.50276 -0.670836 -0.825431 -0.981054 0.243204 -1.27842 0.459343 0.533607 -1.93112 1.16603 -0.337453 -0.784879 0.0441276 -1.06148 0.949602 1.48952
-2.11397 0.0997932 -0.0934857 -0.332775 0.130322 -0.597269 1.0848 0.332133 -0.655547 -1.37971 -0.528319 -1.34418 0.218686 0.157925 0.787695 0.395078 -0.578935 0.828537 -0.044233 0.298056 -0.269946 0.822486
0.0193224 -0.180821 1.58763 -1.28979 0.541442 0.131374 -0.360402 -1.87178 0.365341 0.416429 -0.351132 0.397844 3.03769 0.587423 -0.256372 0.38455 0.561813 0.909853 0.852864 -0.69184 0.15078 0.271741
0.1463 2.35532 -1.12054 -0.170312 0.863957 0.205467 -1.67478 -1.19014 -1.71513 0.187881 -0.220182 0.050262 0.367135 -0.0453455 0.187454 0.74917 0.362049 1.81817 -1.11305 -1.07484 -1.74605 0.272091
-0.659404 -0.526084 -2.44706 -0.0376731 -0.293227 -0.344341 -1.23343 0.743462 1.87359 1.58985 0.266064 … 1.05003 -1.07732 0.190889 -1.92554 -0.827723 0.0722569 -1.6155 -0.531444 1.80599 -0.253826 1.09472
-0.583372 0.133521 0.454498 0.838771 0.576951 0.898052 0.276171 -0.126369 0.553402 0.732669 -0.327384 1.21917 -0.843279 -0.294921 -1.02184 0.925251 2.52576 -0.674227 0.491505 -0.0802199 -0.0913761 -0.814355
0.546402 -0.341532 1.58592 0.160828 -0.747661 0.270103 0.235058 0.102891 -1.23202 -1.09137 -0.353166 0.328799 0.406076 0.8905 0.689982 0.911042 0.889867 1.74923 1.69605 1.40757 0.77199 -0.917334
0.221091 -0.0844647 0.0597149 -0.506709 0.242511 -0.846431 -1.09838 1.18918 -1.66564 0.154088 1.69218 0.0415313 -0.080414 0.45262 0.672675 -0.380834 -0.722085 -0.511691 1.43115 -0.559729 0.230325 -1.35224
-0.8461 -0.947918 -0.362946 1.03332 -0.788973 0.966298 -0.044924 0.381322 0.853947 -0.736554 0.992695 1.01898 1.06452 0.0753523 0.381648 -0.981534 -0.183223 0.223377 -0.192385 -0.751022 0.693223 0.598055
1.57128 0.170002 0.420706 -0.358259 -0.763666 0.357856 -2.45755 0.876643 -0.201308 -1.09225 0.945946 … 0.844154 0.373243 -2.47715 -0.0154152 -0.200707 1.01858 -0.557736 -1.33257 -0.166715 -1.0523 1.08021
1.21051 -0.269519 0.541626 1.08781 -1.06128 -1.07469 -1.33684 -0.00322005 -0.873419 0.0495171 -0.265029 0.769827 -1.8249 -0.907858 -0.936338 0.122975 -0.707488 0.538972 0.793598 -0.898551 0.164789 -0.745389
-0.957271 -0.962949 0.304162 0.0631639 1.01021 -0.28617 -0.50154 -0.172942 0.635797 -0.000177218 0.508057 -1.27861 -0.362468 -0.392428 -0.412562 -0.334188 2.87287 -1.96455 -0.93422 0.116965 -0.600265 -1.70215
0.732213 0.122093 -1.35155 -0.174538 1.71937 -0.161921 -0.0947036 0.433514 -1.35414 0.785818 0.661375 1.04844 1.68358 -0.827405 -1.40593 1.03806 -1.31843 -0.248867 -0.317892 -1.05609 1.55634 0.958025
0.670764 -0.644736 -0.0380392 2.72004 1.35169 0.334075 0.0904835 1.73068 -0.540047 0.0671237 1.27533 0.0430329 -0.750078 0.094525 0.714126 -0.220413 0.694525 0.415242 1.30163 -0.682064 -0.652395 -1.03143
-1.91993 -1.33178 -1.9447 -0.15907 0.0914087 -0.3197 1.20673 0.864202 0.337897 -0.668695 -0.0516383 … 0.7436 0.306433 0.281742 0.0162177 0.285509 -0.290092 -0.161945 -0.976933 -0.362395 -0.451318 -0.837866
-0.440173 -1.23622 0.788577 0.470275 0.0742382 -0.359712 -1.32695 -1.265 0.164096 0.58305 -0.596248 -0.627529 0.982443 1.12335 0.748165 2.17027 -1.29246 -0.668905 -0.046717 -1.01411 -0.415643 -0.168539
0.110476 -0.22638 -1.45318 0.420269 -0.954141 -0.136122 -0.0460511 -0.273752 -0.403632 0.497042 1.15852 0.713211 -0.313969 -0.259574 -0.36592 1.52601 0.0458395 -1.3532 -0.755926 -0.673646 -0.0236632 0.852015
-1.39394 -1.69305 -0.605935 0.931355 1.88012 -0.975347 0.66593 -1.58926 0.86781 -0.204645 -0.704571 0.692413 0.544426 -1.79498 0.374285 0.446569 -0.712523 -0.688889 -0.700023 -0.770922 -0.407802 -0.0558989
-0.0907505 -2.22754 -1.3098 0.38434 2.35649 0.0808456 -2.77276 1.19143 -2.50402 -1.08366 0.384542 -1.06234 0.501594 1.17564 -0.0787722 1.23777 2.39944 0.710511 -0.810753 1.56165 1.4436 0.603893
0.166695 -0.920047 1.57311 -0.181067 -0.844018 0.56591 -0.452114 -1.81834 -0.341545 -1.10804 -0.66568 … -0.251763 -0.108015 -0.561843 -1.7024 0.893132 1.30714 -1.7737 -0.410661 1.05472 -0.5251 -0.536291
1.91467 -1.15632 -0.645947 -0.169997 1.61206 -0.451421 -1.13116 -1.12144 -0.0847346 0.286761 -1.23037 -0.064542 0.213405 1.30446 0.582412 1.98502 -0.569744 -1.01293 -0.20979 -0.178771 -0.116163 2.73663
0.47191 -0.401974 -0.370496 0.702847 -0.111029 -0.600142 0.837781 1.69012 -0.352159 -0.181135 -0.365207 -0.861706 1.50875 0.604447 0.112031 0.621397 -2.06233 2.67289 0.753432 0.218036 -0.36618 0.177427
0.569968 -1.45769 1.60171 -0.508461 -0.94282 -0.629496 -1.0438 0.659822 1.57365 0.690085 1.14958 -0.339958 2.14356 0.944514 0.102015 -0.124177 -0.446006 -0.403078 -0.942279 -0.195355 0.0820187 0.00668417
0.383264 2.16623 -0.410611 1.36818 0.596794 0.833985 -0.692672 -0.12185 0.502075 -0.113643 -0.934123 -0.247053 0.40849 -1.21 1.49231 1.61527 -0.0549187 -0.17075 -1.45476 -0.532932 -0.420932 0.14506
-0.458177 -0.496646 1.09117 -0.319151 -0.892142 -1.09472 -0.270736 0.196537 -0.268524 -0.53424 1.20232 … 0.466566 -1.83836 -0.662859 -1.67958 0.156623 0.706121 0.149542 -0.0310817 -1.01306 2.63536 -0.974717
-2.70159 -3.21527 -1.8141 0.581841 0.465126 0.713843 -0.880311 -1.48438 -1.59518 -0.839279 -0.0526363 1.47398 0.438093 0.343739 1.15257 0.932864 0.480822 -0.93771 1.76454 -1.22583 0.33857 -1.1845
julia> sin.(a)
ERROR: Cannot rewrite unknown use of function: @julia_unbox_uint8_4352_slot = linkonce_odr global i8 ({}*)* @jl_unbox_uint8.stateless
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:33
[2] (::GPUCompiler.var"#68#71"{LLVM.Function, LLVM.Function, LLVM.Function})(builder::LLVM.Builder)
@ GPUCompiler ~/.julia/packages/GPUCompiler/HeCT6/src/irgen.jl:678
[3] LLVM.Builder(f::GPUCompiler.var"#68#71"{LLVM.Function, LLVM.Function, LLVM.Function}, args::LLVM.Context; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ LLVM ~/.julia/packages/LLVM/shuW4/src/irbuilder.jl:21
[4] Builder
@ ~/.julia/packages/LLVM/shuW4/src/irbuilder.jl:19 [inlined]
[5] (::GPUCompiler.var"#rewrite_uses!#70"{LLVM.Function, LLVM.Context})(f::LLVM.Function, new_f::LLVM.Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/HeCT6/src/irgen.jl:642
[6] add_kernel_state!(job::GPUCompiler.CompilerJob, mod::LLVM.Module, entry::LLVM.Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/HeCT6/src/irgen.jl:684
[7] finish_module!(job::GPUCompiler.CompilerJob, mod::LLVM.Module, entry::LLVM.Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/HeCT6/src/interface.jl:211
[8] finish_module!(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget}, mod::LLVM.Module, entry::LLVM.Function)
@ GPUCompiler ~/.julia/packages/GPUCompiler/HeCT6/src/ptx.jl:180
[9] macro expansion
@ ~/.julia/packages/GPUCompiler/HeCT6/src/driver.jl:192 [inlined]
[10] emit_llvm(job::GPUCompiler.CompilerJob, method_instance::Any; libraries::Bool, deferred_codegen::Bool, optimize::Bool, only_entry::Bool)
@ GPUCompiler ~/.julia/packages/GPUCompiler/HeCT6/src/utils.jl:64
[11] emit_llvm
@ ~/.julia/packages/GPUCompiler/HeCT6/src/utils.jl:62 [inlined]
[12] cufunction_compile(job::GPUCompiler.CompilerJob)
@ CUDA ~/.julia/packages/CUDA/YpW0k/src/compiler/execution.jl:325
[13] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link))
@ GPUCompiler ~/.julia/packages/GPUCompiler/HeCT6/src/cache.jl:90
[14] cufunction(f::GPUArrays.var"#broadcast_kernel#17", tt::Type{Tuple{CUDA.CuKernelContext, CuDeviceMatrix{Float64, 1}, Base.Broadcast.Broadcasted{Nothing, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(sin), Tuple{Base.Broadcast.Extruded{CuDeviceMatrix{Float64, 1}, Tuple{Bool, Bool}, Tuple{Int64, Int64}}}}, Int64}}; name::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ CUDA ~/.julia/packages/CUDA/YpW0k/src/compiler/execution.jl:297
[15] cufunction
@ ~/.julia/packages/CUDA/YpW0k/src/compiler/execution.jl:291 [inlined]
[16] macro expansion
@ ~/.julia/packages/CUDA/YpW0k/src/compiler/execution.jl:102 [inlined]
[17] #launch_heuristic#236
@ ~/.julia/packages/CUDA/YpW0k/src/gpuarrays.jl:17 [inlined]
[18] copyto!
@ ~/.julia/packages/GPUArrays/3sW6s/src/host/broadcast.jl:65 [inlined]
[19] copyto!
@ ./broadcast.jl:913 [inlined]
[20] copy
@ ~/.julia/packages/GPUArrays/3sW6s/src/host/broadcast.jl:47 [inlined]
[21] materialize(bc::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Nothing, typeof(sin), Tuple{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}})
@ Base.Broadcast ./broadcast.jl:860
[22] top-level scope
@ REPL[5]:1
[23] top-level scope
@ ~/.julia/packages/CUDA/YpW0k/src/initialization.jl:52
➜ CUDA_code nvidia-smi
Wed Dec 15 19:41:40 2021
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.29.05 Driver Version: 495.29.05 CUDA Version: 11.5 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA GeForce ... On | 00000000:01:00.0 On | N/A |
| 23% 43C P3 27W / 120W | 2496MiB / 5910MiB | 20% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| 0 N/A N/A 2432 G /usr/lib/xorg/Xorg 1632MiB |
| 0 N/A N/A 2579 G /usr/bin/gnome-shell 150MiB |
| 0 N/A N/A 2683 G ...mviewer/tv_bin/TeamViewer 1MiB |
| 0 N/A N/A 3267 G ...AAAAAAAAA= --shared-files 338MiB |
| 0 N/A N/A 4372 G ...AAAAAAAAA= --shared-files 192MiB |
| 0 N/A N/A 6392 G ...AAAAAAAAA= --shared-files 46MiB |
| 0 N/A N/A 172635 G ...AAAAAAAAA= --shared-files 127MiB |
| 0 N/A N/A 175613 G gnome-control-center 1MiB |
+-----------------------------------------------------------------------------+