I tried to pass a nontemporal argument to load and store using llvmcall, following the code from SIMD.jl as a reference. The function works, except I never get a non-temporal instruction on the machine code, and even on the LLVM code the flag vanishes. What could be the reason for that?
function vstorent(x::Vec{4,Float64}, aa::Ptr{Float64})
Base.llvmcall("
%ptr = inttoptr i64 %1 to <4 x double>*
store <4 x double> %0, <4 x double>* %ptr, align 8, !nontemporal !{ i32 1 }
ret void"
, Cvoid, Tuple{NTuple{4,VecElement{Float64}}, Ptr{Float64}}, x.elts, aa)
end
qq = randn(100);
@code_llvm vstorent(Vec((100.0,100.0,100.0,100.0)), pointer(qq, 1))
; @ REPL[17]:2 within `vstorent'
define void @julia_vstorent_12380({ <4 x double> } addrspace(11)* nocapture nonnull readonly dereferenceable(32), i64) {
top:
; ┌ @ sysimg.jl:18 within `getproperty'
%2 = getelementptr inbounds { <4 x double> }, { <4 x double> } addrspace(11)* %0, i64 0, i32 0
; └
%3 = load <4 x double>, <4 x double> addrspace(11)* %2, align 16
%ptr.i = inttoptr i64 %1 to <4 x double>*
store <4 x double> %3, <4 x double>* %ptr.i, align 8
ret void
}