I’m noticing a huge expansion on the Float64(Int64)
on the @inbounds version. I don’t know x86 assembly much at all, but it looks like it’s doing a vector convert from Int to Float, and then extracting the Int back from the SIMD register for the indexing?
Going from
; ┌ @ REPL[34]:3 within `test1'
; │┌ @ float.jl:60 within `Type'
leaq 1(%rax), %r8
vcvtsi2sdq %r8, %xmm1, %xmm0
going to
; │ @ REPL[35]:3 within `test2'
; │┌ @ float.jl:60 within `Type'
vextracti128 $1, %ymm0, %xmm6
vpextrq $1, %xmm6, %rax
vcvtsi2sdq %rax, %xmm12, %xmm1
vmovq %xmm6, %rax
vcvtsi2sdq %rax, %xmm12, %xmm6
vmovlhps %xmm1, %xmm6, %xmm1 # xmm1 = xmm6[0],xmm1[0]
vpextrq $1, %xmm0, %rax
vcvtsi2sdq %rax, %xmm12, %xmm6
vmovq %xmm0, %rax
vcvtsi2sdq %rax, %xmm12, %xmm2
vmovlhps %xmm6, %xmm2, %xmm2 # xmm2 = xmm2[0],xmm6[0]
vinsertf128 $1, %xmm1, %ymm2, %ymm6
vextracti128 $1, %ymm8, %xmm1
vpextrq $1, %xmm1, %rax
vcvtsi2sdq %rax, %xmm12, %xmm2
vmovq %xmm1, %rax
vcvtsi2sdq %rax, %xmm12, %xmm1
vmovlhps %xmm2, %xmm1, %xmm1 # xmm1 = xmm1[0],xmm2[0]
vpextrq $1, %xmm8, %rax
vcvtsi2sdq %rax, %xmm12, %xmm2
vmovq %xmm8, %rax
vcvtsi2sdq %rax, %xmm12, %xmm3
vmovlhps %xmm2, %xmm3, %xmm2 # xmm2 = xmm3[0],xmm2[0]
vinsertf128 $1, %xmm1, %ymm2, %ymm1
vextracti128 $1, %ymm7, %xmm2
vpextrq $1, %xmm2, %rax
vcvtsi2sdq %rax, %xmm12, %xmm3
vmovq %xmm2, %rax
vcvtsi2sdq %rax, %xmm12, %xmm2
vmovlhps %xmm3, %xmm2, %xmm2 # xmm2 = xmm2[0],xmm3[0]
vpextrq $1, %xmm7, %rax
vcvtsi2sdq %rax, %xmm12, %xmm3
vmovq %xmm7, %rax
vcvtsi2sdq %rax, %xmm12, %xmm7
vmovlhps %xmm3, %xmm7, %xmm3 # xmm3 = xmm7[0],xmm3[0]
vinsertf128 $1, %xmm2, %ymm3, %ymm2
vextracti128 $1, %ymm5, %xmm3
vpextrq $1, %xmm3, %rax
vcvtsi2sdq %rax, %xmm12, %xmm7
vmovq %xmm3, %rax
vcvtsi2sdq %rax, %xmm12, %xmm3
vmovlhps %xmm7, %xmm3, %xmm3 # xmm3 = xmm3[0],xmm7[0]
vpextrq $1, %xmm5, %rax
vcvtsi2sdq %rax, %xmm12, %xmm7
vmovq %xmm5, %rax
vcvtsi2sdq %rax, %xmm12, %xmm5
vmovlhps %xmm7, %xmm5, %xmm5 # xmm5 = xmm5[0],xmm7[0]