Sorry about that.
julia> versioninfo(); @code_native debuginfo=:none mysum(x)
Julia Version 1.4.2
Commit 44fa15b150* (2020-05-23 18:35 UTC)
Platform Info:
OS: macOS (x86_64-apple-darwin18.7.0)
CPU: Intel(R) Core(TM) i5-1038NG7 CPU @ 2.00GHz
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-8.0.1 (ORCJIT, goldmont)
.section __TEXT,__text,regular,pure_instructions
movq 8(%rdi), %rax
testq %rax, %rax
jle L26
movq (%rdi), %rcx
cmpq $16, %rax
jae L31
vxorpd %xmm0, %xmm0, %xmm0
xorl %edx, %edx
jmp L130
L26:
vxorps %xmm0, %xmm0, %xmm0
retq
L31:
movq %rax, %rdx
leaq 96(%rcx), %rsi
vxorpd %xmm0, %xmm0, %xmm0
vxorpd %xmm1, %xmm1, %xmm1
vxorpd %xmm2, %xmm2, %xmm2
vxorpd %xmm3, %xmm3, %xmm3
andq $-16, %rdx
movq %rdx, %rdi
nopl (%rax)
L64:
vaddpd -96(%rsi), %ymm0, %ymm0
vaddpd -64(%rsi), %ymm1, %ymm1
vaddpd -32(%rsi), %ymm2, %ymm2
vaddpd (%rsi), %ymm3, %ymm3
subq $-128, %rsi
addq $-16, %rdi
jne L64
vaddpd %ymm0, %ymm1, %ymm0
cmpq %rdx, %rax
vaddpd %ymm0, %ymm2, %ymm0
vaddpd %ymm0, %ymm3, %ymm0
vextractf128 $1, %ymm0, %xmm1
vaddpd %ymm1, %ymm0, %ymm0
vpermilpd $1, %xmm0, %xmm1 ## xmm1 = xmm0[1,0]
vaddpd %xmm1, %xmm0, %xmm0
je L158
L130:
subq %rdx, %rax
leaq (%rcx,%rdx,8), %rcx
nopl (%rax)
L144:
vaddsd (%rcx), %xmm0, %xmm0
addq $8, %rcx
addq $-1, %rax
jne L144
L158:
vzeroupper
retq
nopw %cs:(%rax,%rax)
nopl (%rax)
julia> versioninfo(); @code_native debuginfo=:none mysum(x)
Julia Version 1.6.0-DEV.306
Commit 59b8dde7c1 (2020-06-26 09:21 UTC)
Platform Info:
OS: macOS (x86_64-apple-darwin18.7.0)
CPU: Intel(R) Core(TM) i5-1038NG7 CPU @ 2.00GHz
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-9.0.1 (ORCJIT, icelake-client)
.section __TEXT,__text,regular,pure_instructions
movq 8(%rdi), %rax
testq %rax, %rax
jle L29
movq (%rdi), %rcx
cmpq $32, %rax
jae L34
vxorpd %xmm0, %xmm0, %xmm0
xorl %edx, %edx
jmp L160
L29:
vxorps %xmm0, %xmm0, %xmm0
retq
L34:
movl %eax, %esi
andl $31, %esi
movq %rax, %rdx
subq %rsi, %rdx
vxorpd %xmm0, %xmm0, %xmm0
xorl %esi, %esi
vxorpd %xmm1, %xmm1, %xmm1
vxorpd %xmm2, %xmm2, %xmm2
vxorpd %xmm3, %xmm3, %xmm3
nop
L64:
vaddpd (%rcx,%rsi,8), %zmm0, %zmm0
vaddpd 64(%rcx,%rsi,8), %zmm1, %zmm1
vaddpd 128(%rcx,%rsi,8), %zmm2, %zmm2
vaddpd 192(%rcx,%rsi,8), %zmm3, %zmm3
addq $32, %rsi
cmpq %rsi, %rdx
jne L64
vaddpd %zmm0, %zmm1, %zmm0
vaddpd %zmm0, %zmm2, %zmm0
vaddpd %zmm0, %zmm3, %zmm0
vextractf64x4 $1, %zmm0, %ymm1
vaddpd %zmm1, %zmm0, %zmm0
vextractf128 $1, %ymm0, %xmm1
vaddpd %xmm1, %xmm0, %xmm0
vpermilpd $1, %xmm0, %xmm1 ## xmm1 = xmm0[1,0]
vaddsd %xmm1, %xmm0, %xmm0
cmpq %rdx, %rax
je L173
L160:
vaddsd (%rcx,%rdx,8), %xmm0, %xmm0
incq %rdx
cmpq %rdx, %rax
jne L160
L173:
vzeroupper
retq
nopw %cs:(%rax,%rax)
nopl (%rax,%rax)