1.8.3 with -Cskylake
:
julia> let A1 = rand(10, 1000), B1 = copy(A1), C1 = zero(A1), D1 = zero(A1), E1 = zero(A1)
@code_native syntax=:intel debuginfo=:none map!(+, C1, A1, B1)
end
.text
.file "map!"
.globl "japi1_map!_1084" # -- Begin function japi1_map!_1084
.p2align 4, 0x90
.type "japi1_map!_1084",@function
"japi1_map!_1084": # @"japi1_map!_1084"
.cfi_startproc
# %bb.0: # %top
push rbp
.cfi_def_cfa_offset 16
.cfi_offset rbp, -16
mov rbp, rsp
.cfi_def_cfa_register rbp
push rsi
push rdi
push rax
.cfi_offset rdi, -32
.cfi_offset rsi, -24
mov qword ptr [rbp - 24], rdx
mov rax, qword ptr [rdx + 8]
mov rcx, qword ptr [rax + 8]
test rcx, rcx
je .LBB0_7
# %bb.1: # %L24
mov rsi, qword ptr [rdx + 16]
mov r8, qword ptr [rsi + 8]
test r8, r8
je .LBB0_7
# %bb.2: # %L24
mov r10, qword ptr [rdx + 24]
mov rdx, qword ptr [r10 + 8]
test rdx, rdx
je .LBB0_7
# %bb.3: # %L84.preheader
mov r9, qword ptr [rsi]
mov r10, qword ptr [r10]
mov r11, qword ptr [rax]
dec rdx
dec r8
dec rcx
xor esi, esi
.p2align 4, 0x90
.LBB0_4: # %L84
# =>This Inner Loop Header: Depth=1
vmovsd xmm0, qword ptr [r9 + 8*rsi] # xmm0 = mem[0],zero
vaddsd xmm0, xmm0, qword ptr [r10 + 8*rsi]
vmovsd qword ptr [r11 + 8*rsi], xmm0
cmp rcx, rsi
je .LBB0_7
# %bb.5: # %L147
# in Loop: Header=BB0_4 Depth=1
cmp r8, rsi
je .LBB0_7
# %bb.6: # %L147
# in Loop: Header=BB0_4 Depth=1
lea rdi, [rsi + 1]
cmp rdx, rsi
mov rsi, rdi
jne .LBB0_4
.LBB0_7: # %L172
add rsp, 8
pop rdi
pop rsi
pop rbp
ret
.Lfunc_end0:
.size "japi1_map!_1084", .Lfunc_end0-"japi1_map!_1084"
.cfi_endproc
# -- End function
.section ".note.GNU-stack","",@progbits
1.8.3 without -Cskylake
:
julia> let A1 = rand(10, 1000), B1 = copy(A1), C1 = zero(A1), D1 = zero(A1), E1 = zero(A1)
@code_native syntax=:intel debuginfo=:none map!(+, C1, A1, B1)
end
.text
.file "map!"
.globl "japi1_map!_88" # -- Begin function japi1_map!_88
.p2align 4, 0x90
.type "japi1_map!_88",@function
"japi1_map!_88": # @"japi1_map!_88"
.cfi_startproc
# %bb.0: # %top
push rbp
.cfi_def_cfa_offset 16
.cfi_offset rbp, -16
mov rbp, rsp
.cfi_def_cfa_register rbp
push rsi
push rax
.cfi_offset rsi, -24
mov qword ptr [rbp - 16], rdx
mov rax, qword ptr [rdx + 8]
mov rcx, qword ptr [rax + 8]
test rcx, rcx
je .LBB0_7
# %bb.1: # %L24
mov rsi, qword ptr [rdx + 16]
mov r8, qword ptr [rsi + 8]
test r8, r8
je .LBB0_7
# %bb.2: # %L24
mov r10, qword ptr [rdx + 24]
mov rdx, qword ptr [r10 + 8]
test rdx, rdx
je .LBB0_7
# %bb.3: # %L84.preheader
mov r9, qword ptr [rsi]
mov r10, qword ptr [r10]
mov r11, qword ptr [rax]
add rdx, -1
add r8, -1
add rcx, -1
xor esi, esi
.p2align 4, 0x90
.LBB0_4: # %L84
# =>This Inner Loop Header: Depth=1
vmovsd xmm0, qword ptr [r9 + 8*rsi] # xmm0 = mem[0],zero
vaddsd xmm0, xmm0, qword ptr [r10 + 8*rsi]
cmp rcx, rsi
vmovsd qword ptr [r11 + 8*rsi], xmm0
je .LBB0_7
# %bb.5: # %L147
# in Loop: Header=BB0_4 Depth=1
cmp r8, rsi
je .LBB0_7
# %bb.6: # %L147
# in Loop: Header=BB0_4 Depth=1
cmp rdx, rsi
lea rsi, [rsi + 1]
jne .LBB0_4
.LBB0_7: # %L172
add rsp, 8
pop rsi
pop rbp
ret
.Lfunc_end0:
.size "japi1_map!_88", .Lfunc_end0-"japi1_map!_88"
.cfi_endproc
# -- End function
.section ".note.GNU-stack","",@progbits
1.8.0 with -Cskylake
:
julia> let A1 = rand(10, 1000), B1 = copy(A1), C1 = zero(A1), D1 = zero(A1), E1 = zero(A1)
@code_native syntax=:intel debuginfo=:none map!(+, C1, A1, B1)
end
.text
.file "map!"
.globl "japi1_map!_84" # -- Begin function japi1_map!_84
.p2align 4, 0x90
.type "japi1_map!_84",@function
"japi1_map!_84": # @"japi1_map!_84"
.cfi_startproc
# %bb.0: # %top
push rbp
.cfi_def_cfa_offset 16
.cfi_offset rbp, -16
mov rbp, rsp
.cfi_def_cfa_register rbp
push rsi
push rdi
push rax
.cfi_offset rdi, -32
.cfi_offset rsi, -24
mov qword ptr [rbp - 24], rdx
mov rax, qword ptr [rdx + 8]
mov rcx, qword ptr [rax + 8]
test rcx, rcx
je .LBB0_7
# %bb.1: # %L24
mov rsi, qword ptr [rdx + 16]
mov r8, qword ptr [rsi + 8]
test r8, r8
je .LBB0_7
# %bb.2: # %L24
mov r10, qword ptr [rdx + 24]
mov rdx, qword ptr [r10 + 8]
test rdx, rdx
je .LBB0_7
# %bb.3: # %L84.preheader
mov r9, qword ptr [rsi]
mov r10, qword ptr [r10]
mov r11, qword ptr [rax]
dec rdx
dec r8
dec rcx
xor esi, esi
.p2align 4, 0x90
.LBB0_4: # %L84
# =>This Inner Loop Header: Depth=1
vmovsd xmm0, qword ptr [r9 + 8*rsi] # xmm0 = mem[0],zero
vaddsd xmm0, xmm0, qword ptr [r10 + 8*rsi]
vmovsd qword ptr [r11 + 8*rsi], xmm0
cmp rcx, rsi
je .LBB0_7
# %bb.5: # %L147
# in Loop: Header=BB0_4 Depth=1
cmp r8, rsi
je .LBB0_7
# %bb.6: # %L147
# in Loop: Header=BB0_4 Depth=1
lea rdi, [rsi + 1]
cmp rdx, rsi
mov rsi, rdi
jne .LBB0_4
.LBB0_7: # %L172
add rsp, 8
pop rdi
pop rsi
pop rbp
ret
.Lfunc_end0:
.size "japi1_map!_84", .Lfunc_end0-"japi1_map!_84"
.cfi_endproc
# -- End function
.section ".note.GNU-stack","",@progbits
1.8.0 without -Cskylake
:
julia> let A1 = rand(10, 1000), B1 = copy(A1), C1 = zero(A1), D1 = zero(A1), E1 = zero(A1)
@code_native syntax=:intel debuginfo=:none map!(+, C1, A1, B1)
end
.text
.file "map!"
.globl "japi1_map!_84" # -- Begin function japi1_map!_84
.p2align 4, 0x90
.type "japi1_map!_84",@function
"japi1_map!_84": # @"japi1_map!_84"
.cfi_startproc
# %bb.0: # %top
push rbp
.cfi_def_cfa_offset 16
.cfi_offset rbp, -16
mov rbp, rsp
.cfi_def_cfa_register rbp
push rsi
push rax
.cfi_offset rsi, -24
mov qword ptr [rbp - 16], rdx
mov rax, qword ptr [rdx + 8]
mov rcx, qword ptr [rax + 8]
test rcx, rcx
je .LBB0_7
# %bb.1: # %L24
mov rsi, qword ptr [rdx + 16]
mov r8, qword ptr [rsi + 8]
test r8, r8
je .LBB0_7
# %bb.2: # %L24
mov r10, qword ptr [rdx + 24]
mov rdx, qword ptr [r10 + 8]
test rdx, rdx
je .LBB0_7
# %bb.3: # %L84.preheader
mov r9, qword ptr [rsi]
mov r10, qword ptr [r10]
mov r11, qword ptr [rax]
add rdx, -1
add r8, -1
add rcx, -1
xor esi, esi
.p2align 4, 0x90
.LBB0_4: # %L84
# =>This Inner Loop Header: Depth=1
vmovsd xmm0, qword ptr [r9 + 8*rsi] # xmm0 = mem[0],zero
vaddsd xmm0, xmm0, qword ptr [r10 + 8*rsi]
cmp rcx, rsi
vmovsd qword ptr [r11 + 8*rsi], xmm0
je .LBB0_7
# %bb.5: # %L147
# in Loop: Header=BB0_4 Depth=1
cmp r8, rsi
je .LBB0_7
# %bb.6: # %L147
# in Loop: Header=BB0_4 Depth=1
cmp rdx, rsi
lea rsi, [rsi + 1]
jne .LBB0_4
.LBB0_7: # %L172
add rsp, 8
pop rsi
pop rbp
ret
.Lfunc_end0:
.size "japi1_map!_84", .Lfunc_end0-"japi1_map!_84"
.cfi_endproc
# -- End function
.section ".note.GNU-stack","",@progbits