Why in this example, the @SVector
and @MVector
macros both make performance worse?
Code: (you can copy and paste into REPL)
using BenchmarkTools
using StaticArrays
using Random
function g1(T)
levels = @SVector T[.7, .8, .9, 1.]
stacks = @MVector zeros(T, 4)
for i in 1:4
@inbounds stacks[i] = rand(levels)
end
stacks
end
function g2(T)
levels = @SVector T[.7, .8, .9, 1.]
stacks = zeros(T, 4)
for i in 1:4
@inbounds stacks[i] = rand(levels)
end
stacks
end
function g3(T)
levels = T[.7, .8, .9, 1.]
stacks = @MVector zeros(T, 4)
for i in 1:4
@inbounds stacks[i] = rand(levels)
end
stacks
end
function g4(T)
levels = T[.7, .8, .9, 1.]
stacks = zeros(T, 4)
for i in 1:4
@inbounds stacks[i] = rand(levels)
end
stacks
end
function g5(T)
levels = range(T(.7), T(1), 4)
stacks = zeros(T, 4)
for i in 1:4
@inbounds stacks[i] = rand(levels)
end
stacks
end
T = Float32
@benchmark g1(T)
@benchmark g2(T)
@benchmark g3(T)
@benchmark g4(T)
@benchmark g5(T)
Results:
julia> @benchmark g1(T)
BenchmarkTools.Trial: 10000 samples with 9 evaluations.
Range (min β¦ max): 2.378 ΞΌs β¦ 359.733 ΞΌs β GC (min β¦ max): 0.00% β¦ 98.46%
Time (median): 2.444 ΞΌs β GC (median): 0.00%
Time (mean Β± Ο): 2.652 ΞΌs Β± 3.597 ΞΌs β GC (mean Β± Ο): 1.34% Β± 0.98%
βββββββββ
β
βββββββββββββ β
βββββββββββββββββββββββββββββββββ
β
βββ
ββ
β
β
β
β
β
ββ
β
β
ββββ
βββββ
β
β
β
2.38 ΞΌs Histogram: log(frequency) by time 4.51 ΞΌs <
Memory estimate: 1.12 KiB, allocs estimate: 24.
julia> @benchmark g2(T)
BenchmarkTools.Trial: 10000 samples with 10 evaluations.
Range (min β¦ max): 1.500 ΞΌs β¦ 330.450 ΞΌs β GC (min β¦ max): 0.00% β¦ 99.33%
Time (median): 1.550 ΞΌs β GC (median): 0.00%
Time (mean Β± Ο): 1.712 ΞΌs Β± 3.392 ΞΌs β GC (mean Β± Ο): 1.92% Β± 0.99%
βββββββββ βββ
ββββββ β
ββββββββββββββββββββββββββββββ
βββ
β
βββββββββββββββββββββ
ββββ
β
1.5 ΞΌs Histogram: log(frequency) by time 2.97 ΞΌs <
Memory estimate: 688 bytes, allocs estimate: 15.
julia> @benchmark g3(T)
BenchmarkTools.Trial: 10000 samples with 10 evaluations.
Range (min β¦ max): 1.490 ΞΌs β¦ 347.160 ΞΌs β GC (min β¦ max): 0.00% β¦ 99.19%
Time (median): 1.540 ΞΌs β GC (median): 0.00%
Time (mean Β± Ο): 1.723 ΞΌs Β± 3.479 ΞΌs β GC (mean Β± Ο): 2.00% Β± 0.99%
ββββββββββββββ ββ β
ββββββββββββββββββββββββ
ββ
ββββββββββββββ
βββββββββ
βββββββββ
β
β
1.49 ΞΌs Histogram: log(frequency) by time 3.42 ΞΌs <
Memory estimate: 688 bytes, allocs estimate: 15.
julia> @benchmark g4(T)
BenchmarkTools.Trial: 10000 samples with 184 evaluations.
Range (min β¦ max): 555.978 ns β¦ 10.843 ΞΌs β GC (min β¦ max): 0.00% β¦ 94.30%
Time (median): 569.565 ns β GC (median): 0.00%
Time (mean Β± Ο): 596.218 ns Β± 313.435 ns β GC (mean Β± Ο): 1.51% Β± 2.83%
ββββββββββ βββββ β
ββββββββββββββββββββββββββββββ
β
βββ
ββ
β
ββ
ββββββββ
βββββββ
βββββββ
β
556 ns Histogram: log(frequency) by time 898 ns <
Memory estimate: 224 bytes, allocs estimate: 6.
julia> @benchmark g5(T)
BenchmarkTools.Trial: 10000 samples with 278 evaluations.
Range (min β¦ max): 284.532 ns β¦ 8.292 ΞΌs β GC (min β¦ max): 0.00% β¦ 89.91%
Time (median): 287.770 ns β GC (median): 0.00%
Time (mean Β± Ο): 306.729 ns Β± 142.426 ns β GC (mean Β± Ο): 0.83% Β± 1.87%
βββ
β ββββββ β
ββββββββββββββββββ
ββββββββββββββββββββββ
βββ
β
ββ
β
β
β
β
ββββββ
β
ββββ β
285 ns Histogram: log(frequency) by time 578 ns <
Memory estimate: 80 bytes, allocs estimate: 1.
As can be seen, both @SVector
and @MVector
contribute to around 1ms run time and 9 allocations, while Array
is fast and allocates little, contrary to usual experience. g5
isnβt fair to compare as it exploits a pattern in the input, but it shows how much better g(4)
can still improve.
versioninfo()
julia> versioninfo()
Julia Version 1.8.5
Commit 17cfb8e65e (2023-01-08 06:45 UTC)
Platform Info:
OS: Windows (x86_64-w64-mingw32)
CPU: 8 Γ Intel(R) Core(TM) i5-9300H CPU @ 2.40GHz
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-13.0.1 (ORCJIT, skylake)
Threads: 1 on 8 virtual cores
Environment:
JULIA_PKG_SERVER = https://mirrors.bfsu.edu.cn/julia
Project.toml
[deps]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Manifest.toml
# This file is machine-generated - editing it directly is not advised
julia_version = "1.8.5"
manifest_format = "2.0"
project_hash = "af012333bc8fedec0067ac10f97df20a71800fd7"
[[deps.Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
[[deps.BenchmarkTools]]
deps = ["JSON", "Logging", "Printf", "Profile", "Statistics", "UUIDs"]
git-tree-sha1 = "d9a9701b899b30332bbcb3e1679c41cce81fb0e8"
uuid = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
version = "1.3.2"
[[deps.CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
version = "1.0.1+0"
[[deps.Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
[[deps.JSON]]
deps = ["Dates", "Mmap", "Parsers", "Unicode"]
git-tree-sha1 = "3c837543ddb02250ef42f4738347454f95079d4e"
uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
version = "0.21.3"
[[deps.Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
[[deps.LinearAlgebra]]
deps = ["Libdl", "libblastrampoline_jll"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
[[deps.Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
[[deps.Mmap]]
uuid = "a63ad114-7e13-5084-954f-fe012c677804"
[[deps.OpenBLAS_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
version = "0.3.20+0"
[[deps.Parsers]]
deps = ["Dates", "SnoopPrecompile"]
git-tree-sha1 = "8175fc2b118a3755113c8e68084dc1a9e63c61ee"
uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
version = "2.5.3"
[[deps.Preferences]]
deps = ["TOML"]
git-tree-sha1 = "47e5f437cc0e7ef2ce8406ce1e7e24d44915f88d"
uuid = "21216c6a-2e73-6563-6e65-726566657250"
version = "1.3.0"
[[deps.Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
[[deps.Profile]]
deps = ["Printf"]
uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
[[deps.Random]]
deps = ["SHA", "Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
[[deps.SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
version = "0.7.0"
[[deps.Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
[[deps.SnoopPrecompile]]
deps = ["Preferences"]
git-tree-sha1 = "e760a70afdcd461cf01a575947738d359234665c"
uuid = "66db9d55-30c0-4569-8b51-7e840670fc0c"
version = "1.0.3"
[[deps.SparseArrays]]
deps = ["LinearAlgebra", "Random"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
[[deps.StaticArrays]]
deps = ["LinearAlgebra", "Random", "StaticArraysCore", "Statistics"]
git-tree-sha1 = "6954a456979f23d05085727adb17c4551c19ecd1"
uuid = "90137ffa-7385-5640-81b9-e52037218182"
version = "1.5.12"
[[deps.StaticArraysCore]]
git-tree-sha1 = "6b7ba252635a5eff6a0b0664a41ee140a1c9e72a"
uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
version = "1.4.0"
[[deps.Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[[deps.TOML]]
deps = ["Dates"]
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
version = "1.0.0"
[[deps.UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[[deps.Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
[[deps.libblastrampoline_jll]]
deps = ["Artifacts", "Libdl", "OpenBLAS_jll"]
uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
version = "5.1.1+0"