I have an image say of size (1000,1000)
and I want to reduce it to size (200,200)
by tiling it with (5,5)
batches and replacing each batch by its median. (The purpose of median is to remove some salt pepper like artifacts. I am okay to replace it with something similar robust e.g. median of medians, to speed things up.)
I wrote some optimized code, which follows the performance tips. However I could not bribe the compiler into using @simd
. Is this possible to do this? Any other ideas how to speed this up.
@inline function median5_swap(a,b,c,d,e)
# https://github.com/JeffreySarnoff/SortingNetworks.jl/blob/master/src/swapsort.jl
a,b = minmax(a,b)
c,d = minmax(c,d)
a,c = minmax(a,c)
b,d = minmax(b,d)
c,e = minmax(e,c)
max(c, min(e,b))
end
@inline median5(args...) = median5_swap(args...)
function medmedpool55!(out::AbstractMatrix, img::AbstractMatrix)
@assert size(out, 1) >= size(img, 1) ÷ 5
@assert size(out, 2) >= size(img, 2) ÷ 5
@inbounds for j ∈ indices(out)[2]
@simd for i ∈ indices(out)[1]
x11 = img[5i-4, 5j-4]
x21 = img[5i-3, 5j-4]
x31 = img[5i-2, 5j-4]
x41 = img[5i-1, 5j-4]
x51 = img[5i-0, 5j-4]
x12 = img[5i-4, 5j-3]
x22 = img[5i-3, 5j-3]
x32 = img[5i-2, 5j-3]
x42 = img[5i-1, 5j-3]
x52 = img[5i-0, 5j-3]
x13 = img[5i-4, 5j-2]
x23 = img[5i-3, 5j-2]
x33 = img[5i-2, 5j-2]
x43 = img[5i-1, 5j-2]
x53 = img[5i-0, 5j-2]
x14 = img[5i-4, 5j-1]
x24 = img[5i-3, 5j-1]
x34 = img[5i-2, 5j-1]
x44 = img[5i-1, 5j-1]
x54 = img[5i-0, 5j-1]
x15 = img[5i-4, 5j-0]
x25 = img[5i-3, 5j-0]
x35 = img[5i-2, 5j-0]
x45 = img[5i-1, 5j-0]
x55 = img[5i-0, 5j-0]
y1 = median5(x11,x12,x13,x14,x15)
y2 = median5(x21,x22,x23,x24,x25)
y3 = median5(x31,x32,x33,x34,x35)
y4 = median5(x41,x42,x43,x44,x45)
y5 = median5(x51,x52,x53,x54,x55)
z = median5(y1,y2,y3,y4,y5)
out[i,j] = z
end
end
out
end
using BenchmarkTools
imgs = randn(Float32, 1024,1024, 10)
img = view(imgs, :,:,1)
out = similar(img, size(img) .÷ 5)
@benchmark medmedpool55!(out, img)