Dear All, I have tried the modified version of Julia 0.6 using @rdeits’s suggestion. Here are two version of codes, Julia 0.6 and Julia 1.0, respectively.

```
using Images
using Compat
# using Statistics
# using LinearAlgebra
function crop(img, l)
(m,n)=size(img)
im_crop = img[l+1:m-l,l+1:n-l]
return im_crop
end
function extend(img, L)
(M,N)=size(img)
im_Extend=zeros(M+2*L,N+2*L)
im_Extend[L+1:M+L,L+1:N+L]=img[1:M,1:N]
im_Extend[1:M+2*L, 1:L ]=flipdim(im_Extend[1:M+2*L,L+1:2*L], 2)
im_Extend[1:M+2*L,N+L+1:N+L*2]=flipdim(im_Extend[1:M+2*L,N+1:N+L], 2)
im_Extend[ 1:L ,1:N+2*L]=flipdim(im_Extend[L+1:2*L,1:N+2*L], 1)
im_Extend[M+L+1:M+L*2,1:N+2*L]=flipdim(im_Extend[M+1:M+L,1:N+2*L], 1)
return im_Extend
end
function im2col(img::AbstractMatrix{T}, m, n) where {T<:Real}
M, N = size(img)
patch = Matrix{Vector{T}}(undef, M-m+1, N-n+1)
patchNormalized = Matrix{Vector{T}}(undef, M-m+1, N-n+1)
@inbounds for x = 1:M-m+1, y = 1:N-n+1
p = vec(img[x:x+m-1,y:y+n-1])
patch[x,y] = p
patchNormalized[x,y] = p./norm(p)
end
return patch,patchNormalized
end
function denoise(yNoisy, n, W, K, λ)
yNoisy = extend(yNoisy, n)
MAX_PATCH_NUM = (2W + 1)^2
M, N = size(yNoisy)
patchM, patchN = M-n+1, N-n+1
noisyPatch,noisyPatchNormalized = im2col(yNoisy, n, n)
yHat = zeros(M, N)
count = zeros(M, N)
maxKVal = zeros(K)
maxKidx = zeros(Int, 2, K)
X = Matrix{Float64}(undef, n*n, K)
# loop over each patch(there're totally patchM*patchN patches)
for y = 1:patchM, x = 1:patchN
# window bounds sanity check
xMin = x - W ≤ 0 ? 1 : (x + W > patchM ? patchM - 2W : x - W)
xMax = x - W ≤ 0 ? 1 + 2W : (x + W > patchM ? patchM : x + W)
yMin = y - W ≤ 0 ? 1 : (y + W > patchN ? patchN - 2W : y - W)
yMax = y - W ≤ 0 ? 1 + 2W : (y + W > patchN ? patchN : y + W)
# loop over every coordinate(patch) in the window
# this could find K most similar patches in one pass
b = noisyPatch[x,y]
# println(typeof(b));
# println(typeof(noisyPatchNormalized[1,1]));
minVal = 0.0
minIdx = 1
k = 0
@inbounds for py = yMin:yMax, px = xMin:xMax
temp=noisyPatchNormalized[px,py];
corr = BLAS.dot(n*n,temp,1,b,1)
if k < K
k += 1
maxKVal[k] = corr
maxKidx[1,minIdx] = px
maxKidx[2,minIdx] = py
minVal, minIdx = findmin(maxKVal)
end
corr < minVal && continue
maxKVal[minIdx] = corr
maxKidx[1,minIdx] = px
maxKidx[2,minIdx] = py
minVal, minIdx = findmin(maxKVal)
end
# find the weights
for i = 1:K
px = maxKidx[1,i]
py = maxKidx[2,i]
X[:,i] = noisyPatch[px,py]
end
core = transpose(X) * X + λ * I
weights = core \ (transpose(X) * b)
# synthesize
patch = X * weights |> x->reshape(x, n, n)
yHat[x:x+n-1,y:y+n-1] .+= patch
count[x:x+n-1,y:y+n-1] .+= 1
end
y_DeNoise = crop(yHat ./ count, n)
end
PSNR(x, y) = 20 * log10(255/sqrt(mean((x-y).^2)))
# lena = load(joinpath(@__DIR__, "lena.bmp")) |> channelview
# img = lena[1,:,:].*255.0 # select first channel
# println(norm(img))
file = matopen("y1.mat")
img=read(file, "y1")
close(file)
n = 8
W = 20
K = 12
λ = 300
imgNoisy = img + 3randn(512,512)
PSNR(imgNoisy, img)
imgdenoised = @time denoise(imgNoisy, n, W, K, λ);
println(PSNR(imgNoisy, img))
println(PSNR(imgdenoised, img))
```

```
using Images
using Statistics
using LinearAlgebra
using QuartzImageIO
using ImageMagick
crop(img, l) = crop(img, size(img), l)
crop(img, (m, n), l) = img[l+1:m-l,l+1:n-l]
function extend(img, L)
M, N = size(img)
extended = zeros(M+2*L,N+2*L)
extended[L+1:M+L,L+1:N+L] = img
extended[1:M+2*L, 1:L ] = reverse(extended[1:M+2*L,L+1:2*L], dims=2)
extended[1:M+2*L,N+L+1:N+L*2] = reverse(extended[1:M+2*L,N+1:N+L], dims=2)
extended[ 1:L ,1:N+2*L] = reverse(extended[L+1:2*L,1:N+2*L], dims=1)
extended[M+L+1:M+L*2,1:N+2*L] = reverse(extended[M+1:M+L,1:N+2*L], dims=1)
extended
end
function im2col(img::AbstractMatrix{T}, m, n) where {T<:Real}
M, N = size(img)
patch = Matrix{Vector{T}}(undef, M-m+1, N-n+1)
patchNormalized = Matrix{Vector{T}}(undef, M-m+1, N-n+1)
@inbounds for x = 1:M-m+1, y = 1:N-n+1
p = vec(img[x:x+m-1,y:y+n-1])
patch[x,y] = p
patchNormalized[x,y] = p./norm(p)
end
return patch,patchNormalized
end
function denoise(yNoisy, n, W, K, λ)
yNoisy = extend(yNoisy, n)
MAX_PATCH_NUM = (2W + 1)^2
M, N = size(yNoisy)
patchM, patchN = M-n+1, N-n+1
noisyPatch,noisyPatchNormalized = im2col(yNoisy, n, n)
yHat = zeros(M, N)
count = zeros(M, N)
maxKVal = zeros(K)
maxKidx = zeros(Int, 2, K)
X = Matrix{Float64}(undef, n*n, K)
# loop over each patch(there're totally patchM*patchN patches)
for ii in CartesianIndices((patchM,patchN))
x, y = Tuple(ii)
# window bounds sanity check
xMin = x - W ≤ 0 ? 1 : (x + W > patchM ? patchM - 2W : x - W)
xMax = x - W ≤ 0 ? 1 + 2W : (x + W > patchM ? patchM : x + W)
yMin = y - W ≤ 0 ? 1 : (y + W > patchN ? patchN - 2W : y - W)
yMax = y - W ≤ 0 ? 1 + 2W : (y + W > patchN ? patchN : y + W)
# loop over every coordinate(patch) in the window
# this could find K most similar patches in one pass
b = noisyPatch[x,y]
# println(typeof(b));
# println(typeof(noisyPatchNormalized[1,1]));
minVal = 0.0
minIdx = 1
k = 0
@inbounds for py = yMin:yMax, px = xMin:xMax
temp=noisyPatchNormalized[px,py];
corr = BLAS.dot(n*n,temp,1,b,1)
if k < K
k += 1
maxKVal[k] = corr
maxKidx[1,minIdx] = px
maxKidx[2,minIdx] = py
minVal, minIdx = findmin(maxKVal)
end
corr < minVal && continue
maxKVal[minIdx] = corr
maxKidx[1,minIdx] = px
maxKidx[2,minIdx] = py
minVal, minIdx = findmin(maxKVal)
end
# find the weights
for i = 1:K
px = maxKidx[1,i]
py = maxKidx[2,i]
X[:,i] = noisyPatch[px,py]
end
core = transpose(X) * X + λ * I
weights = core \ (transpose(X) * b)
# synthesize
patch = X * weights |> x->reshape(x, n, n)
yHat[x:x+n-1,y:y+n-1] .+= patch
count[x:x+n-1,y:y+n-1] .+= 1
end
y_DeNoise = crop(yHat ./ count, n)
end
PSNR(x, y) = 20 * log10(255/sqrt(mean((x-y).^2)))
lena = load(joinpath(@__DIR__, "lena.bmp")) |> channelview
img = lena[1,:,:].*255.0 # select first channel
# println(norm(img))
n = 8
W = 20
K = 12
λ = 300
imgNoisy = img + 3randn(512,512)
PSNR(imgNoisy, img)
imgdenoised = @time denoise(imgNoisy, n, W, K, λ);
@info "PSNR Before Denoising:" PSNR(imgNoisy, img)
@info "PSNR After Denoising:" PSNR(imgdenoised, img)
```

It is worth noticing that Julia 0.6 is costing the same amount of time as Julia 1.0 even though it comes with the help of MKL. Does anyone have an idea why?

Julia 1.0: 21.252800 seconds (4.74 M allocations: 1.953 GiB, 5.83% gc time)

Julia 0.6: 20.306164 seconds (10.76 M allocations: 5.229 GiB, 9.76% gc time)