As far as I know, the main idea of this is that you can transform your series into some feature series/sequence and then optimize distance in feature space, just like multi-variate DTW.

One simple feature series is to pick not just one i-th sample, but some short motif of neighbouring samples (a region [i-d : i+d]). More than that, this region can be further decimated just to 3-dot motif: x[i-d], x[i], x[i+d] (if it is smooth enough). See my implementation of this, maybe it will help optimizing it:

##
Source code

```
@inline eucdist(x, y) = abs2(x-y) #(x-y)*(x-y)
@inline absdist(x, y) = abs(x-y)
# multi-variate version
function _dtw!(D::Matrix{T}, x1::AbstractArray{T}, x2::AbstractArray{T}, w::Int, dist = eucdist) where T<:Real
len1, len2 = size(x1, 1), size(x2, 1)
d1 = ndims(x1) == 1 ? 1 : size(x1, 2)
d = ndims(x2) == 1 ? 1 : size(x2, 2)
d == d1 || error("DTW: Dimensions mismatch")
size(D,1) >= len1 && size(D,2) >= len2 || error("DTW: Maximum length exceeded") # ŠæŃŠ¾Š²ŠµŃŠŗŠ° ŠæŃŠµŠ²ŃŃŠµŠ½ŠøŃ Š“Š»ŠøŠ½Ń
# dist = eucdist
# Initialize first column and first row
D[1,1] = dist(x1[1,1], x2[1,1])
@inbounds for k=2:d
D[1,1] += dist(x1[1,k], x2[1,k])
end
prevD = D[1,1]
@inbounds for r=2:len1
c = max(r-w, 1); # c = 1
D[r,c] = prevD
for k=1:d
D[r,c] += dist(x1[r,k], x2[c,k]) #(x1[r] - x2[1])*(x1[r] - x2[1])
end
prevD = D[r,c]
end
prevD = D[1,1]
@inbounds for c=2:len2
r = max(c-w, 1); # c = 1
D[1,c] = prevD
for k=1:d
D[r,c] += dist(x1[r,k], x2[c,k]) #(x1[1] - x2[c])*(x1[1] - x2[c])
end
prevD = D[r,c]
end
# Complete the cost matrix
@inbounds for c=2:len2, r = max(c-w+1, 2) : min(c+w-1, len1) # r=2:len1
D[r,c] = min(D[r-1,c], D[r-1,c-1], D[r,c-1])
for k=1:d
D[r,c] += dist(x1[r,k], x2[c,k]) # (x1[r] - x2[c])*(x1[r] - x2[c])
end
end
D[len1, len2]
end
# 3-dot version
function _dtw!(D::Matrix{T}, x1::AbstractArray{T}, x2::AbstractArray{T}, w::Int, dt::Int, dist = eucdist) where T<:Real
len1, len2 = size(x1, 1), size(x2, 1)
d1 = ndims(x1) == 1 ? 1 : size(x1, 2)
d = ndims(x2) == 1 ? 1 : size(x2, 2)
d == d1 || error("DTW: Dimensions mismatch")
size(D,1) >= len1 && size(D,2) >= len2 || error("DTW: Maximum length exceeded") # ŠæŃŠ¾Š²ŠµŃŠŗŠ° ŠæŃŠµŠ²ŃŃŠµŠ½ŠøŃ Š“Š»ŠøŠ½Ń
dt2 = 2*dt
len1 -= dt2
len2 -= dt2
# Initialize first column and first row
D[1,1] = dist(x1[1,1], x2[1,1]) + dist(x1[1+dt,1], x2[1+dt,1]) + dist(x1[1+dt2,1], x2[1+dt2,1])
@inbounds for k=2:d
D[1,1] += dist(x1[1,k], x2[1,k]) + dist(x1[1+dt,k], x2[1+dt,k]) + dist(x1[1+dt2,k], x2[1+dt2,k])
end
prevD = D[1,1]
@inbounds for r=2:len1
c = max(r-w, 1); # c = 1
D[r,c] = prevD
for k=1:d
D[r,c] += dist(x1[r,k], x2[c,k]) + dist(x1[r+dt,k], x2[c+dt,k]) + dist(x1[r+dt2,k], x2[c+dt2,k])
end
prevD = D[r,c]
end
prevD = D[1,1]
@inbounds for c=2:len2
r = max(c-w, 1); # r = 1
D[r,c] = prevD
for k=1:d
D[r,c] += dist(x1[r,k], x2[c,k]) + dist(x1[r+dt,k], x2[c+dt,k]) + dist(x1[r+dt2,k], x2[c+dt2,k])
end
prevD = D[r,c]
end
# Complete the cost matrix
@inbounds for c=2:len2
for r = max(c-w+1, 2) : min(c+w-1, len1) # r=2:len1
D[r,c] = min(D[r-1,c], D[r-1,c-1], D[r,c-1])
for k=1:d
D[r,c] += dist(x1[r,k], x2[c,k]) + dist(x1[r+dt,k], x2[c+dt,k]) + dist(x1[r+dt2,k], x2[c+dt2,k])
end
end
end
D[len1, len2]
end
```

What about Matrix Profile, there are many domain-specific performance gains not covered in it (as far as I know):

Such optimizations can dramatically improve performance for many concrete applications - in a sense that your number of actual DTW checks tend to `num_of_templates`

x `num_of_matches`

, or even lessā¦