Compare patterns in byte array

How can I find the position in array “a”, where is pattern match from array “b”? Does standard library have implemented this functionality?

a = [0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61]

b = [0x73, 0x20, 0x69]

Expected result is 4, or 4:6 or similar.

I don’t know of a built-in tool to do exactly this, but it can be built out of existing tools:

julia> function find_sub_array(needle, haystack)
         findfirst(firstindex(haystack):(lastindex(haystack) - length(needle))) do i
           @view(haystack[i:(i + length(needle) - 1)]) == needle
         end
       end
find_sub_array (generic function with 1 method)

julia> find_sub_array(b, a)
4

This just uses findfirst to find the first index i such that a[i:(i + 2)] == b. If b is not present, then findfirst will return nothing.

2 Likes

Thank you. It is working. I modified your solution to get first indexes of all patterns in array:

a = [0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x70, 0x20, 0x61, 0x70, 0x20, 0x61, 0x73, 0x20, 0x69, 0x20]

b = [0x73, 0x20, 0x69]

function find_sub_array_index(pattern, arr)
    findfirst(firstindex(arr):(lastindex(arr) - length(pattern))) do i
      @view(arr[i:(i + length(pattern) - 1)]) == pattern
    end
  end

function find_sub_array_indexes(pattern, arr)
    arrResult = []
    result = 0
    resultLast = 1-size(pattern,1)
    patternSize = size(pattern,1)
    while result !== nothing
        nextIndex = (resultLast + patternSize)
        if nextIndex > size(arr, 1)
            return arrResult
        end
        result = resultLast + patternSize + find_sub_array_index(pattern, arr[nextIndex:end])

        if result === nothing
            return arrResult
        end

        push!(arrResult, result-1)
        resultLast = result
    end
end

println(find_sub_array_indexes(b, a))

This is not optimized solution, just working example.

In Julia 1.6 you can use findfirst and findnext for this:

julia> findfirst(b, a)
4:6

See also this discussion: Finding position of a sequence in a UInt8 array? which resulted in this PR: https://github.com/JuliaLang/julia/pull/37283

3 Likes

Ok, I’ll check it. Thank you for advice.