I focus on the following code’s efficient, and I use the TimerOutputs to measure the performance.
using TimerOutputs
const to = TimerOutput()
abstract type AbstractSN end
struct spi <: AbstractSN
M::Int8
Ks::Int8
D::Int16
Ds::Int16
pq_codebook::Array{<:AbstractFloat, 3}
pq_codes::Array{<:Integer, 2}
k_v::Int64
vq_codebook::Array{<:AbstractFloat, 2}
vq_codes::Array{<:Integer, 1}
metric::String
id_index_table::Dict{Int64,Vector{Int64}}
end
function search_neighbors(spi::AbstractSN, q::Vector{<:AbstractFloat}, num_centroid_tosearch::Int, topk::Int)
@timeit to "coarse_search" index,scores_v = coarse_search(spi, q, num_centroid_tosearch)
@timeit to "id_indexed" ids,scores_vq = id_indexed(spi, index, scores_v)
@timeit to "compute_table" lookuptable = compute_table(spi.pq_codebook, q) # (M,Ks)
@timeit to "ex1" pq_codes_ids = spi.pq_codes[ids,:]
@timeit to "compute_scores_" scores_pq = compute_scores_(lookuptable, pq_codes_ids)
@timeit to "ex2" scores = scores_vq + scores_pq;
@timeit to "ex3" len_s = length(scores);
@timeit to "ex4" if topk > len_s
i_ = maxk!(scores, len_s)
ids_ = ids[i_]
ids_ = [ids_;repeat([0],topk - len_s)]
else
i_ = maxk!(scores, topk)
ids_ = ids[i_]
end
return ids_
end
show(to)
You may don’t know the implementation of each methods above. To ease your reading burden, let’s look at the time evaluation first, I will fill in the details later.
I notice I should have used @view in the line “ex1”.
After I use the @view, there was an unexpected result.
I don’t know why the time of "compute_scores_ " increases so much.
The following is the codes of compute_scores_ and id_indexed
function compute_scores_(lookuptable::Array{Float64,2}, pq_codes::AbstractArray{Int,2})
n, M = size(pq_codes);
scores = zeros(n);
for i = 1:n
s_ = 0;
for j = 1:M
@inbounds s_ = s_ + lookuptable[j, pq_codes[i,j]];
end
scores[i] = s_;
end
return scores
end
function id_indexed(spi::AbstractSN, index::SubArray{Int64, 1}, scores_v::Vector{<:AbstractFloat})
ids = []
scores_vq = []
for (i,s) in zip(index,scores_v)
id_i = spi.id_index_table[i] # spi.id_index_table::Dict{Int64,Vector{Int64}}
ids = [ids;id_i]
len = length(id_i)
scores_vq = [scores_vq;repeat([s],len)]
end
ids = convert(Array{Int64,1}, ids)
scores_vq = convert(Array{Float64,1}, scores_vq)
return ids, scores_vq
end
If you have any other suggestions for improving performance, please let me know.
Thanks in advance for any help/suggestions.