Hi,

MWE below

I have a use case for intersection of lists of sorted integer vectors with a vector of sorted integers i.e.

```
intersect!(vec :: Vector{Int},vectorOfVectors::Vector{Vector{Int}})
```

because the vectorOfVectors can be quite large in and of itself, and the fact that I cant always “edit” my input vector, it tends to be better to iterate over vectorOfVectors and intersect and flag an intermediary BitVector/Vector that spans the original vec length and stores if an intersections was found:

```
local commonPopulated = 0
local commonOutput = falses(length(intersectionList))
for list in vectorOfVectors
if ismissing(list) continue end
commonPopulated += sorted_intersect(vec ,list,commonOutput)
end
```

after this, I end up with “commonPopulated” that identify all intersections of vec with all vectors in vectorOfVectors

because I care about the intersection of vec only, I wanted to reuse it and resize based on the values found; below are my own attempts vs the inbuilt “(vector)[bitVector”] value and my question is: why is mine so much slower, even though I reuse the memory vs 1.9MB or so in the inbuilt version?

```
using BenchmarkTools
@inline function shiftBitFlags!(inputIndices , bitVector)
local swapIndex = 0 #length(inputIndices)
local swapped = 1
local comparisonValue = one(eltype(inputIndices))
@inbounds @simd for ind in eachindex(inputIndices)
if bitVector[ind] == comparisonValue
swapIndex += 1
if swapIndex != ind
inputIndices[ind], inputIndices[swapIndex] = inputIndices[swapIndex], inputIndices[ind]
swapped += 1
end
end
end
swapIndex
end
@inline function shiftBitFlagsAndResize!(inputIndices , bitVector)
swapped = shiftBitFlags!(inputIndices, bitVector)
resize!(inputIndices, swapped)
swapped
end
bFalses = falses(1000000)
zInts = zeros(Int32,1000000)
for i in eachindex(bFalses)
if rand(0:1) == 1
bFalses[i] = 1
zInts[i] = 1
end
end
a = collect(Int32[i for i in 1: 1000000])
f = (a)[bFalses]
@btime ($a)[$bFalses] #563.100 μs (2 allocations: 1.91 MiB)
@btime shiftBitFlagsAndResize!(c,$zInts) setup=(c =collect(Int32[i for i in 1: 1000000])) # 3.051 ms (0 allocations: 0 bytes)
@btime shiftBitFlagsAndResize!(c,$bFalses) setup=(c =collect(Int32[i for i in 1: 1000000])) #3.046 ms (0 allocations: 0 bytes)
cShiftInts =collect(Int32[i for i in 1: 1000000])
shiftInts = shiftBitFlagsAndResize!(cShiftInts,zInts)
cShiftBits =collect(Int32[i for i in 1: 1000000])
shiftBits = shiftBitFlagsAndResize!(cShiftBits,bFalses)
@show shiftInts ,shiftBits , length(f), count(bFalses)
@show f == cShiftBits == cShiftInts
@show shiftInts == shiftBits == length(f)
```

Regards,