Faster way to find all bit arrays of weight n

The obvious way to generate these bit arrays is something like:

function findbcn2s(b,n)
    bitstrs = digits.(2^n-1:2^b-1, base=2, pad=b) .|> BitVector
    filter(x->sum(x)==n, bitstrs)
end

I feel like this is horribly inefficient and there must be a faster way, but my combinatorics-fu isn´t good enough to figure it out.

If your n and b are small enough to fit into an Int (or UInt or similar), then why form a BitVector at all? Much more efficient to just query the bits of the integer directly.

Do you mean by using Bits.jl?
So something like:

function findbcn2sv2(b,n)
    filter(x->sum(x)==n, bits.(2^n-1:2^b-1)) |> x->map(z->z[1:b], x)
end

This is significantly faster, thank you.

Sure, that’d be one way. You can also count_ones directly

3 Likes

The other major improvement is that rather than generating all 2^b-1 and filtering, it is possible to generate them 1 at a time. (see Combinatorics.jl/src/permutations.jl at master · JuliaMath/Combinatorics.jl · GitHub for how to do this in the general case, you can do it better in the case of bitarrays)

1 Like

vv=[trues(6) for _ in 1:binomial(6,4)]

foreach(((i,idx),)->vv[i][idx].=false, enumerate(combinations(1:6,2)))

setindex!.(vv, Ref(repeat([false],2)),combinations(1:6,2))

digits.(findall(==(4),count_ones.(1:2^6)),base=2,pad=6)


function d2b(d)
    b=falses(6)
    i=6
    while d>=1
        #d,r=divrem(d,2)
        b[i]=d&1
        d>>=1
        i-=1
    end
    b
end

julia> @btime d2b.(findall(==(4),count_ones.(1:2^6)))
  735.135 ns (35 allocations: 2.41 KiB)
15-element Vector{BitVector}:
 [0, 0, 1, 1, 1, 1]
 [0, 1, 0, 1, 1, 1]
 [0, 1, 1, 0, 1, 1]
 [0, 1, 1, 1, 0, 1]
 [0, 1, 1, 1, 1, 0]
 [1, 0, 0, 1, 1, 1]
 [1, 0, 1, 0, 1, 1]
 [1, 0, 1, 1, 0, 1]
 [1, 0, 1, 1, 1, 0]
 [1, 1, 0, 0, 1, 1]
 [1, 1, 0, 1, 0, 1]
 [1, 1, 0, 1, 1, 0]
 [1, 1, 1, 0, 0, 1]
 [1, 1, 1, 0, 1, 0]
 [1, 1, 1, 1, 0, 0]
1 Like

There’s also this thread on computing the next integer value with the same number of ones set: Computing the preimage of count_ones.

2 Likes

Does it use the popcount CPU instruction which is exactly for counting the number of nonzero bits?

It should, yep. You can check yourself with a @code_native count_ones(123).

1 Like

There is a package SmallCollections which is very useful for this, for example, to get UInts for 3-bit subsets of 6-bits:

julia> using SmallCollections

julia> map(x->x.mask, Subsets(6,3))
20-element Vector{UInt64}:
 0x0000000000000038
 0x0000000000000034
 0x0000000000000032
 0x0000000000000031
 0x000000000000002c
 0x000000000000002a
 0x0000000000000029
 0x0000000000000026
 0x0000000000000025
 0x0000000000000023
 0x000000000000001c
 0x000000000000001a
 0x0000000000000019
 0x0000000000000016
 0x0000000000000015
 0x0000000000000013
 0x000000000000000e
 0x000000000000000d
 0x000000000000000b
 0x0000000000000007

But keeping the subsets in their package representation is probably better:

julia> Subsets(4,2) |> collect
6-element Vector{SmallBitSet{UInt64}}:
 SmallBitSet([3,4])
 SmallBitSet([2,4])
 SmallBitSet([1,4])
 SmallBitSet([2,3])
 SmallBitSet([1,3])
 SmallBitSet([1,2])

Thanks to careful implementation by matthias314, this should be faster than alternatives. Link: SmallCollections.jl .

6 Likes

Oh, that’s a really cool package—very Julian!

I made this iterator some time ago for this purpose:

struct BitCombinations{T, T1, T2}
    weight::T1
    width::T2
    thelast::T
    function BitCombinations(weight, width, ::Type{T}) where {T}
        isbitstype(T) || throw(ArgumentError("$T is not a bitstype"))
        T1 = typeof(weight)
        T2 = typeof(width)
        new{T,T1,T2}(weight, width, ((-1 % T) >>> (8*sizeof(T) - weight)) << (width-weight))
    end
end
Base.length(fw::BitCombinations) = binomial(fw.width, fw.weight)
Base.IteratorEltype(::Type{<:BitCombinations}) = Base.HasEltype()
Base.eltype(::Type{<:BitCombinations{T}}) where T = T


# from https://graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation
@inline function next_combination(x::T) where T
    t = x | (x-one(T))
    return (t+one(T)) | (((~t & -~t) - one(T)) >>> (trailing_zeros(x) + one(T)))
end


@inline function Base.iterate(fw::BitCombinations{T}) where T
    val = (-1 % T) >>> (8*sizeof(T) - fw.weight)
    return (val,val)
end


@inline function Base.iterate(fw::BitCombinations, state)
    state == fw.thelast && return nothing
    val = next_combination(state)
    return (val,val)
end


"""
bitcombinations(weight, width, ::Type{T}=UInt64)

Generate all bit patterns with `weight` bits set in a bit field of width `width`.
An iterator which returns integers of type `T` is created. The type `T` must be a bits type.
The patterns are returned in lexicographical order, that is, in arithmetic order.

Such combinations can also be generated by [`combinations`](@ref), but in case the
actual bit patterns are needed, this function is faster and non-allocating.

"""
function bitcombinations(weight::Integer, width::Integer, ::Type{T}=UInt64) where {T<:Integer}
    nbits = 8*sizeof(T)
    (0 ≤ weight ≤ width ≤ nbits) || 
        throw(ArgumentError(lazy"0 ≤ weight($weight) ≤ width($width) ≤ 8*sizeof($T)($nbits) failed"))
    BitCombinations(weight, width, T)
end

Usage:

julia> collect(bitcombinations(3,6,UInt8))
20-element Vector{UInt8}:
 0x07
 0x0b
 0x0d
 0x0e
 0x13
 0x15
 0x16
 0x19
 0x1a
 0x1c
 0x23
 0x25
 0x26
 0x29
 0x2a
 0x2c
 0x31
 0x32
 0x34
 0x38

Using such bitmasks e.g. for indexing into arrays is somewhat involved, but not very difficult, and can be made quite efficient. Like this function summing the elements of an array which are marked in a bitmask:

@inline blsr(x) = x & (x-true)  # clear lowest set bit, compiles to single blsr instruction

function summask(P::Vector, bitmask)
    p = zero(eltype(P))
    while !iszero(bitmask)
        tz = trailing_zeros(bitmask)
        bitmask = blsr(bitmask)
        p += @inbounds P[tz+1]
    end
    return p
end
4 Likes

@sgaure’s code is faster than what I had so far in SmallCollections.jl. I’ve added improved iterate methods for SmallBitSet and Subsets to master; they will be included in the next release. Then @Dan’s solution based on my package will be as fast as the one using bitcombinations.

4 Likes

Came to think of it. I also made another thing to use with such indices. It works like

v = rand(10)
sum(v[BitIndices(0b01101)])

I.e. a vector indexed by a BitIndices creates an iterator over the marked indices. So one can create non allocating loops like,

for subset in flatten(bitcombinations(w, K) for w in 4:K)
    a += sum(v[BitIndices(subset)])
end

And BitIndices is an iterator as well collect(BitIndices(0b01101)) == [1,3,4]).

I’m not sure it’s error free with various generic inputs, but it works for my purposes. For all I know there’s something like this in SmallCollections, I haven’t had the time to look into it.

struct BitIndices{T,IT} <: AbstractIteratorIndices
    bits::T
    firstindex::IT
    BitIndices(v::T) where T = new{T,typeof(firstindex(v))}(v, firstindex(v)) 
    BitIndices(v::T, fi::IT) where {T,IT} = new{T,IT}(v,fi)
end
BitIndices(a::BitIndices) = a
Base.IteratorEltype(::Type{<:BitIndices}) = Base.HasEltype()
Base.eltype(::Type{<:BitIndices}) = Int
Base.length(ii::BitIndices) = count_ones(ii.bits)

function Base.iterate(bi::BitIndices, state=bi.bits)
    iszero(state) && return nothing
    trailing_zeros(state)+bi.firstindex, state & (state - true)
end
for op in (:&, :|, :⊻)
    @eval Base.$op(a::BitIndices, b::BitIndices) = BitIndices($op(promote(a.bits,b.bits)...))
end
Base.:(~)(a::BitIndices) = BitIndices(~a.bits)

struct IndexIterator{VT, IT}
    v::VT
    idx::IT
end

Base.length(ii::IndexIterator) = length(ii.idx)
Base.IteratorEltype(::Type{T}) where {VT, T<:IndexIterator{VT}} = Base.IteratorEltype(VT)
Base.eltype(::Type{<:IndexIterator{VT}}) where {VT} = eltype(VT)

Base.@propagate_inbounds @inline function Base.iterate(
    ii::IndexIterator, state=iterate(ii.idx)
    ) 
    isnothing(state) && return nothing
    next, istate = state
    return ii.v[next], iterate(ii.idx, istate)
end

Base.getindex(v::AbstractVector, idx::AbstractIteratorIndices) = IndexIterator(v, idx)

I’ve taken the function

function f(v, K)
    a = 0.0
    for subset in Iterators.flatten(bitcombinations(w, K) for w in 4:K)
        a += sum(v[BitIndices(subset)])
    end
    a
end

Note that I had to use Iterators.flatten and to add the line

abstract type AbstractIteratorIndices end

at the very beginning.

With SmallCollections.jl one would write

function ff(v, K)
    a = 0.0
    for subset in Iterators.flatten(Subsets(K, w) for w in 4:K)
        a += sum(@inbounds(v[i]) for i in subset)
    end
    a
end

Then for K = 10 and v = rand(K) the SmallCollections code is about 20% faster than BitIndices on my machine. Without @inbounds, it’s about 15% slower. (I tried to add @inbounds to BitIndices, but it didn’t change anything. Also, I’m using the master version of SmallCollections, which has improved iterators taken from @sgaure’s code.)

2 Likes