Convert integer to bits array

I’m trying to find a way to convert an unsigned integer value into a bitmask.

Does anyone know of a way to do this efficiently using the standard library?

For example, I want something to do the following:

input = UInt16(5)
println(bitmask(input))

and get [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1] back.

bitstring(UInt16(5)), will return a string like “000000…101”, which you can then split and convert to a bitvector or array of ints.

Not very efficient of course. I’m sure someone can offer a better option.

2 Likes

You can & with 1 and repeatedly right shift.

2 Likes

digits(x, base=2, pad=16) |> reverse

18 Likes

I need this occasionally and always come up with an ad-hoc solution, I wonder if a function in Base would make sense.

2 Likes

Thanks everyone! The digits call is the fastest by far btw.


z = UInt(5)
@time digits(z, base=2, pad=16)
  0.000030 seconds (11 allocations: 640 bytes)
@time bitstring(UInt16(5))
  0.035250 seconds (47.15 k allocations: 2.489 MiB)

This is after running each function one time to trigger jit compilation btw.

Edit:

joemiller is correct! I pasted the values with compilation for bitstring

@time bitstring(z)
  0.000007 seconds (6 allocations: 320 bytes)
"0000000000000000000000000000000000000000000000000000000000000101"

@time digits(z, base=2, pad=16)
  0.000023 seconds (11 allocations: 640 bytes)

The bitstring method is faster now that I’m doing it correctly.

3 Likes

The proper way is

julia> function bm(u)
       res =BitVector(undef, sizeof(u)*8)
       res.chunks[1] = u%UInt64
       res
       end
julia> bm(0x05)
8-element BitArray{1}:
  true
 false
  true
 false
 false
 false
 false
 false

This is not exactly what you asked for: The bit pattern is reversed (little endian bitorder). You can use this code to convert to big endian (better than the llvm bitreverse intrinsic):

julia> function revbits(z::UInt8)
                  z = (((z & 0xaa) >>  1) | ((z & 0x55) <<  1))
                  z = (((z & 0xcc) >>  2) | ((z & 0x33) <<  2))
                  z = (((z & 0xf0) >>  4) | ((z & 0x0f) <<  4))
                  return z
              end

julia> function revbits(z::UInt16)
                  z = (((z & 0xaaaa) >>  1) | ((z & 0x5555) <<  1))
                  z = (((z & 0xcccc) >>  2) | ((z & 0x3333) <<  2))
                  z = (((z & 0xf0f0) >>  4) | ((z & 0x0f0f) <<  4))
                  return ntoh(z)
              end

julia> function revbits(z::UInt32)
                  z = (((z & 0xaaaaaaaa) >>  1) | ((z & 0x55555555) <<  1))
                  z = (((z & 0xcccccccc) >>  2) | ((z & 0x33333333) <<  2))
                  z = (((z & 0xf0f0f0f0) >>  4) | ((z & 0x0f0f0f0f) <<  4))
                  return ntoh(z)
              end

julia> function revbits(z::UInt64)
                  z = (((z & 0xaaaaaaaaaaaaaaaa) >>  1) | ((z & 0x5555555555555555) <<  1))
                  z = (((z & 0xcccccccccccccccc) >>  2) | ((z & 0x3333333333333333) <<  2))
                  z = (((z & 0xf0f0f0f0f0f0f0f0) >>  4) | ((z & 0x0f0f0f0f0f0f0f0f) <<  4))
                  return ntoh(z)
              end

and use

julia> function revbm(u)
       res =BitVector(undef, sizeof(u)*8)
       res.chunks[1] = revbits(unsigned(u))%UInt64
       res
       end
julia> revbm(Int8(5))
8-element BitArray{1}:
 false
 false
 false
 false
 false
  true
 false
  true
6 Likes

The allocations and time on @time bitstring(UInt16(5)) seem wrong, it looks like it’s compiling.

Sorry about that! You are correct. I updated my timings.

1 Like

I posted this a month ago but found an embarrassing bug in it. So here’s an update in case anyone is interested:

I agree, working in telecom this is something I do a lot. Not exactly what the OP asked for but here’s a variant I use a lot:

"""
    readbitvector(io, n; ltor=true)

Read bytes from `io` into a BitArray with `n` bits, by default the MSB
of the first byte will end up as index 1 (left-to-right).  Set
`ltor=false` to get the LSB of the first byte at index 1.
"""
function readbitvector(io, n; ltor=true)
    # fail if BitArray definition change
    @assert hasfield(BitArray, :chunks)
    r = BitArray(undef, n)
    # chunk type
    CT = eltype(r.chunks)
    nbytes = (n + 7) ÷ 8
    # BitArray store the first bit in LSB of chunks[1]
    rev = ltor ? bitreverse ∘ ntoh : identity
    for i in 1:nbytes ÷ sizeof(CT)
        r.chunks[i] = rev(read(io, CT))
    end
    # Handle any leftovers
    btail = nbytes & (sizeof(CT) - 1)
    if btail != 0
        w = reduce(|, [CT(b) << (8 * (i - 1)) for (i, b) in enumerate(read(io, btail))])
        r.chunks[end] = rev(w)
    end
    return r
end

It times ~1000 times faster than any variant I tried using the “external interface” of BitArray. Having something like this in Base would be great!

1 Like

Is it possible to modify this code (or perhaps there is some other code) which allows using an io as a bit stream?
Because with this function, the stream always gets rounded off into bytes, and reading a tail of a few bits, loses the rest of the bits in the byte.

A concrete example:

buf = IOBuffer("hello world")
println(Char(read(buf,1)[1]))
println(Char(read(buf,1)[1]))
println(readbitvector(buf,4))
println(readbitvector(buf,4))
println(Char(read(buf,1)[1]))

outputs:

h
e
Bool[0, 1, 1, 0]
Bool[0, 1, 1, 0]
o

with the higher bits of l characters lost.
How would this be fixed? or is there another library solving this? (sounds useful for decompressors etc.)

Hi Dan. Interesting thought! I don’t know of any other package solving this.

Spontaneously, this is how I would implement it:

A Julia IOStream only keeps track of bytes, so we need something else.

E.g.

struct BitIOStream <: IO
    io::IOStream
    last::UInt8
    offset
end
BitIOStream(io::IO) = BitIOStream(io, 0)

function Base.read(bio::BitIOStream, nbits)
    @assert bio.offset == 0
    nbytes = (nbits + 7) ÷ 8
    r = read(bio.io, nbytes)
    bio.last = last(r)
    bio.offset = nbits & 7
    return r
end

function aligntobyte(bio::BitIOStream)
    bio.offset == 0 && return 0, 0x00
    offset = bio.offset
    bio.offset = 0
    return offset, bio.last
end

Reading from BitIOStream must then be done in two phases; first
aligntobyte to get the residual bits from the previous read and then
a read to get a number of bytes in which the last may have some
superflous bits.

offs, prev = aligntobyte(bio)
# handle the 8-offs odd bits in `prev` 
v = read(bio, nbits - (8 - offs))
# handle the rest

None of this is particularly thought through. And not even tried in Julia.

If performance is not an issue, then you can use the proper BitVector methods and things will be a lot easier. I still think it’s best to keep the state (the number of bits consumed) in the IO object.

There were several problems with my previous post.

Here’s one I actually tested:

mutable struct BitIOStream <: IO
    io::IO
    last
    offset
end
BitIOStream(io::IO) = BitIOStream(io, 0, 0)

bit(byte, ix, ::Val{true}) = byte & (0x80 >> ix) != 0
bit(byte, ix, ::Val{false}) = byte & (1 << ix) != 0

function readbitvector(bio::BitIOStream, nbits; ltor=true)
    i = bio.offset # bit-index in `byte`
    byte = bio.offset == 0 ? read(bio.io, UInt8) : bio.last
    j = 1 # index in `r`
    r = BitVector(undef, nbits)
    while true
        r[j] = bit(byte, i, Val(ltor))
        i += 1
        j == nbits && break
        j += 1
        if i == 8
            byte = read(bio.io, UInt8)
            i = 0
        end
    end
    bio.offset = i & 7
    bio.last = byte
    return r
end

Your example, somewhat modified:

bio = BitIOStream(IOBuffer("hello world"))
println(Char(read(bio.io,1)[1]))
println(Char(read(bio.io,1)[1]))
println(readbitvector(bio,4))
println(readbitvector(bio,4))
println(readbitvector(bio,4))
println(readbitvector(bio,4))
println(Char(read(bio.io,1)[1]))

now outputs

h
e
Bool[0, 1, 1, 0]
Bool[1, 1, 0, 0]
Bool[0, 1, 1, 0]
Bool[1, 1, 0, 0]
o
1 Like