I was looking into compression algorithms and storage schemes (like Simple-8b and xor-based compression ) and one thing they do is determine the highest set bit for a given number.
I wrote some code, but I feel this can be done better or in a more efficient way.
function find_highest_bit(n::Number)
ui8_number = reinterpret(UInt8,[n])
totalshift = 0
for (i,byte) in enumerate(ui8_number)
shift = 0
while( (byte >> shift) > 0)
shift = shift+1
end
if shift != 0
totalshift = shift+((i-1)*8)
end
end
return totalshift
end
This can then be used like this
# generate some sorted data
julia> a = cumsum(rand(UInt8,10)).+100000000000;
10-element Vector{UInt64}:
0x000000174876e83b
0x000000174876e8c5
0x000000174876e97e
0x000000174876e9ab
0x000000174876ea68
0x000000174876eb2a
0x000000174876eb4c
0x000000174876ec28
0x000000174876ec4b
0x000000174876ed45
# calculate bits needed to store each number
julia> sum(find_highest_bit.(a))
10-element Vector{Int64}:
37
37
37
37
37
37
37
37
37
37
# calculate bits when only the difference is stored
julia> sum(find_highest_bit.(diff(a)))
9-element Vector{Int64}:
8
8
6
8
8
6
8
6
8
#same for float
julia> a = 2000.0 .+ rand(10)
10-element Vector{Float64}:
2000.4285571858823
2000.6576141052876
2000.8451949182838
2000.810710858975
2000.2450630027245
2000.4849832339958
2000.7565738749688
2000.3952829518228
2000.6558837662135
2000.2400002168467
julia> find_highest_bit.(a)
10-element Vector{Int64}:
63
63
63
63
63
63
63
63
63
63
# some ugly xor of neighbors
t = vcat(reinterpret.(eltype(a),[xor.(reinterpret(UInt8,a[i:i]),reinterpret(UInt8,a[i+1:i+1])) for i in 1:length(a)-1])...)
# this also needs less bits
julia> find_highest_bit.(t)
9-element Vector{Int64}:
42
41
39
42
41
42
42
42
42
Any hints are welcome.
Thank you,