This is biased if `max_int`

is not a power of 2.

A pretty fast way is described in https://arxiv.org/pdf/1805.10941.pdf. @rfourquet made a PR to that effect in https://github.com/JuliaLang/julia/pull/29240, but that is somehow still in limbo.

Sample implementation:

```
julia> function nearlydivisionless(s::Union{Int64, UInt64})
x=rand(UInt64)
m = widemul(x, s)
ell = m % UInt64
if ell<s
t= -s % s
while ell < t
x=rand(UInt64)
m = widemul(x, s)
ell = m % UInt64
end
end
return (m>>64)%typeof(s)
end
julia> @btime rand($(1:10^7));
23.193 ns (0 allocations: 0 bytes)
julia> @btime nearlydivisionless($(10^7));
9.198 ns (0 allocations: 0 bytes)
```

For fun side projects you can just monkey-patch his PR:

```
julia> @eval Random begin
#### Nearly Division Less
# cf. https://arxiv.org/abs/1805.10941
struct SamplerRangeNDL{U<:Unsigned,T} <: Sampler{T}
a::T # first element of the range
s::U # range length or zero for full range
end
function SamplerRangeNDL(r::AbstractUnitRange{T}) where {T}
isempty(r) && throw(ArgumentError("range must be non-empty"))
a = first(r)
U = uint_sup(T)
s = (last(r) - first(r)) % unsigned(T) % U + one(U) # overflow ok
# mod(-s, s) could be put in the Sampler object for repeated calls, but
# this would be an advantage only for very big s and number of calls
SamplerRangeNDL(a, s)
end
function rand(rng::AbstractRNG, sp::SamplerRangeNDL{U,T}) where {U,T}
s = sp.s
x = widen(rand(rng, U))
m = x * s
l = m % U
if l < s
t = mod(-s, s)
while l < t
x = widen(rand(rng, U))
m = x * s
l = m % U
end
end
(s == 0 ? x : m >> (8*sizeof(U))) % T + sp.a
end
# API until this is made the default
fast(r::AbstractUnitRange{<:Base.BitInteger64}) = SamplerRangeNDL(r)
fast(r::AbstractArray) = Random.SamplerSimple(r, fast(firstindex(r):lastindex(r)))
end
```

giving

```
julia> s=Random.fast(1:10^7);
julia> @btime rand($s)
9.193 ns (0 allocations: 0 bytes)
1779832
julia> s=Random.fast(Int32(1):Int32(10^7));
julia> @btime rand($s)
9.656 ns (0 allocations: 0 bytes)
2823504
```