Threads are not speeding up computation a lot

Here is the best I could get, I hope I didn’t change the logic
module

module Secp256k1
using BitIntegers
const A = uint256"0"
const B = uint256"7"
const P = uint256"115792089237316195423570985008687907853269984665640564039457584007908834671663"
const N = uint256"115792089237316195423570985008687907852837564279074904382605163141518161494337"
const Gx = uint256"55066263022277343669578718895168534326250603453777594175500187360389116729240"
const Gy = uint256"32670510020758816978083085130507043184471273380659243275938904335757337482424"

const G = (Gx, Gy)

function jacobian_double(p)
    # """
    # Double a point in elliptic curves
    # :param p: Point you want to double
    # :return: Point that represents the sum of First and Second Point
    # """
    
    #:param P: Prime number in the module of the equation Y^2 = X^3 + A*X + B (mod )
    #:param A: Coefficient of the first-order term of the equation Y^2 = X^3 + A*X + B (mod Secp256k1.p)
    p_x, p_y, p_z = p


    if p_y == 0
        return uint256"0", uint256"0", uint256"1"
    end

    ysq = mod((p_y*p_y), Secp256k1.P)

    S = mod((4 * p_x * ysq), Secp256k1.P)
    # M = (3 * p_x ** 2 + A * p_z ** 4) % cls.P
    # A is zero
    M = mod((3 * p_x*p_x), Secp256k1.P)
    nx = mod((M*M - 2 * S), Secp256k1.P)
    ny = mod((M * (S - nx) - 8 * ysq*ysq), Secp256k1.P)
    nz = mod((2 * p_y * p_z), Secp256k1.P)

    return nx, ny, nz
end

function jacobian_add(p, q)
    # """
    # Add two points in elliptic curves

    # :param p: First Point you want to add
    # :param q: Second Point you want to add
    # :return: Point that represents the sum of First and Second Point
    # """
    p_x, p_y, p_z = p
    q_x, q_y, q_z = q

    if p_y == 0
        return q_x, q_y, q_z
    end
    if q_y == 0
        return p_x, p_y, p_z
    end

    U1 = mod((p_x * q_z*q_z), Secp256k1.P)
    U2 = mod((q_x * p_z*p_z), Secp256k1.P)
    S1 = mod((p_y * q_z*q_z*q_z), Secp256k1.P)
    S2 = mod((q_y * p_z*p_z*p_z), Secp256k1.P)

    if U1 == U2
        if S1 != S2
            return uint256"0", uint256"0", uint256"1"
        end
        return jacobian_double(p)
    end

    H = U2 - U1
    R = S2 - S1
    H2 = mod((H * H), Secp256k1.P)
    H3 = mod((H * H2), Secp256k1.P)
    U1H2 = mod((U1 * H2), Secp256k1.P)
    nx = mod((R * R - H3 - 2 * U1H2), Secp256k1.P)
    ny = mod((R * (U1H2 - nx) - S1 * H3), Secp256k1.P)
    nz = mod((H * p_z * q_z), Secp256k1.P)

    return nx, ny, nz
end

function jacobian_multiply(p, n)
    # """
    # Multily point and scalar in elliptic curves
    # :param p: First Point to mutiply
    # :param n: Scalar to mutiply
    # :return: Point that represents the sum of First and Second Point
    # """
    p_x, p_y, p_z = p

    if p_y == 0 || n == 0
        return uint256"0", uint256"0", uint256"1"
    end

    if n == 1
        return p
    end

    if n < 0 || n >= Secp256k1.N
        return jacobian_multiply(p, mod(n, Secp256k1.N))
    end

    if mod(n, 2) == 0
        return jacobian_double(jacobian_multiply(p, div(n, 2)))
    end

    pd = jacobian_double(jacobian_multiply(p, div(n, 2)))
    return jacobian_add(pd, p)
end

function to_jacobian(p)
    p_x, p_y = p
    return p_x, p_y, uint256"1"
end

function _inv(x, n)
    # """
    # Extended Euclidean Algorithm. It's the 'division' in elliptic curves

    # :param x: Divisor
    # :param n: Mod for division
    # :return: Value representing the division
    # """
    if x == 0
        return uint256"0"
    end

    lm  = uint256"1"
    hm  = uint256"0"
    low  = mod(x, n)
    high  = n
    r  = uint256"0"
    while low > 1
        r = div(high, low)
        nm = hm - lm * r
        nw = high - low * r
        high = low
        hm = lm
        low = nw
        lm = nm
    end

    return mod(lm, n)
end

function from_jacobian(p)
    # """
    # Convert point back from Jacobian coordinates

    # :param p: First Point you want to add
    # :param P: Prime number in the module of the equation Y^2 = X^3 + A*X + B (mod Secp256k1.p)
    # :return: Point in default coordinates
    # """

    p_x, p_y, p_z = p

    z = _inv(p_z, Secp256k1.P)
    x = mod((p_x * z^2), Secp256k1.P)
    y = mod((p_y * z^3), Secp256k1.P)

    return x, y
end


function der_keys(n)
    pubj = jacobian_multiply(to_jacobian(Secp256k1.G), n)
    return from_jacobian(pubj)
end

end

code


# using Base.Threads
using Base.Iterators
using BenchmarkTools
using BitIntegers

include("./secp256k1.jl")

function gen_keys(k_start, k_num)
    k_end = k_start + k_num - 1
    k_range = k_start:k_end
    data_size = length(k_range)
    keys = zeros(UInt256, data_size, 2)
    Threads.@threads for i in eachindex(k_range)
        k = k_range[i]
        x,y = Secp256k1.der_keys(k)
        keys[i,1] = x
        keys[i,2] = y
    end
    return keys
end

function test_gen_keys_range()
    gen_keys(uint256"123",uint256"100000")
end

test_gen_keys_range()
@time test_gen_keys_range();

benchmark

> julia -t 1 --project main.jl
  0.304315 seconds (10 allocations: 6.104 MiB, 11.04% gc time)
> julia -t 20 --project main.jl
  0.050477 seconds (105 allocations: 6.117 MiB)

speedup are insane Bit Integer are wow.
And for the 10000 case (which was 2.096 s and 705.314 ms) now we have

> julia -t 1 --project main.jl
  0.026495 seconds (10 allocations: 625.828 KiB)
> julia -t 20 --project main.jl
  0.006071 seconds (105 allocations: 639.047 KiB)

just tested I get the same exact time with fetching, not linear on my machine but its really memory related (logrange plot)

7 Likes