Assistance with optimization

Hi friends,

I made a post about the problem of generating pairs of objects while also filtering them, and it seemed that parallel computing was the main suggestion, so I’m trying to learn CUDA for that, but I’ve run into a wall with the performance of the underlying functions themselves. They’re run 100k’s times each but they are just multiplication or some conditional statement, so I’m not sure how to make them faster when they are doing things so simple, if there is anything.

using Graphs
using Random
using LinearAlgebra
using GeometryBasics
using BenchmarkTools
using StatProfilerHTML


const k = 4
const tol::Float32 = 10.0^(-k)
const rval::Float32 = 10^k
const msgval::Float32 = 0.0452
const msgtuple::GeometryBasics.Point{2,Float32} = GeometryBasics.Point(msgval,msgval)



function computecross(graph::SimpleGraph, pos::Vector{GeometryBasics.Point{2,Float32}}, f=8, b=25)
  
    edgs = collect(edges(graph))
    eview = view(edgs, eachindex(edgs))  
    ecount = ne(graph)
    vcount = nv(graph)
    allocatedlength = unsafe_trunc(Int32, (ecount*(ecount-1)/f-b*ecount))

    segments = map(x -> get_segment(x, pos), edgs)
    
    crossings = Vector{Tuple{Tuple{   Graphs.SimpleGraphs.SimpleEdge{Int64}, Graphs.SimpleGraphs.SimpleEdge{Int64}}, Point{2, Float32} } }(undef, allocatedlength)
    sizehint!(crossings, allocatedlength)
    
    index=1
    
    for i in 1:ecount
        @inbounds seg_i = segments[i]
        for j in 1:i-1
            
             @views @inbounds x1, y1, x2, y2, x3, y3, x4, y4, t, u = line_data(seg_i, segments[j])

                if  0 < t  && t < 1 && 0 < u && u < 1 
                    point = lineintersection(x1, y1, x1-x2, y1-y2, x3, y3, x3-x4, y3-y4, t, u)

                    @views @inbounds if point !== msgtuple && not_on_endpoint(seg_i, segments[j], point) 

                        @inbounds crossings[index] = ((eview[i], eview[j]), point)
                        index+=1

                    end
                end
        end 
    end 

    resize!(crossings, index-1)
    
    return crossings
end 


function get_segment(e, positions::Vector{GeometryBasics.Point{2,Float32}})
            
    return GeometryBasics.Line(positions[src(e)], positions[dst(e)])
end 



function line_data(seg1, seg2)
    
    x1 = seg1[1][1]
    y1 = seg1[1][2]
    x2 = seg1[2][1]
    y2 = seg1[2][2]
    
    x3 = seg2[1][1]
    y3 = seg2[1][2]
    x4 = seg2[2][1]
    y4 = seg2[2][2]
    
    @fastmath t_n = (x1-x3)*(y3-y4)-(y1-y3)*(x3-x4)
    @fastmath t_d = (x1-x2)*(y3-y4)-(y1-y2)*(x3-x4)
    
    @fastmath u_n = -1*((x1-x2)*(y1-y3)-(y1-y2)*(x1-x3))
    @fastmath u_d = (x1-x2)*(y3-y4)-(y1-y2)*(x3-x4)
    
  
    t = t_n/t_d
    
    u = u_n/u_d
    
    return x1, y1, x2, y2, x3, y3, x4, y4, t, u
end 


function lineintersection(x1, y1, x12, y12, x3, y3, x34, y34, t, u)


    @fastmath p1x = x1-(t)*(x12)
    @fastmath p1y = y1-(t)*(y12)
    
    @fastmath p2x = x3+(u)*(x34)
    @fastmath p2y = y3+(u)*(y34)

    
    if isapprox_on_lines(-x12, -y12, x1, y1, p1x, p1y)
        
        return GeometryBasics.Point(fast_k_round(p1x), fast_k_round(p1y))

            
    elseif isapprox_on_lines(-x34, -y34, x3, y3, p2x, p2y)
        return GeometryBasics.Point(fast_k_round(p2x), fast_k_round(p2y))

    else
        return msgtuple
                   
    end

 
end

function not_on_endpoint(seg1, seg2, pt)

    return (distance2(seg1[1],pt)<tol || distance2(seg1[2],pt)<tol || distance2(seg2[1],pt)<tol || distance2(seg2[2],pt)<tol) ? (return false) : (return true)
    
end 

function distance2(x, y)
    sum = float(abs2(zero(eltype(x)) - zero(eltype(y))))
    @simd for i in eachindex(x, y)
        @inbounds sum += abs2(x[i] - y[i])
    end
    return sum
end




function is_intersecting(t, u)
       
    return (0 <= t ) && (t <= 1) && (0 <= u) && (u <= 1)
    
end 


function fast_k_round(x::Float32)
    @fastmath @inline return Float32(unsafe_trunc(Int32, rval*x)/rval)
end 


function isapprox_on_lines(seg1xdiff::Float32, seg1ydiff::Float32, choosenseg1x::Float32, choosenseg1y::Float32, px::Float32, py::Float32)
       
    # Compute cross products
    cross1 = @fastmath seg1xdiff*(py-choosenseg1y)-seg1ydiff*(px-choosenseg1x)
    

    # Compute squares of magnitudes
    norm_sq1::Float32 = @fastmath abs2(cross1)

    # Compare squares of magnitudes to square of tolerance
    
    norm_sq1 < tol ? (return true) : (return false)
    
    
end 




function testing_generate_layout(graph::AbstractGraph)
    n = nv(graph)
    positions::Vector{GeometryBasics.Point{2,Float32}} = [GeometryBasics.Point(round(rand(), digits = k), round(rand(), digits = k)) for _ in 1:n]
    
    return positions
end 

  
function testing_generate_graph()
    n = rand(50:50)
    max_edges = n * (n - 1) ÷ 2
    e = min(max_edges)
    graph = SimpleGraph(n, e)
    #graph = SimpleDiGraph(n, e)
    return graph
end



G = testing_generate_graph()
positions1 = testing_generate_layout(G)

Around 50% of the program when sampled are the functions below.

not_on_endpoint is ~18% which checks if a calculated crossing point is an endpoint of one of the line segments. This is mostly from calculating distances, For this, I was using a function for Euclidean distance that had been posted by a steward I think on the forums, so I don’t think I can improve it at all.

lineintersection is ~17%. Most of this is a conditional for checking which of two possible points is an intersection by checking if it is approximately on the lines with a cross-product calculation.

line_data is ~14% and this retrieves coordinates from the line segments and calculates two numbers for an inequality which determines if there is a crossing.

I’ve tried to read every post I can find on best practices and performance for these, and the function is type-stable. Is there something I’m not doing, or doing wrong, that should be obvious?

Benchmark for 50 vertex complete graph with random drawing is ~6.5 to 6.8ms and ~6MiB