Calling c function from julia - segmentation fault

Hi all! I’ve been stuck on a problem for a while now and would appreciate any help as I’m not really sure how to debug this further.

I’m trying to write a Julia interface for a C function. The function appears to run successfully a few times before Julia exits with a segmentation fault.

The C function has the following interface:

void convex_cluster_admm(double *X, double *Lambda, double *U, double *V,
                         int *p, int *n, int *nK, int *ix, double *w, double *gamma, double *nu, int *type,
                         int *s1, int *s2, int *M1, int *M2, int *mix1, int *mix2,
                         double *primal, double *dual, double *tols_primal, double *tols_dual, int *max_iter, int *iter,
                         double *eps_abs, double *eps_rel)

and stores the results in the matrices U, V, and Lambda.

I have written the following function to call the C function:


function convex_cluster_admm(X::Matrix, Lambda::Matrix, w::Vector, gamma, nu; type=2, max_iter=100, tol_abs=1e-5, tol_rel=1e-4)

    # Cast variables to C standard types
    Lambda_ = Cdouble.(Lambda)
    X_ = Cdouble.(X)
    w_ = Cdouble.(w)
    
    gamma_ = Cdouble.([gamma])
    nu_ = Cdouble.([nu])
    type_ = Cint.([type])

    p_ = Cint.([size(X, 1)])
    n_ = Cint.([size(X, 2)])
    k_ = Cint.([size(w, 1)])

    primal_ = Cdouble.([max_iter])
    dual_ = Cdouble.([max_iter])
    max_iter_ = Cint.([max_iter])
    iter_ = Cint.([1])

    abs_ = Cdouble.([tol_abs])
    rel_ = Cdouble.([tol_rel])

    tols_primal_ = Cdouble.([max_iter])
    tols_dual_ = Cdouble.([tol_rel])

    
    # Allocate memory for results
    U = Cdouble.(zeros(p_[1], n_[1]))
    V  = Cdouble.(zeros(p_[1], k_[1]))

    # Calculate edgeset
    ix, M1, M2, s1, s2 = compactify_edges(w, size(X, 2))
    ix .-= 1
    M1 .-= 1
    M2 .-=1
    
    ix_ = Cint.(ix)
    M1_ = Cint.(M1)
    M2_ = Cint.(M2)
    s1_ = Cint.(s1)
    s2_ = Cint.(s2)

    mix1_ = Cint.([size(M1, 1)])
    mix2_ = Cint.([size(M2, 1)])

    # Call C function
    GC.@preserve begin
            ccall((:convex_cluster_admm, "./cvxclustr/cvxclustr"), Cvoid, 
                    (Ref{Cdouble}, Ref{Cdouble} , Ref{Cdouble}, Ref{Cdouble}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cint},
                    Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cint}, Ref{Cint},
                    Ref{Cdouble}, Ref{Cdouble}) ,
                    X_, Lambda_, U, V, p_, n_, k_, ix_, w_, gamma_, nu_, type_, s1_, s2_,
                    M1_, M2_, mix1_, mix2_, primal_, dual_, tols_primal_, tols_dual_, max_iter_, iter_, abs_, rel_)
    end
end

After calling the function a few times, the code will sometimes crash with the following error:

signal (11): Segmentation fault
in expression starting at /home/max/Desktop/heirachical_clustering/hierarchical-convex-clustering/cvxclustr/cvxclustr.jl:328
gc_mark_loop at /buildworker/worker/package_linux64/build/src/gc.c:2522
_jl_gc_collect at /buildworker/worker/package_linux64/build/src/gc.c:3034
jl_gc_collect at /buildworker/worker/package_linux64/build/src/gc.c:3241
maybe_collect at /buildworker/worker/package_linux64/build/src/gc.c:880 [inlined]
jl_gc_pool_alloc at /buildworker/worker/package_linux64/build/src/gc.c:1204
FlatteningRF at ./reduce.jl:119 [inlined]
MappingRF at ./reduce.jl:93 [inlined]
_foldl_impl at ./reduce.jl:62
foldl_impl at ./reduce.jl:48 [inlined]
mapfoldl_impl at ./reduce.jl:44 [inlined]
#mapfoldl#214 at ./reduce.jl:160 [inlined]
mapfoldl at ./reduce.jl:160 [inlined]
#mapreduce#218 at ./reduce.jl:287 [inlined]
mapreduce at ./reduce.jl:287 [inlined]
#reduce#220 at ./reduce.jl:456 [inlined]
reduce at ./reduce.jl:456 [inlined]
compactify_edges at /home/max/Desktop/heirachical_clustering/hierarchical-convex-clustering/cvxclustr/cvxclustr.jl:73
#convex_cluster_admm#5 at /home/max/Desktop/heirachical_clustering/hierarchical-convex-clustering/cvxclustr/cvxclustr.jl:163
convex_cluster_admm##kw at /home/max/Desktop/heirachical_clustering/hierarchical-convex-clustering/cvxclustr/cvxclustr.jl:128 [inlined]
test at /home/max/Desktop/heirachical_clustering/hierarchical-convex-clustering/cvxclustr/cvxclustr.jl:119
unknown function (ip: 0x7fd4e345eebc)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2237 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2419
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1703 [inlined]
do_call at /buildworker/worker/package_linux64/build/src/interpreter.c:115
eval_value at /buildworker/worker/package_linux64/build/src/interpreter.c:204
eval_stmt_value at /buildworker/worker/package_linux64/build/src/interpreter.c:155 [inlined]
eval_body at /buildworker/worker/package_linux64/build/src/interpreter.c:561
jl_interpret_toplevel_thunk at /buildworker/worker/package_linux64/build/src/interpreter.c:669
jl_toplevel_eval_flex at /buildworker/worker/package_linux64/build/src/toplevel.c:877
jl_toplevel_eval_flex at /buildworker/worker/package_linux64/build/src/toplevel.c:825
jl_toplevel_eval_in at /buildworker/worker/package_linux64/build/src/toplevel.c:929
eval at ./boot.jl:360 [inlined]
include_string at ./loading.jl:1094
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2237 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2419
_include at ./loading.jl:1148
include at ./Base.jl:386
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2237 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2419
exec_options at ./client.jl:285
_start at ./client.jl:485
jfptr__start_41020.clone_1 at /home/max/julia-1.6.0/lib/julia/sys.so (unknown line)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2237 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2419
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1703 [inlined]
true_main at /buildworker/worker/package_linux64/build/src/jlapi.c:560
repl_entrypoint at /buildworker/worker/package_linux64/build/src/jlapi.c:702
main at /buildworker/worker/package_linux64/build/cli/loader_exe.c:51
__libc_start_main at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
_start at /home/max/julia-1.6.0/bin/julia (unknown line)
Allocations: 6014438 (Pool: 6011017; Big: 3421); GC: 7
Segmentation fault (core dumped)

While the error seems to occur inside the function compactify_edges, my guess is that the problem is to do with ccall messing with the garbage collector.

Any help would be greatly appreciated!

For values, please use, for example, Ref{Cdouble}(gamma) instead.

and you didn’t preserve anything here(in this case, you don’t need to preserve anything though).

Thanks for the quick reply @Gnimuc

For values, please use, for example, Ref{Cdouble}(gamma) instead.

I assume that this is just referring to the scalar variables. I’ve updated the code accordingly:


function convex_cluster_admm(X::Matrix, Lambda::Matrix, w::Vector, gamma, nu; type=2, max_iter=100, tol_abs=1e-5, tol_rel=1e-4,accelerate=true)

    # Variable allocation
    Lambda_ = Cdouble.(Lambda)
    X_ = Cdouble.(X)
    w_ = Cdouble.(w)
    
    gamma_ = Ref{Cdouble}(gamma)
    nu_ = Ref{Cdouble}(nu)
    type_ = Ref{Cint}(type)

    p_ = Ref{Cint}(size(X, 1))
    n_ = Ref{Cint}(size(X, 2))
    k_ = Ref{Cint}(size(w, 1))

    primal_ = Ref{Cdouble}(max_iter)
    dual_ = Ref{Cdouble}(max_iter)
    max_iter_ = Ref{Cint}(max_iter)
    iter_ = Ref{Cint}(1)

    abs_ = Ref{Cdouble}(tol_abs)
    rel_ = Ref{Cdouble}(tol_rel)

    tols_primal_ = Ref{Cdouble}(max_iter)
    tols_dual_ = Ref{Cdouble}(tol_rel)

    # Allocate memory for results
    U = Cdouble.(zeros(size(X)))
    V  = Cdouble.(zeros(size(X,1), size(w,1)))

    # Edge information
    ix, M1, M2, s1, s2 = compactify_edges(w, size(X, 2))
    ix .-= 1
    M1 .-= 1
    M2 .-=1

    ix_ = Cint.(ix)
    M1_ = Cint.(M1)
    M2_ = Cint.(M2)
    s1_ = Cint.(s1)
    s2_ = Cint.(s2)

    mix1_ = Ref{Cint}(size(M1, 1))
    mix2_ = Ref{Cint}(size(M2, 1))

    # Call C function
    ccall((:convex_cluster_admm, "./cvxclustr/cvxclustr"), Cvoid, 
                (Ref{Cdouble}, Ref{Cdouble} , Ref{Cdouble}, Ref{Cdouble}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cint},
                Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cint}, Ref{Cint},
                Ref{Cdouble}, Ref{Cdouble}) ,
                X_, Lambda_, U, V, p_, n_, k_, ix_, w_, gamma_, nu_, type_, s1_, s2_,
                M1_, M2_, mix1_, mix2_, primal_, dual_, tols_primal_, tols_dual_, max_iter_, iter_, abs_, rel_)
end

However, the problem seems to still be occurring.
Interestingly enough, for large problems, the segfault seems to stop occurring (or at least it occurs less frequently).

what is compactify_edges?

It contains some preprocessing required by the C function.
Since the function is written in pure Julia I figured it is unlikely to be the source of the problem.

# Setup edgelists for cvxclustr c functions
function compactify_edges(w, n)

    sizes1 = zeros(Int64, n)
    sizes2 = zeros(Int64, n)
    
    # ix is a the list (i j) where w[i, j] != 0
    P = reduce(vcat, [i j] for i ∈ 1:n for j ∈ i+1:n)
    P = P[w .!=0, :] # remove elements where W is zero


    M1 = zeros(Int64, size(w, 1), n)
    M2 = zeros(Int64, size(w, 1), n)

    for i in 1:n
        group1 = findall(P[:,1] .== i)
        sizes1[i] = length(group1)
        if (sizes1[i] > 0) 
          M1[1:sizes1[i], i] = group1
        end

        group2 = findall(P[:,2] .== i)
        sizes2[i] = length(group2)
        if (sizes2[i] > 0) 
          M2[1:sizes2[i], i] = group2
        end
    end

    M1 = M1[1:maximum(sizes1), :]
    M2 = M2[1:maximum(sizes2), :]

    return P, M1, M2, sizes1, sizes2
end

Could you change all of the Ref types in the ccall to Ptr types, for example, Ref{Cdouble}Ptr{Cdouble} and give it a test?

Just checked it and the error still occurs.
I don’t think that the error is in the C function as it is taken from an R library that works fine (although I haven’t stepped though the code in great detail).

function convex_cluster_admm(X::Matrix, Lambda::Matrix, w::Vector, gamma, nu; type=2, max_iter=100, tol_abs=1e-5, tol_rel=1e-4,accelerate=true)

    # Variable allocation
    Lambda_ = Cdouble.(Lambda)
    X_ = Cdouble.(X)
    w_ = Cdouble.(w)
    
    gamma_ = Ref{Cdouble}(gamma)
    nu_ = Ref{Cdouble}(nu)
    type_ = Ref{Cint}(type)

    p_ = Ref{Cint}(size(X, 1))
    n_ = Ref{Cint}(size(X, 2))
    k_ = Ref{Cint}(size(w, 1))


    primal_ = Ref{Cdouble}(max_iter)
    dual_ = Ref{Cdouble}(max_iter)
    max_iter_ = Ref{Cint}(max_iter)
    iter_ = Ref{Cint}(1)

    abs_ = Ref{Cdouble}(tol_abs)
    rel_ = Ref{Cdouble}(tol_rel)

    tols_primal_ = Ref{Cdouble}(max_iter)
    tols_dual_ = Ref{Cdouble}(tol_rel)


    # Allocate memory for results
    U = Cdouble.(zeros(size(X)))
    V  = Cdouble.(zeros(size(X,1), size(w,1)))

    # Edge information
    ix, M1, M2, s1, s2 = compactify_edges(w, size(X, 2))
    ix .-= 1
    M1 .-= 1
    M2 .-=1


    ix_ = Cint.(ix)
    M1_ = Cint.(M1)
    M2_ = Cint.(M2)
    s1_ = Cint.(s1)
    s2_ = Cint.(s2)

    mix1_ = Ref{Cint}(size(M1, 1))
    mix2_ = Ref{Cint}(size(M2, 1))

    # Call C function
    if accelerate
        ccall((:convex_cluster_admm_acc, "./cvxclustr/cvxclustr"), Cvoid, 
                (Ptr{Cdouble}, Ptr{Cdouble} , Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cint},
                Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cint}, Ptr{Cint},
                Ptr{Cdouble}, Ptr{Cdouble}) ,
                X_, Lambda_, U, V, p_, n_, k_, ix_, w_, gamma_, nu_, type_, s1_, s2_,
                M1_, M2_, mix1_, mix2_, primal_, dual_, tols_primal_, tols_dual_, max_iter_, iter_, abs_, rel_)
    else
        ccall((:convex_cluster_admm, "./cvxclustr/cvxclustr"), Cvoid, 
                (Ptr{Cdouble}, Ptr{Cdouble} , Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cint},
                Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cint}, Ptr{Cint},
                Ptr{Cdouble}, Ptr{Cdouble}) ,
                X_, Lambda_, U, V, p_, n_, k_, ix_, w_, gamma_, nu_, type_, s1_, s2_,
                M1_, M2_, mix1_, mix2_, primal_, dual_, tols_primal_, tols_dual_, max_iter_, iter_, abs_, rel_)
    end

    return U
end

You aren’t providing a minimal working example, so it’s a bit hard to guess what’s the issue

Can you please show what’s exactly at line 328?

The wrapper code looks good to me. The segment fault might be due to other reasons. If this is a gc bug, then you might need to narrow down the problem to an MWE and submit a bug report with rr trace: https://github.com/JuliaLang/BugReporting.jl.

It’s good to know that the wrapper was not wildly off.
I’ll see If I can either build a MWE or think of another potential workaround.

Thanks for your help =)