Calling c function from julia - segmentation fault

Hi all! I’ve been stuck on a problem for a while now and would appreciate any help as I’m not really sure how to debug this further.

I’m trying to write a Julia interface for a C function. The function appears to run successfully a few times before Julia exits with a segmentation fault.

The C function has the following interface:

void convex_cluster_admm(double *X, double *Lambda, double *U, double *V,
                         int *p, int *n, int *nK, int *ix, double *w, double *gamma, double *nu, int *type,
                         int *s1, int *s2, int *M1, int *M2, int *mix1, int *mix2,
                         double *primal, double *dual, double *tols_primal, double *tols_dual, int *max_iter, int *iter,
                         double *eps_abs, double *eps_rel)

and stores the results in the matrices U, V, and Lambda.

I have written the following function to call the C function:


function convex_cluster_admm(X::Matrix, Lambda::Matrix, w::Vector, gamma, nu; type=2, max_iter=100, tol_abs=1e-5, tol_rel=1e-4)

    # Cast variables to C standard types
    Lambda_ = Cdouble.(Lambda)
    X_ = Cdouble.(X)
    w_ = Cdouble.(w)
    
    gamma_ = Cdouble.([gamma])
    nu_ = Cdouble.([nu])
    type_ = Cint.([type])

    p_ = Cint.([size(X, 1)])
    n_ = Cint.([size(X, 2)])
    k_ = Cint.([size(w, 1)])

    primal_ = Cdouble.([max_iter])
    dual_ = Cdouble.([max_iter])
    max_iter_ = Cint.([max_iter])
    iter_ = Cint.([1])

    abs_ = Cdouble.([tol_abs])
    rel_ = Cdouble.([tol_rel])

    tols_primal_ = Cdouble.([max_iter])
    tols_dual_ = Cdouble.([tol_rel])

    
    # Allocate memory for results
    U = Cdouble.(zeros(p_[1], n_[1]))
    V  = Cdouble.(zeros(p_[1], k_[1]))

    # Calculate edgeset
    ix, M1, M2, s1, s2 = compactify_edges(w, size(X, 2))
    ix .-= 1
    M1 .-= 1
    M2 .-=1
    
    ix_ = Cint.(ix)
    M1_ = Cint.(M1)
    M2_ = Cint.(M2)
    s1_ = Cint.(s1)
    s2_ = Cint.(s2)

    mix1_ = Cint.([size(M1, 1)])
    mix2_ = Cint.([size(M2, 1)])

    # Call C function
    GC.@preserve begin
            ccall((:convex_cluster_admm, "./cvxclustr/cvxclustr"), Cvoid, 
                    (Ref{Cdouble}, Ref{Cdouble} , Ref{Cdouble}, Ref{Cdouble}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cint},
                    Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cint}, Ref{Cint},
                    Ref{Cdouble}, Ref{Cdouble}) ,
                    X_, Lambda_, U, V, p_, n_, k_, ix_, w_, gamma_, nu_, type_, s1_, s2_,
                    M1_, M2_, mix1_, mix2_, primal_, dual_, tols_primal_, tols_dual_, max_iter_, iter_, abs_, rel_)
    end
end

After calling the function a few times, the code will sometimes crash with the following error:

signal (11): Segmentation fault
in expression starting at /home/max/Desktop/heirachical_clustering/hierarchical-convex-clustering/cvxclustr/cvxclustr.jl:328
gc_mark_loop at /buildworker/worker/package_linux64/build/src/gc.c:2522
_jl_gc_collect at /buildworker/worker/package_linux64/build/src/gc.c:3034
jl_gc_collect at /buildworker/worker/package_linux64/build/src/gc.c:3241
maybe_collect at /buildworker/worker/package_linux64/build/src/gc.c:880 [inlined]
jl_gc_pool_alloc at /buildworker/worker/package_linux64/build/src/gc.c:1204
FlatteningRF at ./reduce.jl:119 [inlined]
MappingRF at ./reduce.jl:93 [inlined]
_foldl_impl at ./reduce.jl:62
foldl_impl at ./reduce.jl:48 [inlined]
mapfoldl_impl at ./reduce.jl:44 [inlined]
#mapfoldl#214 at ./reduce.jl:160 [inlined]
mapfoldl at ./reduce.jl:160 [inlined]
#mapreduce#218 at ./reduce.jl:287 [inlined]
mapreduce at ./reduce.jl:287 [inlined]
#reduce#220 at ./reduce.jl:456 [inlined]
reduce at ./reduce.jl:456 [inlined]
compactify_edges at /home/max/Desktop/heirachical_clustering/hierarchical-convex-clustering/cvxclustr/cvxclustr.jl:73
#convex_cluster_admm#5 at /home/max/Desktop/heirachical_clustering/hierarchical-convex-clustering/cvxclustr/cvxclustr.jl:163
convex_cluster_admm##kw at /home/max/Desktop/heirachical_clustering/hierarchical-convex-clustering/cvxclustr/cvxclustr.jl:128 [inlined]
test at /home/max/Desktop/heirachical_clustering/hierarchical-convex-clustering/cvxclustr/cvxclustr.jl:119
unknown function (ip: 0x7fd4e345eebc)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2237 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2419
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1703 [inlined]
do_call at /buildworker/worker/package_linux64/build/src/interpreter.c:115
eval_value at /buildworker/worker/package_linux64/build/src/interpreter.c:204
eval_stmt_value at /buildworker/worker/package_linux64/build/src/interpreter.c:155 [inlined]
eval_body at /buildworker/worker/package_linux64/build/src/interpreter.c:561
jl_interpret_toplevel_thunk at /buildworker/worker/package_linux64/build/src/interpreter.c:669
jl_toplevel_eval_flex at /buildworker/worker/package_linux64/build/src/toplevel.c:877
jl_toplevel_eval_flex at /buildworker/worker/package_linux64/build/src/toplevel.c:825
jl_toplevel_eval_in at /buildworker/worker/package_linux64/build/src/toplevel.c:929
eval at ./boot.jl:360 [inlined]
include_string at ./loading.jl:1094
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2237 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2419
_include at ./loading.jl:1148
include at ./Base.jl:386
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2237 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2419
exec_options at ./client.jl:285
_start at ./client.jl:485
jfptr__start_41020.clone_1 at /home/max/julia-1.6.0/lib/julia/sys.so (unknown line)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2237 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2419
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1703 [inlined]
true_main at /buildworker/worker/package_linux64/build/src/jlapi.c:560
repl_entrypoint at /buildworker/worker/package_linux64/build/src/jlapi.c:702
main at /buildworker/worker/package_linux64/build/cli/loader_exe.c:51
__libc_start_main at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
_start at /home/max/julia-1.6.0/bin/julia (unknown line)
Allocations: 6014438 (Pool: 6011017; Big: 3421); GC: 7
Segmentation fault (core dumped)

While the error seems to occur inside the function compactify_edges, my guess is that the problem is to do with ccall messing with the garbage collector.

Any help would be greatly appreciated!

For values, please use, for example, Ref{Cdouble}(gamma) instead.

and you didn’t preserve anything here(in this case, you don’t need to preserve anything though).

Thanks for the quick reply @Gnimuc

For values, please use, for example, Ref{Cdouble}(gamma) instead.

I assume that this is just referring to the scalar variables. I’ve updated the code accordingly:


function convex_cluster_admm(X::Matrix, Lambda::Matrix, w::Vector, gamma, nu; type=2, max_iter=100, tol_abs=1e-5, tol_rel=1e-4,accelerate=true)

    # Variable allocation
    Lambda_ = Cdouble.(Lambda)
    X_ = Cdouble.(X)
    w_ = Cdouble.(w)
    
    gamma_ = Ref{Cdouble}(gamma)
    nu_ = Ref{Cdouble}(nu)
    type_ = Ref{Cint}(type)

    p_ = Ref{Cint}(size(X, 1))
    n_ = Ref{Cint}(size(X, 2))
    k_ = Ref{Cint}(size(w, 1))

    primal_ = Ref{Cdouble}(max_iter)
    dual_ = Ref{Cdouble}(max_iter)
    max_iter_ = Ref{Cint}(max_iter)
    iter_ = Ref{Cint}(1)

    abs_ = Ref{Cdouble}(tol_abs)
    rel_ = Ref{Cdouble}(tol_rel)

    tols_primal_ = Ref{Cdouble}(max_iter)
    tols_dual_ = Ref{Cdouble}(tol_rel)

    # Allocate memory for results
    U = Cdouble.(zeros(size(X)))
    V  = Cdouble.(zeros(size(X,1), size(w,1)))

    # Edge information
    ix, M1, M2, s1, s2 = compactify_edges(w, size(X, 2))
    ix .-= 1
    M1 .-= 1
    M2 .-=1

    ix_ = Cint.(ix)
    M1_ = Cint.(M1)
    M2_ = Cint.(M2)
    s1_ = Cint.(s1)
    s2_ = Cint.(s2)

    mix1_ = Ref{Cint}(size(M1, 1))
    mix2_ = Ref{Cint}(size(M2, 1))

    # Call C function
    ccall((:convex_cluster_admm, "./cvxclustr/cvxclustr"), Cvoid, 
                (Ref{Cdouble}, Ref{Cdouble} , Ref{Cdouble}, Ref{Cdouble}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cint},
                Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cint}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cdouble}, Ref{Cint}, Ref{Cint},
                Ref{Cdouble}, Ref{Cdouble}) ,
                X_, Lambda_, U, V, p_, n_, k_, ix_, w_, gamma_, nu_, type_, s1_, s2_,
                M1_, M2_, mix1_, mix2_, primal_, dual_, tols_primal_, tols_dual_, max_iter_, iter_, abs_, rel_)
end

However, the problem seems to still be occurring.
Interestingly enough, for large problems, the segfault seems to stop occurring (or at least it occurs less frequently).

what is compactify_edges?

It contains some preprocessing required by the C function.
Since the function is written in pure Julia I figured it is unlikely to be the source of the problem.

# Setup edgelists for cvxclustr c functions
function compactify_edges(w, n)

    sizes1 = zeros(Int64, n)
    sizes2 = zeros(Int64, n)
    
    # ix is a the list (i j) where w[i, j] != 0
    P = reduce(vcat, [i j] for i ∈ 1:n for j ∈ i+1:n)
    P = P[w .!=0, :] # remove elements where W is zero


    M1 = zeros(Int64, size(w, 1), n)
    M2 = zeros(Int64, size(w, 1), n)

    for i in 1:n
        group1 = findall(P[:,1] .== i)
        sizes1[i] = length(group1)
        if (sizes1[i] > 0) 
          M1[1:sizes1[i], i] = group1
        end

        group2 = findall(P[:,2] .== i)
        sizes2[i] = length(group2)
        if (sizes2[i] > 0) 
          M2[1:sizes2[i], i] = group2
        end
    end

    M1 = M1[1:maximum(sizes1), :]
    M2 = M2[1:maximum(sizes2), :]

    return P, M1, M2, sizes1, sizes2
end

Could you change all of the Ref types in the ccall to Ptr types, for example, Ref{Cdouble}Ptr{Cdouble} and give it a test?

Just checked it and the error still occurs.
I don’t think that the error is in the C function as it is taken from an R library that works fine (although I haven’t stepped though the code in great detail).

function convex_cluster_admm(X::Matrix, Lambda::Matrix, w::Vector, gamma, nu; type=2, max_iter=100, tol_abs=1e-5, tol_rel=1e-4,accelerate=true)

    # Variable allocation
    Lambda_ = Cdouble.(Lambda)
    X_ = Cdouble.(X)
    w_ = Cdouble.(w)
    
    gamma_ = Ref{Cdouble}(gamma)
    nu_ = Ref{Cdouble}(nu)
    type_ = Ref{Cint}(type)

    p_ = Ref{Cint}(size(X, 1))
    n_ = Ref{Cint}(size(X, 2))
    k_ = Ref{Cint}(size(w, 1))


    primal_ = Ref{Cdouble}(max_iter)
    dual_ = Ref{Cdouble}(max_iter)
    max_iter_ = Ref{Cint}(max_iter)
    iter_ = Ref{Cint}(1)

    abs_ = Ref{Cdouble}(tol_abs)
    rel_ = Ref{Cdouble}(tol_rel)

    tols_primal_ = Ref{Cdouble}(max_iter)
    tols_dual_ = Ref{Cdouble}(tol_rel)


    # Allocate memory for results
    U = Cdouble.(zeros(size(X)))
    V  = Cdouble.(zeros(size(X,1), size(w,1)))

    # Edge information
    ix, M1, M2, s1, s2 = compactify_edges(w, size(X, 2))
    ix .-= 1
    M1 .-= 1
    M2 .-=1


    ix_ = Cint.(ix)
    M1_ = Cint.(M1)
    M2_ = Cint.(M2)
    s1_ = Cint.(s1)
    s2_ = Cint.(s2)

    mix1_ = Ref{Cint}(size(M1, 1))
    mix2_ = Ref{Cint}(size(M2, 1))

    # Call C function
    if accelerate
        ccall((:convex_cluster_admm_acc, "./cvxclustr/cvxclustr"), Cvoid, 
                (Ptr{Cdouble}, Ptr{Cdouble} , Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cint},
                Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cint}, Ptr{Cint},
                Ptr{Cdouble}, Ptr{Cdouble}) ,
                X_, Lambda_, U, V, p_, n_, k_, ix_, w_, gamma_, nu_, type_, s1_, s2_,
                M1_, M2_, mix1_, mix2_, primal_, dual_, tols_primal_, tols_dual_, max_iter_, iter_, abs_, rel_)
    else
        ccall((:convex_cluster_admm, "./cvxclustr/cvxclustr"), Cvoid, 
                (Ptr{Cdouble}, Ptr{Cdouble} , Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cint},
                Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cdouble}, Ptr{Cint}, Ptr{Cint},
                Ptr{Cdouble}, Ptr{Cdouble}) ,
                X_, Lambda_, U, V, p_, n_, k_, ix_, w_, gamma_, nu_, type_, s1_, s2_,
                M1_, M2_, mix1_, mix2_, primal_, dual_, tols_primal_, tols_dual_, max_iter_, iter_, abs_, rel_)
    end

    return U
end

You aren’t providing a minimal working example, so it’s a bit hard to guess what’s the issue

Can you please show what’s exactly at line 328?

1 Like

The wrapper code looks good to me. The segment fault might be due to other reasons. If this is a gc bug, then you might need to narrow down the problem to an MWE and submit a bug report with rr trace: https://github.com/JuliaLang/BugReporting.jl.

It’s good to know that the wrapper was not wildly off.
I’ll see If I can either build a MWE or think of another potential workaround.

Thanks for your help =)