JIT issues relevant to realtime applications

Thank you ! I’m not too concerned about the GC because I can preallocate things quite aggressively if needed.
Compiling in another process and shipping the bytecode is a very interesting idea, worth exploring I think.
However I’m not sure it would be enough, because it seems like even very benign compilations can have high latency. So even if some new functions were precompiled, as things stand there would still be interruptions I think.
This seems to be the case with what I’m testing at least.

Can someone explain why? For me (not from the field) it is hard to imagine audio processing needing to deal with unpredictable types such that JIT is inherently required.

I think it’s not new types but new functions, to write new audio manipulations on the fly. Correct me if I’m wrong.

2 Likes

What version of Julia are you using? Recently (1.10ish) there has been quite a bit of work to make the locks involved in compilation smaller.

If you run into issues MWEs and profiles are appreciated. That would help us figure out which parts of the code are currently bottlenecks.

Thank you. I can reproduce this with 1.9.2, 1.10-alpha, and the nightly build as of today.
MWE below, I will try and post a profile later:

module test

using SimpleDirectMediaLayer
using SimpleDirectMediaLayer.LibSDL2
import Base.sin
using Base.Threads
using Setfield

struct SineWave
    frequency::Float64
    volume::Float64
    panning::Float64 # range -1 (left) to 1 (right)
    phase::Float64
    SineWave(frequency::Union{Float64,Int}, volume::Float64, panning::Float64=0.0, phase::Float64=0.0) = new(Float64(frequency), volume, panning, phase)
end

sines::Vector{SineWave} = SineWave[]

# Create a lock to protect the sines array
sines_lock::ReentrantLock = ReentrantLock()

function add_sine(frequency::Union{Float64,Int}; volume_db::Float64=-30.0, panning::Float64=0.0)::Nothing
    volume = 10^(volume_db / 20)
    lock(sines_lock) do
        sw = SineWave(frequency, volume, panning)
        push!(sines, sw)
        return nothing
    end
end

function push_audio(audio_device::Cint, buffer_size::UInt32, sample_rate::Cint, output_buffer::Vector{Float32})::Nothing
    #@info "Starting audio loop"
    for i in 1:buffer_size
        sample_left::Float32 = Float32(0)
        sample_right::Float32 = Float32(0)

        # Calculate the output for each sine wave
        for j in eachindex(sines)
            wave = sines[j]
            c = 2 * π * wave.frequency / sample_rate
            phase = wave.phase
            output::Float32 = wave.volume * sin(phase)

            sample_left += sqrt((1 - wave.panning) / 2) * output
            sample_right += sqrt((1 + wave.panning) / 2) * output

            # Update the phase for the next cycle
            sines[j] = @set wave.phase = mod2pi(phase + c)
        end

        output_buffer[2*i-1] = sample_left
        output_buffer[2*i] = sample_right
    end
    SDL_QueueAudio(audio_device, output_buffer, sizeof(Float32) * buffer_size * 2)
    return nothing
end

# Thread for continuously producing and playing audio
function audio_thread(audio_device::Cint, sample_rate::Cint, audio_spec::SDL_AudioSpec)
    set_zero_subnormals(true) # Denormals are slow and pointless for audio
    sample_size::Int = sizeof(Float32) * 2 # 2 for stereo
    buffer_size::UInt32 = audio_spec.samples
    output_buffer::Vector{Float32} = zeros(buffer_size * 2)
    try
        while true
            # Wait if the queue is full
            #Note this in practice means that the total latency can be up to 2x buffer size
            while SDL_GetQueuedAudioSize(audio_device) > buffer_size * sample_size * 2
                sleep(buffer_size / sample_rate / 2)
            end
            #@info "Done waiting"
            lock(sines_lock) do
                #@info "audio thread lock aquired"
                push_audio(audio_device, buffer_size, sample_rate, output_buffer)
                #@info "Audio pushed"
            end
        end
    catch e
        @error "Error encountered in audio_thread:"
        @error e, Base.catch_stack()
        @error "Terminating the program."
        exit(1)
    end
end

function main()
    SDL_Init(SDL_INIT_AUDIO)

    sample_rate::Cint = 44_100
    buffer_size::UInt32 = 512

    audio_spec = SDL_AudioSpec(sample_rate, AUDIO_F32SYS, 2, 0, buffer_size, 0, 0, C_NULL, C_NULL)
    audio_device::Cint = SDL_OpenAudioDevice(C_NULL, 0, Ref(audio_spec), C_NULL, 0)
    try
        # Unpausing the audio device (starts playing)
        SDL_PauseAudioDevice(audio_device, 0)

        add_sine(440)

        println("Starting audio")
        @spawn audio_thread(audio_device, sample_rate, audio_spec)

        sleep(2)
        println("First exception breaks up the audio:")
        try
            eval(Meta.parse("missing_func()"))
        catch err
            println("Invalid command.", err)
        end
        sleep(2)
        println("Second exception doesn't:")
        try
            eval(Meta.parse("missing_func()"))
        catch err
            println("Invalid command.", err)
        end
        sleep(2)
        println("Done")

    finally
        SDL_CloseAudioDevice(audio_device)
        SDL_Quit()
    end
end

if abspath(PROGRAM_FILE) == @__FILE__
    main()
end

end # module test

dependencies:

[deps]
Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
SimpleDirectMediaLayer = "98e33af6-2ee5-5afd-9e75-cbc738b767c4"

One would need to precompile the custom manipulation before putting it into the pipeline. That’s the point?

The best way to explain may be with an example:
https://gibber.cc/playground/index.html
The custom manipulations ( nodes in the graph ) are mostly pre written ( though they don’t have to be ) but the idea is you construct a compute graph for DSP processing as well as timing events on the fly and run it in real time.
Because the graph is run on a per sample basis this can only work with some JIT compilation.
The above example is all Javascript which is less performant than Julia but the engines seem to have JITs that are very well optimised for low latency.

Your link seemed to just point to the main page - could you edit it?

the link seems correct here, one has to manually select one of the examples such as “intro” in the drop down and follow the on screen instructions to run it.
image

1 Like

@vchuravy I have a .pb file from the profiler but it seems I can’t post it here, not sure what’s the best way around that or if it’s easier to re run locally ?

https://pprof.me/ is one way.

Thank you !
Here:
https://pprof.me/97069c8/

This could actually be related to Unexpected `sleep` function behavior in relation to main thread activity in multithreading context · Issue #50643 · JuliaLang/julia · GitHub

Looking at the profile, you use sleep and if the primary thread is busy doing compilation it will not poll libuv and thus your sleep will be longer than expected

1 Like

Ah that’s very interesting, I saw that thread the other day and it crossed my mind.
I just tried removing the sleep in the audio thread and it does seem to stop the exception from interrupting the audio, so it does seem like it’s that !
However removing the sleep makes the program very unpredictable, sometimes it gets stuck busy waiting.
Is there any temporary workaround ?

I’m far outside my domain here, so please forgive me if this is dumb, but could you check the global clock and wait for it to increment how ever far you need?

Unless there was a way for the clock to send an event when it reached a certain time, you’d still be continously calling the clock’s time to check it so it would still be a busy wait and probably the same problem as above.
What I do is in fact equivalent: I check how full the audio queue is, and the audio queue empties at a fixed rate based on the wall clock.
Another approach would be for the low level audio library to call us back when it’s emptied the buffer and it needs more data. That would be a “pull” model instead of the current “push” model.
Unfortunately the low level audio driver is in C and it seems that passing Julia functions to C callbacks has issues of its own which is why I went with the push approach.

Just an idea, again sorry if this is silly :sweat_smile:

  1. Spawn a separate Julia process that won’t get interrupted by comp in the main process, a la Distributed.jl
  2. pass a Channel to both the main and separate processes
  3. Have the main loop do a take!(ch) where you would insert a sleep
  4. Have the separate process sleep, and when it wakes up, pass a “ready” signal through the Channel
  5. that would unblock the main loop and let it proceed

You’d also probably need an additional Channel{Int64} for passing how long you want to sleep to the Sleeper process.

On Julia 1.10 callbacks from C to Julia should be much more robust. What kinds are you running into?

I previously had some runtime errors and someone else said they couldn’t get it to work so I didn’t pursue further.
I tried again and could get it to run with a callback but there is no sound, I’m not sure what I did wrong ( code below in case there’s anything obvious ).
Another issue is that being in a C callback means that the errors become very hard to understand or trace which will be an issue given I will have a lot of stuff in that loop.

module InteractiveSynthPOC

using SimpleDirectMediaLayer
using SimpleDirectMediaLayer.LibSDL2
import Base.sin
import LinearAlgebra.norm
using Random
using Base.Threads
using Setfield

# Structure for sine wave with frequency, phase, volume and panning
struct SineWave
    frequency::Float64
    volume::Float64
    panning::Float64 # range -1 (left) to 1 (right)
    phase::Float64
    SineWave(frequency::Union{Float64, Int}, volume::Float64, panning::Float64 = 0.0, phase::Float64=0.0) = new(Float64(frequency), volume, panning, phase)
end

# Create an array to hold the sine waves
sines::Vector{SineWave} = SineWave[]

# Create a lock to protect the sines array
sines_lock::ReentrantLock = ReentrantLock()


function add_sine(frequency::Union{Float64, Int}; volume_db::Float64 = -30.0, panning::Float64 = 0.0)::Nothing
    volume = 10^(volume_db/20)
    lock(sines_lock) do
        #@info "user thread lock aquired"
        sw = SineWave(frequency, volume, panning)
        push!(sines, sw)
        #println(sines)
        return nothing
    end
    #@info "user thread lock freed"
end

struct AudioSetup
    sample_rate::Cint
end

function audio_callback(userdata::Ptr{Cvoid}, stream::Ptr{UInt8}, len::Cint)
    # Cast the userdata pointer to Ptr{AudioSetup}
    audio_setup_ptr = convert(Ptr{AudioSetup}, userdata)
    
    # Load the AudioSetup struct from the pointer
    audio_setup = unsafe_load(audio_setup_ptr)
    lock(sines_lock) do
        generate_audio(stream, len, audio_setup.sample_rate)
    end
end

function generate_audio(buffer::Ptr{UInt8}, len::Cint, sample_rate::Cint)::Nothing
    #@info "Generating audio"
    buffer_size::UInt32 = len / sizeof(Float32) / 2 # 2 for stereo
    float_buffer = reinterpret(Ptr{Float32}, buffer)
    for i in 1:buffer_size
        sample_left::Float32 = Float32(0)
        sample_right::Float32 = Float32(0)

        # Calculate the output for each sine wave
        for j in eachindex(sines)
            wave = sines[j]
            c = 2 * π * wave.frequency / sample_rate
            phase = wave.phase
            output::Float32 = wave.volume * sin(phase)

            # Add the output to the left and right channels, taking panning into account
            sample_left  += sqrt((1 - wave.panning) / 2) * output
            sample_right += sqrt((1 + wave.panning) / 2) * output

            # Update the phase for the next cycle
            sines[j] = @set wave.phase=mod2pi(phase + c)
        end

        unsafe_store!(float_buffer, sample_left, 2*i-1)
        unsafe_store!(float_buffer, sample_right, 2*i)
    end
    #@info "Audio pushed"
    return nothing
end


function main()

    SDL_Init(SDL_INIT_AUDIO)

    # NOTE ! Not sure what is going in SDL but using something than 44_100 seems to lead to audible distortion
    sample_rate::Cint = 44_100
    buffer_size::UInt32 = 512

    callback_ptr = @cfunction(audio_callback, Cvoid, (Ptr{Nothing}, Ptr{UInt8}, Cint))
    audio_setup = AudioSetup(sample_rate)

    # Create a pointer to the AudioSetup structure
    audio_setup_ptr::Ptr{Cvoid} = pointer_from_objref( Ref{audio_setup})
    audio_spec = SDL_AudioSpec(sample_rate, AUDIO_F32SYS, 2, 0, buffer_size, 0, 0, callback_ptr, audio_setup_ptr)
    audio_device::Cint = SDL_OpenAudioDevice(C_NULL, 0, Ref(audio_spec), C_NULL, 0)
    try
        # Unpausing the audio device (starts playing)
        SDL_PauseAudioDevice(audio_device, 0)

        add_sine(440)

        println("Starting audio")

        sleep(2)
        println("First exception breaks up the audio:")
        try
            eval(Meta.parse("missing_func()"))
        catch err
            println("Invalid command.", err)
        end
        sleep(2)
        println("Second exception doesn't:")
        try
            eval(Meta.parse("missing_func()"))
        catch err
            println("Invalid command.", err)
        end
        sleep(2)
        println("Done")

    finally
        SDL_CloseAudioDevice(audio_device)
        SDL_Quit()
    end
end

if abspath(PROGRAM_FILE) == @__FILE__
    main()
end

end # module InteractiveSynthPOC


It seems that the best solution for now is to run the user interraction part in a separate thread, this seems to work well. This is consistent with the report in the other thread about the bug in sleep().

1 Like