Should Julia model audio data like it models image data: with an easy conversion between a matrix and its corresponding media file?

I am using Julia to create a video entry into the Summer of Math Exposition II competition and I wanted to see how difficult it would be to automate all the video editing tasks with Julia and ffmpeg instead of spending hours fiddling with a video editor application. Given the ocean of ffmpeg command line options, I was expecting a lot of difficulties but it turned out to be surprisingly simple. For all the content creators currently spending so much time fiddling with video editor applications, the automation of video editing may be a widely appealing use case for Julia.

I think content creators would be helped by adding to Julia (or perhaps Images.jl??) something like the following aload() and asave() functions. (I prepended an ‘a’ for audio to distinguish them from the load() and save() functions for images.)

# asave: audio save to file
# argments:
#   filename
#   audio data
#   audio sampling frequence
function asave(fn::String,
	AS::Matrix{Int16},
	asfreq::Int64=44100)
	
	io::IO = open(fn, "w")
	fsize::UInt32 = 2 * length(AS) + 44
	
	# write 44 byte .WAV header
	write(io, "RIFF") +   			# RIFF tag (4 bytes)
	write(io, UInt32(fsize-8)) +	# tag block size (4 bytes)
	write(io, "WAVE") +				# WAVE tag (4 bytes)
	write(io, "fmt ") +				# format description header (4 bytes)
	write(io, UInt32(16)) +			# tag block size (4 bytes)
	write(io, UInt16(1)) +			# specify PCM encoding (2 bytes)
	write(io, UInt16(size(AS,1))) +	# specify # channels: 1=mono, 2=stereo (2 bytes)
	write(io, UInt32(asfreq)) +		# specify sampling rate (4 bytes)
	write(io, UInt32(asfreq*4)) +	# specify bytes/second (4 bytes)
	write(io, UInt16(4)) +			# specify  block alignment (2 bytes)
	write(io, UInt16(16)) +			# specify bits per sample (2 bytes)
	write(io, "data") +				# data description header (4 bytes)
	write(io, UInt32(fsize-44))		# specify # bytes of audio data (4 bytes)
	
	# write audio data
	write(io, AS[:])
	close(io)
end

# aload: audio load from file
# argment:
#   filename
function aload(fn::String)
	io::IO = open(fn, "r")
	seek(io, 22)
	nChan::UInt16 = read(io, UInt16)
	asfreq::UInt32 = read(io, UInt32)
	seek(io, 40)
	nByte::UInt32 = read(io, UInt32)
	nSample::UInt32 = div(nByte, 2)
	S = Vector{Int16}(undef, nSample)
	read!(io, S)
	close(io)
	
	return reshape(S,nChan,:), asfreq
end
julia> asfreq = 44100
44100

julia> T = 0:1/asfreq:3;

julia> AS = [ # Audio Samples
          Int16.(round.(32767*sin.(2 * pi * 440 .* T)'));
          Int16.(round.(32767*cos.(2 * pi * 440 * 3/2 .* T)'))
       ];

julia> asave("tone.wav", AS) # save stereo data

julia> AS1 = reshape(AS[1,:], 1, :);

julia> asave("tone1.wav", AS1) # save mono data

julia> AS2, asfreq2 = aload("tone.wav");

julia> AS[:,1:7]
2×7 Matrix{Int16}:
     0   2053   4098   6126   8131  10103  12036
 32767  32622  32189  31472  30476  29211  27689

julia> AS2[:,1:7]
2×7 Matrix{Int16}:
     0   2053   4098   6126   8131  10103  12036
 32767  32622  32189  31472  30476  29211  27689

When it is finished (in the next couple weeks), I’ll post a link here to the Julia code that generates the video and synchronizes it with the audio.

4 Likes