I’m trying to demonstrate the effects of precompilation techniques in Julia.
I have written 3 modules and one main.jl
file which uses them.
Here is my code:
$ cat MyModule1.jl
module MyModule1
function mysum(v::AbstractVector{T}) where T
s = zero(T)
for x in v
s += x
end
return s
end
end
$ cat MyModule2.jl
__precompile__()
module MyModule2
function mysum(v::AbstractVector{T}) where T
s = zero(T)
for x in v
s += x
end
return s
end
end
$ cat MyModule3.jl
__precompile__()
module MyModule3
function mysum(v::AbstractVector{T}) where T
s = zero(T)
for x in v
s += x
end
return s
end
mysum([0.0]) # precompile with v = Vector{Float64}
end
$ cat main.jl
# Data generation
time_start = Base.time_ns()
v = rand(1000000)
time_end = Base.time_ns()
time_duration = 1.0e-9 * (time_end - time_start)
println("Data generation time: $(time_duration) s")
# Check types
println(typeof(v))
println(typeof(v[1]))
println(v[1])
# using MyModule
time_start = Base.time_ns()
using MyModule1
time_end = Base.time_ns()
time_duration = 1.0e-9 * (time_end - time_start)
println("using MyModule1 time: $(time_duration) s")
# using MyModule2
time_start = Base.time_ns()
using MyModule2
time_end = Base.time_ns()
time_duration = 1.0e-9 * (time_end - time_start)
println("using MyModule2 time: $(time_duration) s")
# using MyModule3
time_start = Base.time_ns()
using MyModule3
time_end = Base.time_ns()
time_duration = 1.0e-9 * (time_end - time_start)
println("using MyModule3 time: $(time_duration) s")
# MyModule1.mysum (1)
time_start = Base.time_ns()
s = MyModule1.mysum(v)
time_end = Base.time_ns()
time_duration = 1.0e-9 * (time_end - time_start)
println("function MyModule1.mysum time: $(time_duration) s")
# MyModule1.mysum (2)
time_start = Base.time_ns()
s = MyModule1.mysum(v)
time_end = Base.time_ns()
time_duration = 1.0e-9 * (time_end - time_start)
println("function MyModule1.mysum time (2nd call): $(time_duration) s")
# MyModule2.mysum (1)
time_start = Base.time_ns()
s = MyModule2.mysum(v)
time_end = Base.time_ns()
time_duration = 1.0e-9 * (time_end - time_start)
println("function MyModule2.mysum time: $(time_duration) s")
# MyModule2.mysum (2)
time_start = Base.time_ns()
s = MyModule2.mysum(v)
time_end = Base.time_ns()
time_duration = 1.0e-9 * (time_end - time_start)
println("function MyModule2.mysum time (2nd call): $(time_duration) s")
# MyModule3.mysum (1)
time_start = Base.time_ns()
s = MyModule3.mysum(v)
time_end = Base.time_ns()
time_duration = 1.0e-9 * (time_end - time_start)
println("function MyModule3.mysum time: $(time_duration) s")
# MyModule3.mysum (2)
time_start = Base.time_ns()
s = MyModule3.mysum(v)
time_end = Base.time_ns()
time_duration = 1.0e-9 * (time_end - time_start)
println("function MyModule3.mysum time (2nd call): $(time_duration) s")
I get quite strange results from this. If I edit one of these files (MyModuleX.jl
), when I run julia main.jl
, the modules for the files which have been edited seem to take about 1 second to run.
However, if I run a second time, the time is much shorter. Typically a few ms.
This seems to be consistently repeatable.
This makes me think the contents of each of these files is being cached somewhere, or the results of some compilation steps are being cached. I’m not sure what is being cached, exactly. The files themselves? The result of some precompilation steps?
I am trying to create a repeatable experiment which demonstrates the effects of
- no precompilation
- precompilation using
__precompile__
- precompilation using
__precompile__
in combination with compilation all the way to machine code with an explicit function call