Curious about the internals of dynamic dispatch

I ran some benchmarks comparing dynamic dispatch cost in Julia vs Rust.

In Julia I got the following results:

Dynamic Dispatch:
  4.677 ms (0 allocations: 0 bytes)
Static Dispatch
  159.291 μs (0 allocations: 0 bytes)

And in Rust I got the following:

Dynamic dispatch runtime: 1.62775ms
Static dispatch runtime: 156.541µs

Below is the code I ran:

Julia code:

using BenchmarkTools

abstract type A end

struct B <: A 
    b::Int64
end

struct C <: A 
    c::Int64
end

function get_val(b::B)
    b.b
end

function get_val(c::C)
    c.c
end

function dynamic_dispatch(v::Vector{A})
    sum = 0
    for a in v
        sum += get_val(a)::Int64
    end
    sum
end

function static_dispatch(v::Vector{B})
    sum = 0
    for a in v
        sum += get_val(a)::Int64
    end
    sum
end

function main()
    v1 = Vector{A}()
    v2 = Vector{B}()
    for _ in range(1,1_000_000)
        x = rand([1, 2])
        if x === 1
            push!(v1, B(1))
            push!(v2, B(1))
        else
            push!(v1, C(2))
            push!(v2, B(2))
        end
    end
    println("Dynamic Dispatch:")
    @btime dynamic_dispatch($v1)
    println("Static Dispatch")
    @btime static_dispatch($v2)
end

main()

Rust code:

trait A {
    fn get_val(&self) -> i64;
}

struct B {
    b: i64,
}

struct C {
    c: i64,
}

impl A for B {
    fn get_val(&self) -> i64 {
        self.b
    }
}

impl A for C {
    fn get_val(&self) -> i64 {
        self.c
    }
}

fn dynamic_dispatch(v: &Vec<Box<dyn A>>) -> i64 {
    let mut sum = 0;
    for ele in v {
        sum += ele.get_val();
    }
    sum
}

fn static_dispatch(v: &Vec<B>) -> i64 {
    let mut sum = 0;
    for ele in v {
        sum += ele.get_val();
    }
    sum
}

fn main() {
    let mut v1 = Vec::<Box<dyn A>>::new();
    let mut v2 = Vec::<B>::new();

    for _ in 0..1_000_000 {
        if rand::random() {
            v1.push(Box::new(B { b: 1 }));
            v2.push(B { b: 1 });
        } else {
            v1.push(Box::new(C { c: 2 }));
            v2.push(B { b: 2 });
        }
    }

    let now = std::time::Instant::now();
    let r1 = dynamic_dispatch(&v1);
    let elapsed: std::time::Duration = now.elapsed();
    println!("Dynamic dispatch runtime: {:?}", elapsed);
    let now = std::time::Instant::now();
    let r2 = static_dispatch(&v2);
    let elapsed: std::time::Duration = now.elapsed();
    println!("Static dispatch runtime: {:?}", elapsed);
    println!("{}, {}", r1, r2);
}

I ran the rust code a bunch to get multiple sample points (release build). Basically the static dispatch has the same performance, but Rust’s dynamic dispatch is significantly faster. This matches my general feel for Julia’s dynamic dispatch. It just feels a little slower than I expected relative to method lookups in the other “fast” languages like C++ and Rust. Is there a good reason for this? Is there some dynamic behavior allowed in Julia that is not in the other languages that makes Julia this much slower when it comes to dynamic dispatch?