Here is a comparison of the results from LV and a normal implementation; LV was about 30x faster on my system. the threaded version was 100x better, but then it’s subjective.
my versioninfo() being
julia> versioninfo()
Julia Version 1.6.2
Commit 1b93d53fc4 (2021-07-14 15:36 UTC)
Platform Info:
  OS: Linux (x86_64-pc-linux-gnu)
  CPU: Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-11.0.1 (ORCJIT, skylake)
Environment:
  JULIA_NUM_THREADS = 12
julia> m = rand(100,1000);
julia> mt = rand(1000,100);
julia> result = zeros(100, 100);
julia> using BenchmarkTools, LoopVectorization
    
julia> function min_loops_lv!(m, mt, result)
       
          @turbo for i in 1:size(m ,1)
               for j in 1:size(mt, 2)
                   for k in 1:size(mt, 1)
                       result[i, j] += min(m[i,k], mt[k, j])
                   end
               end
           end
       end
min_loops_lv! (generic function with 1 method)
julia> function min_loops_tlv!(m, mt, result)
       
          @tturbo for i in 1:size(m ,1)
               for j in 1:size(mt, 2)
                   for k in 1:size(mt, 1)
                       result[i, j] += min(m[i,k], mt[k, j])
                   end
               end
           end
       end
min_loops_tlv! (generic function with 1 method)
julia> function min_loops_normal!(m, mt, result)
       
           for i in 1:size(m ,1)
               for j in 1:size(mt, 2)
                   for k in 1:size(mt, 1)
                       result[i, j] += min(m[i,k], mt[k, j])
                   end
               end
           end
       end
min_loops_normal! (generic function with 1 method)
julia> @benchmark min_loops_normal!($m, $mt, $result)
BenchmarkTools.Trial: 230 samples with 1 evaluation.
 Range (min … max):  21.075 ms …  23.152 ms  ┊ GC (min … max): 0.00% … 0.00%
 Time  (median):     21.735 ms               ┊ GC (median):    0.00%
 Time  (mean ± σ):   21.784 ms ± 331.447 μs  ┊ GC (mean ± σ):  0.00% ± 0.00%
              ▃ ▄ ▇▄ ▆▄▆▃█▄▄▂▄▅                                 
  ▃▁▁▁▃▁▆▃▅▅▇▇█▆█▅█████████████▅▆▃▆▄▄▅▅▆▄▃▃▃▁▄▁▁▁▃▃▄▃▃▁▁▁▁▃▃▃▄ ▄
  21.1 ms         Histogram: frequency by time         22.9 ms <
 Memory estimate: 0 bytes, allocs estimate: 0.
julia> result_lv = zeros(100, 100);
julia> @benchmark min_loops_lv!($m, $mt, $result_lv)
BenchmarkTools.Trial: 6836 samples with 1 evaluation.
 Range (min … max):  664.470 μs …  1.721 ms  ┊ GC (min … max): 0.00% … 0.00%
 Time  (median):     711.900 μs              ┊ GC (median):    0.00%
 Time  (mean ± σ):   728.216 μs ± 59.243 μs  ┊ GC (mean ± σ):  0.00% ± 0.00%
       ▅  █   ▂                                                 
  ▃▂▁▁▃█▂▂█▆▂▅█▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▂
  664 μs          Histogram: frequency by time          970 μs <
 Memory estimate: 0 bytes, allocs estimate: 0.
julia> result_tlv = zeros(100, 100);
julia> @benchmark min_loops_tlv!($m, $mt, $result_tlv)
BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range (min … max):  165.561 μs … 404.591 μs  ┊ GC (min … max): 0.00% … 0.00%
 Time  (median):     194.182 μs               ┊ GC (median):    0.00%
 Time  (mean ± σ):   201.954 μs ±  29.925 μs  ┊ GC (mean ± σ):  0.00% ± 0.00%
    ▆▆  █                                                        
  ▂▁██▂▆█▂▂▃▂▂▂▂▃▄▄▂▂▂▂▂▂▂▂▂▂▄▇▇█▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁ ▂
  166 μs           Histogram: frequency by time          278 μs <
 Memory estimate: 0 bytes, allocs estimate: 0.