If you want it be super fast, write a single nested-loop for both operations like this:
function subMins!(mat)
mcols = minimum(mat, dims=1)
mrows = minimum(mat, dims=2)
for j = 1:size(mat,2), i = 1:size(mat,1)
mat[i,j] -= mrows[i] + mcols[j]
end
end
using BenchmarkTools
mat = rand(UInt16, 16000, 16000)
@btime subMins!($mat)
290.291 ms (32 allocations: 63.41 KiB)