Using a AbstractUnitRange inside a struct

Josa9321 · July 11, 2021, 3:44pm

I want to use this struct:

struct Instance_RH{P <: AbstractFloat, Q <: AbstractUnitRange}
    cost::Array{P, 2}
    time_by_quantity::Array{P, 2}
    penalty_by_multitask::Array{P, 1}
    maximum_budget::P
    I::Q
    J::Q
end

I and J being dependent on dimensions of cost, and declared in:

dimensions_of_instance = size(cost);
I = Base.OneTo(dimensions_of_instance[1]);
J = Base.OneTo(dimensions_of_instance[2]);

The problem is, when I put them in this loop, its performance decreases a little:

for j in instance.J
    quantity_in_project[j] = 0;
    for i in instance.I
        quantity_in_project[j] += x[i, j];
    end
end

When I declare myself the UnitRange in the loop, like 1:5, it goes faster, so what am I doing wrong?

ericphanson · July 11, 2021, 3:51pm

how are you benchmarking? (including the code might help for this). I suspect it could be a benchmarking issue where if you don’t interpolate the arguments to the function to e.g. BenchmarkTools.@benchmark it treats them as globals, which could give different numbers depending on whether the range is hardcoded or not. Or if it is not a benchmarking issue, it could be constant-folding on the hardcoded range and unrolling the loop or something like that.

Josa9321 · July 11, 2021, 4:16pm

I’m benchmarking with BenchmarkTools like this:

@benchmark calculate_quantity!($z, $solution_1, $instance_1)

The output for using the range inside the struct is:

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     23.470 ns (0.00% GC)
  median time:      27.081 ns (0.00% GC)
  mean time:        27.169 ns (0.00% GC)
  maximum time:     92.678 ns (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     997

If I declare the ranges inside the function, I get this:

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     11.411 ns (0.00% GC)
  median time:      13.614 ns (0.00% GC)
  mean time:        13.444 ns (0.00% GC)
  maximum time:     58.659 ns (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     999

If it’s the second situation, what can I do?

dpsanders · July 11, 2021, 4:22pm

Please post the full code that can be copy and pasted.

Josa9321 · July 11, 2021, 4:48pm

Ok.

struct Instance_RH{P <: AbstractFloat, Q <: AbstractUnitRange}
    cost::Array{P, 2}
    time_by_quantity::Array{P, 2}
    penalty_by_multitask::Array{P, 1}
    penalty_by_know_how::Array{P, 2}
    maximum_budget::P
    I::Q
    J::Q
end

function generate_instance(cost, time_by_quantity, 
    penalty_by_multitask, penalty_by_know_how, maximum_budget)
    
    dimensions_of_instance = size(cost);
    I = Base.OneTo(dimensions_of_instance[1]);
    J = Base.OneTo(dimensions_of_instance[2]);
    return Instance_RH(cost, time_by_quantity, 
        penalty_by_multitask, penalty_by_know_how, maximum_budget, I, J);
end


cost = [404.16130257432536 342.6121400760149 334.65400017129275; 609.9826980212956 451.2171757784507 483.83148674153233; 362.93218686863423 414.41083606045623 675.6532058287046; 498.73524270040207 379.8833522296185 372.12075326006203; 507.1394146512986 343.40004673720284 344.87937542652276; 614.5935044087983 343.38652475868105 666.6455577870877; 654.5084251471844 460.7325334872372 343.51651476383165; 514.5598529059743 445.47832539448785 667.5994573488664; 352.0625777996967 357.63523731788064 426.5800063667885; 633.8737992123354 575.4684437771787 526.8544116378721]
time_by_quantity = [551.149318836251 353.5576871795756 544.7850084991596; 540.8786124298001 347.11253457830435 534.2360337745861; 530.9324781581835 340.806606793647 524.2600861079036; 521.2428271420944 334.758141874596 514.891286519538; 512.0603850942899 329.686814227325 506.0319653251741; 503.2315108697694 324.6932839541254 497.68125911747074; 495.06705853446783 320.35799835061465 489.867352509886; 487.4786234404495 316.2110977723632 483.3234557345035; 481.19585646796173 312.4731479370266 477.0680377885712; 476.0273928951479 309.1389856638628 472.01244831930615]
penalty_by_multitask = [3.932768925120902, 2.1629197821510475, 3.747201375134096, 1.2027401788064473, 3.762680990567876, 3.601216190081945, 3.9412748264981996, 1.004961550315905, 3.963908498229274, 1.5845709053894907]
penalty_by_know_how = [0.811984335664391 0.2852506669586609 0.4118216702970811; 0.10635146869948212 0.3497031430472351 0.6630721652898788; 0.13880963807944877 0.5621105391563797 0.22663788295134837; 0.44358375556142055 0.22274423735250545 0.5015070457420686; 0.14826541117764483 0.7966779051319146 0.6769308830979445; 0.07009524283219473 0.08771913744412921 0.5220504363220271; 0.8324525095520683 0.8185583447816812 0.4939598817833735; 0.597792860354574 0.4440247339167606 0.8233927217289345; 0.014589514521600778 0.29122819336562134 0.6274289868926287; 0.21229629326964108 0.2910770390826134 0.8959322834368744]
maximum_budget = 14109.108387884087
instance_0 = generate_instance(cost, time_by_quantity, penalty_by_multitask, penalty_by_know_how, maximum_budget)

function calculate!(y, x, instance) # Quantidade de pessoas i envolvidas no projeto j
    @inbounds begin
        for j in instance.J
            y[j] = 0;
            for i in instance.I
                y[j] += x[i, j];
            end
        end
    end
    return minimum(y) == 0 ? count(i->(i == 0), y) : 0;
end
x = [1 1 0; 0 0 1; 1 0 1; 0 0 0; 0 0 0; 1 1 1; 0 0 0; 0 0 0; 0 0 0; 0 1 1]
y = zeros(Int, size(x, 2))
@benchmark calculate!($y, $x, $instance_0)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     16.617 ns (0.00% GC)
  median time:      17.117 ns (0.00% GC)
  mean time:        18.704 ns (0.00% GC)
  maximum time:     71.672 ns (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     999

function calculate_2!(y, x, instance) # Quantidade de pessoas i envolvidas no projeto j
    @inbounds begin
        for j in 1:3
            y[j] = 0;
            for i in 1:10
                y[j] += x[i, j];
            end
        end
    end
    return minimum(y) == 0 ? count(i->(i == 0), y) : 0;
end

@benchmark calculate_2!($y, $x, $instance_0)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     11.111 ns (0.00% GC)
  median time:      11.512 ns (0.00% GC)
  mean time:        12.849 ns (0.00% GC)
  maximum time:     74.274 ns (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     999

lmiq · July 11, 2021, 6:20pm

I cannot test now, but in the second case the ranges of the loops are known at compile time, and that may allow some additional compiler optimizations. I have seen that happening in some cases.

If that is the case and the performance difference is relevant to you, what can be done is to put the ranges of the loops in the type parameters. For example using

Josa9321:

struct Instance_RH{P <: AbstractFloat, I, J}
    cost::Array{P, 2}
    time_by_quantity::Array{P, 2}
    penalty_by_multitask::Array{P, 1}
    penalty_by_know_how::Array{P, 2}
    maximum_budget::P
end

inst = Instance_RHP{Float64,3,10}(....)


function calculate!(y, x, instance::Instance_RHP{T,I,J}) where {T,I,J}
...
end

And run the loops from one to I and J inside the function. With thar the compiler will specialize to these lengths (and compile a new method for every new instance with different I or J, so be careful if these change a lot, you may have problems related to runtime dispatch).

mkitti · July 12, 2021, 7:45am

To generalize @lmiq’s approach, you could pass I and J into calculate! using Val.

struct Instance_RH{P <: AbstractFloat, Q <: AbstractUnitRange}
    cost::Array{P, 2}
    time_by_quantity::Array{P, 2}
    penalty_by_multitask::Array{P, 1}
    penalty_by_know_how::Array{P, 2}
    maximum_budget::P
    I::Q
    J::Q
end

calculate!(y, x, instance) = calculate!(y, x, instance, Val(instance.I), Val(instance.J))

function calculate!(y, x, instance, ::Val{I}, ::Val{J}) where {I, J}
    @inbounds begin
        for j in J
            y[j] = 0;
            for i in I
                y[j] += x[i, j];
            end
        end
    end
    return minimum(y) == 0 ? count(i->(i == 0), y) : 0;
end

This will compile a specific version of calculate! for specific values of I and J. This can be counter-productive if you need to use many different ranges in a single program because Julia will spend a lot of time compiling different versions of calculate!. However, if I and J do not vary much in a Julia session, this could be useful.

Josa9321 · July 12, 2021, 12:49pm

I’ll use different ranges, between 10 and 20 instances, and I think many functions with this idea. Is this much?

lmiq · July 12, 2021, 1:04pm

I think the problem is not much compilation time, except if the whole program is very fast. The greatest problem may be if you are calling this function from within a loop where you change the ranges all the time. In that case, the dispatch to one or other function will occur at runtime, and that will be bad for performance and allocations:

julia> struct Ranges{N,M} end

julia> function loop()
         s = 0.
         for i in 1:1000
           n = rand(1:10)
           m = rand(1:10)
           r = Ranges{n,m}()
           s += loop_over_ranges(r)
         end
         s
       end
loop (generic function with 1 method)

julia> function loop_over_ranges(r::Ranges{N,M}) where {N,M}
         s = 0.
         for i in 1:N
           for j in 1:M
             s += sin(i*j)
           end
         end
         return s
       end
loop_over_ranges (generic function with 1 method)

julia> @btime loop()
  718.319 μs (2000 allocations: 31.25 KiB) # allocations come from run-time dispatch
-490.3299078031772

# doing things the simple way here is better:
julia> function loop2()
         s = 0.
         for i in 1:1000
           n = rand(1:10)
           m = rand(1:10)
           s += loop_over_ranges2(n,m)
         end
         s
       end
loop2 (generic function with 1 method)

julia> function loop_over_ranges2(n,m)
         s = 0.
         for i in 1:n
           for j in 1:m
             s += sin(i*j)
           end
         end
         return s
       end
loop_over_ranges2 (generic function with 1 method)

julia> @btime loop2()
  316.391 μs (0 allocations: 0 bytes)
-260.6418221413946

Note that for a single run, the loop that gets the information of the ranges from the type is faster:

julia> r = Ranges{5,10}()
Ranges{5, 10}()

julia> @btime loop_over_ranges($r)
  429.462 ns (0 allocations: 0 bytes)
1.4301429914966868

julia> @btime loop_over_ranges2(5,10)
  566.620 ns (0 allocations: 0 bytes)
1.4301429914966868

mkitti · July 12, 2021, 2:12pm

Do you use each instance more than once? Do you know the ranges ahead of time?

Josa9321 · July 12, 2021, 3:24pm

I’ll put each instance of 10 to 20 I think, that won’t alter the range, to be solved using a heuristic, so it’ll be a parameter of various functions before I change to another one.

Topic		Replies	Views
Nested loop is slow when indices are in a struct New to Julia performance	2	97	July 11, 2025
Any at UnitRange seems unoptimal Internals & Design unitrange	20	506	October 13, 2022
Significant decrease in performance after seemingly irrelevant changes Performance	16	1188	June 29, 2019
Type restriction on UnitRange Internals & Design question	17	1493	October 21, 2017
Iterating over range is slower than while loop Performance	7	590	April 24, 2023

Using a AbstractUnitRange inside a struct

Related topics