I think the reduction kernels in GPUArrays can act on lazy Broadcasted objects, so you can probably make them do this for you:
julia> begin
n = 10
A = randn(n, n)
l = randn(n)
f = ((i, j),) -> (i, j, A[i, j]^2 + (1 - l[i])*(1 + l[j]))
op = (x, y) -> x[3] > y[3] ? x : y
col = Iterators.product(1:n, 1:n) # simplified from product(1:k, (k+1):n), just make views of A, l as necc.
i, j, volf = mapreduce(f, op, col; init=(0, 0, -Inf))
end
(7, 3, 13.250911638285407)
julia> argmax(@. A^2 + (1 - l)*(1 + l'))
CartesianIndex(7, 3)
julia> Meta.@lower @. A^2 + (1 - l)*(1 + l')
:($(Expr(:thunk, CodeInfo(
@ none within `top-level scope`
1 β %1 = +
β %2 = ^
β %3 = A
β %4 = Core.apply_type(Base.Val, 2)
β %5 = (%4)()
β %6 = Base.broadcasted(Base.literal_pow, %2, %3, %5)
β %7 = *
β %8 = Base.broadcasted(-, 1, l)
β %9 = +
β %10 = var"'"(l)
β %11 = Base.broadcasted(%9, 1, %10)
β %12 = Base.broadcasted(%7, %8, %11)
β %13 = Base.broadcasted(%1, %6, %12)
β %14 = Base.materialize(%13)
βββ return %14
))))
julia> function lazy(A, l)
x6 = Base.broadcasted(Base.literal_pow, ^, A, Val(2))
x8 =Base.broadcasted(-, 1, l)
x11 = Base.broadcasted(+, 1, l')
x12 = Base.broadcasted(*, x8, x11)
x13 = Base.broadcasted(+, x6, x12)
end
lazy (generic function with 1 method)
# eager
julia> argmax(Base.materialize(lazy(A, l)))
CartesianIndex(7, 3)
julia> using JLArrays
julia> argmax(Base.materialize(lazy(jl(A), jl(l))))
CartesianIndex(7, 3)
# lazy
julia> maximum(lazy(A, l)) # just iterating, I believe
13.250911638285407
julia> maximum(x for x in lazy(A, l))
13.250911638285407
julia> maximum(lazy(jl(A), jl(l))) # using GPUArrays reduction, as iteration fails
13.250911638285407
julia> maximum(x for x in lazy(jl(A), jl(l)))
ERROR: Scalar indexing is disallowed.
# argmax
julia> argmax(lazy(A, l))
ERROR: MethodError: no method matching keys(::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{β¦}, Nothing, typeof(+), Tuple{β¦}})
julia> Base.keys(bc::Base.Broadcast.Broadcasted) = CartesianIndices(axes(bc))
julia> argmax(lazy(A, l))
CartesianIndex(7, 3)
julia> argmax(lazy(jl(A), jl(l)))
ERROR: Scalar indexing is disallowed.
# better idea
julia> function lazy3(A, l)
a1, a2 = axes(A)
bc = lazy(A, l)
x14 = Base.broadcasted(tuple, bc, a1, a2')
end
lazy3 (generic function with 1 method)
julia> maximum(lazy3(A, l))
(13.250911638285407, 7, 3)
julia> maximum(lazy3(jl(A), jl(l)))
ERROR: MethodError: no method matching typemin(::Type{Tuple{Float64, Int64, Int64}})
Stacktrace:
[1] neutral_element(::typeof(max), T::Type)
@ GPUArrays ~/.julia/packages/GPUArrays/ouBUA/src/host/mapreduce.jl:25
[2] _mapreduce(f::typeof(identity), op::typeof(max), As::Base.Broadcast.Broadcasted{β¦}; dims::Colon, init::Nothing)
@ GPUArrays ~/.julia/packages/GPUArrays/ouBUA/src/host/mapreduce.jl:49
...
julia> Base.typemin(::Type{Tuple{T,I,J}}) where {T,I,J} = map(typemin, (T,I,J)) # piracy... could overload GPUArrays .neutral_element instead
julia> maximum(lazy3(jl(A), jl(l)))
(13.250911638285407, 7, 3)