Derivatives of a production function

I have a production function with several CES levels:

function F(L2, A; λ=ones(4)./2, ρ=ones(4)./2)
    #L2 is 3x2, A is 1x1, λ and ρ are vectors of length 4
    λ1 = λ[1:2]
    λ2 = λ[3:4]
    ρ1 = ρ[1:2]
    ρ2 = ρ[3:4]

    L1 = Array{Float64}(undef, 2)

    for k in 1:2
        L1[k] = (λ2[2]*(λ2[1]*L2[1,k]^ρ2[1] + (1 - λ2[1])*L2[2,k]^ρ2[1])^(ρ2[2]/ρ2[1]) + (1 - λ2[2])*L2[3,k]^ρ2[2])^(1/ρ2[2])
    end

    Q = (λ1[2]*(λ1[1]*A[1]^ρ1[1] + (1 - λ1[1])*L1[1]^ρ1[1])^(ρ2[1]/ρ1[1]) + (1 - λ1[2])*L1[2]^ρ2[2])^(1/ρ1[2])
    return Q
end

Consider L2[k] as the amount of a type of labor in occupation k. I’m interested in finding the derivatives of Q with respect to L2, that determines the wages for that type of labor.

I’m wondering if anyone has any suggestions for going about that. I have been playing with Zygote.jl but have not completely figured it out. Can the other differentiation packages (ReverseDiff.jl, FiniteDiff.jl) work with this kind of function?

Zygote will not work for you the way it’s implemented now as you are mutating L1. ForwardDiff.jl should work well though

ForwardDiff.gradient(L2 -> F(L2, A; kwargs...), L2)

Note that defining L1 like this

 L1 = similar(L2, 2)

will be required then

2 Likes

Since k only goes to 2, you could make L1 a Tuple instead of an Array.

L1 = ntuple(k -> (λ2[2]*(λ2[1]*L2[1,k]^ρ2[1] + (1 - λ2[1])*L2[2,k]^ρ2[1])^(ρ2[2]/ρ2[1]) + (1 - λ2[2])*L2[3,k]^ρ2[2])^(1/ρ2[2]),  2)

This avoids mutating an array, so should work with Zygote (although I have not tested.)

1 Like

Good point, it will reduce the allocations by a small amount as well. In this case, though, the dimensions are so small, and all operations are scalar operations, that forward diff is likely to be faster anyways.

I tried to post a MWE, my real application has arrays of size 200 x 30 and several levels of CES aggregation. That leads me to ask about how best to organize the function so that I can differentiate it.

L2 would be that 200 x 30 array. For my sanity, I organized that Array using something like:

L21 = L2[1:10, 1:5]
L22 = L2[11:20, 11:5]
...

then use the aggregators on those Lij Arrays. Of course, I’d need to initialize those Lij Arrays first, how do I do that?

Can I organize the function like that and still be able to differentiate with respect to L2?

Ok, if your arrays are that large the advice on forward vs. reverse diff might change. Without having an example that runs and is representative of the actual problem it’s difficult to advice you.

Ok, here it is in all its glory:

function f(t, A, prodinput) # have to include parameters as kwargs here
    # A is a vector of length 30, prodinput is 160 x 30
    HSmale25 = prodinput[1:10, :]
    HSmale35 = prodinput[11:20, :]
    HSmale45 = prodinput[21:30, :]
    HSmale55 = prodinput[31:40, :]
    HSfemale25 = prodinput[41:50, :]
    HSfemale35 = prodinput[51:60, :]
    HSfemale45 = prodinput[61:70, :]
    HSfemale55 = prodinput[71:80, :]
    COLmale25 = prodinput[81:90, :]
    COLmale35 = prodinput[91:100, :]
    COLmale45 = prodinput[101:110, :]
    COLmale55 = prodinput[111:120, :]
    COLfemale25 = prodinput[121:130, :]
    COLfemale35 = prodinput[131:140, :]
    COLfemale45 = prodinput[141:150, :]
    COLfemale55 = prodinput[151:160, :]

    for i in 1:3, k in 1:2 # skilled: occupations 1,2
        τ[i,k,1] = τ[i,1,1]
        σ[i,k,1] = σ[i,1,1]
        ν[i,k,1] = ν[i,1,1]
        ϖ[i,k,1] = ϖ[i,1,1]
    end

    for i in 1:3, k in 3:6 # services: occupations 3-6
        τ[i,k,1] = τ[i,1,1]
        σ[i,k,1] = σ[i,1,1]
        ν[i,k,1] = ν[i,1,1]
        ϖ[i,k,1] = ϖ[i,1,1]
    end

    for i in 1:3, k in 7:10 # services: occupations 7-10
        τ[i,k,1] = τ[i,1,1]
        σ[i,k,1] = σ[i,1,1]
        ν[i,k,1] = ν[i,1,1]
        ϖ[i,k,1] = ϖ[i,1,1]
    end

    for i in 1:3, k in 1:10, j in 1:5
        τ[i,k,j] = τ[i,1,j]
        σ[i,k,j] = σ[i,1,j]
        ν[i,k,j] = ν[i,1,j]
        ϖ[i,k,j] = ϖ[i,1,j]
    end

    for t in 1:szT
        # equations 8a-8d
        for k in 1:szK
            HSmale[k, t] = (logit(τ[1,k,:],t)*HSmale25[k, t]^ρ[7] + logit(τ[2,k,:],t)*HSmale35[k, t]^ρ[7] + logit(τ[3,k,:],t)*HSmale45[k, t]^ρ[7] + (1-logit(τ[1,k,:],t)-logit(τ[2,k,:],t)-logit(τ[3,k,:],t))*HSmale55[k, t])^(1/ρ[7])
            HSfemale[k, t] = (logit(σ[1,k,:],t)*HSfemale25[k, t]^ρ[7] + logit(σ[2,k,:],t)*HSfemale35[k, t]^ρ[7] + logit(σ[3,k,:],t)*HSfemale45[k, t]^ρ[7] + (1-logit(σ[1,k,:],t)-logit(σ[2,k,:],t)-logit(σ[3,k,:],t))*HSfemale55[k, t])^(1/ρ[7])
            COLmale[k, t] = (logit(ν[1,k,:],t)*COLmale25[k, t]^ρ[7] + logit(ν[2,k,:],t)*COLmale35[k, t]^ρ[7] + logit(ν[3,k,:],t)*COLmale45[k, t]^ρ[7] + (1-logit(ν[1,k,:],t)-logit(ν[2,k,:],t)-logit(ν[3,k,:],t))*COLmale55[k, t])^(1/ρ[7])
            COLfemale[k, t] = (logit(ϖ[1,k,:],t)*COLfemale25[k, t]^ρ[7] + logit(ϖ[2,k,:],t)*COLfemale35[k, t]^ρ[7] + logit(ϖ[3,k,:],t)*COLfemale45[k, t]^ρ[7] + (1-logit(ϖ[1,k,:],t)-logit(ϖ[2,k,:],t)-logit(ϖ[3,k,:],t))*COLfemale55[k, t])^(1/ρ[7])
        end

        for k in 1:szK # eqs 7
            HS[k, t] = (logit(ξ[k,:],t)*HSmale[k, t]^ρ[6] + (1-logit(ξ[k,:],t))*HSfemale[k, t]^ρ[6])^(1/ρ[6])
            COL[k, t] = (logit(γ[k,:],t)*COLmale[k, t]^ρ[6] + (1-logit(γ[k,:],t))*COLfemale[k, t]^ρ[6])^(1/ρ[6])
        end

        for k in 1:szK # eq 6
            EMPk[k, t] = (logit(μ[k,:],t)*HS[k, t]^ρ[5] + (1-logit(μ[k,:],t))*COL[k, t]^ρ[5])^(1/ρ[5])
        end

        # eqs 5
        EMPu[1, t] = (logit(λ[3,:],t)*EMPk[3, t]^ρ[3] + logit(λ[4,:],t)*EMPk[4, t]^ρ[3] + logit(λ[5,:],t)*EMPk[5, t]^ρ[3] + (1-logit(λ[3,:],t)-logit(λ[4,:],t)-logit(λ[5,:],t))*EMPk[6, t]^ρ[3])^(1/ρ[3])
        EMPu[2, t] = (logit(λ[7,:],t)*EMPk[7, t]^ρ[4] + logit(λ[8,:],t)*EMPk[8, t]^ρ[4] + logit(λ[9,:],t)*EMPk[9, t]^ρ[4] + (1-logit(λ[7,:],t)-logit(λ[8,:],t)-logit(λ[9,:],t))*EMPk[10, t]^ρ[4])^(1/ρ[4])

        # eqs 4
        EMP_su[1, t] = (logit(λ[1,:],t)*EMPk[1, t]^ρ[1] + (1-logit(λ[1,:],t))*EMPk[2, t]^ρ[1])^(1/ρ[1])
        EMP_su[2, t] = (logit(λ[2,:],t)*EMPu[1, t]^ρ[2] + (1-logit(λ[2,:],t))*EMPu[2, t]^ρ[2])^(1/ρ[2])
    end

    Y = β(t; b=b)*(λY[2]*(λY[1]*A[t]^ρY[1] + (1 - λY[1])*EMP_su[1,t]^ρY[1])^(ρY[2]/ρY[1]) + (1-λY[2])*EMP_su[2,t]^ρY[2])^(1/ρY[2])

    return Y
end

Now that I look at it after giving it a facelift, I think it can work with ForwardDiff to obtain the derivatives wrt prodinput, no?

I implemented something like this a few years ago in Matlab (the code is in my github repo as shared/CesNestedLH.m.

There, I defined a CES object that is just a plain vanilla CES which can compute its derivatives and knows its weights and elasticity of substitution. The marginal products have the usual closed form solutions.

Then there is a NestedCES object that knows its weights and elasticity and contains a bunch of CES objects for the nested CES parts.

Computing derivatives then simply involves applying the chain rule.

One could generalize this to a more flexible nested production function where the nested ones do not have to be CES. That would be something useful to write up as a package, I think.

1 Like

It is still lacking something in order to be a minimal working example (MWE), representative input. What is the definition of t,A,prodinput? This is important in order to

  1. Let people actually run the code.
  2. It tells people about representative sizes for the inputs.
1 Like

I’ll take a look at that, thanks!

Ok, I reworked the function. Here I’m posting an example that should work without problems. What I’m interested in knowing is whether the way in which I am initializing the various Arrays inside the function f would not yield any problems when computing the derivatives with respect to prodinput or A. I can compute the derivatives, but I don’t know if they are correct or there is a problem with “tracing” the inputs to the function. (and if anyone can also suggest some speedups would be great)

using ForwardDiff
using ReverseDiff
using LinearAlgebra

szK = 10
szT = 30
szPoly = 5
A = rand(szT)
prodinput = rand(16*szK, szT)
τ = rand(3,szK,szPoly)
σ = rand(3,szK,szPoly)
ν = rand(3,szK,szPoly)
ϖ = rand(3,szK,szPoly)
ξ = rand(szK,szPoly)
γ = rand(szK,szPoly)
μ = rand(szK,szPoly)
λ = rand(9,szPoly)
λY = rand(2,szPoly)

ρ = rand(7)
ρY = rand(2)

b = rand(2)

function β(t; b=b)
    b[1] + b[2]*t
end

function logit(x, t; min=0, max=1, n=szPoly)
    vt = [t^i for i in 0:n-1]
    exvt = exp(dot(x, vt))
    return (max - min) * (exvt/(1 + exvt)) + min
end

function tab(x, t; min=0, max=1, n=szPoly)
    vt = [t^i for i in 0:n-1]
    pol = dot(x, vt)
    return (max + min)/2 + (max - min)/2 * ((2pol)/(1 + pol^2))
end

function f(t, A, prodinput; transf=tab) # have to include parameters as kwargs here
    # A is a vector of length 30, prodinput is 160 x 30
    HSmale25 = prodinput[1:10, :]
    HSmale35 = prodinput[11:20, :]
    HSmale45 = prodinput[21:30, :]
    HSmale55 = prodinput[31:40, :]
    HSfemale25 = prodinput[41:50, :]
    HSfemale35 = prodinput[51:60, :]
    HSfemale45 = prodinput[61:70, :]
    HSfemale55 = prodinput[71:80, :]
    COLmale25 = prodinput[81:90, :]
    COLmale35 = prodinput[91:100, :]
    COLmale45 = prodinput[101:110, :]
    COLmale55 = prodinput[111:120, :]
    COLfemale25 = prodinput[121:130, :]
    COLfemale35 = prodinput[131:140, :]
    COLfemale45 = prodinput[141:150, :]
    COLfemale55 = prodinput[151:160, :]

    HSmale = similar(HSmale25)
    HSfemale = similar(HSfemale25)
    COLmale = similar(COLmale25)
    COLfemale = similar(COLfemale25)

    HS = similar(HSmale)
    COL = similar(COLmale)

    EMPk = similar(HS)
    EMPu = similar(EMPk, (2, szT))
    EMP_su = similar(EMPu, (2, szT))

    for i in 1:3, k in 1:2 # skilled: occupations 1,2
        τ[i,k,1] = τ[i,1,1]
        σ[i,k,1] = σ[i,1,1]
        ν[i,k,1] = ν[i,1,1]
        ϖ[i,k,1] = ϖ[i,1,1]
    end

    for i in 1:3, k in 3:6 # services: occupations 3-6
        τ[i,k,1] = τ[i,1,1]
        σ[i,k,1] = σ[i,1,1]
        ν[i,k,1] = ν[i,1,1]
        ϖ[i,k,1] = ϖ[i,1,1]
    end

    for i in 1:3, k in 7:10 # services: occupations 7-10
        τ[i,k,1] = τ[i,1,1]
        σ[i,k,1] = σ[i,1,1]
        ν[i,k,1] = ν[i,1,1]
        ϖ[i,k,1] = ϖ[i,1,1]
    end

    for i in 1:3, k in 1:10, j in 1:5
        τ[i,k,j] = τ[i,1,j]
        σ[i,k,j] = σ[i,1,j]
        ν[i,k,j] = ν[i,1,j]
        ϖ[i,k,j] = ϖ[i,1,j]
    end

#    for t in 1:szT
        # equations 8a-8d
        for k in 1:szK
            τ1 = transf(τ[1,k,:],t)
            τ2 = transf(τ[2,k,:],t; max=1-τ1)
            τ3 = transf(τ[3,k,:],t; max=1-τ1-τ2)
            τ4 = 1 - τ1 - τ2 - τ3
            σ1 = transf(σ[1,k,:],t)
            σ2 = transf(σ[2,k,:],t; max=1-σ1)
            σ3 = transf(σ[3,k,:],t; max=1-σ1-σ2)
            σ4 = 1 - σ1 - σ2 - σ3
            ν1 = transf(ν[1,k,:],t)
            ν2 = transf(ν[2,k,:],t; max=1-ν1)
            ν3 = transf(ν[3,k,:],t; max=1-ν1-ν2)
            ν4 = 1 - ν1 - ν2 - ν3
            ϖ1 = transf(ϖ[1,k,:],t)
            ϖ2 = transf(ϖ[2,k,:],t; max=1-ϖ1)
            ϖ3 = transf(ϖ[3,k,:],t; max=1-ϖ1-ϖ2)
            ϖ4 = 1 - ϖ1 - ϖ2 - ϖ3
            HSmale[k, t] = (τ1*HSmale25[k, t]^ρ[7] + τ2*HSmale35[k, t]^ρ[7] + τ3*HSmale45[k, t]^ρ[7] + τ4*HSmale55[k, t])^(1/ρ[7])
            HSfemale[k, t] = (σ1*HSfemale25[k, t]^ρ[7] + σ2*HSfemale35[k, t]^ρ[7] + σ3*HSfemale45[k, t]^ρ[7] + σ4*HSfemale55[k, t])^(1/ρ[7])
            COLmale[k, t] = (ν1*COLmale25[k, t]^ρ[7] + ν2*COLmale35[k, t]^ρ[7] + ν3*COLmale45[k, t]^ρ[7] + ν4*COLmale55[k, t])^(1/ρ[7])
            COLfemale[k, t] = (ϖ1*COLfemale25[k, t]^ρ[7] + ϖ2*COLfemale35[k, t]^ρ[7] + ϖ3*COLfemale45[k, t]^ρ[7] + ϖ4*COLfemale55[k, t])^(1/ρ[7])
        end

        for k in 1:szK # eqs 7
            ξ1 = transf(ξ[k,:],t)
            γ1 = transf(γ[k,:],t)
            HS[k, t] = (ξ1*HSmale[k, t]^ρ[6] + (1-ξ1)*HSfemale[k, t]^ρ[6])^(1/ρ[6])
            COL[k, t] = (γ1*COLmale[k, t]^ρ[6] + (1-γ1)*COLfemale[k, t]^ρ[6])^(1/ρ[6])
        end

        for k in 1:szK # eq 6
            μ1 = transf(μ[k,:],t)
            EMPk[k, t] = (μ1*HS[k, t]^ρ[5] + (1-μ1)*COL[k, t]^ρ[5])^(1/ρ[5])
        end

        # eqs 5
        λ3 = transf(λ[3,:],t)
        λ4 = transf(λ[4,:],t; max=1-λ3)
        λ5 = transf(λ[5,:],t; max=1-λ3-λ4)
        λ6 = 1 - λ3 - λ4 - λ5
        λ7 = transf(λ[7,:],t)
        λ8 = transf(λ[8,:],t; max=1-λ7)
        λ9 = transf(λ[9,:],t; max=1-λ7-λ8)
        λ10 = 1 - λ7 - λ8 - λ9
        EMPu[1, t] = (λ3*EMPk[3, t]^ρ[3] + λ4*EMPk[4, t]^ρ[3] + λ5*EMPk[5, t]^ρ[3] + λ6*EMPk[6, t]^ρ[3])^(1/ρ[3])
        EMPu[2, t] = (λ7*EMPk[7, t]^ρ[4] + λ8*EMPk[8, t]^ρ[4] + λ9*EMPk[9, t]^ρ[4] + λ10*EMPk[10, t]^ρ[4])^(1/ρ[4])

        # eqs 4
        λ1 = transf(λ[1,:],t)
        λ2 = transf(λ[2,:],t)
        EMP_su[1, t] = (λ1*EMPk[1, t]^ρ[1] + (1-λ1)*EMPk[2, t]^ρ[1])^(1/ρ[1])
        EMP_su[2, t] = (λ2*EMPu[1, t]^ρ[2] + (1-λ2)*EMPu[2, t]^ρ[2])^(1/ρ[2])
#    end

    λY1 = transf(λY[1,:],t)
    λY2 = transf(λY[2,:],t)
    Y = β(t; b=b)*(λY2*(λY1*A[t]^ρY[1] + (1 - λY1)*EMP_su[1,t]^ρY[1])^(ρY[2]/ρY[1]) + (1-λY2)*EMP_su[2,t]^ρY[2])^(1/ρY[2])

    return Y
end

When you use either ReverseDiff or ForwardDiff, the input to the function will be of a special type. To make sure it works, all variables that are affected by the input must be initialized such that the special type carries over. If you happen to initialize an array to the wrong type, you’ll get a convert error when assigning into it.

If you want to verify a gradient obtained by AD, you can try FiniteDifferences.jl and compare the result.

2 Likes

Thank you for the clarification. So I take that ReverseDiff and ForwardDiff are pretty fool-proof, which is pretty great!