From 437f93d39b969b2cc14b0e8b5e3c705bde2201f0 Mon Sep 17 00:00:00 2001
From: Jonathan Maack <jonathan.maack@nrel.gov>
Date: Wed, 10 Apr 2024 15:48:20 -0600
Subject: [PATCH 1/3] Initial implementation of allowing user to provide matrix
 vector product rule

---
 src/linear.jl | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/linear.jl b/src/linear.jl
index 080f67b..97c4391 100644
--- a/src/linear.jl
+++ b/src/linear.jl
@@ -66,12 +66,13 @@ This version is for linear equations Ay = b
 - `A::matrix`, `b::vector`: components of linear system ``A y = b``
 - `lsolve::function`: lsolve(A, b). Function to solve the linear system, default is backslash operator.
 - `Af::factorization`: An optional factorization of A, useful to override default factorize, or if multiple linear solves will be performed with same A matrix.
+- `matvec_mul`: Function to compute ``A*y`` for a vector ``y``. Defaults to the julia multipy operator.
 """
-implicit_linear(A, b; lsolve=linear_solve, Af=nothing) = _implicit_linear(A, b, lsolve, Af)
+implicit_linear(A, b; lsolve=linear_solve, Af=nothing, matvec_mul=*) = _implicit_linear(A, b, lsolve, matvec_mul, Af)
 
 
 # If no AD, just solve normally.
-_implicit_linear(A, b, lsolve, Af) = isnothing(Af) ? lsolve(A, b) : lsolve(Af, b)
+_implicit_linear(A, b, lsolve, matvec_mul, Af) = isnothing(Af) ? lsolve(A, b) : lsolve(Af, b)
 # function implicit_linear(A, b, lsolve, Af, cache)
 #     if isnothing(cache)
 #         if isnothing(Af)
@@ -90,9 +91,9 @@ _implicit_linear(A, b, lsolve, Af) = isnothing(Af) ? lsolve(A, b) : lsolve(Af, b
 # end
 
 # catch three cases where one or both contain duals
-_implicit_linear(A::AbstractArray{<:ForwardDiff.Dual{T}}, b::AbstractArray{<:ForwardDiff.Dual{T}}, lsolve, Af) where {T} = linear_dual(A, b, lsolve, Af, T)
-_implicit_linear(A, b::AbstractArray{<:ForwardDiff.Dual{T}}, lsolve, Af) where {T} = linear_dual(A, b, lsolve, Af, T)
-_implicit_linear(A::AbstractArray{<:ForwardDiff.Dual{T}}, b, lsolve, Af) where {T} = linear_dual(A, b, lsolve, Af, T)
+_implicit_linear(A::AbstractArray{<:ForwardDiff.Dual{T}}, b::AbstractArray{<:ForwardDiff.Dual{T}}, lsolve, matvec_mul, Af) where {T} = linear_dual(A, b, lsolve, matvec_mul, Af, T)
+_implicit_linear(A, b::AbstractArray{<:ForwardDiff.Dual{T}}, lsolve, matvec_mul, Af) where {T} = linear_dual(A, b, lsolve, matvec_mul, Af, T)
+_implicit_linear(A::AbstractArray{<:ForwardDiff.Dual{T}}, b, lsolve, matvec_mul, Af) where {T} = linear_dual(A, b, lsolve, matvec_mul, Af, T)
 # implicit_linear(A::AbstractArray{<:ForwardDiff.Dual{T}}, b::AbstractArray{<:ForwardDiff.Dual{T}}, lsolve, Af, cache) where {T} = isnothing(cache) ? linear_dual(A, b, lsolve, Af, T) : linear_dual(A, b, lsolve, Af, T, cache)
 # implicit_linear(A, b::AbstractArray{<:ForwardDiff.Dual{T}}, lsolve, Af, cache) where {T} = isnothing(cache) ? linear_dual(A, b, lsolve, Af, T) : linear_dual(A, b, lsolve, Af, T, cache)
 # implicit_linear(A::AbstractArray{<:ForwardDiff.Dual{T}}, b, lsolve, Af, cache) where {T} = isnothing(cache) ? linear_dual(A, b, lsolve, Af, T) : linear_dual(A, b, lsolve, Af, T, cache)
@@ -110,7 +111,7 @@ _implicit_linear(A::AbstractArray{<:ForwardDiff.Dual{T}}, b, lsolve, Af) where {
 # implicit_linear!(ydot, A::AbstractArray{<:ForwardDiff.Dual{T}}, b, lsolve, Af) where {T} = linear_dual!(ydot, A, b, lsolve, Af, T)
 
 # Both A and b contain duals
-function linear_dual(A, b, lsolve, Af, T)
+function linear_dual(A, b, lsolve, matvec_mul, Af, T)
 
     # unpack dual numbers (if not dual numbers, since only one might be, just returns itself)
     bv = fd_value(b)
@@ -122,7 +123,7 @@ function linear_dual(A, b, lsolve, Af, T)
     yv = lsolve(Afact, bv)
 
     # extract Partials of b - A * y  i.e., bdot - Adot * y  (since y does not contain duals)
-    rhs = fd_partials(b - A*yv)
+    rhs = fd_partials(b - matvec_mul(A,yv))
 
     # solve for new derivatives
     ydot = lsolve(Afact, rhs)
@@ -182,7 +183,7 @@ end
 
 
 # Provide a ChainRule rule for reverse mode
-function ChainRulesCore.rrule(::typeof(_implicit_linear), A, b, lsolve, Af)
+function ChainRulesCore.rrule(::typeof(_implicit_linear), A, b, lsolve, matvec_mul, Af)
 
     # save factorization
     Afact = isnothing(Af) ? factorize(ReverseDiff.value(A)) : Af
@@ -192,16 +193,16 @@ function ChainRulesCore.rrule(::typeof(_implicit_linear), A, b, lsolve, Af)
 
     function implicit_pullback(ybar)
         u = lsolve(Afact', ybar)
-        return NoTangent(), -u*y', u, NoTangent(), NoTangent()
+        return NoTangent(), -u*y', u, NoTangent(), NoTangent(), NoTangent()
     end
 
     return y, implicit_pullback
 end
 
 # register above rule for ReverseDiff
-ReverseDiff.@grad_from_chainrules _implicit_linear(A::Union{ReverseDiff.TrackedArray, AbstractArray{<:ReverseDiff.TrackedReal}}, b, lsolve, Af)
-ReverseDiff.@grad_from_chainrules _implicit_linear(A, b::Union{ReverseDiff.TrackedArray, AbstractArray{<:ReverseDiff.TrackedReal}}, lsolve, Af)
-ReverseDiff.@grad_from_chainrules _implicit_linear(A::Union{ReverseDiff.TrackedArray, AbstractArray{<:ReverseDiff.TrackedReal}}, b::Union{ReverseDiff.TrackedArray, AbstractVector{<:ReverseDiff.TrackedReal}}, lsolve, Af)
+ReverseDiff.@grad_from_chainrules _implicit_linear(A::Union{ReverseDiff.TrackedArray, AbstractArray{<:ReverseDiff.TrackedReal}}, b, lsolve, matvec_mul, Af)
+ReverseDiff.@grad_from_chainrules _implicit_linear(A, b::Union{ReverseDiff.TrackedArray, AbstractArray{<:ReverseDiff.TrackedReal}}, lsolve, matvec_mul, Af)
+ReverseDiff.@grad_from_chainrules _implicit_linear(A::Union{ReverseDiff.TrackedArray, AbstractArray{<:ReverseDiff.TrackedReal}}, b::Union{ReverseDiff.TrackedArray, AbstractVector{<:ReverseDiff.TrackedReal}}, lsolve, matvec_mul, Af)
 
 
 # function implicit_linear_inplace(A, b, y, Af)

From 39ad9516995ea6dcbc8cdd234506fb76c295f883 Mon Sep 17 00:00:00 2001
From: Jonathan Maack <jonathan.maack@nrel.gov>
Date: Thu, 11 Apr 2024 08:49:54 -0600
Subject: [PATCH 2/3] Change keyword to mmul

---
 src/linear.jl | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/linear.jl b/src/linear.jl
index 97c4391..c918837 100644
--- a/src/linear.jl
+++ b/src/linear.jl
@@ -66,13 +66,13 @@ This version is for linear equations Ay = b
 - `A::matrix`, `b::vector`: components of linear system ``A y = b``
 - `lsolve::function`: lsolve(A, b). Function to solve the linear system, default is backslash operator.
 - `Af::factorization`: An optional factorization of A, useful to override default factorize, or if multiple linear solves will be performed with same A matrix.
-- `matvec_mul`: Function to compute ``A*y`` for a vector ``y``. Defaults to the julia multipy operator.
+- `mmul`: Function to compute ``A*y`` for a vector ``y``. Defaults to the julia multipy operator.
 """
-implicit_linear(A, b; lsolve=linear_solve, Af=nothing, matvec_mul=*) = _implicit_linear(A, b, lsolve, matvec_mul, Af)
+implicit_linear(A, b; lsolve=linear_solve, Af=nothing, mmul=*) = _implicit_linear(A, b, lsolve, mmul, Af)
 
 
 # If no AD, just solve normally.
-_implicit_linear(A, b, lsolve, matvec_mul, Af) = isnothing(Af) ? lsolve(A, b) : lsolve(Af, b)
+_implicit_linear(A, b, lsolve, mmul, Af) = isnothing(Af) ? lsolve(A, b) : lsolve(Af, b)
 # function implicit_linear(A, b, lsolve, Af, cache)
 #     if isnothing(cache)
 #         if isnothing(Af)
@@ -91,9 +91,9 @@ _implicit_linear(A, b, lsolve, matvec_mul, Af) = isnothing(Af) ? lsolve(A, b) :
 # end
 
 # catch three cases where one or both contain duals
-_implicit_linear(A::AbstractArray{<:ForwardDiff.Dual{T}}, b::AbstractArray{<:ForwardDiff.Dual{T}}, lsolve, matvec_mul, Af) where {T} = linear_dual(A, b, lsolve, matvec_mul, Af, T)
-_implicit_linear(A, b::AbstractArray{<:ForwardDiff.Dual{T}}, lsolve, matvec_mul, Af) where {T} = linear_dual(A, b, lsolve, matvec_mul, Af, T)
-_implicit_linear(A::AbstractArray{<:ForwardDiff.Dual{T}}, b, lsolve, matvec_mul, Af) where {T} = linear_dual(A, b, lsolve, matvec_mul, Af, T)
+_implicit_linear(A::AbstractArray{<:ForwardDiff.Dual{T}}, b::AbstractArray{<:ForwardDiff.Dual{T}}, lsolve, mmul, Af) where {T} = linear_dual(A, b, lsolve, mmul, Af, T)
+_implicit_linear(A, b::AbstractArray{<:ForwardDiff.Dual{T}}, lsolve, mmul, Af) where {T} = linear_dual(A, b, lsolve, mmul, Af, T)
+_implicit_linear(A::AbstractArray{<:ForwardDiff.Dual{T}}, b, lsolve, mmul, Af) where {T} = linear_dual(A, b, lsolve, mmul, Af, T)
 # implicit_linear(A::AbstractArray{<:ForwardDiff.Dual{T}}, b::AbstractArray{<:ForwardDiff.Dual{T}}, lsolve, Af, cache) where {T} = isnothing(cache) ? linear_dual(A, b, lsolve, Af, T) : linear_dual(A, b, lsolve, Af, T, cache)
 # implicit_linear(A, b::AbstractArray{<:ForwardDiff.Dual{T}}, lsolve, Af, cache) where {T} = isnothing(cache) ? linear_dual(A, b, lsolve, Af, T) : linear_dual(A, b, lsolve, Af, T, cache)
 # implicit_linear(A::AbstractArray{<:ForwardDiff.Dual{T}}, b, lsolve, Af, cache) where {T} = isnothing(cache) ? linear_dual(A, b, lsolve, Af, T) : linear_dual(A, b, lsolve, Af, T, cache)
@@ -111,7 +111,7 @@ _implicit_linear(A::AbstractArray{<:ForwardDiff.Dual{T}}, b, lsolve, matvec_mul,
 # implicit_linear!(ydot, A::AbstractArray{<:ForwardDiff.Dual{T}}, b, lsolve, Af) where {T} = linear_dual!(ydot, A, b, lsolve, Af, T)
 
 # Both A and b contain duals
-function linear_dual(A, b, lsolve, matvec_mul, Af, T)
+function linear_dual(A, b, lsolve, mmul, Af, T)
 
     # unpack dual numbers (if not dual numbers, since only one might be, just returns itself)
     bv = fd_value(b)
@@ -123,7 +123,7 @@ function linear_dual(A, b, lsolve, matvec_mul, Af, T)
     yv = lsolve(Afact, bv)
 
     # extract Partials of b - A * y  i.e., bdot - Adot * y  (since y does not contain duals)
-    rhs = fd_partials(b - matvec_mul(A,yv))
+    rhs = fd_partials(b - mmul(A,yv))
 
     # solve for new derivatives
     ydot = lsolve(Afact, rhs)
@@ -183,7 +183,7 @@ end
 
 
 # Provide a ChainRule rule for reverse mode
-function ChainRulesCore.rrule(::typeof(_implicit_linear), A, b, lsolve, matvec_mul, Af)
+function ChainRulesCore.rrule(::typeof(_implicit_linear), A, b, lsolve, mmul, Af)
 
     # save factorization
     Afact = isnothing(Af) ? factorize(ReverseDiff.value(A)) : Af
@@ -200,9 +200,9 @@ function ChainRulesCore.rrule(::typeof(_implicit_linear), A, b, lsolve, matvec_m
 end
 
 # register above rule for ReverseDiff
-ReverseDiff.@grad_from_chainrules _implicit_linear(A::Union{ReverseDiff.TrackedArray, AbstractArray{<:ReverseDiff.TrackedReal}}, b, lsolve, matvec_mul, Af)
-ReverseDiff.@grad_from_chainrules _implicit_linear(A, b::Union{ReverseDiff.TrackedArray, AbstractArray{<:ReverseDiff.TrackedReal}}, lsolve, matvec_mul, Af)
-ReverseDiff.@grad_from_chainrules _implicit_linear(A::Union{ReverseDiff.TrackedArray, AbstractArray{<:ReverseDiff.TrackedReal}}, b::Union{ReverseDiff.TrackedArray, AbstractVector{<:ReverseDiff.TrackedReal}}, lsolve, matvec_mul, Af)
+ReverseDiff.@grad_from_chainrules _implicit_linear(A::Union{ReverseDiff.TrackedArray, AbstractArray{<:ReverseDiff.TrackedReal}}, b, lsolve, mmul, Af)
+ReverseDiff.@grad_from_chainrules _implicit_linear(A, b::Union{ReverseDiff.TrackedArray, AbstractArray{<:ReverseDiff.TrackedReal}}, lsolve, mmul, Af)
+ReverseDiff.@grad_from_chainrules _implicit_linear(A::Union{ReverseDiff.TrackedArray, AbstractArray{<:ReverseDiff.TrackedReal}}, b::Union{ReverseDiff.TrackedArray, AbstractVector{<:ReverseDiff.TrackedReal}}, lsolve, mmul, Af)
 
 
 # function implicit_linear_inplace(A, b, y, Af)

From fad0e0d0da4da8233f466d76f3ce10755155a742 Mon Sep 17 00:00:00 2001
From: Jonathan Maack <jonathan.maack@nrel.gov>
Date: Thu, 11 Apr 2024 09:13:33 -0600
Subject: [PATCH 3/3] Add test for user defined matrix multiplication

---
 test/runtests.jl | 65 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 30d1457..0aad926 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -292,6 +292,69 @@ end
     @test all(isapprox.(J1, J2, atol=3e-12))
 end
 
+@testset "linear_user_mmul" begin
+
+    count = 0
+
+    function my_multiply(A, x)
+        (m, n) = size(A)
+        T = promote_type(eltype(A), eltype(x))
+        y = zeros(T, m)
+        for j in 1:n
+            for i in 1:m
+                y[i] += A[i, j] * x[j]
+            end
+        end
+        # Provide a way to make sure this function was called
+        count += 1
+        return y
+    end
+
+    function test(a)
+        A = a[1] * [1.0 2.0 3.0; 4.1 5.3 6.4; 7.4 8.6 9.7]
+        b = 2.0 * a[2:4]
+        x = implicit_linear(A, b; mmul=my_multiply)
+        z = 2 * x
+        return z
+    end
+
+    function test2(a)
+        A = [1.0 2.0 3.0; 4.1 5.3 6.4; 7.4 8.6 9.7]
+        b = 2.0 * a[2:4]
+        x = implicit_linear(A, b; mmul=my_multiply)
+        z = 2 * x
+        return z
+    end
+
+    function test3(a)
+        A = a[1] * [1.0 2.0 3.0; 4.1 5.3 6.4; 7.4 8.6 9.7]
+        b = 2.0 * ones(3)
+        x = implicit_linear(A, b; mmul=my_multiply)
+        z = 2 * x
+        return z
+    end
+
+    a = [1.2, 2.3, 3.1, 4.3]
+    J1 = ForwardDiff.jacobian(test, a)
+    J2 = ReverseDiff.jacobian(test, a)
+
+    @test count == 1
+    @test all(isapprox.(J1, J2, atol=3e-12))
+
+    J1 = ForwardDiff.jacobian(test2, a)
+    J2 = ReverseDiff.jacobian(test2, a)
+
+    @test count == 2
+    @test all(isapprox.(J1, J2, atol=3e-12))
+
+    J1 = ForwardDiff.jacobian(test3, a)
+    J2 = ReverseDiff.jacobian(test3, a)
+
+    @test count == 3
+    @test all(isapprox.(J1, J2, atol=3e-12))
+
+end
+
 @testset "1d (also parameters)" begin
 
     residual(y, x, p) = y/x[1] + x[2]*cos(y)
@@ -991,4 +1054,4 @@ end
     @test all(isapprox.(J1, J2, atol=1e-15))
     @test all(isapprox.(J1, Jfd, atol=1e-9))
 
-end
\ No newline at end of file
+end