Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add COCG method for complex symmetric linear systems #289

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 40 additions & 21 deletions src/cg.jl
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
import Base: iterate
using Printf
export cg, cg!, CGIterable, PCGIterable, cg_iterator!, CGStateVariables
export cg, cg!, cocg, cocg!, CGIterable, PCGIterable, cg_iterator!, CGStateVariables

mutable struct CGIterable{matT, solT, vecT, numT <: Real}
mutable struct CGIterable{matT, solT, vecT, numT <: Real, paramT <: Number, dotT <: AbstractDot}
A::matT
x::solT
r::vecT
c::vecT
u::vecT
tol::numT
residual::numT
prev_residual::numT
ρ_prev::paramT
maxiter::Int
mv_products::Int
dotproduct::dotT
end

mutable struct PCGIterable{precT, matT, solT, vecT, numT <: Real, paramT <: Number}
mutable struct PCGIterable{precT, matT, solT, vecT, numT <: Real, paramT <: Number, dotT <: AbstractDot}
Pl::precT
A::matT
x::solT
Expand All @@ -24,9 +25,10 @@ mutable struct PCGIterable{precT, matT, solT, vecT, numT <: Real, paramT <: Numb
u::vecT
tol::numT
residual::numT
ρ::paramT
ρ_prev::paramT
maxiter::Int
mv_products::Int
dotproduct::dotT
end

@inline converged(it::Union{CGIterable, PCGIterable}) = it.residual ≤ it.tol
Expand All @@ -47,18 +49,20 @@ function iterate(it::CGIterable, iteration::Int=start(it))
end

# u := r + βu (almost an axpy)
β = it.residual^2 / it.prev_residual^2
ρ = isa(it.dotproduct, ConjugatedDot) ? it.residual^2 : _norm(it.r, it.dotproduct)^2
β = ρ / it.ρ_prev

it.u .= it.r .+ β .* it.u

# c = A * u
mul!(it.c, it.A, it.u)
α = it.residual^2 / dot(it.u, it.c)
α = ρ / _dot(it.u, it.c, it.dotproduct)

# Improve solution and residual
it.ρ_prev = ρ
it.x .+= α .* it.u
it.r .-= α .* it.c

it.prev_residual = it.residual
it.residual = norm(it.r)

# Return the residual at item and iteration number as state
Expand All @@ -78,18 +82,17 @@ function iterate(it::PCGIterable, iteration::Int=start(it))
# Apply left preconditioner
ldiv!(it.c, it.Pl, it.r)

ρ_prev = it.ρ
it.ρ = dot(it.c, it.r)

# u := c + βu (almost an axpy)
β = it.ρ / ρ_prev
ρ = _dot(it.r, it.c, it.dotproduct)
β = ρ / it.ρ_prev
it.u .= it.c .+ β .* it.u

# c = A * u
mul!(it.c, it.A, it.u)
α = it.ρ / dot(it.u, it.c)
α = ρ / _dot(it.u, it.c, it.dotproduct)

# Improve solution and residual
it.ρ_prev = ρ
it.x .+= α .* it.u
it.r .-= α .* it.c

Expand Down Expand Up @@ -122,7 +125,8 @@ function cg_iterator!(x, A, b, Pl = Identity();
reltol::Real = sqrt(eps(real(eltype(b)))),
maxiter::Int = size(A, 2),
statevars::CGStateVariables = CGStateVariables(zero(x), similar(x), similar(x)),
initially_zero::Bool = false)
initially_zero::Bool = false,
dotproduct::AbstractDot = ConjugatedDot())
u = statevars.u
r = statevars.r
c = statevars.c
Expand All @@ -143,14 +147,12 @@ function cg_iterator!(x, A, b, Pl = Identity();
# Return the iterable
if isa(Pl, Identity)
return CGIterable(A, x, r, c, u,
tolerance, residual, one(residual),
maxiter, mv_products
)
tolerance, residual, one(eltype(r)),
maxiter, mv_products, dotproduct)
else
return PCGIterable(Pl, A, x, r, c, u,
tolerance, residual, one(eltype(x)),
maxiter, mv_products
)
tolerance, residual, one(eltype(r)),
maxiter, mv_products, dotproduct)
end
end

Expand Down Expand Up @@ -211,6 +213,7 @@ function cg!(x, A, b;
statevars::CGStateVariables = CGStateVariables(zero(x), similar(x), similar(x)),
verbose::Bool = false,
Pl = Identity(),
dotproduct::AbstractDot = ConjugatedDot(),
kwargs...)
history = ConvergenceHistory(partial = !log)
history[:abstol] = abstol
Expand All @@ -219,7 +222,7 @@ function cg!(x, A, b;

# Actually perform CG
iterable = cg_iterator!(x, A, b, Pl; abstol = abstol, reltol = reltol, maxiter = maxiter,
statevars = statevars, kwargs...)
statevars = statevars, dotproduct = dotproduct, kwargs...)
if log
history.mvps = iterable.mv_products
end
Expand All @@ -237,3 +240,19 @@ function cg!(x, A, b;

log ? (iterable.x, history) : iterable.x
end

"""
cocg(A, b; kwargs...) -> x, [history]

Same as [`cocg!`](@ref), but allocates a solution vector `x` initialized with zeros.
dkarrasch marked this conversation as resolved.
Show resolved Hide resolved
"""
cocg(A, b; kwargs...) = cocg!(zerox(A, b), A, b; initially_zero = true, kwargs...)

"""
cocg!(x, A, b; kwargs...) -> x, [history]

Same as [`cg!`](@ref), but uses the unconjugated dot product (`xᵀy`) instead of the usual,
conjugated dot product (`x'y`) in the algorithm. It is for solving linear systems with
matrices `A` that are complex-symmetric (`Aᵀ == A`) rahter than Hermitian (`A' == A`).
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
matrices `A` that are complex-symmetric (`Aᵀ == A`) rahter than Hermitian (`A' == A`).
matrices `A` that are complex-symmetric (`Aᵀ == A`) rather than Hermitian (`A' == A`).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for finding out the typo! Will correct this in the next commit.

"""
cocg!(x, A, b; kwargs...) = cg!(x, A, b; dotproduct = UnconjugatedDot(), kwargs...)
15 changes: 14 additions & 1 deletion src/common.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import LinearAlgebra: ldiv!, \

export Identity
export Identity, ConjugatedDot, UnconjugatedDot

#### Type-handling
"""
Expand Down Expand Up @@ -30,3 +30,16 @@ struct Identity end
\(::Identity, x) = copy(x)
ldiv!(::Identity, x) = x
ldiv!(y, ::Identity, x) = copyto!(y, x)

"""
Conjugated and unconjugated dot products
"""
abstract type AbstractDot end
struct ConjugatedDot <: AbstractDot end
struct UnconjugatedDot <: AbstractDot end

_norm(x, ::ConjugatedDot) = norm(x)
_dot(x, y, ::ConjugatedDot) = dot(x, y)

_norm(x, ::UnconjugatedDot) = sqrt(sum(xₖ->xₖ^2, x))
_dot(x, y, ::UnconjugatedDot) = transpose(@view(x[:])) * @view(y[:]) # allocating, but faster than sum(prod, zip(x,y))
Copy link
Member

@haampie haampie Mar 22, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
_dot(x, y, ::UnconjugatedDot) = transpose(@view(x[:])) * @view(y[:]) # allocating, but faster than sum(prod, zip(x,y))
_dot(x, y, ::UnconjugatedDot) = transpose(x) * y

this shouldn't allocate, at least not on julia 1.5 and above; so let's drop that comment.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is allocating when x and y are not vectors; see my previous comment #289 (comment). The test in the comment was done with Julia 1.5.4. Could you confirm if it is non-allocating for x and y that are multi-dimensional arrays?

If it is allocating, how about changing the comment in the code from # allocating, but ... to # allocating for non-vectorial x and y, but ...?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, but I'm not following why you're using view(x, :) in the first place. x and y are assumed to behave as vectors; we have no block methods.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and the only thing that might allocate if julia is not smart enough is the array wrapper, but it's O(1) not O(n), so not worth documenting

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, but I'm not following why you're using view(x, :) in the first place. x and y are assumed to behave as vectors; we have no block methods.

What if x and y are multi-dimensional arrays? It is quite common for iterative methods to handle such x and y with matrix-free operators A acting on them.

It is true that multi-dimensional arrays x and y are vector-like in a sense that vector operators such as dot(x, y) are defined on them. Other methods in IterativeMethods must have worked fine so far because they relied only on such vector operators. Unfortunately the unconjugated dot product is not one of such vector operators in Julia, so I had to turn those vector-like objects into actual vectors using the wrapper @view.

If the unconjugated dot product had been one of those operators required for making objects behave as vectors, I wouldn't have had to go through these complications...

and the only thing that might allocate if julia is not smart enough is the array wrapper, but it's O(1) not O(n), so not worth documenting

Fair enough. I will remove the comment in the next commit.

Copy link
Member

@haampie haampie Mar 23, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The CG method only solves Ax=b right now where x and b are vectors, not AX=B for multiple right-hand sides. I think we have to reserve B::AbstractMatrix for the use case of block methods if they ever get implemented.

Orthogonal to that is how you internally represent your data. If you want X and B to be a tensor because that maps well to your PDE finite differences scheme, then the user should still wrap it in a vector type

struct MyTensorWithVectorSemantics{T} <: AbstractVector{T}
  X::Array{T, 3}
end

and define the interface for it.

Or you don't and you call cg(my_operator, reshape(B, :)) where my_operator restores the shape to B's.

But generally we assume the interface of vectors in iterative methods, and later we can think about supporting block methods to solve AX=B for multiple right-hand sides, where we assume a matrix interface.

Copy link
Author

@wsshin wsshin Mar 27, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if we should support multiple right-hand sides with the same cg() interface. The current cg() implementation interprets a non-vector b (including b::AbstractMatrix) as a single right-hand-side column vector, as long as * is defined between the given A and b. I think supporting such usage in the high-level interface in IterativeSolvers.jl was the conclusion people reached after the lengthy discussion in #2 about duck-typing.

That said, my current proposal

_norm(x, ::UnconjugatedDot) = sqrt(sum(xₖ->xₖ^2, x))
_dot(x, y, ::UnconjugatedDot) = transpose(@view(x[:])) * @view(y[:])

is not the perfect solution that supports the most general type of x either: _norm assumes iterable x and _dot assumes reshapable x, and the Banach space element mentioned in #289 (comment) satisfies neither condition. The proposal just makes cocg() works for the most popular cases of x::AbstractArray without requiring the users to define _norm(x, ::UnconjugatedDot) and _dot(x, y, ::UnconjugatedDot) separately. For other types of x, the users must define _norm and _dot for the type.

(I think the best way to solve the present issue might be to define a lazy wrapper Conjugate like Transpose or Adjoint in the standard library and rewrite LinearAlgebra:dot to interpret dot(Conjugate(x), y) as the unconjugated dot product.)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@haampie and @stevengj, any thoughts on this?

42 changes: 34 additions & 8 deletions test/cg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ Random.seed!(1234321)
@testset "Small full system" begin
n = 10

@testset "Matrix{$T}" for T in (Float32, Float64, ComplexF32, ComplexF64)
@testset "Matrix{$T}, conjugated dot product" for T in (Float32, Float64, ComplexF32, ComplexF64)
A = rand(T, n, n)
A = A' * A + I
b = rand(T, n)
reltol = √eps(real(T))

x,ch = cg(A, b; reltol=reltol, maxiter=2n, log=true)
@test isa(ch, ConvergenceHistory)
@test norm(A*x - b) / norm(b) ≤ reltol
@test A*x ≈ b rtol=reltol
@test ch.isconverged

# If you start from the exact solution, you should converge immediately
Expand All @@ -50,6 +50,32 @@ Random.seed!(1234321)
x0 = cg(A, zeros(T, n))
@test x0 == zeros(T, n)
end

@testset "Matrix{$T}, unconjugated dot product" for T in (Float32, Float64, ComplexF32, ComplexF64)
A = rand(T, n, n)
A = A + transpose(A) + 15I
x = ones(T, n)
b = A * x

reltol = √eps(real(T))

# Solve without preconditioner
x1, his1 = cocg(A, b, reltol = reltol, maxiter = 100, log = true)
@test isa(his1, ConvergenceHistory)
@test A*x1 ≈ b rtol=reltol

# With an initial guess
x_guess = rand(T, n)
x2, his2 = cocg!(x_guess, A, b, reltol = reltol, maxiter = 100, log = true)
@test isa(his2, ConvergenceHistory)
@test x2 == x_guess
@test A*x2 ≈ b rtol=reltol

# Do an exact LU decomp of a nearby matrix
F = lu(A + rand(T, n, n))
x3, his3 = cocg(A, b, Pl = F, maxiter = 100, reltol = reltol, log = true)
@test A*x3 ≈ b rtol=reltol
end
end

@testset "Sparse Laplacian" begin
Expand All @@ -64,24 +90,24 @@ end
@testset "SparseMatrixCSC{$T, $Ti}" for T in (Float64, Float32), Ti in (Int64, Int32)
xCG = cg(A, rhs; reltol=reltol, maxiter=100)
xJAC = cg(A, rhs; Pl=P, reltol=reltol, maxiter=100)
@test norm(A * xCG - rhs) ≤ reltol
@test norm(A * xJAC - rhs) ≤ reltol
@test A*xCG rhs rtol=reltol
@test A*xJAC rhs rtol=reltol
end

Af = LinearMap(A)
@testset "Function" begin
xCG = cg(Af, rhs; reltol=reltol, maxiter=100)
xJAC = cg(Af, rhs; Pl=P, reltol=reltol, maxiter=100)
@test norm(A * xCG - rhs) ≤ reltol
@test norm(A * xJAC - rhs) ≤ reltol
@test A*xCG rhs rtol=reltol
@test A*xJAC rhs rtol=reltol
end

@testset "Function with specified starting guess" begin
x0 = randn(size(rhs))
xCG, hCG = cg!(copy(x0), Af, rhs; abstol=abstol, reltol=0.0, maxiter=100, log=true)
xJAC, hJAC = cg!(copy(x0), Af, rhs; Pl=P, abstol=abstol, reltol=0.0, maxiter=100, log=true)
@test norm(A * xCG - rhs) ≤ reltol
@test norm(A * xJAC - rhs) ≤ reltol
@test A*xCG rhs rtol=reltol
@test A*xJAC rhs rtol=reltol
@test niters(hJAC) == niters(hCG)
end
end
Expand Down
14 changes: 14 additions & 0 deletions test/common.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,20 @@ end
@test ldiv!(y, P, copy(x)) == x
end

@testset "Vector{$T}, conjugated and unconjugated dot products" for T in (ComplexF32, ComplexF64)
n = 100
x = rand(T, n)
y = rand(T, n)

# Conjugated dot product
@test IterativeSolvers._norm(x, ConjugatedDot()) ≈ sqrt(x'x)
@test IterativeSolvers._dot(x, y, ConjugatedDot()) ≈ x'y

# Unonjugated dot product
@test IterativeSolvers._norm(x, UnconjugatedDot()) ≈ sqrt(transpose(x) * x)
@test IterativeSolvers._dot(x, y, UnconjugatedDot()) ≈ transpose(x) * y
end

end

DocMeta.setdocmeta!(IterativeSolvers, :DocTestSetup, :(using IterativeSolvers); recursive=true)
Expand Down