feat(Analysis/Convex): Birkhoff's theorem and doubly stochastic matri…

…ces (#16278)
leanprover-community · Oct 8, 2024 · 2e8a2b2 · 2e8a2b2
1 parent dde8da6
commit 2e8a2b2
Show file tree

Hide file tree

Showing 3 changed files with 313 additions and 0 deletions.
diff --git a/Mathlib.lean b/Mathlib.lean
@@ -1096,6 +1096,7 @@ import Mathlib.Analysis.ConstantSpeed
 import Mathlib.Analysis.Convex.AmpleSet
 import Mathlib.Analysis.Convex.Basic
 import Mathlib.Analysis.Convex.Between
+import Mathlib.Analysis.Convex.Birkhoff
 import Mathlib.Analysis.Convex.Body
 import Mathlib.Analysis.Convex.Caratheodory
 import Mathlib.Analysis.Convex.Combination
@@ -2354,6 +2355,7 @@ import Mathlib.Data.Matrix.CharP
 import Mathlib.Data.Matrix.ColumnRowPartitioned
 import Mathlib.Data.Matrix.Composition
 import Mathlib.Data.Matrix.DMatrix
+import Mathlib.Data.Matrix.DoublyStochastic
 import Mathlib.Data.Matrix.DualNumber
 import Mathlib.Data.Matrix.Hadamard
 import Mathlib.Data.Matrix.Invertible

diff --git a/Mathlib/Analysis/Convex/Birkhoff.lean b/Mathlib/Analysis/Convex/Birkhoff.lean
@@ -0,0 +1,172 @@
+/-
+Copyright (c) 2024 Bhavik Mehta. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Bhavik Mehta
+-/
+
+import Mathlib.Analysis.Convex.Combination
+import Mathlib.Combinatorics.Hall.Basic
+import Mathlib.Data.Matrix.DoublyStochastic
+import Mathlib.Tactic.Linarith
+
+/-!
+# Birkhoff's theorem
+
+## Main statements
+
+* `doublyStochastic_eq_sum_perm`: If `M` is a doubly stochastic matrix, then it is a convex
+  combination of permutation matrices.
+* `doublyStochastic_eq_convexHull_perm`: The set of doubly stochastic matrices is the convex hull
+  of the permutation matrices.
+
+## TODO
+
+* Show that the extreme points of doubly stochastic matrices are the permutation matrices.
+* Show that for `x y : n → R`, `x` is majorized by `y` if and only if there is a doubly stochastic
+  matrix `M` such that `M *ᵥ y = x`.
+
+## Tags
+
+Doubly stochastic, Birkhoff's theorem, Birkhoff-von Neumann theorem
+-/
+
+open Finset Function Matrix
+
+variable {R n : Type*} [Fintype n] [DecidableEq n]
+
+section LinearOrderedSemifield
+
+variable [LinearOrderedSemifield R] {M : Matrix n n R}
+
+/--
+If M is a positive scalar multiple of a doubly stochastic matrix, then there is a permutation matrix
+whose support is contained in the support of M.
+-/
+private lemma exists_perm_eq_zero_implies_eq_zero [Nonempty n] {s : R} (hs : 0 < s)
+    (hM : ∃ M' ∈ doublyStochastic R n, M = s • M') :
+    ∃ σ : Equiv.Perm n, ∀ i j, M i j = 0 → σ.permMatrix R i j = 0 := by
+  rw [exists_mem_doublyStochastic_eq_smul_iff hs.le] at hM
+  let f (i : n) : Finset n := univ.filter (M i · ≠ 0)
+  have hf (A : Finset n) : A.card ≤ (A.biUnion f).card := by
+    have (i) : ∑ j ∈ f i, M i j = s := by simp [sum_subset (filter_subset _ _), hM.2.1]
+    have h₁ : ∑ i ∈ A, ∑ j ∈ f i, M i j = A.card * s := by simp [this]
+    have h₂ : ∑ i, ∑ j ∈ A.biUnion f, M i j = (A.biUnion f).card * s := by
+      simp [sum_comm (t := A.biUnion f), hM.2.2, mul_comm s]
+    suffices A.card * s ≤ (A.biUnion f).card * s by exact_mod_cast le_of_mul_le_mul_right this hs
+    rw [← h₁, ← h₂]
+    trans ∑ i ∈ A, ∑ j ∈ A.biUnion f, M i j
+    · refine sum_le_sum fun i hi => ?_
+      exact sum_le_sum_of_subset_of_nonneg (subset_biUnion_of_mem f hi) (by simp [*])
+    · exact sum_le_sum_of_subset_of_nonneg (by simp) fun _ _ _ => sum_nonneg fun j _ => hM.1 _ _
+  obtain ⟨g, hg, hg'⟩ := (all_card_le_biUnion_card_iff_exists_injective f).1 hf
+  rw [Finite.injective_iff_bijective] at hg
+  refine ⟨Equiv.ofBijective g hg, fun i j hij => ?_⟩
+  simp only [PEquiv.toMatrix_apply, Option.mem_def, ite_eq_right_iff, one_ne_zero, imp_false,
+    Equiv.toPEquiv_apply, Equiv.ofBijective_apply, Option.some.injEq]
+  rintro rfl
+  simpa [f, hij] using hg' i
+
+end LinearOrderedSemifield
+
+section LinearOrderedField
+
+variable [LinearOrderedField R] {M : Matrix n n R}
+
+/--
+If M is a scalar multiple of a doubly stochastic matrix, then it is a conical combination of
+permutation matrices. This is most useful when M is a doubly stochastic matrix, in which case
+the combination is convex.
+
+This particular formulation is chosen to make the inductive step easier: we no longer need to
+rescale each time a permutation matrix is subtracted.
+-/
+private lemma doublyStochastic_sum_perm_aux (M : Matrix n n R)
+    (s : R) (hs : 0 ≤ s)
+    (hM : ∃ M' ∈ doublyStochastic R n, M = s • M') :
+    ∃ w : Equiv.Perm n → R, (∀ σ, 0 ≤ w σ) ∧ ∑ σ, w σ • σ.permMatrix R = M := by
+  rcases isEmpty_or_nonempty n
+  case inl => exact ⟨1, by simp, Subsingleton.elim _ _⟩
+  set d : ℕ := (Finset.univ.filter fun i : n × n => M i.1 i.2 ≠ 0).card with ← hd
+  clear_value d
+  induction d using Nat.strongRecOn generalizing M s
+  case ind d ih =>
+  rcases eq_or_lt_of_le hs with rfl | hs'
+  case inl =>
+    use 0
+    simp only [zero_smul, exists_and_right] at hM
+    simp [hM]
+  obtain ⟨σ, hσ⟩ := exists_perm_eq_zero_implies_eq_zero hs' hM
+  obtain ⟨i, hi, hi'⟩ := exists_min_image _ (fun i => M i (σ i)) univ_nonempty
+  rw [exists_mem_doublyStochastic_eq_smul_iff hs] at hM
+  let N : Matrix n n R := M - M i (σ i) • σ.permMatrix R
+  have hMi' : 0 < M i (σ i) := (hM.1 _ _).lt_of_ne' fun h => by
+    simpa [Equiv.toPEquiv_apply] using hσ _ _ h
+  let s' : R := s - M i (σ i)
+  have hs' : 0 ≤ s' := by
+    simp only [s', sub_nonneg, ← hM.2.1 i]
+    exact single_le_sum (fun j _ => hM.1 i j) (by simp)
+  have : ∃ M' ∈ doublyStochastic R n, N = s' • M' := by
+    rw [exists_mem_doublyStochastic_eq_smul_iff hs']
+    simp only [sub_apply, smul_apply, PEquiv.toMatrix_apply, Equiv.toPEquiv_apply, Option.mem_def,
+      Option.some.injEq, smul_eq_mul, mul_ite, mul_one, mul_zero, sub_nonneg,
+      sum_sub_distrib, sum_ite_eq, mem_univ, ↓reduceIte, N]
+    refine ⟨fun i' j => ?_, by simp [hM.2.1], by simp [← σ.eq_symm_apply, hM]⟩
+    split
+    case isTrue h => exact (hi' i' (by simp)).trans_eq (by rw [h])
+    case isFalse h => exact hM.1 _ _
+  have hd' : (univ.filter fun i : n × n => N i.1 i.2 ≠ 0).card < d := by
+    rw [← hd]
+    refine card_lt_card ?_
+    rw [ssubset_iff_of_subset (monotone_filter_right _ _)]
+    · simp only [ne_eq, mem_filter, mem_univ, true_and, Decidable.not_not, Prod.exists]
+      refine ⟨i, σ i, hMi'.ne', ?_⟩
+      simp [N, Equiv.toPEquiv_apply]
+    · rintro ⟨i', j'⟩ hN' hM'
+      dsimp at hN' hM'
+      simp only [sub_apply, hM', smul_apply, PEquiv.toMatrix_apply, Equiv.toPEquiv_apply,
+        Option.mem_def, Option.some.injEq, smul_eq_mul, mul_ite, mul_one, mul_zero, zero_sub,
+        neg_eq_zero, ite_eq_right_iff, Classical.not_imp, N] at hN'
+      obtain ⟨rfl, _⟩ := hN'
+      linarith [hi' i' (by simp)]
+  obtain ⟨w, hw, hw'⟩ := ih _ hd' _ s' hs' this rfl
+  refine ⟨w + fun σ' => if σ' = σ then M i (σ i) else 0, ?_⟩
+  simp only [Pi.add_apply, add_smul, sum_add_distrib, hw', ite_smul, zero_smul,
+    sum_ite_eq', mem_univ, ↓reduceIte, N, sub_add_cancel, and_true]
+  intro σ'
+  split <;> simp [add_nonneg, hw, hM.1]
+
+/--
+If M is a doubly stochastic matrix, then it is an convex combination of permutation matrices. Note
+`doublyStochastic_eq_convexHull_permMatrix` shows `doublyStochastic n` is exactly the convex hull of
+the permutation matrices, and this lemma is instead most useful for accessing the coefficients of
+each permutation matrices directly.
+-/
+lemma exists_eq_sum_perm_of_mem_doublyStochastic (hM : M ∈ doublyStochastic R n) :
+    ∃ w : Equiv.Perm n → R, (∀ σ, 0 ≤ w σ) ∧ ∑ σ, w σ = 1 ∧ ∑ σ, w σ • σ.permMatrix R = M := by
+  rcases isEmpty_or_nonempty n
+  case inl => exact ⟨fun _ => 1, by simp, by simp, Subsingleton.elim _ _⟩
+  obtain ⟨w, hw1, hw3⟩ := doublyStochastic_sum_perm_aux M 1 (by simp) ⟨M, hM, by simp⟩
+  refine ⟨w, hw1, ?_, hw3⟩
+  inhabit n
+  have : ∑ j, ∑ σ : Equiv.Perm n, w σ • σ.permMatrix R default j = 1 := by
+    simp only [← smul_apply (m := n), ← Finset.sum_apply, hw3]
+    rw [sum_row_of_mem_doublyStochastic hM]
+  simpa [sum_comm (γ := n), Equiv.toPEquiv_apply] using this
+
+/--
+**Birkhoff's theorem**
+The set of doubly stochastic matrices is the convex hull of the permutation matrices.  Note
+`exists_eq_sum_perm_of_mem_doublyStochastic` gives a convex weighting of each permutation matrix
+directly.  To show `doublyStochastic n` is convex, use `convex_doublyStochastic`.
+-/
+theorem doublyStochastic_eq_convexHull_permMatrix :
+    doublyStochastic R n = convexHull R {σ.permMatrix R | σ : Equiv.Perm n} := by
+  refine (convexHull_min ?g1 convex_doublyStochastic).antisymm' fun M hM => ?g2
+  case g1 =>
+    rintro x ⟨h, rfl⟩
+    exact permMatrix_mem_doublyStochastic
+  case g2 =>
+    obtain ⟨w, hw1, hw2, hw3⟩ := exists_eq_sum_perm_of_mem_doublyStochastic hM
+    exact mem_convexHull_of_exists_fintype w (·.permMatrix R) hw1 hw2 (by simp) hw3
+
+end LinearOrderedField
diff --git a/Mathlib/Data/Matrix/DoublyStochastic.lean b/Mathlib/Data/Matrix/DoublyStochastic.lean
@@ -0,0 +1,139 @@
+/-
+Copyright (c) 2024 Bhavik Mehta. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Bhavik Mehta
+-/
+
+import Mathlib.Analysis.Convex.Basic
+import Mathlib.LinearAlgebra.Matrix.Permutation
+
+/-!
+# Doubly stochastic matrices
+
+## Main definitions
+
+* `doublyStochastic`: a square matrix is doubly stochastic if all entries are nonnegative, and left
+  or right multiplication by the vector of all 1s gives the vector of all 1s. Equivalently, all
+  row and column sums are equal to 1.
+
+## Main statements
+
+* `convex_doublyStochastic`: The set of doubly stochastic matrices is convex.
+* `permMatrix_mem_doublyStochastic`: Any permutation matrix is doubly stochastic.
+
+## TODO
+
+Define the submonoids of row-stochastic and column-stochastic matrices.
+Show that the submonoid of doubly stochastic matrices is the meet of them, or redefine it as such.
+
+## Tags
+
+Doubly stochastic, Birkhoff's theorem, Birkhoff-von Neumann theorem
+-/
+
+open Finset Function Matrix
+
+variable {R n : Type*} [Fintype n] [DecidableEq n]
+
+section OrderedSemiring
+variable [OrderedSemiring R] {M : Matrix n n R}
+
+/--
+A square matrix is doubly stochastic iff all entries are nonnegative, and left or right
+multiplication by the vector of all 1s gives the vector of all 1s.
+-/
+def doublyStochastic (R n : Type*) [Fintype n] [DecidableEq n] [OrderedSemiring R] :
+    Submonoid (Matrix n n R) where
+  carrier := {M | (∀ i j, 0 ≤ M i j) ∧ M *ᵥ 1 = 1 ∧ 1 ᵥ* M = 1 }
+  mul_mem' {M N} hM hN := by
+    refine ⟨fun i j => sum_nonneg fun i _ => mul_nonneg (hM.1 _ _) (hN.1 _ _), ?_, ?_⟩
+    next => rw [← mulVec_mulVec, hN.2.1, hM.2.1]
+    next => rw [← vecMul_vecMul, hM.2.2, hN.2.2]
+  one_mem' := by simp [zero_le_one_elem]
+
+lemma mem_doublyStochastic :
+    M ∈ doublyStochastic R n ↔ (∀ i j, 0 ≤ M i j) ∧ M *ᵥ 1 = 1 ∧ 1 ᵥ* M = 1 :=
+  Iff.rfl
+
+lemma mem_doublyStochastic_iff_sum :
+    M ∈ doublyStochastic R n ↔
+      (∀ i j, 0 ≤ M i j) ∧ (∀ i, ∑ j, M i j = 1) ∧ ∀ j, ∑ i, M i j = 1 := by
+  simp [funext_iff, doublyStochastic, mulVec, vecMul, dotProduct]
+
+/-- Every entry of a doubly stochastic matrix is nonnegative. -/
+lemma nonneg_of_mem_doublyStochastic (hM : M ∈ doublyStochastic R n) {i j : n} : 0 ≤ M i j :=
+  hM.1 _ _
+
+/-- Each row sum of a doubly stochastic matrix is 1. -/
+lemma sum_row_of_mem_doublyStochastic (hM : M ∈ doublyStochastic R n) (i : n) : ∑ j, M i j = 1 :=
+  (mem_doublyStochastic_iff_sum.1 hM).2.1 _
+
+/-- Each column sum of a doubly stochastic matrix is 1. -/
+lemma sum_col_of_mem_doublyStochastic (hM : M ∈ doublyStochastic R n) (j : n) : ∑ i, M i j = 1 :=
+  (mem_doublyStochastic_iff_sum.1 hM).2.2 _
+
+/-- A doubly stochastic matrix multiplied with the all-ones column vector is 1. -/
+lemma mulVec_one_of_mem_doublyStochastic (hM : M ∈ doublyStochastic R n) : M *ᵥ 1 = 1 :=
+  (mem_doublyStochastic.1 hM).2.1
+
+/-- The all-ones row vector multiplied with a doubly stochastic matrix is 1. -/
+lemma one_vecMul_of_mem_doublyStochastic (hM : M ∈ doublyStochastic R n) : 1 ᵥ* M = 1 :=
+  (mem_doublyStochastic.1 hM).2.2
+
+/-- Every entry of a doubly stochastic matrix is less than or equal to 1. -/
+lemma le_one_of_mem_doublyStochastic (hM : M ∈ doublyStochastic R n) {i j : n} :
+    M i j ≤ 1 := by
+  rw [← sum_row_of_mem_doublyStochastic hM i]
+  exact single_le_sum (fun k _ => hM.1 _ k) (mem_univ j)
+
+/-- The set of doubly stochastic matrices is convex. -/
+lemma convex_doublyStochastic : Convex R (doublyStochastic R n : Set (Matrix n n R)) := by
+  intro x hx y hy a b ha hb h
+  simp only [SetLike.mem_coe, mem_doublyStochastic_iff_sum] at hx hy ⊢
+  simp [add_nonneg, ha, hb, mul_nonneg, hx, hy, sum_add_distrib, ← mul_sum, h]
+
+/-- Any permutation matrix is doubly stochastic. -/
+lemma permMatrix_mem_doublyStochastic {σ : Equiv.Perm n} :
+    σ.permMatrix R ∈ doublyStochastic R n := by
+  rw [mem_doublyStochastic_iff_sum]
+  refine ⟨fun i j => ?g1, ?g2, ?g3⟩
+  case g1 => aesop
+  case g2 => simp [Equiv.toPEquiv_apply]
+  case g3 => simp [Equiv.toPEquiv_apply, ← Equiv.eq_symm_apply]
+
+end OrderedSemiring
+
+section LinearOrderedSemifield
+
+variable [LinearOrderedSemifield R] {M : Matrix n n R}
+
+/--
+A matrix is `s` times a doubly stochastic matrix iff all entries are nonnegative, and all row and
+column sums are equal to `s`.
+
+This lemma is useful for the proof of Birkhoff's theorem - in particular because it allows scaling
+by nonnegative factors rather than positive ones only.
+-/
+lemma exists_mem_doublyStochastic_eq_smul_iff {M : Matrix n n R} {s : R} (hs : 0 ≤ s) :
+    (∃ M' ∈ doublyStochastic R n, M = s • M') ↔
+      (∀ i j, 0 ≤ M i j) ∧ (∀ i, ∑ j, M i j = s) ∧ (∀ j, ∑ i, M i j = s) := by
+  classical
+  constructor
+  case mp =>
+    rintro ⟨M', hM', rfl⟩
+    rw [mem_doublyStochastic_iff_sum] at hM'
+    simp only [smul_apply, smul_eq_mul, ← mul_sum]
+    exact ⟨fun i j => mul_nonneg hs (hM'.1 _ _), by simp [hM']⟩
+  rcases eq_or_lt_of_le hs with rfl | hs
+  case inl =>
+    simp only [zero_smul, exists_and_right, and_imp]
+    intro h₁ h₂ _
+    refine ⟨⟨1, Submonoid.one_mem _⟩, ?_⟩
+    ext i j
+    specialize h₂ i
+    rw [sum_eq_zero_iff_of_nonneg (by simp [h₁ i])] at h₂
+    exact h₂ _ (by simp)
+  rintro ⟨hM₁, hM₂, hM₃⟩
+  exact ⟨s⁻¹ • M, by simp [mem_doublyStochastic_iff_sum, ← mul_sum, hs.ne', inv_mul_cancel₀, *]⟩
+
+end LinearOrderedSemifield