diff --git a/Mathlib.lean b/Mathlib.lean index 7caab9ae96b88..6917161a2ee67 100644 --- a/Mathlib.lean +++ b/Mathlib.lean @@ -1096,6 +1096,7 @@ import Mathlib.Analysis.ConstantSpeed import Mathlib.Analysis.Convex.AmpleSet import Mathlib.Analysis.Convex.Basic import Mathlib.Analysis.Convex.Between +import Mathlib.Analysis.Convex.Birkhoff import Mathlib.Analysis.Convex.Body import Mathlib.Analysis.Convex.Caratheodory import Mathlib.Analysis.Convex.Combination @@ -2354,6 +2355,7 @@ import Mathlib.Data.Matrix.CharP import Mathlib.Data.Matrix.ColumnRowPartitioned import Mathlib.Data.Matrix.Composition import Mathlib.Data.Matrix.DMatrix +import Mathlib.Data.Matrix.DoublyStochastic import Mathlib.Data.Matrix.DualNumber import Mathlib.Data.Matrix.Hadamard import Mathlib.Data.Matrix.Invertible diff --git a/Mathlib/Analysis/Convex/Birkhoff.lean b/Mathlib/Analysis/Convex/Birkhoff.lean new file mode 100644 index 0000000000000..a754f5db135c9 --- /dev/null +++ b/Mathlib/Analysis/Convex/Birkhoff.lean @@ -0,0 +1,172 @@ +/- +Copyright (c) 2024 Bhavik Mehta. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Bhavik Mehta +-/ + +import Mathlib.Analysis.Convex.Combination +import Mathlib.Combinatorics.Hall.Basic +import Mathlib.Data.Matrix.DoublyStochastic +import Mathlib.Tactic.Linarith + +/-! +# Birkhoff's theorem + +## Main statements + +* `doublyStochastic_eq_sum_perm`: If `M` is a doubly stochastic matrix, then it is a convex + combination of permutation matrices. +* `doublyStochastic_eq_convexHull_perm`: The set of doubly stochastic matrices is the convex hull + of the permutation matrices. + +## TODO + +* Show that the extreme points of doubly stochastic matrices are the permutation matrices. +* Show that for `x y : n → R`, `x` is majorized by `y` if and only if there is a doubly stochastic + matrix `M` such that `M *ᵥ y = x`. + +## Tags + +Doubly stochastic, Birkhoff's theorem, Birkhoff-von Neumann theorem +-/ + +open Finset Function Matrix + +variable {R n : Type*} [Fintype n] [DecidableEq n] + +section LinearOrderedSemifield + +variable [LinearOrderedSemifield R] {M : Matrix n n R} + +/-- +If M is a positive scalar multiple of a doubly stochastic matrix, then there is a permutation matrix +whose support is contained in the support of M. +-/ +private lemma exists_perm_eq_zero_implies_eq_zero [Nonempty n] {s : R} (hs : 0 < s) + (hM : ∃ M' ∈ doublyStochastic R n, M = s • M') : + ∃ σ : Equiv.Perm n, ∀ i j, M i j = 0 → σ.permMatrix R i j = 0 := by + rw [exists_mem_doublyStochastic_eq_smul_iff hs.le] at hM + let f (i : n) : Finset n := univ.filter (M i · ≠ 0) + have hf (A : Finset n) : A.card ≤ (A.biUnion f).card := by + have (i) : ∑ j ∈ f i, M i j = s := by simp [sum_subset (filter_subset _ _), hM.2.1] + have h₁ : ∑ i ∈ A, ∑ j ∈ f i, M i j = A.card * s := by simp [this] + have h₂ : ∑ i, ∑ j ∈ A.biUnion f, M i j = (A.biUnion f).card * s := by + simp [sum_comm (t := A.biUnion f), hM.2.2, mul_comm s] + suffices A.card * s ≤ (A.biUnion f).card * s by exact_mod_cast le_of_mul_le_mul_right this hs + rw [← h₁, ← h₂] + trans ∑ i ∈ A, ∑ j ∈ A.biUnion f, M i j + · refine sum_le_sum fun i hi => ?_ + exact sum_le_sum_of_subset_of_nonneg (subset_biUnion_of_mem f hi) (by simp [*]) + · exact sum_le_sum_of_subset_of_nonneg (by simp) fun _ _ _ => sum_nonneg fun j _ => hM.1 _ _ + obtain ⟨g, hg, hg'⟩ := (all_card_le_biUnion_card_iff_exists_injective f).1 hf + rw [Finite.injective_iff_bijective] at hg + refine ⟨Equiv.ofBijective g hg, fun i j hij => ?_⟩ + simp only [PEquiv.toMatrix_apply, Option.mem_def, ite_eq_right_iff, one_ne_zero, imp_false, + Equiv.toPEquiv_apply, Equiv.ofBijective_apply, Option.some.injEq] + rintro rfl + simpa [f, hij] using hg' i + +end LinearOrderedSemifield + +section LinearOrderedField + +variable [LinearOrderedField R] {M : Matrix n n R} + +/-- +If M is a scalar multiple of a doubly stochastic matrix, then it is a conical combination of +permutation matrices. This is most useful when M is a doubly stochastic matrix, in which case +the combination is convex. + +This particular formulation is chosen to make the inductive step easier: we no longer need to +rescale each time a permutation matrix is subtracted. +-/ +private lemma doublyStochastic_sum_perm_aux (M : Matrix n n R) + (s : R) (hs : 0 ≤ s) + (hM : ∃ M' ∈ doublyStochastic R n, M = s • M') : + ∃ w : Equiv.Perm n → R, (∀ σ, 0 ≤ w σ) ∧ ∑ σ, w σ • σ.permMatrix R = M := by + rcases isEmpty_or_nonempty n + case inl => exact ⟨1, by simp, Subsingleton.elim _ _⟩ + set d : ℕ := (Finset.univ.filter fun i : n × n => M i.1 i.2 ≠ 0).card with ← hd + clear_value d + induction d using Nat.strongRecOn generalizing M s + case ind d ih => + rcases eq_or_lt_of_le hs with rfl | hs' + case inl => + use 0 + simp only [zero_smul, exists_and_right] at hM + simp [hM] + obtain ⟨σ, hσ⟩ := exists_perm_eq_zero_implies_eq_zero hs' hM + obtain ⟨i, hi, hi'⟩ := exists_min_image _ (fun i => M i (σ i)) univ_nonempty + rw [exists_mem_doublyStochastic_eq_smul_iff hs] at hM + let N : Matrix n n R := M - M i (σ i) • σ.permMatrix R + have hMi' : 0 < M i (σ i) := (hM.1 _ _).lt_of_ne' fun h => by + simpa [Equiv.toPEquiv_apply] using hσ _ _ h + let s' : R := s - M i (σ i) + have hs' : 0 ≤ s' := by + simp only [s', sub_nonneg, ← hM.2.1 i] + exact single_le_sum (fun j _ => hM.1 i j) (by simp) + have : ∃ M' ∈ doublyStochastic R n, N = s' • M' := by + rw [exists_mem_doublyStochastic_eq_smul_iff hs'] + simp only [sub_apply, smul_apply, PEquiv.toMatrix_apply, Equiv.toPEquiv_apply, Option.mem_def, + Option.some.injEq, smul_eq_mul, mul_ite, mul_one, mul_zero, sub_nonneg, + sum_sub_distrib, sum_ite_eq, mem_univ, ↓reduceIte, N] + refine ⟨fun i' j => ?_, by simp [hM.2.1], by simp [← σ.eq_symm_apply, hM]⟩ + split + case isTrue h => exact (hi' i' (by simp)).trans_eq (by rw [h]) + case isFalse h => exact hM.1 _ _ + have hd' : (univ.filter fun i : n × n => N i.1 i.2 ≠ 0).card < d := by + rw [← hd] + refine card_lt_card ?_ + rw [ssubset_iff_of_subset (monotone_filter_right _ _)] + · simp only [ne_eq, mem_filter, mem_univ, true_and, Decidable.not_not, Prod.exists] + refine ⟨i, σ i, hMi'.ne', ?_⟩ + simp [N, Equiv.toPEquiv_apply] + · rintro ⟨i', j'⟩ hN' hM' + dsimp at hN' hM' + simp only [sub_apply, hM', smul_apply, PEquiv.toMatrix_apply, Equiv.toPEquiv_apply, + Option.mem_def, Option.some.injEq, smul_eq_mul, mul_ite, mul_one, mul_zero, zero_sub, + neg_eq_zero, ite_eq_right_iff, Classical.not_imp, N] at hN' + obtain ⟨rfl, _⟩ := hN' + linarith [hi' i' (by simp)] + obtain ⟨w, hw, hw'⟩ := ih _ hd' _ s' hs' this rfl + refine ⟨w + fun σ' => if σ' = σ then M i (σ i) else 0, ?_⟩ + simp only [Pi.add_apply, add_smul, sum_add_distrib, hw', ite_smul, zero_smul, + sum_ite_eq', mem_univ, ↓reduceIte, N, sub_add_cancel, and_true] + intro σ' + split <;> simp [add_nonneg, hw, hM.1] + +/-- +If M is a doubly stochastic matrix, then it is an convex combination of permutation matrices. Note +`doublyStochastic_eq_convexHull_permMatrix` shows `doublyStochastic n` is exactly the convex hull of +the permutation matrices, and this lemma is instead most useful for accessing the coefficients of +each permutation matrices directly. +-/ +lemma exists_eq_sum_perm_of_mem_doublyStochastic (hM : M ∈ doublyStochastic R n) : + ∃ w : Equiv.Perm n → R, (∀ σ, 0 ≤ w σ) ∧ ∑ σ, w σ = 1 ∧ ∑ σ, w σ • σ.permMatrix R = M := by + rcases isEmpty_or_nonempty n + case inl => exact ⟨fun _ => 1, by simp, by simp, Subsingleton.elim _ _⟩ + obtain ⟨w, hw1, hw3⟩ := doublyStochastic_sum_perm_aux M 1 (by simp) ⟨M, hM, by simp⟩ + refine ⟨w, hw1, ?_, hw3⟩ + inhabit n + have : ∑ j, ∑ σ : Equiv.Perm n, w σ • σ.permMatrix R default j = 1 := by + simp only [← smul_apply (m := n), ← Finset.sum_apply, hw3] + rw [sum_row_of_mem_doublyStochastic hM] + simpa [sum_comm (γ := n), Equiv.toPEquiv_apply] using this + +/-- +**Birkhoff's theorem** +The set of doubly stochastic matrices is the convex hull of the permutation matrices. Note +`exists_eq_sum_perm_of_mem_doublyStochastic` gives a convex weighting of each permutation matrix +directly. To show `doublyStochastic n` is convex, use `convex_doublyStochastic`. +-/ +theorem doublyStochastic_eq_convexHull_permMatrix : + doublyStochastic R n = convexHull R {σ.permMatrix R | σ : Equiv.Perm n} := by + refine (convexHull_min ?g1 convex_doublyStochastic).antisymm' fun M hM => ?g2 + case g1 => + rintro x ⟨h, rfl⟩ + exact permMatrix_mem_doublyStochastic + case g2 => + obtain ⟨w, hw1, hw2, hw3⟩ := exists_eq_sum_perm_of_mem_doublyStochastic hM + exact mem_convexHull_of_exists_fintype w (·.permMatrix R) hw1 hw2 (by simp) hw3 + +end LinearOrderedField diff --git a/Mathlib/Data/Matrix/DoublyStochastic.lean b/Mathlib/Data/Matrix/DoublyStochastic.lean new file mode 100644 index 0000000000000..8bc91f3996116 --- /dev/null +++ b/Mathlib/Data/Matrix/DoublyStochastic.lean @@ -0,0 +1,139 @@ +/- +Copyright (c) 2024 Bhavik Mehta. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Bhavik Mehta +-/ + +import Mathlib.Analysis.Convex.Basic +import Mathlib.LinearAlgebra.Matrix.Permutation + +/-! +# Doubly stochastic matrices + +## Main definitions + +* `doublyStochastic`: a square matrix is doubly stochastic if all entries are nonnegative, and left + or right multiplication by the vector of all 1s gives the vector of all 1s. Equivalently, all + row and column sums are equal to 1. + +## Main statements + +* `convex_doublyStochastic`: The set of doubly stochastic matrices is convex. +* `permMatrix_mem_doublyStochastic`: Any permutation matrix is doubly stochastic. + +## TODO + +Define the submonoids of row-stochastic and column-stochastic matrices. +Show that the submonoid of doubly stochastic matrices is the meet of them, or redefine it as such. + +## Tags + +Doubly stochastic, Birkhoff's theorem, Birkhoff-von Neumann theorem +-/ + +open Finset Function Matrix + +variable {R n : Type*} [Fintype n] [DecidableEq n] + +section OrderedSemiring +variable [OrderedSemiring R] {M : Matrix n n R} + +/-- +A square matrix is doubly stochastic iff all entries are nonnegative, and left or right +multiplication by the vector of all 1s gives the vector of all 1s. +-/ +def doublyStochastic (R n : Type*) [Fintype n] [DecidableEq n] [OrderedSemiring R] : + Submonoid (Matrix n n R) where + carrier := {M | (∀ i j, 0 ≤ M i j) ∧ M *ᵥ 1 = 1 ∧ 1 ᵥ* M = 1 } + mul_mem' {M N} hM hN := by + refine ⟨fun i j => sum_nonneg fun i _ => mul_nonneg (hM.1 _ _) (hN.1 _ _), ?_, ?_⟩ + next => rw [← mulVec_mulVec, hN.2.1, hM.2.1] + next => rw [← vecMul_vecMul, hM.2.2, hN.2.2] + one_mem' := by simp [zero_le_one_elem] + +lemma mem_doublyStochastic : + M ∈ doublyStochastic R n ↔ (∀ i j, 0 ≤ M i j) ∧ M *ᵥ 1 = 1 ∧ 1 ᵥ* M = 1 := + Iff.rfl + +lemma mem_doublyStochastic_iff_sum : + M ∈ doublyStochastic R n ↔ + (∀ i j, 0 ≤ M i j) ∧ (∀ i, ∑ j, M i j = 1) ∧ ∀ j, ∑ i, M i j = 1 := by + simp [funext_iff, doublyStochastic, mulVec, vecMul, dotProduct] + +/-- Every entry of a doubly stochastic matrix is nonnegative. -/ +lemma nonneg_of_mem_doublyStochastic (hM : M ∈ doublyStochastic R n) {i j : n} : 0 ≤ M i j := + hM.1 _ _ + +/-- Each row sum of a doubly stochastic matrix is 1. -/ +lemma sum_row_of_mem_doublyStochastic (hM : M ∈ doublyStochastic R n) (i : n) : ∑ j, M i j = 1 := + (mem_doublyStochastic_iff_sum.1 hM).2.1 _ + +/-- Each column sum of a doubly stochastic matrix is 1. -/ +lemma sum_col_of_mem_doublyStochastic (hM : M ∈ doublyStochastic R n) (j : n) : ∑ i, M i j = 1 := + (mem_doublyStochastic_iff_sum.1 hM).2.2 _ + +/-- A doubly stochastic matrix multiplied with the all-ones column vector is 1. -/ +lemma mulVec_one_of_mem_doublyStochastic (hM : M ∈ doublyStochastic R n) : M *ᵥ 1 = 1 := + (mem_doublyStochastic.1 hM).2.1 + +/-- The all-ones row vector multiplied with a doubly stochastic matrix is 1. -/ +lemma one_vecMul_of_mem_doublyStochastic (hM : M ∈ doublyStochastic R n) : 1 ᵥ* M = 1 := + (mem_doublyStochastic.1 hM).2.2 + +/-- Every entry of a doubly stochastic matrix is less than or equal to 1. -/ +lemma le_one_of_mem_doublyStochastic (hM : M ∈ doublyStochastic R n) {i j : n} : + M i j ≤ 1 := by + rw [← sum_row_of_mem_doublyStochastic hM i] + exact single_le_sum (fun k _ => hM.1 _ k) (mem_univ j) + +/-- The set of doubly stochastic matrices is convex. -/ +lemma convex_doublyStochastic : Convex R (doublyStochastic R n : Set (Matrix n n R)) := by + intro x hx y hy a b ha hb h + simp only [SetLike.mem_coe, mem_doublyStochastic_iff_sum] at hx hy ⊢ + simp [add_nonneg, ha, hb, mul_nonneg, hx, hy, sum_add_distrib, ← mul_sum, h] + +/-- Any permutation matrix is doubly stochastic. -/ +lemma permMatrix_mem_doublyStochastic {σ : Equiv.Perm n} : + σ.permMatrix R ∈ doublyStochastic R n := by + rw [mem_doublyStochastic_iff_sum] + refine ⟨fun i j => ?g1, ?g2, ?g3⟩ + case g1 => aesop + case g2 => simp [Equiv.toPEquiv_apply] + case g3 => simp [Equiv.toPEquiv_apply, ← Equiv.eq_symm_apply] + +end OrderedSemiring + +section LinearOrderedSemifield + +variable [LinearOrderedSemifield R] {M : Matrix n n R} + +/-- +A matrix is `s` times a doubly stochastic matrix iff all entries are nonnegative, and all row and +column sums are equal to `s`. + +This lemma is useful for the proof of Birkhoff's theorem - in particular because it allows scaling +by nonnegative factors rather than positive ones only. +-/ +lemma exists_mem_doublyStochastic_eq_smul_iff {M : Matrix n n R} {s : R} (hs : 0 ≤ s) : + (∃ M' ∈ doublyStochastic R n, M = s • M') ↔ + (∀ i j, 0 ≤ M i j) ∧ (∀ i, ∑ j, M i j = s) ∧ (∀ j, ∑ i, M i j = s) := by + classical + constructor + case mp => + rintro ⟨M', hM', rfl⟩ + rw [mem_doublyStochastic_iff_sum] at hM' + simp only [smul_apply, smul_eq_mul, ← mul_sum] + exact ⟨fun i j => mul_nonneg hs (hM'.1 _ _), by simp [hM']⟩ + rcases eq_or_lt_of_le hs with rfl | hs + case inl => + simp only [zero_smul, exists_and_right, and_imp] + intro h₁ h₂ _ + refine ⟨⟨1, Submonoid.one_mem _⟩, ?_⟩ + ext i j + specialize h₂ i + rw [sum_eq_zero_iff_of_nonneg (by simp [h₁ i])] at h₂ + exact h₂ _ (by simp) + rintro ⟨hM₁, hM₂, hM₃⟩ + exact ⟨s⁻¹ • M, by simp [mem_doublyStochastic_iff_sum, ← mul_sum, hs.ne', inv_mul_cancel₀, *]⟩ + +end LinearOrderedSemifield