Fix the merge; correct the naming convention in some instances; style…

… tweaks.
leanprover-community · Jun 21, 2024 · 9b16f4e · 9b16f4e
1 parent f12a91d
commit 9b16f4e
Show file tree

Hide file tree

Showing 2 changed files with 85 additions and 43 deletions.
diff --git a/Mathlib/Tactic.lean b/Mathlib/Tactic.lean
@@ -119,7 +119,6 @@ import Mathlib.Tactic.Linter.HashCommandLinter
 import Mathlib.Tactic.Linter.Lint
 import Mathlib.Tactic.Linter.Style
 import Mathlib.Tactic.Linter.TextBased
-import Mathlib.Tactic.Linter.TextBased
 import Mathlib.Tactic.Measurability
 import Mathlib.Tactic.Measurability.Init
 import Mathlib.Tactic.MkIffOfInductiveProp

diff --git a/Mathlib/Tactic/Linter/TextBased.lean b/Mathlib/Tactic/Linter/TextBased.lean
@@ -4,7 +4,7 @@ Released under Apache 2.0 license as described in the file LICENSE.
 Authors: Michael Rothgang
 -/
 
-import Batteries.Data.String.Basic
+import Batteries.Data.String.Matcher
 import Mathlib.Init.Data.Nat.Notation
 
 /-!
@@ -14,12 +14,12 @@ This file defines various mathlib linters which are based on reading the source
 In practice, only style linters will have this form.
 All of these have been rewritten from the `lint-style.py` script.
 
-For now, this only contains the linter for the copyright and author headers:
+For now, this only contains the linters for the copyright and author headers and large files:
 further linters will be ported in subsequent PRs.
 
 -/
 
-open Lean Elab
+open Lean Elab System
 
 /-- Possible errors that text-based linters can report. -/
 -- We collect these in one inductive type to centralise error reporting.
@@ -29,6 +29,10 @@ inductive StyleError where
   | copyright (context : Option String)
   /-- Malformed authors line in the copyright header -/
   | authors
+  /-- The current file was too large: this error contains the current number of lines
+  as well as a size limit (slightly larger). On future runs, this linter will allow this file
+  to grow up to this limit. -/
+  | fileTooLong (number_lines : ℕ) (new_size_limit : ℕ) : StyleError
   deriving BEq
 
 /-- Create the underlying error message for a given `StyleError`. -/
@@ -37,13 +41,16 @@ def StyleError.errorMessage (err : StyleError) : String := match err with
   | StyleError.copyright none => s!"Malformed or missing copyright header"
   | StyleError.authors =>
     "Authors line should look like: 'Authors: Jean Dupont, Иван Иванович Иванов'"
+  | StyleError.fileTooLong current_size size_limit =>
+      s!"{size_limit} file contains {current_size} lines, try to split it up"
 
-/-- The error code for a given style error. Keep this in sync with `parse?_style_error` below! -/
+/-- The error code for a given style error. Keep this in sync with `parse?_errorContext` below! -/
 -- FUTURE: we're matching the old codes in `lint-style.py` for compatibility;
 -- in principle, we could also print something more readable.
 def StyleError.errorCode (err : StyleError) : String := match err with
   | StyleError.copyright _ => "ERR_COP"
   | StyleError.authors => "ERR_AUT"
+  | StyleError.fileTooLong _ _ => "ERR_NUM_LIN"
 
 /-- Context for a style error: the actual error, the line number in the file we're reading
 and the path to the file. -/
@@ -53,21 +60,20 @@ structure ErrorContext where
   /-- The line number of the error -/
   lineNumber : ℕ
   /-- The path to the file which was linted -/
-  path : System.FilePath
+  path : FilePath
 
 /-- The parts of a `StyleError` which are considered when matching against the existing
   style exceptions: for example, we ignore the particular line length of a "line too long" error. -/
 def StyleError.normalise (err : StyleError) : StyleError := match err with
+  -- NB: keep this in sync with `parse?_errorContext` below.
+  | StyleError.fileTooLong _ _ => StyleError.fileTooLong 0 0
   -- We do *not* care about the *kind* of wrong copyright.
-  -- NB: keep this in sync with `parse?_style_error` below.
   | StyleError.copyright _ => StyleError.copyright ""
   | _ => err
 
 /-- Careful: we do not want to compare `ErrorContexts` exactly; we ignore some details. -/
 instance : BEq ErrorContext where
   beq ctx ctx' :=
-      -- XXX: `lint-style.py` was calling `resolve()` on the path before before comparing them
-      -- should we also do so?
       ctx.path == ctx'.path
       -- We completely ignore line numbers of errors. Not sure if this is best.
       -- We normalise errors before comparing them.
@@ -84,31 +90,42 @@ def outputMessage (errctx : ErrorContext) : String :=
 
 /-- Try parsing an `ErrorContext` from a string: return `some` if successful, `none` otherwise. -/
 def parse?_errorContext (line : String) : Option ErrorContext := Id.run do
-  let parts := line.split (fun c ↦ c == ' ')
+  let parts := line.split (· == ' ')
   match parts with
-    | filename :: ":" :: "line" :: _line_number :: ":" :: error_code :: ":" :: _error_message =>
-      -- Turn the filename into a path. XXX: is there a nicer way to do this?
-      -- Invariant: `style-exceptions.txt` always contains Unix paths
-      -- (because, for example, in practice it is updated by CI, which runs on unix).
-      -- Hence, splitting and joining on "/" is actually somewhat safe.
-      let path : System.FilePath := System.mkFilePath (filename.split (fun c ↦ c == '/'))
+    | filename :: ":" :: "line" :: _line_number :: ":" :: error_code :: ":" :: error_message =>
+      -- Turn the filename into a path. In general, this is ambiguous if we don't know if we're
+      -- dealing with e.g. Windows or POSIX paths. In our setting, this is fine, since no path
+      -- component contains any path separator.
+      let path := mkFilePath (filename.split (FilePath.pathSeparators.contains ·))
       -- Parse the error kind from the error code, ugh.
-      -- NB: keep this in sync with `StyleError.error_code` above!
+      -- NB: keep this in sync with `StyleError.errorCode` above!
       let err : Option StyleError := match error_code with
         -- Use default values for parameters which are normalised.
         -- NB: keep this in sync with `normalise` above!
         | "ERR_COP" => some (StyleError.copyright "")
         | "ERR_AUT" => some (StyleError.authors)
+        | "ERR_NUM_LIN" =>
+          -- Parse the error message in the script. `none` indicates invalid input.
+          match (error_message.get? 0, error_message.get? 3) with
+          | (some limit, some current) =>
+            match (String.toNat? limit, String.toNat? current) with
+            | (some size_limit, some current_size) =>
+              some (StyleError.fileTooLong current_size size_limit)
+            | _ => none
+          | _ => none
         | _ => none
       -- Omit the line number, as we don't use it anyway.
       err.map fun e ↦ (ErrorContext.mk e 0 path)
-    -- We ignore any lines which don't match the particular format.
+    -- It would be nice to print an error on any line which doesn't match the above format,
+    -- but is awkward to do so (this `def` is not in any IO monad). Hopefully, this is not necessary
+    -- anyway as the style exceptions file is mostly automatically generated.
     | _ => none
 
 /-- Parse all style exceptions for a line of input.
 Return an array of all exceptions which could be parsed: invalid input is ignored. -/
 def parseStyleExceptions (lines : Array String) : Array ErrorContext := Id.run do
-  Array.filterMap (fun line ↦ parse?_errorContext line) lines
+  -- We treat all lines starting with "--" as a comment and ignore them.
+  Array.filterMap (parse?_errorContext ·) (lines.filter (fun line ↦ !line.startsWith "--"))
 
 /-- Print information about all errors encountered to standard output. -/
 def formatErrors (errors : Array ErrorContext) : IO Unit := do
@@ -141,8 +158,8 @@ def copyrightHeaderLinter : TextbasedLinter := fun lines ↦ Id.run do
   -- The header should start and end with blank comments.
   let _ := match (start.get? 0, start.get? 4) with
   | (some "/-", some "-/") => none
-  | (some "/-", _) => return Array.mkArray1 (StyleError.copyright none, 4)
-  | _ => return Array.mkArray1 (StyleError.copyright none, 0)
+  | (some "/-", _) => return #[(StyleError.copyright none, 4)]
+  | _ => return #[(StyleError.copyright none, 0)]
 
   -- If this is given, we go over the individual lines one by one,
   -- and provide some context on what is mis-formatted (if anything).
@@ -175,53 +192,79 @@ def isImportsOnlyFile (lines : Array String) : Bool :=
   -- this is in fact not necessary. (It is needed for `Tactic/Linter.lean`, though.)
   lines.all (fun line ↦ line.startsWith "import " || line == "" || line.startsWith "-- ")
 
+/-- Error if a collection of lines is too large. "Too large" means more than 1500 lines
+**and** longer than an optional previous limit.
+If the file is too large, return a matching `StyleError`, which includes a new size limit
+(which is somewhat larger than the current size). -/
+def checkFileLength (lines : Array String) (existing_limit : Option ℕ) : Option StyleError :=
+  Id.run do
+  if lines.size > 1500 then
+    let is_larger : Bool := match existing_limit with
+    | some mark => lines.size > mark
+    | none => true
+    if is_larger then
+      -- We add about 200 lines of slack to the current file size: small PRs will be unaffected,
+      -- but sufficiently large PRs will get nudged towards splitting up this file.
+      return some (StyleError.fileTooLong lines.size ((Nat.div lines.size 100) * 100 + 200))
+  none
+
 end
 
 /-- All text-based linters registered in this file. -/
-def allLinters : Array TextbasedLinter := Array.mk
-  [copyrightHeaderLinter]
+def allLinters : Array TextbasedLinter := #[copyrightHeaderLinter]
 
 /-- Read a file, apply all text-based linters and print the formatted errors.
-`exceptions` are any previous style exceptions.
+`sizeLimit` is any pre-existing limit on this file's size.
+`exceptions` are any other style exceptions.
 Return `true` if there were new errors (and `false` otherwise). -/
-def lintFile (path : System.FilePath) (exceptions : Array ErrorContext) : IO Bool := do
+def lintFile (path : FilePath) (sizeLimit : Option ℕ)
+  (exceptions : Array ErrorContext) : IO Bool := do
   let lines ← IO.FS.lines path
   -- We don't need to run any checks on imports-only files.
   -- NB. The Python script used to still run a few linters; this is in fact not necessary.
   if isImportsOnlyFile lines then
     return false
-  let all_output := (Array.map (fun lint ↦
+  let mut errors := #[]
+  if let some (StyleError.fileTooLong n limit) := checkFileLength lines sizeLimit then
+    errors := #[ErrorContext.mk (StyleError.fileTooLong n limit) 1 path]
+  let allOutput := (Array.map (fun lint ↦
     (Array.map (fun (e, n) ↦ ErrorContext.mk e n path)) (lint lines))) allLinters
   -- This this list is not sorted: for github, this is fine.
-  let errors := (Array.flatten all_output).filter (fun e ↦ !exceptions.contains e)
+  errors := allOutput.flatten.filter (fun e ↦ !exceptions.contains e)
   formatErrors errors
   return errors.size > 0
 
 /-- Lint all files referenced in a given import-only file.
 Return the number of files which had new style errors. -/
-def lintAllFiles (path : System.FilePath) : IO UInt32 := do
-  -- Read all module names in Mathlib from the file at `path`.
+def lintAllFiles (path : FilePath) : IO UInt32 := do
+  -- Read all module names from the file at `path`.
   let allModules ← IO.FS.lines path
   -- Read the style exceptions file.
   -- We also have a `nolints` file with manual exceptions for the linter.
-  let exceptions_file ← IO.FS.lines (System.mkFilePath ["scripts/style-exceptions.txt"])
-  let mut style_exceptions := parseStyleExceptions exceptions_file
-  let nolints ← IO.FS.lines (System.mkFilePath ["scripts/nolints-style.txt"])
-  style_exceptions := style_exceptions.append (parseStyleExceptions nolints)
-  let mut number_error_files := 0
+  let exceptions ← IO.FS.lines (mkFilePath ["scripts", "style-exceptions.txt"])
+  let mut styleExceptions := parseStyleExceptions exceptions
+  let nolints ← IO.FS.lines (mkFilePath ["scripts", "nolints-style.txt"])
+  styleExceptions := styleExceptions.append (parseStyleExceptions nolints)
+
+  let mut numberErrorFiles := 0
   for module in allModules do
     let module := module.stripPrefix "import "
     -- Convert the module name to a file name, then lint that file.
-    let path := (System.mkFilePath (module.split fun c ↦ (c == '.'))).addExtension "lean"
-    let e ← lintFile path style_exceptions
-    if e then
-      number_error_files := number_error_files + 1
-  return number_error_files
+    let path := (mkFilePath (module.split (· == '.'))).addExtension "lean"
+    -- Find all size limits for this given file.
+    -- If several size limits are given (unlikely in practice), we use the first one.
+    let sizeLimits := (styleExceptions.filter (·.path == path)).filterMap (fun errctx ↦
+      match errctx.error with
+      | StyleError.fileTooLong _ limit => some limit
+      | _ => none)
+    if ← lintFile path (sizeLimits.get? 0) styleExceptions then
+      numberErrorFiles := numberErrorFiles + 1
+  return numberErrorFiles
 
 /-- The entry point to the `lake exe lint_style` command. -/
 def main : IO UInt32 := do
-  let mut number_error_files := 0
+  let mut numberErrorFiles := 0
   for s in ["Archive.lean", "Counterexamples.lean", "Mathlib.lean"] do
-    let n ← lintAllFiles (System.mkFilePath [s])
-    number_error_files := number_error_files + n
-  return number_error_files
+    let n ← lintAllFiles (mkFilePath [s])
+    numberErrorFiles := numberErrorFiles + n
+  return numberErrorFiles