Skip to content

Commit

Permalink
refactor: tweak and move functionality for getting all modules out of…
Browse files Browse the repository at this point in the history
… `scripts` (#13339)

With #13245, two scripts in `scripts` would like to use very similar information:
move it to a central place instead (a file in `Mathlib/Util`). Suggestions for better locations are welcome!

Also, split the `getAll` function in two pieces, getting the file names and transforming this into module names.
While #11853 only requires the latter, #13245 needs the former (which is computed anyway).
Only accessible the converted module names and converting them back to files would be absurd.

- [x] depends on: #11853 which adds these utility functions
  • Loading branch information
grunweg committed Jun 1, 2024
1 parent de56413 commit bffb043
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 51 deletions.
1 change: 1 addition & 0 deletions Mathlib.lean
Original file line number Diff line number Diff line change
Expand Up @@ -4290,6 +4290,7 @@ import Mathlib.Util.CountHeartbeats
import Mathlib.Util.Delaborators
import Mathlib.Util.DischargerAsTactic
import Mathlib.Util.Export
import Mathlib.Util.GetAllModules
import Mathlib.Util.IncludeStr
import Mathlib.Util.LongNames
import Mathlib.Util.MemoFix
Expand Down
72 changes: 72 additions & 0 deletions Mathlib/Util/GetAllModules.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/-
Copyright (c) 2024 Damiano Testa. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Mario Carneiro, Kim Morrison, Damiano Testa
-/

import Lean.Util.Path
import Lake.CLI.Main


/-!
# Utility functions for finding all `.lean` files or modules in a project.
TODO:
`getLeanLibs` contains a hard-coded choice of which dependencies should be built and which ones
should not. Could this be made more structural and robust, possibly with extra `Lake` support?
-/

open Lean System.FilePath

/-- `getAllFiles git ml` takes all `.lean` files in the directory `ml`
(recursing into sub-directories) and returns the `Array` of `String`s
```
#[file₁, ..., fileₙ]
```
of all their file names.
The input `git` is a `Bool`ean flag:
* `true` means that the command uses `git ls-files` to find the relevant files;
* `false` means that the command recursively scans all dirs searching for `.lean` files.
-/
def getAllFiles (git : Bool) (ml : String) : IO (Array System.FilePath) := do
let ml.lean := addExtension ⟨ml⟩ "lean" -- for example, `Mathlib.lean`
let allModules : Array System.FilePath ← (do
if git then
let mlDir := ml.push pathSeparator -- for example, `Mathlib/`
let allLean ← IO.Process.run { cmd := "git", args := #["ls-files", mlDir ++ "*.lean"] }
return (((allLean.dropRightWhile (· == '\n')).splitOn "\n").map (⟨·⟩)).toArray
else do
let all ← walkDir ml
return all.filter (·.extension == some "lean"))
let files := (allModules.erase ml.lean).qsort (·.toString < ·.toString)
let existingFiles ← files.mapM fun f => do
-- this check is helpful in case the `git` option is on and a local file has been removed
if ← pathExists f then
return f
else return ""
return existingFiles.filter (· != "")

/-- Like `getAllFiles`, but return an array of *module* names instead,
i.e. names of the form `"Mathlib.Algebra.Algebra.Basic"`. -/
def getAllModules (git : Bool) (ml : String) : IO (Array String) := do
let files ← getAllFiles git ml
return ← files.mapM fun f => do
return (← moduleNameOfFileName f none).toString

open Lake in
/-- `getLeanLibs` returns the names (as an `Array` of `String`s) of all the libraries
on which the current project depends.
If the current project is `mathlib`, then it excludes the libraries `Cache` and `LongestPole` and
it includes `Mathlib/Tactic`. -/
def getLeanLibs : IO (Array String) := do
let (elanInstall?, leanInstall?, lakeInstall?) ← findInstall?
let config ← MonadError.runEIO <| mkLoadConfig { elanInstall?, leanInstall?, lakeInstall? }
let ws ← MonadError.runEIO (MainM.runLogIO (loadWorkspace config)).toEIO
let package := ws.root
let libs := (package.leanLibs.map (·.name)).map (·.toString)
return if package.name == `mathlib then
libs.erase "Cache" |>.erase "LongestPole" |>.push ("Mathlib".push pathSeparator ++ "Tactic")
else
libs
2 changes: 2 additions & 0 deletions lakefile.lean
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ require importGraph from git "https://github.com/leanprover-community/import-gra
@[default_target]
lean_lib Mathlib

-- NB. When adding further libraries, check if they should be excluded from `getLeanLibs` in
-- `Mathlib/Util/GetAllModules.lean`.
lean_lib Cache
lean_lib LongestPole
lean_lib Archive
Expand Down
53 changes: 2 additions & 51 deletions scripts/mk_all.lean
Original file line number Diff line number Diff line change
Expand Up @@ -4,66 +4,17 @@ Released under Apache 2.0 license as described in the file LICENSE.
Authors: Yaël Dillies, Damiano Testa
-/
import Cli.Basic
import Lean.Util.Path
import Lake.CLI.Main
import Mathlib.Util.GetAllModules

/-!
# Script to create a file importing all files from a folder
This file declares a command to gather all Lean files from a folder into a single Lean file.
TODO:
`getLeanLibs` contains a hard-coded choice of which dependencies should be built and which ones
should not. Could this be made more structural and robust, possibly with extra `Lake` support?
-/

open Lean System.FilePath

/-- `getAll git ml` takes all `.lean` files in the dir `ml` (recursing into sub-dirs) and
returns the `Array` of `String`s
```
#[file₁, ..., fileₙ]
```
where each `fileᵢ` is of the form `"Mathlib.Algebra.Algebra.Basic"`.
The input `git` is a `Bool`ean flag:
* `true` means that the command uses `git ls-files` to find the relevant files;
* `false` means that the command recursively scans all dirs searching for `.lean` files.
-/
def getAll (git : Bool) (ml : String) : IO (Array String) := do
let ml.lean := addExtension ⟨ml⟩ "lean" -- for example, `Mathlib.lean`
let allModules : Array System.FilePath ← (do
if git then
let mlDir := ml.push pathSeparator -- for example, `Mathlib/`
let allLean ← IO.Process.run { cmd := "git", args := #["ls-files", mlDir ++ "*.lean"] }
return (((allLean.dropRightWhile (· == '\n')).splitOn "\n").map (⟨·⟩)).toArray
else do
let all ← walkDir ml
return all.filter (·.extension == some "lean"))
let files := (allModules.erase ml.lean).qsort (·.toString < ·.toString)
let withImport ← files.mapM fun f => do
-- this check is helpful in case the `git` option is on and a local file has been removed
if ← pathExists f then
return (← moduleNameOfFileName f none).toString
else return ""
return withImport.filter (· != "")

open Lake in
/-- `getLeanLibs` returns the names (as an `Array` of `String`s) of all the libraries
on which the current project depends.
If the current project is `mathlib`, then it excludes the libraries `Cache` and `LongestPole` and
it includes `Mathlib/Tactic`. -/
def getLeanLibs : IO (Array String) := do
let (elanInstall?, leanInstall?, lakeInstall?) ← findInstall?
let config ← MonadError.runEIO <| mkLoadConfig { elanInstall?, leanInstall?, lakeInstall? }
let ws ← MonadError.runEIO (MainM.runLogIO (loadWorkspace config)).toEIO
let package := ws.root
let libs := (package.leanLibs.map (·.name)).map (·.toString)
return if package.name == `mathlib then
libs.erase "Cache" |>.erase "LongestPole" |>.push ("Mathlib".push pathSeparator ++ "Tactic")
else
libs

open IO.FS IO.Process Name Cli in
/-- Implementation of the `mk_all` command line program.
The exit code is the number of files that the command updates/creates. -/
Expand All @@ -80,7 +31,7 @@ def mkAllCLI (args : Parsed) : IO UInt32 := do
let mut updates := 0
for d in libs.reverse do -- reverse to create `Mathlib/Tactic.lean` before `Mathlib.lean`
let fileName := addExtension d "lean"
let allFiles ← getAll git d
let allFiles ← getAllModules git d
let fileContent := ("\n".intercalate (allFiles.map ("import " ++ ·)).toList).push '\n'
if !(← pathExists fileName) then
IO.println s!"Creating '{fileName}'"
Expand Down

0 comments on commit bffb043

Please sign in to comment.