Skip to content

Commit

Permalink
Add CI workflow file, lean and shell script
Browse files Browse the repository at this point in the history
  • Loading branch information
adomani committed May 19, 2024
1 parent d874bdf commit 2eb3523
Show file tree
Hide file tree
Showing 4 changed files with 336 additions and 0 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/mathlib_stats.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: Post Mathlib report on Zulip
on:
schedule:
- cron: "0 0 * * Sun" # post at midnight on Sunday UTC

jobs:
mathlib-report:
runs-on: pr
steps:
runs-on: ubuntu-latest
steps:

- name: Checkout master branch
uses: adomani/get_mathlib4_with_cache@v1

- id: mathlib_stats
run: |
git checkout origin/adomani/weekly_change_report scripts/mathlib_stats.sh scripts/count_decls.lean
printf $'summary<<EOF\n%s\nEOF' "$(./scripts/mathlib_stats.sh)" >> "$GITHUB_OUTPUT"
- name: Post report on Zulip
uses: zulip/github-actions-zulip/send-message@v1
with:
api-key: ${{ secrets.ZULIP_API_KEY }}
email: '[email protected]'
organization-url: 'https://leanprover.zulipchat.com'
to: 'mathlib4'
type: 'stream'
topic: 'Mathlib weekly change report'
content: ${{ steps.mathlib_stats.outputs.summary }}
46 changes: 46 additions & 0 deletions .github/workflows/mathlib_stats_label.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# This workflow is triggered by the label `test-ci`. It is meant to be useful to debug
# the PR, since I think that the cron job cannot run on a PR.
name: Add comment on PR and post to Zulip
on:
pull_request:
types:
- labeled
schedule:
- cron: "*/5 * * * *"

jobs:
add-comment:
if: github.event.label.name == 'test-ci'
runs-on: ubuntu-latest
permissions:
pull-requests: write
steps:

- name: Checkout master branch
uses: adomani/get_mathlib4_with_cache@v1

- id: mathlib_stats
run: |
git checkout origin/adomani/periodic_reports_dev_custom_action scripts/mathlib_stats.sh scripts/count_decls.lean
printf $'summary<<EOF\n%s\nEOF' "$(./scripts/mathlib_stats.sh)" >> "$GITHUB_OUTPUT"
printf '%s\n' "$(./scripts/mathlib_stats.sh)"
- name: Add comment
run: gh pr comment "$NUMBER" --body "$BODY"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GH_REPO: ${{ github.repository }}
NUMBER: ${{ github.event.pull_request.number }}
BODY: ${{ steps.mathlib_stats.outputs.summary }}

- name: Post report on Zulip
uses: zulip/github-actions-zulip/send-message@v1
with:
api-key: ${{ secrets.ZULIP_API_KEY }}
email: '[email protected]'
organization-url: 'https://leanprover.zulipchat.com'
to: 'mathlib4'
type: 'stream'
topic: 'Mathlib weekly change report'
content: ${{ steps.mathlib_stats.outputs.summary }}
86 changes: 86 additions & 0 deletions scripts/count_decls.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/-
Copyright (c) 2024 Kyle Miller. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Kyle Miller, Damiano Testa
-/
import Mathlib

/-!
# `count_decls` -- a tally of declarations in `Mathlib`
This file is used by the `Periodic reports` script to generate a tally of declarations
in `Mathlib`, `Batteries` and core.
Source: https://leanprover.zulipchat.com/#narrow/stream/287929-mathlib4/topic/Metamathematics.3A.20Theorems.20by.20Domain.20Areas/near/374090639
-/

namespace PeriodicReports
open Lean Elab Meta

/-- A structure to tally declaration types:
* `thms` are theorems;
* `preds` are predicates;
* `types` are types;
* `data` is everything else.
-/
structure Tally where
thms : HashSet Name := {}
data : HashSet Name := {}
preds : HashSet Name := {}
types : HashSet Name := {}

/-- `toString t` produces a string where each line is either
* the (human-readable) name of a field of `t : Tally`, or
* the name of a single declaration followed by `,`.
The script to extract comparison data relies on the format
* lines ending with `,` or not,
* single declaration per line.
-/
def toString (t : Tally) : String :=
let print (h : HashSet Name) : String :=
let tot := h.toList.map (·.toString)
String.intercalate ",\n" tot ++ ","
s!"\
Theorems
{print t.thms}
Data
{print t.data}
Predicates
{print t.preds}
Types
{print t.types}"

variable (mods : HashSet Nat) (env : Environment) in
/-- Extend a `Tally` by the ConstantInfo `c`. It is written to work with `Lean.SMap.foldM`. -/
def updateTally (s : Tally) (n : Name) (c : ConstantInfo) :
MetaM Tally := do
if c.isUnsafe || (← n.isBlackListed) then return s else
let typ := c.type
if (← isProp typ) then
return {s with thms := s.thms.insert n}
else
let exp ← forallTelescopeReducing typ fun _ e => return e
if exp.isType then
return {s with types := s.types.insert n}
else
if exp.isSort then
return {s with preds := s.preds.insert n}
else
return {s with data := s.data.insert n}

/-- extends a `Tally` all the `ConstantInfos` in the environment. -/
def mkTally (s : Tally) : MetaM Tally := do
let env ← getEnv
let consts := env.constants
consts.foldM updateTally s

/-- `count_decls` prints a tally of theorems, types, predicates and data in
`Mathlib`, `Batteries` and core. -/
elab "count_decls" : command => do
let (s, _) ← Command.liftCoreM do MetaM.run do (mkTally {})
IO.println (toString s)

--count_decls

end PeriodicReports
174 changes: 174 additions & 0 deletions scripts/mathlib_stats.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#!/usr/bin/env bash

: <<'DOC_MODULE'
This file contains code to compute certain "global" statistics of Mathlib.
The information is typically posted to Zulip in #mathlib4 "Mathlib weekly change report".
The main discussion for these statistics is [this thread](https://leanprover.zulipchat.com/#narrow/stream/113488-general).
Further suggestions of possible statistics are
* Top five popular tactics since last post
* Net change in number of lines of Mathlib
* Number of new declarations
* Number of invisible (autogenerated) declarations
* Number of docstrings
* Longest proof (expression)
* Longest proof (syntax)
* Most popular starting tactic
* Most popular finishing tactic
* New modules with docstrings
* Longest pole
* Slowest proofs
* Slowest files
* Number of instances
* Number of simp lemmas
* Number of classes
* Number of linters
* Number of environment extensions
DOC_MODULE

statsURL="https://leanprover-community.github.io/mathlib_stats.html"
mlURL="https://github.com/leanprover-community/mathlib4"

## results in 'hash YYYY-MM-DD'
hashAndDate="$(git log master --since='one week ago' --date=short --pretty="%H %ad" | tail -1)"

## just the commit hash
oldCommit="${hashAndDate/% */}"
oldCommitURL="[${oldCommit:0:10}](${mlURL}/commit/${oldCommit})"

currentCommit="$(git rev-parse HEAD)"
currentCommitURL="[${currentCommit:0:10}](${mlURL}/commit/${currentCommit})"
## just the date
date=${hashAndDate/#* /}

#####################
# Git-based reports #
#####################

## 'x files changed, y insertions(+), z deletions(-)'
gdiff="$(git diff --shortstat "${oldCommit}"..."${currentCommit}")"
gcompare="${mlURL}/compare/${oldCommit}...${currentCommit}"

printf -v today '%(%Y-%m-%d)T\n' -1

## insertions-deletions
#net=$(awk -v gd="${gdiff}" 'BEGIN{
# tot=0
# n=split(gd, gda, " ")
# for(i=2; i<=n; i++) {
# if(gda[i]+0 == gda[i]){ tot=gda[i]-tot }
# }
# print -tot }')

# produce output `(+X -Y ~Z)` for the files added (+), deleted (-), modified (~)
filesPlusMinus="$(git diff --name-status "${oldCommit}"..."${currentCommit}" |
awk '/^A/ { added++ }
/^M/ { modified++}
/^D/ {deleted++} END{
printf("(+%d -%d ~%d)\n", added, deleted, modified)
}')"

net=$(git diff --shortstat "${oldCommit}"..."${currentCommit}" |
awk -v filesPlusMinus="${filesPlusMinus}" 'BEGIN{ con=0 }{
for(i=1; i<=NF; i++) {
if($i+0 == $i){ con++; nums[con]=$i } ## nums= [files changed, insertions, deletions]
}
if(con != 3) { print "Expected 3 fields from `git diff`" }
printf("%s files changed %s, %s lines changed (+%s -%s)\n",
nums[1], filesPlusMinus, nums[2]-nums[3], nums[2], nums[3]) }')

######################
# Lean-based reports #
######################

# produces a string of the form
# ```
# Theorems
# <one_decl_name_per_row>,
# ...
# Predicates
# <one_decl_name_per_row>,
# ...
# ```
getCountDecls () {
sed 's=^--\(count_decls\)=\1=' scripts/count_decls.lean | lake env lean --stdin
}

# `tallyCountDecls file` extracts from `file`
# Theorems xx
# Predicates yy
# ...
tallyCountDecls () {
# `count` is the index of the various `kind`s -- the fields of `Tally`
awk 'BEGIN{ count=0 }
# lines that do not end in `,` represent kinds and we accumulate them in `kind`
# we also start a tally of them in `acc[count]`
/[^,]$/ { count++; kind[count]=$0; acc[count]=0; }
# lines that end in `,` represent declarations to be tallied
/,$/ { acc[count]++ } END{
for(t=1; t<=count; t++) { printf("%s %s\n", kind[t], acc[t]) }
}' "${1}"
}

# `getKind kind file` extracts all declaration names of kind `kind` from `file`
getKind () {
awk -v typ="${1}$" 'BEGIN{ found=0 }
/[^,]$/ { found=0 }
(found == 1) { print $0 }
($0 ~ typ) { found=1 }' "${2}" | sort
}

# the output of `count_decls`
newDeclsTots="$(getCountDecls)"

# the tally of the output of `count_decls`
newDecls="$(echo "${newDeclsTots}" | tallyCountDecls -)"
# Theorems 73590...
git checkout -q "${oldCommit}"
# 'detached HEAD' state

lake exe cache get > /dev/null
# stuff gets downloaded

# just in case some part of the cache is corrupted
lake build --quiet

# update the `count_decls` and `mathlib_stats` scripts to the latest version
git checkout -q origin/adomani/periodic_reports_dev_custom_action scripts/count_decls.lean scripts/mathlib_stats.sh

# the output of `count_decls`
oldDeclsTots="$(getCountDecls)"

# the tally of the output of `count_decls`
oldDecls="$(echo "${oldDeclsTots}" | tallyCountDecls -)"
# Theorems 73152...

# produce the `+X -Y` report for the declarations in each category
plusMinus="$(for typ in $(echo "$newDeclsTots" | grep "[^,]$" | tr '\n' ' ');
do
comm -123 --total <(
echo "${newDeclsTots}" | getKind "${typ}$" -) <(
echo "${oldDeclsTots}" | getKind "${typ}$" -)
done)"

# produces the table summary of the declarations split by kind
declSummary="$(paste -d' ' <(echo "${newDecls}") <(echo "${oldDecls}") <(echo "${plusMinus}") |
LC_ALL=en_US.UTF-8 awk 'BEGIN{ print "|Type|Total|%|\n|:-:|:-:|:-:|" }{
printf("| %s | %'"'"'d (+%'"'"'d -%'"'"'d) | %4.2f%% |\n", $1, $2, $5, $6, ($2-$4)*100/$2)
}'
)"

## final report
printf -- '---\n\n## Weekly stats ([%s...%(%Y-%m-%d)T](%s))\n\n' "${date}" -1 "${gcompare}"

printf -- '%s\n\n' "${declSummary}"

printf -- '%s\n\n' "${net}"

printf -- 'Reference commits: old %s, new %s.\n\n' "${oldCommitURL}" "${currentCommitURL}"

printf -- 'Take also a look at the [`Mathlib` stats page](%s).\n' "${statsURL}"

0 comments on commit 2eb3523

Please sign in to comment.