Release v2.0.0 (#545)

* Support for logging with Aim (#534) * Update template to Lightning 2.0 (#548) * Update pre-commit hooks (#549) * Refactor utils (#541) * Add option for pytorch 2.0 model compilation (#550) * Update `README.md` (#551) --------- Co-authored-by: Mattie Tesfaldet <[email protected]> Co-authored-by: Johnny <[email protected]>
ashleve · Mar 18, 2023 · df6a17f · df6a17f
1 parent adc6afe
commit df6a17f
Show file tree

Hide file tree

Showing 30 changed files with 287 additions and 244 deletions.
diff --git a/.gitignore b/.gitignore
@@ -149,3 +149,6 @@ configs/local/default.yaml
 /data/
 /logs/
 .env
+
+# Aim logging
+.aim
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,7 +3,7 @@ default_language_version:
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.3.0
+    rev: v4.4.0
     hooks:
       # list of supported hooks: https://pre-commit.com/hooks.html
       - id: trailing-whitespace
@@ -19,7 +19,7 @@ repos:
 
   # python code formatting
   - repo: https://github.com/psf/black
-    rev: 22.6.0
+    rev: 23.1.0
     hooks:
       - id: black
         args: [--line-length, "99"]
@@ -33,21 +33,21 @@ repos:
 
   # python upgrading syntax to newer version
   - repo: https://github.com/asottile/pyupgrade
-    rev: v2.32.1
+    rev: v3.3.1
     hooks:
       - id: pyupgrade
         args: [--py38-plus]
 
   # python docstring formatting
   - repo: https://github.com/myint/docformatter
-    rev: v1.4
+    rev: v1.5.1
     hooks:
       - id: docformatter
         args: [--in-place, --wrap-summaries=99, --wrap-descriptions=99]
 
   # python check (PEP8), programming errors and code complexity
   - repo: https://github.com/PyCQA/flake8
-    rev: 4.0.1
+    rev: 6.0.0
     hooks:
       - id: flake8
         args:
@@ -60,28 +60,28 @@ repos:
 
   # python security linter
   - repo: https://github.com/PyCQA/bandit
-    rev: "1.7.1"
+    rev: "1.7.5"
     hooks:
       - id: bandit
         args: ["-s", "B101"]
 
   # yaml formatting
   - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v2.7.1
+    rev: v3.0.0-alpha.6
     hooks:
       - id: prettier
         types: [yaml]
         exclude: "environment.yaml"
 
   # shell scripts linter
   - repo: https://github.com/shellcheck-py/shellcheck-py
-    rev: v0.8.0.4
+    rev: v0.9.0.2
     hooks:
       - id: shellcheck
 
   # md formatting
   - repo: https://github.com/executablebooks/mdformat
-    rev: 0.7.14
+    rev: 0.7.16
     hooks:
       - id: mdformat
         args: ["--number"]
@@ -94,7 +94,7 @@ repos:
 
   # word spelling linter
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.1.0
+    rev: v2.2.4
     hooks:
       - id: codespell
         args:
@@ -103,13 +103,13 @@ repos:
 
   # jupyter notebook cell output clearing
   - repo: https://github.com/kynan/nbstripout
-    rev: 0.5.0
+    rev: 0.6.1
     hooks:
       - id: nbstripout
 
   # jupyter notebook linting
   - repo: https://github.com/nbQA-dev/nbQA
-    rev: 1.4.0
+    rev: 1.6.3
     hooks:
       - id: nbqa-black
         args: ["--line-length=99"]

diff --git a/README.md b/README.md
@@ -28,8 +28,8 @@ _Suggestions are always welcome!_
 
 **Why you might want to use it:**
 
-✅ Speed <br>
-Rapidly iterate over models, datasets, tasks and experiments on different accelerators like multi-GPUs or TPUs.
+✅ Save on boilerplate <br>
+Easily add new models, datasets, tasks, experiments, and train on different accelerators, like multi-GPU, TPU or SLURM clusters.
 
 ✅ Education <br>
 Thoroughly commented. You can use this repo as a learning resource.
@@ -46,7 +46,10 @@ Lightning and Hydra are still evolving and integrate many libraries, which means
 Template is not really adjusted for building data pipelines that depend on each other. It's more efficient to use it for model prototyping on ready-to-use data.
 
 ❌ Overfitted to simple use case <br>
-The configuration setup is built with simple lightning training in mind. You might need to put some effort to adjust it for different use cases, e.g. lightning lite.
+The configuration setup is built with simple lightning training in mind. You might need to put some effort to adjust it for different use cases, e.g. lightning fabric.
+
+❌ Might not support your workflow <br>
+For example, you can't resume hydra-based multirun or hyperparameter search.
 
 > **Note**: _Keep in mind this is unofficial community project._
 
@@ -319,9 +322,6 @@ python train.py debug=overfit
 # raise exception if there are any numerical anomalies in tensors, like NaN or +/-inf
 python train.py +trainer.detect_anomaly=true
 
-# log second gradient norm of the model
-python train.py +trainer.track_grad_norm=2
-
 # use only 20% of the data
 python train.py +trainer.limit_train_batches=0.2 \
 +trainer.limit_val_batches=0.2 +trainer.limit_test_batches=0.2
@@ -435,6 +435,12 @@ pre-commit run -a
 
 > **Note**: Apply pre-commit hooks to do things like auto-formatting code and configs, performing code analysis or removing output from jupyter notebooks. See [# Best Practices](#best-practices) for more.
 
+Update pre-commit hook versions in `.pre-commit-config.yaml` with:
+
+```bash
+pre-commit autoupdate
+```
+
 </details>
 
 <details>
@@ -818,7 +824,7 @@ You can use different optimization frameworks integrated with Hydra, like [Optun
 
 The `optimization_results.yaml` will be available under `logs/task_name/multirun` folder.
 
-This approach doesn't support advanced techniques like prunning - for more sophisticated search, you should probably write a dedicated optimization task (without multirun feature).
+This approach doesn't support resuming interrupted search and advanced techniques like prunning - for more sophisticated search and workflows, you should probably write a dedicated optimization task (without multirun feature).
 
 <br>
 
@@ -889,10 +895,13 @@ def on_train_start(self):
 ## Best Practices
 
 <details>
-<summary><b>Use Miniconda for GPU environments</b></summary>
+<summary><b>Use Miniconda</b></summary>
+
+It's usually unnecessary to install full anaconda environment, miniconda should be enough (weights around 80MB).
+
+Big advantage of conda is that it allows for installing packages without requiring certain compilers or libraries to be available in the system (since it installs precompiled binaries), so it often makes it easier to install some dependencies e.g. cudatoolkit for GPU support.
 
-It's usually unnecessary to install full anaconda environment, miniconda should be enough.
-It often makes it easier to install some dependencies, like cudatoolkit for GPU support. It also allows you to access your environments globally.
+It also allows you to access your environments globally which might be more convenient than creating new local environment for every project.
 
 Example installation:
 
@@ -901,6 +910,12 @@ wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
 bash Miniconda3-latest-Linux-x86_64.sh
 ```
 
+Update conda:
+
+```bash
+conda update -n base -c defaults conda
+```
+
 Create new conda environment:
 
 ```bash
@@ -934,6 +949,12 @@ To reformat all files in the project use command:
 pre-commit run -a
 ```
 
+To update hook versions in [.pre-commit-config.yaml](.pre-commit-config.yaml) use:
+
+```bash
+pre-commit autoupdate
+```
+
 </details>
 
 <details>
@@ -1035,7 +1056,7 @@ The style guide is available [here](https://pytorch-lightning.readthedocs.io/en/
        def training_step_end():
            ...
 
-       def training_epoch_end():
+       def on_train_epoch_end():
            ...
 
        def validation_step():
@@ -1044,7 +1065,7 @@ The style guide is available [here](https://pytorch-lightning.readthedocs.io/en/
        def validation_step_end():
            ...
 
-       def validation_epoch_end():
+       def on_validation_epoch_end():
            ...
 
        def test_step():
@@ -1053,7 +1074,7 @@ The style guide is available [here](https://pytorch-lightning.readthedocs.io/en/
        def test_step_end():
            ...
 
-       def test_epoch_end():
+       def on_test_epoch_end():
            ...
 
        def configure_optimizers():
@@ -1245,7 +1266,7 @@ git clone https://github.com/YourGithubName/your-repo-name
 cd your-repo-name
 
 # create conda environment and install dependencies
-conda env create -f environment.yaml
+conda env create -f environment.yaml -n myenv
 
 # activate conda environment
 conda activate myenv

diff --git a/configs/callbacks/early_stopping.yaml b/configs/callbacks/early_stopping.yaml
@@ -1,9 +1,9 @@
-# https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.callbacks.EarlyStopping.html
+# https://pytorch-lightning.readthedocs.io/en/latest/api/lightning.callbacks.EarlyStopping.html
 
 # Monitor a metric and stop training when it stops improving.
 # Look at the above link for more detailed information.
 early_stopping:
-  _target_: pytorch_lightning.callbacks.EarlyStopping
+  _target_: lightning.pytorch.callbacks.EarlyStopping
   monitor: ??? # quantity to be monitored, must be specified !!!
   min_delta: 0. # minimum change in the monitored quantity to qualify as an improvement
   patience: 3 # number of checks with no improvement after which training will be stopped

diff --git a/configs/callbacks/model_checkpoint.yaml b/configs/callbacks/model_checkpoint.yaml
@@ -1,9 +1,9 @@
-# https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.callbacks.ModelCheckpoint.html
+# https://pytorch-lightning.readthedocs.io/en/latest/api/lightning.callbacks.ModelCheckpoint.html
 
 # Save the model periodically by monitoring a quantity.
 # Look at the above link for more detailed information.
 model_checkpoint:
-  _target_: pytorch_lightning.callbacks.ModelCheckpoint
+  _target_: lightning.pytorch.callbacks.ModelCheckpoint
   dirpath: null # directory to save the model file
   filename: null # checkpoint filename
   monitor: null # name of the logged metric which determines when model is improving

diff --git a/configs/callbacks/model_summary.yaml b/configs/callbacks/model_summary.yaml
@@ -1,7 +1,7 @@
-# https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.callbacks.RichModelSummary.html
+# https://pytorch-lightning.readthedocs.io/en/latest/api/lightning.callbacks.RichModelSummary.html
 
 # Generates a summary of all layers in a LightningModule with rich text formatting.
 # Look at the above link for more detailed information.
 model_summary:
-  _target_: pytorch_lightning.callbacks.RichModelSummary
+  _target_: lightning.pytorch.callbacks.RichModelSummary
   max_depth: 1 # the maximum depth of layer nesting that the summary will include
diff --git a/configs/callbacks/rich_progress_bar.yaml b/configs/callbacks/rich_progress_bar.yaml
@@ -1,6 +1,6 @@
-# https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.callbacks.RichProgressBar.html
+# https://pytorch-lightning.readthedocs.io/en/latest/api/lightning.callbacks.RichProgressBar.html
 
 # Create a progress bar with rich text formatting.
 # Look at the above link for more detailed information.
 rich_progress_bar:
-  _target_: pytorch_lightning.callbacks.RichProgressBar
+  _target_: lightning.pytorch.callbacks.RichProgressBar
diff --git a/configs/experiment/example.yaml b/configs/experiment/example.yaml
@@ -36,3 +36,5 @@ logger:
   wandb:
     tags: ${tags}
     group: "mnist"
+  aim:
+    experiment: "mnist"
diff --git a/configs/logger/aim.yaml b/configs/logger/aim.yaml
@@ -0,0 +1,28 @@
+# https://aimstack.io/
+
+# example usage in lightning module:
+# https://github.com/aimhubio/aim/blob/main/examples/pytorch_lightning_track.py
+
+# open the Aim UI with the following command (run in the folder containing the `.aim` folder):
+# `aim up`
+
+aim:
+  _target_: aim.pytorch_lightning.AimLogger
+  repo: ${paths.root_dir} # .aim folder will be created here
+  # repo: "aim://ip_address:port" # can instead provide IP address pointing to Aim remote tracking server which manages the repo, see https://aimstack.readthedocs.io/en/latest/using/remote_tracking.html#
+
+  # aim allows to group runs under experiment name
+  experiment: null # any string, set to "default" if not specified
+
+  train_metric_prefix: "train/"
+  val_metric_prefix: "val/"
+  test_metric_prefix: "test/"
+
+  # sets the tracking interval in seconds for system usage metrics (CPU, GPU, memory, etc.)
+  system_tracking_interval: 10 # set to null to disable system metrics tracking
+
+  # enable/disable logging of system params such as installed packages, git info, env vars, etc.
+  log_system_params: true
+
+  # enable/disable tracking console logs (default value is true)
+  capture_terminal_logs: false # set to false to avoid infinite console log loop issue https://github.com/aimhubio/aim/issues/2550
diff --git a/configs/logger/comet.yaml b/configs/logger/comet.yaml
@@ -1,7 +1,7 @@
 # https://www.comet.ml
 
 comet:
-  _target_: pytorch_lightning.loggers.comet.CometLogger
+  _target_: lightning.pytorch.loggers.comet.CometLogger
   api_key: ${oc.env:COMET_API_TOKEN} # api key is loaded from environment variable
   save_dir: "${paths.output_dir}"
   project_name: "lightning-hydra-template"

diff --git a/configs/logger/csv.yaml b/configs/logger/csv.yaml
@@ -1,7 +1,7 @@
 # csv logger built in lightning
 
 csv:
-  _target_: pytorch_lightning.loggers.csv_logs.CSVLogger
+  _target_: lightning.pytorch.loggers.csv_logs.CSVLogger
   save_dir: "${paths.output_dir}"
   name: "csv/"
   prefix: ""
diff --git a/configs/logger/mlflow.yaml b/configs/logger/mlflow.yaml
@@ -1,7 +1,7 @@
 # https://mlflow.org
 
 mlflow:
-  _target_: pytorch_lightning.loggers.mlflow.MLFlowLogger
+  _target_: lightning.pytorch.loggers.mlflow.MLFlowLogger
   # experiment_name: ""
   # run_name: ""
   tracking_uri: ${paths.log_dir}/mlflow/mlruns # run `mlflow ui` command inside the `logs/mlflow/` dir to open the UI

diff --git a/configs/logger/neptune.yaml b/configs/logger/neptune.yaml
@@ -1,7 +1,7 @@
 # https://neptune.ai
 
 neptune:
-  _target_: pytorch_lightning.loggers.neptune.NeptuneLogger
+  _target_: lightning.pytorch.loggers.neptune.NeptuneLogger
   api_key: ${oc.env:NEPTUNE_API_TOKEN} # api key is loaded from environment variable
   project: username/lightning-hydra-template
   # name: ""

diff --git a/configs/logger/tensorboard.yaml b/configs/logger/tensorboard.yaml
@@ -1,7 +1,7 @@
 # https://www.tensorflow.org/tensorboard/
 
 tensorboard:
-  _target_: pytorch_lightning.loggers.tensorboard.TensorBoardLogger
+  _target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger
   save_dir: "${paths.output_dir}/tensorboard/"
   name: null
   log_graph: False

diff --git a/configs/logger/wandb.yaml b/configs/logger/wandb.yaml
@@ -1,7 +1,7 @@
 # https://wandb.ai
 
 wandb:
-  _target_: pytorch_lightning.loggers.wandb.WandbLogger
+  _target_: lightning.pytorch.loggers.wandb.WandbLogger
   # name: "" # name of the run (normally generated by wandb)
   save_dir: "${paths.output_dir}"
   offline: False

diff --git a/configs/train.yaml b/configs/train.yaml
@@ -33,8 +33,6 @@ task_name: "train"
 # tags to help you identify your experiments
 # you can overwrite this in experiment configs
 # overwrite from command line with `python train.py tags="[first_tag, second_tag]"`
-# appending lists from command line is currently not supported :(
-# https://github.com/facebookresearch/hydra/issues/1547
 tags: ["dev"]
 
 # set False to skip model training
@@ -44,6 +42,9 @@ train: True
 # lightning chooses best weights based on the metric specified in checkpoint callback
 test: True
 
+# compile model for faster training with pytorch 2.0
+compile: False
+
 # simply provide checkpoint path to resume training
 ckpt_path: null
 

diff --git a/configs/trainer/default.yaml b/configs/trainer/default.yaml
@@ -1,4 +1,4 @@
-_target_: pytorch_lightning.Trainer
+_target_: lightning.pytorch.trainer.Trainer
 
 default_root_dir: ${paths.output_dir}