Merge pull request #27 from aai-institute/feature/callbacks

Add callbacks.
aai-institute · Feb 6, 2024 · dd39745 · dd39745
2 parents ec3a761 + 9a23765
commit dd39745
Show file tree

Hide file tree

Showing 6 changed files with 300 additions and 84 deletions.
diff --git a/notebooks/superresolution.ipynb b/notebooks/superresolution.ipynb
diff --git a/src/continuity/callbacks/__init__.py b/src/continuity/callbacks/__init__.py
@@ -0,0 +1,113 @@
+"""
+`continuity.callbacks`
+
+Callbacks for training in Continuity.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Optional, List, Dict
+import matplotlib.pyplot as plt
+
+
+class Callback(ABC):
+    """
+    Callback base class for `fit` method of `Operator`.
+    """
+
+    @abstractmethod
+    def __call__(self, epoch, logs: Dict[str, float]):
+        """Callback function.
+        Called at the end of each epoch.
+
+        Args:
+            epoch: Current epoch.
+            logs: Dictionary of logs.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def on_train_begin(self):
+        """Called at the beginning of training."""
+
+    @abstractmethod
+    def on_train_end(self):
+        """Called at the end of training."""
+
+
+class PrintTrainingLoss(Callback):
+    """
+    Callback to print training loss.
+    """
+
+    def __init__(self):
+        super().__init__()
+
+    def __call__(self, epoch: int, logs: Dict[str, float]):
+        """Callback function.
+        Called at the end of each epoch.
+
+        Args:
+            epoch: Current epoch.
+            logs: Dictionary of logs.
+        """
+        loss_train = logs["loss/train"]
+        seconds_per_epoch = logs["seconds_per_epoch"]
+
+        print(
+            f"\rEpoch {epoch}:  loss/train = {loss_train:.4e}  "
+            f"({seconds_per_epoch:.2f} s/epoch)",
+            end="",
+        )
+
+    def on_train_begin(self):
+        """Called at the beginning of training."""
+
+    def on_train_end(self):
+        """Called at the end of training."""
+        print("")
+
+
+class LearningCurve(Callback):
+    """
+    Callback to plot learning curve.
+
+    Args:
+        keys: List of keys to plot. Default is ["loss/train"].
+    """
+
+    def __init__(self, keys: Optional[List[str]] = None):
+        if keys is None:
+            keys = ["loss/train"]
+
+        self.keys = keys
+        self.on_train_begin()
+        super().__init__()
+
+    def __call__(self, epoch: int, logs: Dict[str, float]):
+        """Callback function.
+        Called at the end of each epoch.
+
+        Args:
+            epoch: Current epoch.
+            logs: Dictionary of logs.
+        """
+        for key in self.keys:
+            if key in logs:
+                self.losses[key].append(logs[key])
+
+    def on_train_begin(self):
+        """Called at the beginning of training."""
+        self.losses = {key: [] for key in self.keys}
+
+    def on_train_end(self):
+        """Called at the end of training."""
+        for key in self.keys:
+            vals = self.losses[key]
+            epochs = list(range(1, len(vals) + 1))
+            plt.plot(epochs, vals)
+
+        plt.yscale("log")
+        plt.xlabel("Epoch")
+        plt.ylabel("Loss")
+        plt.legend(self.keys)
+        plt.show()
diff --git a/src/continuity/data/__init__.py b/src/continuity/data/__init__.py
@@ -23,7 +23,7 @@ def get_device() -> torch.device:
         Device.
     """
     device = torch.device("cpu")
-    use_mps_backend = os.environ.get("USE_MPS_BACKEND", True).lower() in ("true", "1")
+    use_mps_backend = os.environ.get("USE_MPS_BACKEND", "True").lower() in ("true", "1")
 
     if use_mps_backend and torch.backends.mps.is_available():
         device = torch.device("mps")

diff --git a/src/continuity/operators/operator.py b/src/continuity/operators/operator.py
@@ -3,10 +3,10 @@
 import torch
 from abc import abstractmethod
 from time import time
-from typing import Optional
+from typing import Optional, List
 from torch import Tensor
-from torch.utils.tensorboard import SummaryWriter
 from continuity.data import device, DataSet
+from continuity.callbacks import Callback, PrintTrainingLoss
 from continuity.operators.losses import Loss, MSELoss
 
 
@@ -47,20 +47,32 @@ def compile(self, optimizer: torch.optim.Optimizer, loss_fn: Optional[Loss] = No
 
         # Print number of model parameters
         num_params = sum(p.numel() for p in self.parameters())
-        print(f"Model parameters: {num_params}")
+        print(f"Model parameters: {num_params}   Device: {device}")
 
     def fit(
-        self, dataset: DataSet, epochs: int, writer: Optional[SummaryWriter] = None
+        self,
+        dataset: DataSet,
+        epochs: int,
+        callbacks: Optional[List[Callback]] = None,
     ):
         """Fit operator to data set.
 
         Args:
             dataset: Data set.
             epochs: Number of epochs.
-            writer: Tensorboard-like writer for loss visualization.
+            callbacks: List of callbacks.
         """
+        # Default callback
+        if callbacks is None:
+            callbacks = [PrintTrainingLoss()]
+
+        # Call on_train_begin
+        for callback in callbacks:
+            callback.on_train_begin()
+
+        # Train
         for epoch in range(epochs + 1):
-            mean_loss = 0
+            loss_train = 0
 
             start = time()
             for i in range(len(dataset)):
@@ -76,21 +88,24 @@ def closure(x=x, u=u, y=y, v=v):
                 self.optimizer.param_groups[0]["lr"] *= 0.999
 
                 # Compute mean loss
-                mean_loss += self.loss_fn(self, x, u, y, v).detach().item()
+                loss_train += self.loss_fn(self, x, u, y, v).detach().item()
 
             end = time()
-            mean_loss /= len(dataset)
-
-            if writer is not None:
-                writer.add_scalar("Loss/train", mean_loss, epoch)
-
-            iter_per_second = len(dataset) / (end - start)
-            print(
-                f"\rEpoch {epoch}:  loss = {mean_loss:.4e}  "
-                f"({iter_per_second:.2f} it/s)",
-                end="",
-            )
-        print("")
+            seconds_per_epoch = end - start
+            loss_train /= len(dataset)
+
+            # Callbacks
+            logs = {
+                "loss/train": loss_train,
+                "seconds_per_epoch": seconds_per_epoch,
+            }
+
+            for callback in callbacks:
+                callback(epoch, logs)
+
+        # Call on_train_end
+        for callback in callbacks:
+            callback.on_train_end()
 
     def loss(self, x: Tensor, u: Tensor, y: Tensor, v: Tensor) -> Tensor:
         """Evaluate loss function.

diff --git a/src/continuity/plotting/__init__.py b/src/continuity/plotting/__init__.py
@@ -30,8 +30,12 @@ def plot(x: Tensor, u: Tensor, ax: Optional[Axis] = None):
     dim = x.shape[-1]
     assert dim in [1, 2], "Only supports `d = 1,2`"
 
+    # Move to cpu
+    x = x.cpu().detach().numpy()
+    u = u.cpu().detach().numpy()
+
     if dim == 1:
-        ax.plot(x, u, "k.")
+        ax.plot(x, u, ".")
 
     if dim == 2:
         xx, yy = x[:, 0], x[:, 1]

diff --git a/tests/test_convolution.py b/tests/test_convolution.py
@@ -1,5 +1,6 @@
 import torch
 import matplotlib.pyplot as plt
+from continuity.data import device
 from continuity.data.datasets import Sine
 from continuity.operators import ContinuousConvolution
 from continuity.plotting import plot
@@ -20,7 +21,8 @@ def test_convolution():
     # Kernel
     def dirac(x, y):
         dist = ((x - y) ** 2).sum(dim=-1)
-        return torch.isclose(dist, torch.zeros(1)).to(torch.float32)
+        zero = torch.zeros(1, device=device)
+        return torch.isclose(dist, zero).to(torch.float32)
 
     # Operator
     operator = ContinuousConvolution(
@@ -30,7 +32,7 @@ def dirac(x, y):
     )
 
     # Create tensors
-    y = torch.linspace(-1, 1, num_evals).unsqueeze(-1)
+    y = torch.linspace(-1, 1, num_evals).unsqueeze(-1).to(device)
 
     # Apply operator
     v = operator(x, u, y)
@@ -41,7 +43,7 @@ def dirac(x, y):
     # Plotting
     fig, ax = plt.subplots(1, 1)
     plot(x, u, ax=ax)
-    plt.plot(x, v, "o")
+    plot(x, v, ax=ax)
     fig.savefig(f"test_convolution.png")
 
     # For num_sensors == num_evals, we get v = u / num_sensors.