Lint

Signed-off-by: Olaf Lipinski <[email protected]>
olipinski · Nov 7, 2023 · f69b95d · f69b95d
1 parent 18a58c9
commit f69b95d
Show file tree

Hide file tree

Showing 5 changed files with 37 additions and 16 deletions.
diff --git a/emlangkit/language.py b/emlangkit/language.py
@@ -305,7 +305,12 @@ def boundaries(self, return_count: bool = False, return_mean: bool = False):
 
         return self.__boundaries
 
-    def random_boundaries(self, return_count: bool = False, return_mean: bool = False, recompute: bool = False):
+    def random_boundaries(
+        self,
+        return_count: bool = False,
+        return_mean: bool = False,
+        recompute: bool = False,
+    ):
         if self.__random_boundaries is None and not recompute:
             if self.__boundaries is None:
                 self.boundaries()
@@ -345,7 +350,12 @@ def segments(self, return_ids: bool = False, return_hashed_segments: bool = Fals
 
         return self.__segments
 
-    def random_segments(self, return_ids: bool = False, return_hashed_segments: bool = False, recompute: bool = False):
+    def random_segments(
+        self,
+        return_ids: bool = False,
+        return_hashed_segments: bool = False,
+        recompute: bool = False,
+    ):
         if self.__random_segments is None and not recompute:
             if self.__random_boundaries is None and not recompute:
                 self.random_boundaries()
@@ -362,7 +372,11 @@ def random_segments(self, return_ids: bool = False, return_hashed_segments: bool
             return self.__random_segments, self.__random_hashed_segments
 
         if return_ids and return_hashed_segments:
-            return self.__random_segments, self.__random_segment_ids, self.__random_hashed_segments
+            return (
+                self.__random_segments,
+                self.__random_segment_ids,
+                self.__random_hashed_segments,
+            )
 
         return self.__random_segments
 
@@ -381,7 +395,9 @@ def has_stats(self, compute_topsim: bool = False) -> dict:
             # and has no effect on the distance measurement
             if compute_topsim:
                 padded_hashed_segments = utils.pad_jagged(self.__hashed_segments)
-                padded_random_hashed_segments = utils.pad_jagged(self.__random_hashed_segments)
+                padded_random_hashed_segments = utils.pad_jagged(
+                    self.__random_hashed_segments
+                )
 
             self.__has_stats = {
                 "vocab_size": len(self.__segment_ids),
@@ -390,15 +406,19 @@ def has_stats(self, compute_topsim: bool = False) -> dict:
                 # We use hamming here, as the segments could contain multiple characters
                 # So editdistance would give us a worse estimate
                 "topographic_similarity": metrics.compute_topographic_similarity(
-                    padded_hashed_segments, self.observations, message_dist_metric="hamming"
+                    padded_hashed_segments,
+                    self.observations,
+                    message_dist_metric="hamming",
                 )
                 if compute_topsim
                 else None,
                 "random_vocab_size": len(self.__random_segment_ids),
                 "random_zla": random_zla,
                 "random_zipf": random_freq,
                 "random_topographic_similarity": metrics.compute_topographic_similarity(
-                    padded_random_hashed_segments, self.observations, message_dist_metric="hamming"
+                    padded_random_hashed_segments,
+                    self.observations,
+                    message_dist_metric="hamming",
                 )
                 if compute_topsim
                 else None,

diff --git a/emlangkit/metrics/topsim.py b/emlangkit/metrics/topsim.py
@@ -1,5 +1,5 @@
 """Calculate topographic similarity for a given language."""
-from typing import Tuple, Literal
+from typing import Literal, Tuple
 
 import editdistance
 import numpy as np
@@ -8,7 +8,10 @@
 
 
 def compute_topographic_similarity(
-    messages: np.ndarray, observations: np.ndarray, observations_dist_metric: str = "hamming", message_dist_metric: str = "editdistance"
+    messages: np.ndarray,
+    observations: np.ndarray,
+    observations_dist_metric: str = "hamming",
+    message_dist_metric: str = "editdistance",
 ) -> Tuple[float, float]:
     """
     Calculate the topographic similarity between the given messages and observations.

diff --git a/emlangkit/utils/__init__.py b/emlangkit/utils/__init__.py
@@ -1,6 +1,4 @@
 """Root __init__ of the utils."""
 from emlangkit.utils.array_ops import pad_jagged
 
-__all__ = [
-    "pad_jagged"
-]
+__all__ = ["pad_jagged"]
diff --git a/emlangkit/utils/array_ops.py b/emlangkit/utils/array_ops.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 
+
 def pad_jagged(array: np.ndarray, fill: int = 0) -> np.ndarray:
     """
     Append the minimal required amount of a given integer at the end of each array, such that it looses its jagedness.
@@ -21,7 +22,7 @@ def pad_jagged(array: np.ndarray, fill: int = 0) -> np.ndarray:
 
     """
     maxlen = max(len(r) for r in array)
-    padded = np.full((len(array), maxlen),fill_value=fill)
+    padded = np.full((len(array), maxlen), fill_value=fill)
     for enu, row in enumerate(array):
-        padded[enu, :len(row)] += row
-    return padded
+        padded[enu, : len(row)] += row
+    return padded
diff --git a/tests/test_language.py b/tests/test_language.py
@@ -86,11 +86,10 @@ def test_language_metrics():
     lang.conditional_entropy()
     lang.boundaries(return_count=True, return_mean=True)
     lang.random_boundaries(return_count=True, return_mean=True)
-    lang.segments(return_ids=True,return_hashed_segments=True)
+    lang.segments(return_ids=True, return_hashed_segments=True)
     lang.random_segments(return_ids=True, return_hashed_segments=True)
     lang.has_stats(compute_topsim=True)
 
     # Test recomputing random stats
     lang.random_boundaries(recompute=True)
     lang.random_segments(recompute=True)
-