Merge pull request #356 from iver56/ij/docs

Improve documentation
iver56 · Sep 30, 2024 · 7b0fcc0 · 7b0fcc0
2 parents f068c54 + dded985
commit 7b0fcc0
Show file tree

Hide file tree

Showing 25 changed files with 69 additions and 65 deletions.
diff --git a/audiomentations/augmentations/add_color_noise.py b/audiomentations/augmentations/add_color_noise.py
@@ -174,7 +174,7 @@ def __init__(
     def randomize_parameters(self, samples: np.ndarray, sample_rate: int):
         super().randomize_parameters(samples, sample_rate)
         if self.parameters["should_apply"]:
-            # Pick SNR in decibel scale
+            # Pick SNR in Decibel scale
             snr = random.uniform(self.min_snr_db, self.max_snr_db)
 
             # Pick f_decay

diff --git a/audiomentations/augmentations/add_gaussian_snr.py b/audiomentations/augmentations/add_gaussian_snr.py
@@ -10,7 +10,7 @@
 class AddGaussianSNR(BaseWaveformTransform):
     """
     Add gaussian noise to the input. A random Signal to Noise Ratio (SNR) will be picked
-    uniformly in the decibel scale. This aligns with human hearing, which is more
+    uniformly in the Decibel scale. This aligns with human hearing, which is more
     logarithmic than linear.
     """
 

diff --git a/audiomentations/augmentations/apply_impulse_response.py b/audiomentations/augmentations/apply_impulse_response.py
@@ -46,8 +46,6 @@ def __init__(
         self.__load_ir = functools.lru_cache(maxsize=lru_cache_size)(self.__load_ir)
         self.leave_length_unchanged = leave_length_unchanged
 
-        self.leave_length_unchanged = leave_length_unchanged
-
     @staticmethod
     def __load_ir(file_path, sample_rate, mono):
         return load_sound_file(file_path, sample_rate, mono=mono)
@@ -58,7 +56,8 @@ def randomize_parameters(self, samples: NDArray[np.float32], sample_rate: int):
             self.parameters["ir_file_path"] = random.choice(self.ir_files)
 
     def apply(self, samples: NDArray[np.float32], sample_rate: int):
-        load_mono_ir = samples.ndim == 1 # determine if ir should load as mono
+        # Determine if the impulse response should be loaded as mono
+        load_mono_ir = samples.ndim == 1
         ir, sample_rate2 = self.__load_ir(self.parameters["ir_file_path"], sample_rate, mono=load_mono_ir)
         if sample_rate != sample_rate2:
             # This will typically not happen, as librosa should automatically resample the
@@ -72,7 +71,7 @@ def apply(self, samples: NDArray[np.float32], sample_rate: int):
         samples_original_dim = samples.ndim
         samples, ir = np.atleast_2d(samples), np.atleast_2d(ir)
 
-        # Preallocate the the output array
+        # Preallocate the output array
         output_shape = (samples.shape[0], samples.shape[1] + ir.shape[1] - 1)
         signal_ir = np.empty(output_shape, dtype=samples.dtype)
 

diff --git a/audiomentations/augmentations/band_pass_filter.py b/audiomentations/augmentations/band_pass_filter.py
@@ -31,10 +31,10 @@ def __init__(
         :param max_rolloff: Maximum filter roll-off (in dB/octave)
             Must be a multiple of 6
         :param zero_phase: Whether filtering should be zero phase.
-            When this is set to `True` it will not affect the phase of the
+            When this is set to `True`, it will not affect the phase of the
             input signal but will sound 3 dB lower at the cutoff frequency
             compared to the non-zero phase case (6 dB vs 3 dB). Additionally,
-            it is 2 times slower than in the non-zero phase case. If you
+            it is twice as slow as the non-zero phase case. If you
             absolutely want no phase distortions (e.g. want to augment an
             audio file with lots of transients, like a drum track), set
             this to `True`.

diff --git a/audiomentations/augmentations/band_stop_filter.py b/audiomentations/augmentations/band_stop_filter.py
@@ -36,12 +36,12 @@ def __init__(
         :param max_rolloff: Maximum filter roll-off (in dB/octave)
             Must be a multiple of 6
         :param zero_phase: Whether filtering should be zero phase.
-            When this is set to `true` it will not affect the phase of the
+            When this is set to `True`, it will not affect the phase of the
             input signal but will sound 3 dB lower at the cutoff frequency
             compared to the non-zero phase case (6 dB vs 3 dB). Additionally,
-            it is 2 times slower than in the non-zero phase case. If you
+            it is twice as slow as the non-zero phase case. If you
             absolutely want no phase distortions (e.g. want to augment a
-            drum track), set this to `true`.
+            drum track), set this to `True`.
         :param p: The probability of applying this transform
         """
         super().__init__(

diff --git a/audiomentations/augmentations/base_butterword_filter.py b/audiomentations/augmentations/base_butterword_filter.py
@@ -188,7 +188,7 @@ def apply(self, samples: NDArray[np.float32], sample_rate: int = None):
             cutoff_freq = self.parameters["cutoff_freq"]
             nyquist_freq = sample_rate // 2
             if cutoff_freq > nyquist_freq:
-                # Ensure that the cutoff frequency does not exceed the nyquist
+                # Ensure that the cutoff frequency does not exceed the Nyquist
                 # frequency to avoid an exception from scipy
                 cutoff_freq = nyquist_freq * 0.9999
             sos = butter(
@@ -206,7 +206,7 @@ def apply(self, samples: NDArray[np.float32], sample_rate: int = None):
             )
             nyquist_freq = sample_rate // 2
             if high_freq > nyquist_freq:
-                # Ensure that the upper critical frequency does not exceed the nyquist
+                # Ensure that the upper critical frequency does not exceed the Nyquist
                 # frequency to avoid an exception from scipy
                 high_freq = nyquist_freq * 0.9999
             sos = butter(

diff --git a/audiomentations/augmentations/high_pass_filter.py b/audiomentations/augmentations/high_pass_filter.py
@@ -26,12 +26,12 @@ def __init__(
         :param max_rolloff: Maximum filter roll-off (in dB/octave)
             Must be a multiple of 6
         :param zero_phase: Whether filtering should be zero phase.
-            When this is set to `true` it will not affect the phase of the
+            When this is set to `True`, it will not affect the phase of the
             input signal but will sound 3 dB lower at the cutoff frequency
             compared to the non-zero phase case (6 dB vs. 3 dB). Additionally,
-            it is 2 times slower than in the non-zero phase case. If you
+            it is twice as slow as the non-zero phase case. If you
             absolutely want no phase distortions (e.g. want to augment a
-            drum track), set this to `true`.
+            drum track), set this to `True`.
         :param p: The probability of applying this transform
         """
         super().__init__(

diff --git a/audiomentations/augmentations/high_shelf_filter.py b/audiomentations/augmentations/high_shelf_filter.py
@@ -16,7 +16,7 @@ class HighShelfFilter(BaseWaveformTransform):
     A high shelf filter is a filter that either boosts (increases amplitude) or cuts
     (decreases amplitude) frequencies above a certain center frequency. This transform
     applies a high-shelf filter at a specific center frequency in hertz.
-    The gain at nyquist frequency is controlled by `{min,max}_gain_db` (note: can be positive or negative!).
+    The gain at Nyquist frequency is controlled by `{min,max}_gain_db` (note: can be positive or negative!).
     Filter coefficients are taken from the W3 Audio EQ Cookbook: https://www.w3.org/TR/audio-eq-cookbook/
     """
 
@@ -35,8 +35,8 @@ def __init__(
         """
         :param min_center_freq: The minimum center frequency of the shelving filter
         :param max_center_freq: The maximum center frequency of the shelving filter
-        :param min_gain_db: The minimum gain at the nyquist frequency
-        :param max_gain_db: The maximum gain at the nyquist frequency
+        :param min_gain_db: The minimum gain at the Nyquist frequency
+        :param max_gain_db: The maximum gain at the Nyquist frequency
         :param min_q: The minimum quality factor Q. The higher the Q, the steeper the
             transition band will be.
         :param max_q: The maximum quality factor Q. The higher the Q, the steeper the
@@ -120,7 +120,7 @@ def apply(self, samples: NDArray[np.float32], sample_rate: int):
         nyquist_freq = sample_rate // 2
         center_freq = self.parameters["center_freq"]
         if center_freq > nyquist_freq:
-            # Ensure that the center frequency is below the nyquist
+            # Ensure that the center frequency is below the Nyquist
             # frequency to avoid filter instability
             center_freq = nyquist_freq * 0.9999
 

diff --git a/audiomentations/augmentations/limiter.py b/audiomentations/augmentations/limiter.py
@@ -15,15 +15,14 @@
 class Limiter(BaseWaveformTransform):
     """
     A simple audio limiter (dynamic range compression).
-    Note: This transform also delays the signal by a fraction of the attack time.
     """
 
     supports_multichannel = True
 
     def __init__(
         self,
-        min_threshold_db: float = -24,
-        max_threshold_db: float = -2,
+        min_threshold_db: float = -24.0,
+        max_threshold_db: float = -2.0,
         min_attack: float = 0.0005,
         max_attack: float = 0.025,
         min_release: float = 0.05,
@@ -36,8 +35,8 @@ def __init__(
         The attack time is how quickly the limiter kicks in once the audio signal starts exceeding the threshold.
         The release time determines how quickly the limiter stops working after the signal drops below the threshold.
 
-        :param min_threshold_db: Minimum threshold in decibels
-        :param max_threshold_db: Maximum threshold in decibels
+        :param min_threshold_db: Minimum threshold in Decibels
+        :param max_threshold_db: Maximum threshold in Decibels
         :param min_attack: Minimum attack time in seconds
         :param max_attack: Maximum attack time in seconds
         :param min_release: Minimum release time in seconds

diff --git a/audiomentations/augmentations/low_pass_filter.py b/audiomentations/augmentations/low_pass_filter.py
@@ -26,12 +26,12 @@ def __init__(
         :param max_rolloff: Maximum filter roll-off (in dB/octave)
             Must be a multiple of 6
         :param zero_phase: Whether filtering should be zero phase.
-            When this is set to `true` it will not affect the phase of the
+            When this is set to `True`, it will not affect the phase of the
             input signal but will sound 3 dB lower at the cutoff frequency
             compared to the non-zero phase case (6 dB vs. 3 dB). Additionally,
-            it is 2 times slower than in the non-zero phase case. If you
+            it is twice as slow as the non-zero phase case. If you
             absolutely want no phase distortions (e.g. want to augment a
-            drum track), set this to `true`.
+            drum track), set this to `True`.
         :param p: The probability of applying this transform
         """
         super().__init__(

diff --git a/audiomentations/augmentations/low_shelf_filter.py b/audiomentations/augmentations/low_shelf_filter.py
@@ -119,7 +119,7 @@ def apply(self, samples: NDArray[np.float32], sample_rate: int):
         nyquist_freq = sample_rate // 2
         center_freq = self.parameters["center_freq"]
         if center_freq > nyquist_freq:
-            # Ensure that the center frequency is below the nyquist
+            # Ensure that the center frequency is below the Nyquist
             # frequency to avoid filter instability
             center_freq = nyquist_freq * 0.9999
 

diff --git a/audiomentations/augmentations/trim.py b/audiomentations/augmentations/trim.py
@@ -14,7 +14,7 @@ class Trim(BaseWaveformTransform):
 
     def __init__(self, top_db: float = 30.0, p: float = 0.5):
         """
-        :param top_db: The threshold (in decibels) below reference to consider as silence
+        :param top_db: The threshold (in Decibels) below reference to consider as silence
         :param p: The probability of applying this transform
         """
         super().__init__(p)

diff --git a/docs/changelog.md b/docs/changelog.md
@@ -323,7 +323,7 @@ These are **breaking changes**. The following example shows how you can adapt yo
 * When looking for audio files in `AddImpulseResponse`, `AddBackgroundNoise`
   and `AddShortNoises`, follow symlinks by default.
 * When using the new parameters `min_snr_in_db` and `max_snr_in_db` in `AddGaussianSNR`,
-  SNRs will be picked uniformly in _the decibel scale_ instead of in the linear amplitude
+  SNRs will be picked uniformly in _the Decibel scale_ instead of in the linear amplitude
   ratio scale. The new behavior aligns more with human hearing, which is not linear.
 
 ### Fixed

diff --git a/docs/waveform_transforms/add_background_noise.md b/docs/waveform_transforms/add_background_noise.md
@@ -26,7 +26,7 @@ Here are some examples of datasets that can be downloaded and used as background
 ## Input-output example
 
 Here we add some music to a speech recording, targeting a signal-to-noise ratio (SNR) of
-5 decibels (dB), which means that the speech (_signal_) is 5 dB louder than the music (_noise_).
+5 Decibels (dB), which means that the speech (_signal_) is 5 dB louder than the music (_noise_).
 
 ![Input-output waveforms and spectrograms](AddBackgroundNoise.webp)
 

diff --git a/docs/waveform_transforms/add_gaussian_snr.md b/docs/waveform_transforms/add_gaussian_snr.md
@@ -4,7 +4,7 @@ _Added in v0.7.0_
 
 The `AddGaussianSNR` transform injects Gaussian noise into an audio signal. It applies
 a **Signal-to-Noise Ratio (SNR)** that is chosen randomly from a **uniform distribution on the
-decibel scale**. This choice is consistent with the nature of human hearing, which is
+Decibel scale**. This choice is consistent with the nature of human hearing, which is
 logarithmic rather than linear.
 
 **SNR** is a common measure used in science and engineering to compare the level of a

diff --git a/docs/waveform_transforms/apply_impulse_response.md b/docs/waveform_transforms/apply_impulse_response.md
@@ -46,7 +46,7 @@ will slow down execution, and because some high frequencies may get lost.
 
 ## Input-output example
 
-Here we make a dry speech recording quite reverbant by convolving it with a room impulse response
+Here we make a dry speech recording quite reverberant by convolving it with a room impulse response
 
 ![Input-output waveforms and spectrograms](ApplyImpulseResponse.webp)
 

diff --git a/docs/waveform_transforms/band_pass_filter.md b/docs/waveform_transforms/band_pass_filter.md
@@ -46,19 +46,19 @@ augmented_sound = transform(my_waveform_ndarray, sample_rate=48000)
 [`max_bandwidth_fraction`](#max_bandwidth_fraction){ #max_bandwidth_fraction }: `float` • range: [0.0, 2.0]
 :   :octicons-milestone-24: Default: `1.99`. Maximum bandwidth relative to center frequency
 
-[`min_rolloff`](#min_rolloff){ #min_rolloff }: `float` • unit: Decibels/octave
+[`min_rolloff`](#min_rolloff){ #min_rolloff }: `int` • unit: Decibels/octave
 :   :octicons-milestone-24: Default: `12`. Minimum filter roll-off (in dB/octave).
-    Must be a multiple of 6
+    Must be a multiple of 6 (or 12 if `zero_phase` is `True`)
 
-[`max_rolloff`](#max_rolloff){ #max_rolloff }: `float` • unit: Decibels/octave
+[`max_rolloff`](#max_rolloff){ #max_rolloff }: `int` • unit: Decibels/octave
 :   :octicons-milestone-24: Default: `24`. Maximum filter roll-off (in dB/octave)
-    Must be a multiple of 6
+    Must be a multiple of 6 (or 12 if `zero_phase` is `True`)
 
 [`zero_phase`](#zero_phase){ #zero_phase }: `bool`
 :   :octicons-milestone-24: Default: `False`. Whether filtering should be zero phase.
-    When this is set to `True` it will not affect the phase of the input signal but will
+    When this is set to `True`, it will not affect the phase of the input signal but will
     sound 3 dB lower at the cutoff frequency compared to the non-zero phase case (6 dB
-    vs. 3 dB). Additionally, it is 2 times slower than in the non-zero phase case. If
+    vs. 3 dB). Additionally, it is twice as slow as the non-zero phase case. If
     you absolutely want no phase distortions (e.g. want to augment an audio file with
     lots of transients, like a drum track), set this to `True`.
 

diff --git a/docs/waveform_transforms/band_stop_filter.md b/docs/waveform_transforms/band_stop_filter.md
@@ -36,24 +36,24 @@ can hear that the timbre is different in the transformed sound than in the origi
 :   :octicons-milestone-24: Default: `4000.0`. Maximum center frequency in hertz
 
 [`min_bandwidth_fraction`](#min_bandwidth_fraction){ #min_bandwidth_fraction }: `float`
-:   :octicons-milestone-24: Default: `0.5`. Minimum bandwidth relative to center frequency
+:   :octicons-milestone-24: Default: `0.5`. Minimum bandwidth fraction relative to center frequency
 
 [`max_bandwidth_fraction`](#max_bandwidth_fraction){ #max_bandwidth_fraction }: `float`
-:   :octicons-milestone-24: Default: `1.99`. Maximum bandwidth relative to center frequency
+:   :octicons-milestone-24: Default: `1.99`. Maximum bandwidth fraction relative to center frequency
 
-[`min_rolloff`](#min_rolloff){ #min_rolloff }: `float` • unit: Decibels/octave
+[`min_rolloff`](#min_rolloff){ #min_rolloff }: `int` • unit: Decibels/octave
 :   :octicons-milestone-24: Default: `12`. Minimum filter roll-off (in dB/octave).
-    Must be a multiple of 6
+    Must be a multiple of 6 (or 12 if `zero_phase` is `True`)
 
-[`max_rolloff`](#max_rolloff){ #max_rolloff }: `float` • unit: Decibels/octave
+[`max_rolloff`](#max_rolloff){ #max_rolloff }: `int` • unit: Decibels/octave
 :   :octicons-milestone-24: Default: `24`. Maximum filter roll-off (in dB/octave)
-    Must be a multiple of 6
+    Must be a multiple of 6 (or 12 if `zero_phase` is `True`)
 
 [`zero_phase`](#zero_phase){ #zero_phase }: `bool`
 :   :octicons-milestone-24: Default: `False`. Whether filtering should be zero phase.
-    When this is set to `True` it will not affect the phase of the input signal but will
+    When this is set to `True`, it will not affect the phase of the input signal but will
     sound 3 dB lower at the cutoff frequency compared to the non-zero phase case (6 dB
-    vs. 3 dB). Additionally, it is 2 times slower than in the non-zero phase case. If
+    vs. 3 dB). Additionally, it is twice as slow as the non-zero phase case. If
     you absolutely want no phase distortions (e.g. want to augment an audio file with
     lots of transients, like a drum track), set this to `True`.
 

diff --git a/docs/waveform_transforms/high_pass_filter.md b/docs/waveform_transforms/high_pass_filter.md
@@ -15,17 +15,17 @@ Can also be set for zero-phase filtering (will result in a 6 dB drop at cutoff).
 
 [`min_rolloff`](#min_rolloff){ #min_rolloff }: `float` • unit: Decibels/octave
 :   :octicons-milestone-24: Default: `12`. Minimum filter roll-off (in dB/octave).
-    Must be a multiple of 6
+    Must be a multiple of 6 (or 12 if `zero_phase` is `True`)
 
 [`max_rolloff`](#max_rolloff){ #max_rolloff }: `float` • unit: Decibels/octave
 :   :octicons-milestone-24: Default: `24`. Maximum filter roll-off (in dB/octave).
-    Must be a multiple of 6
+    Must be a multiple of 6 (or 12 if `zero_phase` is `True`)
 
 [`zero_phase`](#zero_phase){ #zero_phase }: `bool`
 :   :octicons-milestone-24: Default: `False`. Whether filtering should be zero phase.
-    When this is set to `True` it will not affect the phase of the input signal but will
+    When this is set to `True`, it will not affect the phase of the input signal but will
     sound 3 dB lower at the cutoff frequency compared to the non-zero phase case (6 dB
-    vs. 3 dB). Additionally, it is 2 times slower than in the non-zero phase case. If
+    vs. 3 dB). Additionally, it is twice as slow as the non-zero phase case. If
     you absolutely want no phase distortions (e.g. want to augment an audio file with
     lots of transients, like a drum track), set this to `True`.
 

diff --git a/docs/waveform_transforms/low_pass_filter.md b/docs/waveform_transforms/low_pass_filter.md
@@ -13,19 +13,19 @@ Can also be set for zero-phase filtering (will result in a 6 dB drop at cutoff).
 [`max_cutoff_freq`](#max_cutoff_freq){ #max_cutoff_freq }: `float` • unit: hertz
 :   :octicons-milestone-24: Default: `7500.0`. Maximum cutoff frequency
 
-[`min_rolloff`](#min_rolloff){ #min_rolloff }: `float` • unit: Decibels/octave
+[`min_rolloff`](#min_rolloff){ #min_rolloff }: `int` • unit: Decibels/octave
 :   :octicons-milestone-24: Default: `12`. Minimum filter roll-off (in dB/octave).
-    Must be a multiple of 6
+    Must be a multiple of 6 (or 12 if `zero_phase` is `True`)
 
-[`max_rolloff`](#max_rolloff){ #max_rolloff }: `float` • unit: Decibels/octave
+[`max_rolloff`](#max_rolloff){ #max_rolloff }: `int` • unit: Decibels/octave
 :   :octicons-milestone-24: Default: `24`. Maximum filter roll-off (in dB/octave)
-    Must be a multiple of 6
+    Must be a multiple of 6 (or 12 if `zero_phase` is `True`)
 
 [`zero_phase`](#zero_phase){ #zero_phase }: `bool`
 :   :octicons-milestone-24: Default: `False`. Whether filtering should be zero phase.
-    When this is set to `True` it will not affect the phase of the input signal but will
+    When this is set to `True`, it will not affect the phase of the input signal but will
     sound 3 dB lower at the cutoff frequency compared to the non-zero phase case (6 dB
-    vs. 3 dB). Additionally, it is 2 times slower than in the non-zero phase case. If
+    vs. 3 dB). Additionally, it is twice as slow as the non-zero phase case. If
     you absolutely want no phase distortions (e.g. want to augment an audio file with
     lots of transients, like a drum track), set this to `True`.
 

diff --git a/docs/waveform_transforms/normalize.md b/docs/waveform_transforms/normalize.md
@@ -8,6 +8,12 @@ Also known as peak normalization.
 
 # Normalize API
 
+[`apply_to`](#apply_to){ #apply_to }: `str` • choices: `"all"`, `"only_too_loud_sounds"`
+:   :octicons-milestone-24: Default: `"all"`. Defines the criterion for applying the transform.
+
+    * `"all"`: Apply peak normalization to all inputs
+    * `"only_too_loud_sounds"`: Apply peak normalization only to inputs where the maximum absolute peak is greater than 1
+
 [`p`](#p){ #p }: `float` • range: [0.0, 1.0]
 :   :octicons-milestone-24: Default: `0.5`. The probability of applying this transform.