Skip to content

Commit

Permalink
Merge pull request #356 from iver56/ij/docs
Browse files Browse the repository at this point in the history
Improve documentation
  • Loading branch information
iver56 authored Sep 30, 2024
2 parents f068c54 + dded985 commit 7b0fcc0
Show file tree
Hide file tree
Showing 25 changed files with 69 additions and 65 deletions.
2 changes: 1 addition & 1 deletion audiomentations/augmentations/add_color_noise.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def __init__(
def randomize_parameters(self, samples: np.ndarray, sample_rate: int):
super().randomize_parameters(samples, sample_rate)
if self.parameters["should_apply"]:
# Pick SNR in decibel scale
# Pick SNR in Decibel scale
snr = random.uniform(self.min_snr_db, self.max_snr_db)

# Pick f_decay
Expand Down
2 changes: 1 addition & 1 deletion audiomentations/augmentations/add_gaussian_snr.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
class AddGaussianSNR(BaseWaveformTransform):
"""
Add gaussian noise to the input. A random Signal to Noise Ratio (SNR) will be picked
uniformly in the decibel scale. This aligns with human hearing, which is more
uniformly in the Decibel scale. This aligns with human hearing, which is more
logarithmic than linear.
"""

Expand Down
7 changes: 3 additions & 4 deletions audiomentations/augmentations/apply_impulse_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@ def __init__(
self.__load_ir = functools.lru_cache(maxsize=lru_cache_size)(self.__load_ir)
self.leave_length_unchanged = leave_length_unchanged

self.leave_length_unchanged = leave_length_unchanged

@staticmethod
def __load_ir(file_path, sample_rate, mono):
return load_sound_file(file_path, sample_rate, mono=mono)
Expand All @@ -58,7 +56,8 @@ def randomize_parameters(self, samples: NDArray[np.float32], sample_rate: int):
self.parameters["ir_file_path"] = random.choice(self.ir_files)

def apply(self, samples: NDArray[np.float32], sample_rate: int):
load_mono_ir = samples.ndim == 1 # determine if ir should load as mono
# Determine if the impulse response should be loaded as mono
load_mono_ir = samples.ndim == 1
ir, sample_rate2 = self.__load_ir(self.parameters["ir_file_path"], sample_rate, mono=load_mono_ir)
if sample_rate != sample_rate2:
# This will typically not happen, as librosa should automatically resample the
Expand All @@ -72,7 +71,7 @@ def apply(self, samples: NDArray[np.float32], sample_rate: int):
samples_original_dim = samples.ndim
samples, ir = np.atleast_2d(samples), np.atleast_2d(ir)

# Preallocate the the output array
# Preallocate the output array
output_shape = (samples.shape[0], samples.shape[1] + ir.shape[1] - 1)
signal_ir = np.empty(output_shape, dtype=samples.dtype)

Expand Down
4 changes: 2 additions & 2 deletions audiomentations/augmentations/band_pass_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ def __init__(
:param max_rolloff: Maximum filter roll-off (in dB/octave)
Must be a multiple of 6
:param zero_phase: Whether filtering should be zero phase.
When this is set to `True` it will not affect the phase of the
When this is set to `True`, it will not affect the phase of the
input signal but will sound 3 dB lower at the cutoff frequency
compared to the non-zero phase case (6 dB vs 3 dB). Additionally,
it is 2 times slower than in the non-zero phase case. If you
it is twice as slow as the non-zero phase case. If you
absolutely want no phase distortions (e.g. want to augment an
audio file with lots of transients, like a drum track), set
this to `True`.
Expand Down
6 changes: 3 additions & 3 deletions audiomentations/augmentations/band_stop_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ def __init__(
:param max_rolloff: Maximum filter roll-off (in dB/octave)
Must be a multiple of 6
:param zero_phase: Whether filtering should be zero phase.
When this is set to `true` it will not affect the phase of the
When this is set to `True`, it will not affect the phase of the
input signal but will sound 3 dB lower at the cutoff frequency
compared to the non-zero phase case (6 dB vs 3 dB). Additionally,
it is 2 times slower than in the non-zero phase case. If you
it is twice as slow as the non-zero phase case. If you
absolutely want no phase distortions (e.g. want to augment a
drum track), set this to `true`.
drum track), set this to `True`.
:param p: The probability of applying this transform
"""
super().__init__(
Expand Down
4 changes: 2 additions & 2 deletions audiomentations/augmentations/base_butterword_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def apply(self, samples: NDArray[np.float32], sample_rate: int = None):
cutoff_freq = self.parameters["cutoff_freq"]
nyquist_freq = sample_rate // 2
if cutoff_freq > nyquist_freq:
# Ensure that the cutoff frequency does not exceed the nyquist
# Ensure that the cutoff frequency does not exceed the Nyquist
# frequency to avoid an exception from scipy
cutoff_freq = nyquist_freq * 0.9999
sos = butter(
Expand All @@ -206,7 +206,7 @@ def apply(self, samples: NDArray[np.float32], sample_rate: int = None):
)
nyquist_freq = sample_rate // 2
if high_freq > nyquist_freq:
# Ensure that the upper critical frequency does not exceed the nyquist
# Ensure that the upper critical frequency does not exceed the Nyquist
# frequency to avoid an exception from scipy
high_freq = nyquist_freq * 0.9999
sos = butter(
Expand Down
6 changes: 3 additions & 3 deletions audiomentations/augmentations/high_pass_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ def __init__(
:param max_rolloff: Maximum filter roll-off (in dB/octave)
Must be a multiple of 6
:param zero_phase: Whether filtering should be zero phase.
When this is set to `true` it will not affect the phase of the
When this is set to `True`, it will not affect the phase of the
input signal but will sound 3 dB lower at the cutoff frequency
compared to the non-zero phase case (6 dB vs. 3 dB). Additionally,
it is 2 times slower than in the non-zero phase case. If you
it is twice as slow as the non-zero phase case. If you
absolutely want no phase distortions (e.g. want to augment a
drum track), set this to `true`.
drum track), set this to `True`.
:param p: The probability of applying this transform
"""
super().__init__(
Expand Down
8 changes: 4 additions & 4 deletions audiomentations/augmentations/high_shelf_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class HighShelfFilter(BaseWaveformTransform):
A high shelf filter is a filter that either boosts (increases amplitude) or cuts
(decreases amplitude) frequencies above a certain center frequency. This transform
applies a high-shelf filter at a specific center frequency in hertz.
The gain at nyquist frequency is controlled by `{min,max}_gain_db` (note: can be positive or negative!).
The gain at Nyquist frequency is controlled by `{min,max}_gain_db` (note: can be positive or negative!).
Filter coefficients are taken from the W3 Audio EQ Cookbook: https://www.w3.org/TR/audio-eq-cookbook/
"""

Expand All @@ -35,8 +35,8 @@ def __init__(
"""
:param min_center_freq: The minimum center frequency of the shelving filter
:param max_center_freq: The maximum center frequency of the shelving filter
:param min_gain_db: The minimum gain at the nyquist frequency
:param max_gain_db: The maximum gain at the nyquist frequency
:param min_gain_db: The minimum gain at the Nyquist frequency
:param max_gain_db: The maximum gain at the Nyquist frequency
:param min_q: The minimum quality factor Q. The higher the Q, the steeper the
transition band will be.
:param max_q: The maximum quality factor Q. The higher the Q, the steeper the
Expand Down Expand Up @@ -120,7 +120,7 @@ def apply(self, samples: NDArray[np.float32], sample_rate: int):
nyquist_freq = sample_rate // 2
center_freq = self.parameters["center_freq"]
if center_freq > nyquist_freq:
# Ensure that the center frequency is below the nyquist
# Ensure that the center frequency is below the Nyquist
# frequency to avoid filter instability
center_freq = nyquist_freq * 0.9999

Expand Down
9 changes: 4 additions & 5 deletions audiomentations/augmentations/limiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,14 @@
class Limiter(BaseWaveformTransform):
"""
A simple audio limiter (dynamic range compression).
Note: This transform also delays the signal by a fraction of the attack time.
"""

supports_multichannel = True

def __init__(
self,
min_threshold_db: float = -24,
max_threshold_db: float = -2,
min_threshold_db: float = -24.0,
max_threshold_db: float = -2.0,
min_attack: float = 0.0005,
max_attack: float = 0.025,
min_release: float = 0.05,
Expand All @@ -36,8 +35,8 @@ def __init__(
The attack time is how quickly the limiter kicks in once the audio signal starts exceeding the threshold.
The release time determines how quickly the limiter stops working after the signal drops below the threshold.
:param min_threshold_db: Minimum threshold in decibels
:param max_threshold_db: Maximum threshold in decibels
:param min_threshold_db: Minimum threshold in Decibels
:param max_threshold_db: Maximum threshold in Decibels
:param min_attack: Minimum attack time in seconds
:param max_attack: Maximum attack time in seconds
:param min_release: Minimum release time in seconds
Expand Down
6 changes: 3 additions & 3 deletions audiomentations/augmentations/low_pass_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ def __init__(
:param max_rolloff: Maximum filter roll-off (in dB/octave)
Must be a multiple of 6
:param zero_phase: Whether filtering should be zero phase.
When this is set to `true` it will not affect the phase of the
When this is set to `True`, it will not affect the phase of the
input signal but will sound 3 dB lower at the cutoff frequency
compared to the non-zero phase case (6 dB vs. 3 dB). Additionally,
it is 2 times slower than in the non-zero phase case. If you
it is twice as slow as the non-zero phase case. If you
absolutely want no phase distortions (e.g. want to augment a
drum track), set this to `true`.
drum track), set this to `True`.
:param p: The probability of applying this transform
"""
super().__init__(
Expand Down
2 changes: 1 addition & 1 deletion audiomentations/augmentations/low_shelf_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def apply(self, samples: NDArray[np.float32], sample_rate: int):
nyquist_freq = sample_rate // 2
center_freq = self.parameters["center_freq"]
if center_freq > nyquist_freq:
# Ensure that the center frequency is below the nyquist
# Ensure that the center frequency is below the Nyquist
# frequency to avoid filter instability
center_freq = nyquist_freq * 0.9999

Expand Down
2 changes: 1 addition & 1 deletion audiomentations/augmentations/trim.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class Trim(BaseWaveformTransform):

def __init__(self, top_db: float = 30.0, p: float = 0.5):
"""
:param top_db: The threshold (in decibels) below reference to consider as silence
:param top_db: The threshold (in Decibels) below reference to consider as silence
:param p: The probability of applying this transform
"""
super().__init__(p)
Expand Down
2 changes: 1 addition & 1 deletion docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ These are **breaking changes**. The following example shows how you can adapt yo
* When looking for audio files in `AddImpulseResponse`, `AddBackgroundNoise`
and `AddShortNoises`, follow symlinks by default.
* When using the new parameters `min_snr_in_db` and `max_snr_in_db` in `AddGaussianSNR`,
SNRs will be picked uniformly in _the decibel scale_ instead of in the linear amplitude
SNRs will be picked uniformly in _the Decibel scale_ instead of in the linear amplitude
ratio scale. The new behavior aligns more with human hearing, which is not linear.

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion docs/waveform_transforms/add_background_noise.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Here are some examples of datasets that can be downloaded and used as background
## Input-output example

Here we add some music to a speech recording, targeting a signal-to-noise ratio (SNR) of
5 decibels (dB), which means that the speech (_signal_) is 5 dB louder than the music (_noise_).
5 Decibels (dB), which means that the speech (_signal_) is 5 dB louder than the music (_noise_).

![Input-output waveforms and spectrograms](AddBackgroundNoise.webp)

Expand Down
2 changes: 1 addition & 1 deletion docs/waveform_transforms/add_gaussian_snr.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ _Added in v0.7.0_

The `AddGaussianSNR` transform injects Gaussian noise into an audio signal. It applies
a **Signal-to-Noise Ratio (SNR)** that is chosen randomly from a **uniform distribution on the
decibel scale**. This choice is consistent with the nature of human hearing, which is
Decibel scale**. This choice is consistent with the nature of human hearing, which is
logarithmic rather than linear.

**SNR** is a common measure used in science and engineering to compare the level of a
Expand Down
2 changes: 1 addition & 1 deletion docs/waveform_transforms/apply_impulse_response.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ will slow down execution, and because some high frequencies may get lost.

## Input-output example

Here we make a dry speech recording quite reverbant by convolving it with a room impulse response
Here we make a dry speech recording quite reverberant by convolving it with a room impulse response

![Input-output waveforms and spectrograms](ApplyImpulseResponse.webp)

Expand Down
12 changes: 6 additions & 6 deletions docs/waveform_transforms/band_pass_filter.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,19 @@ augmented_sound = transform(my_waveform_ndarray, sample_rate=48000)
[`max_bandwidth_fraction`](#max_bandwidth_fraction){ #max_bandwidth_fraction }: `float` • range: [0.0, 2.0]
: :octicons-milestone-24: Default: `1.99`. Maximum bandwidth relative to center frequency

[`min_rolloff`](#min_rolloff){ #min_rolloff }: `float` • unit: Decibels/octave
[`min_rolloff`](#min_rolloff){ #min_rolloff }: `int` • unit: Decibels/octave
: :octicons-milestone-24: Default: `12`. Minimum filter roll-off (in dB/octave).
Must be a multiple of 6
Must be a multiple of 6 (or 12 if `zero_phase` is `True`)

[`max_rolloff`](#max_rolloff){ #max_rolloff }: `float` • unit: Decibels/octave
[`max_rolloff`](#max_rolloff){ #max_rolloff }: `int` • unit: Decibels/octave
: :octicons-milestone-24: Default: `24`. Maximum filter roll-off (in dB/octave)
Must be a multiple of 6
Must be a multiple of 6 (or 12 if `zero_phase` is `True`)

[`zero_phase`](#zero_phase){ #zero_phase }: `bool`
: :octicons-milestone-24: Default: `False`. Whether filtering should be zero phase.
When this is set to `True` it will not affect the phase of the input signal but will
When this is set to `True`, it will not affect the phase of the input signal but will
sound 3 dB lower at the cutoff frequency compared to the non-zero phase case (6 dB
vs. 3 dB). Additionally, it is 2 times slower than in the non-zero phase case. If
vs. 3 dB). Additionally, it is twice as slow as the non-zero phase case. If
you absolutely want no phase distortions (e.g. want to augment an audio file with
lots of transients, like a drum track), set this to `True`.

Expand Down
16 changes: 8 additions & 8 deletions docs/waveform_transforms/band_stop_filter.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,24 +36,24 @@ can hear that the timbre is different in the transformed sound than in the origi
: :octicons-milestone-24: Default: `4000.0`. Maximum center frequency in hertz

[`min_bandwidth_fraction`](#min_bandwidth_fraction){ #min_bandwidth_fraction }: `float`
: :octicons-milestone-24: Default: `0.5`. Minimum bandwidth relative to center frequency
: :octicons-milestone-24: Default: `0.5`. Minimum bandwidth fraction relative to center frequency

[`max_bandwidth_fraction`](#max_bandwidth_fraction){ #max_bandwidth_fraction }: `float`
: :octicons-milestone-24: Default: `1.99`. Maximum bandwidth relative to center frequency
: :octicons-milestone-24: Default: `1.99`. Maximum bandwidth fraction relative to center frequency

[`min_rolloff`](#min_rolloff){ #min_rolloff }: `float` • unit: Decibels/octave
[`min_rolloff`](#min_rolloff){ #min_rolloff }: `int` • unit: Decibels/octave
: :octicons-milestone-24: Default: `12`. Minimum filter roll-off (in dB/octave).
Must be a multiple of 6
Must be a multiple of 6 (or 12 if `zero_phase` is `True`)

[`max_rolloff`](#max_rolloff){ #max_rolloff }: `float` • unit: Decibels/octave
[`max_rolloff`](#max_rolloff){ #max_rolloff }: `int` • unit: Decibels/octave
: :octicons-milestone-24: Default: `24`. Maximum filter roll-off (in dB/octave)
Must be a multiple of 6
Must be a multiple of 6 (or 12 if `zero_phase` is `True`)

[`zero_phase`](#zero_phase){ #zero_phase }: `bool`
: :octicons-milestone-24: Default: `False`. Whether filtering should be zero phase.
When this is set to `True` it will not affect the phase of the input signal but will
When this is set to `True`, it will not affect the phase of the input signal but will
sound 3 dB lower at the cutoff frequency compared to the non-zero phase case (6 dB
vs. 3 dB). Additionally, it is 2 times slower than in the non-zero phase case. If
vs. 3 dB). Additionally, it is twice as slow as the non-zero phase case. If
you absolutely want no phase distortions (e.g. want to augment an audio file with
lots of transients, like a drum track), set this to `True`.

Expand Down
8 changes: 4 additions & 4 deletions docs/waveform_transforms/high_pass_filter.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@ Can also be set for zero-phase filtering (will result in a 6 dB drop at cutoff).

[`min_rolloff`](#min_rolloff){ #min_rolloff }: `float` • unit: Decibels/octave
: :octicons-milestone-24: Default: `12`. Minimum filter roll-off (in dB/octave).
Must be a multiple of 6
Must be a multiple of 6 (or 12 if `zero_phase` is `True`)

[`max_rolloff`](#max_rolloff){ #max_rolloff }: `float` • unit: Decibels/octave
: :octicons-milestone-24: Default: `24`. Maximum filter roll-off (in dB/octave).
Must be a multiple of 6
Must be a multiple of 6 (or 12 if `zero_phase` is `True`)

[`zero_phase`](#zero_phase){ #zero_phase }: `bool`
: :octicons-milestone-24: Default: `False`. Whether filtering should be zero phase.
When this is set to `True` it will not affect the phase of the input signal but will
When this is set to `True`, it will not affect the phase of the input signal but will
sound 3 dB lower at the cutoff frequency compared to the non-zero phase case (6 dB
vs. 3 dB). Additionally, it is 2 times slower than in the non-zero phase case. If
vs. 3 dB). Additionally, it is twice as slow as the non-zero phase case. If
you absolutely want no phase distortions (e.g. want to augment an audio file with
lots of transients, like a drum track), set this to `True`.

Expand Down
12 changes: 6 additions & 6 deletions docs/waveform_transforms/low_pass_filter.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@ Can also be set for zero-phase filtering (will result in a 6 dB drop at cutoff).
[`max_cutoff_freq`](#max_cutoff_freq){ #max_cutoff_freq }: `float` • unit: hertz
: :octicons-milestone-24: Default: `7500.0`. Maximum cutoff frequency

[`min_rolloff`](#min_rolloff){ #min_rolloff }: `float` • unit: Decibels/octave
[`min_rolloff`](#min_rolloff){ #min_rolloff }: `int` • unit: Decibels/octave
: :octicons-milestone-24: Default: `12`. Minimum filter roll-off (in dB/octave).
Must be a multiple of 6
Must be a multiple of 6 (or 12 if `zero_phase` is `True`)

[`max_rolloff`](#max_rolloff){ #max_rolloff }: `float` • unit: Decibels/octave
[`max_rolloff`](#max_rolloff){ #max_rolloff }: `int` • unit: Decibels/octave
: :octicons-milestone-24: Default: `24`. Maximum filter roll-off (in dB/octave)
Must be a multiple of 6
Must be a multiple of 6 (or 12 if `zero_phase` is `True`)

[`zero_phase`](#zero_phase){ #zero_phase }: `bool`
: :octicons-milestone-24: Default: `False`. Whether filtering should be zero phase.
When this is set to `True` it will not affect the phase of the input signal but will
When this is set to `True`, it will not affect the phase of the input signal but will
sound 3 dB lower at the cutoff frequency compared to the non-zero phase case (6 dB
vs. 3 dB). Additionally, it is 2 times slower than in the non-zero phase case. If
vs. 3 dB). Additionally, it is twice as slow as the non-zero phase case. If
you absolutely want no phase distortions (e.g. want to augment an audio file with
lots of transients, like a drum track), set this to `True`.

Expand Down
6 changes: 6 additions & 0 deletions docs/waveform_transforms/normalize.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ Also known as peak normalization.

# Normalize API

[`apply_to`](#apply_to){ #apply_to }: `str` • choices: `"all"`, `"only_too_loud_sounds"`
: :octicons-milestone-24: Default: `"all"`. Defines the criterion for applying the transform.

* `"all"`: Apply peak normalization to all inputs
* `"only_too_loud_sounds"`: Apply peak normalization only to inputs where the maximum absolute peak is greater than 1

[`p`](#p){ #p }: `float` • range: [0.0, 1.0]
: :octicons-milestone-24: Default: `0.5`. The probability of applying this transform.

Expand Down
Loading

0 comments on commit 7b0fcc0

Please sign in to comment.