Skip to content

Commit

Permalink
refactor: add gather_dimensions for proprotion exceeding
Browse files Browse the repository at this point in the history
  • Loading branch information
aidanjgriffiths committed Dec 11, 2023
1 parent fd860d1 commit 9e31845
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 119 deletions.
120 changes: 1 addition & 119 deletions src/scores/continuous/flip_flop_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import xarray as xr

from scores.functions import angular_difference
from scores.processing import binary_discretise
from scores.processing import proportion_exceeding
from scores.typing import XarrayLike
from scores.utils import DimensionError, check_dims, dims_complement

Expand Down Expand Up @@ -433,121 +433,3 @@ def flip_flop_index_proportion_exceeding(
flip_flop_exceeding.attrs = flip_flop_data.attrs

return flip_flop_exceeding


def proportion_exceeding(data: XarrayLike, thresholds: Iterable, dims: Optional[Iterable] = None):
"""
Calculates the proportion of `data` equal to or exceeding `thresholds`.
Args:
data (xarray.Dataset or xarray.DataArray): The data from which
to calculate the proportion exceeding `thresholds`
thresholds (iterable): The proportion of Flip-Flop index results
equal to or exceeding these thresholds will be calculated.
the flip-flop index.
dims (Optional[iterable]): Strings corresponding to the dimensions in the input
xarray data objects that we wish to preserve in the output. All other
dimensions in the input data objects are collapsed.
Returns:
An xarray data object with the type of `data` and dimensions
`dims` + 'threshold'. The values are the proportion of `data`
that are greater than or equal to the corresponding threshold.
"""
return _binary_discretise_proportion(data, thresholds, ">=", dims=dims)


def _binary_discretise_proportion(
data: XarrayLike,
thresholds: Iterable,
mode: str,
dims: Optional[Iterable] = None,
abs_tolerance: Optional[bool] = None,
autosqueeze: Optional[bool] = False,
):
"""
Returns the proportion of `data` in each category. The categories are
defined by the relationship of data to threshold as specified by
the operation `mode`.
Args:
data (xarray.Dataset or xarray.DataArray): The data to convert
into 0 and 1 according the thresholds before calculating the
proportion.
thresholds (iterable): The proportion of Flip-Flop index results
equal to or exceeding these thresholds will be calculated.
the flip-flop index.
mode (str): Specifies the required relation of `data` to `thresholds`
for a value to fall in the 'event' category (i.e. assigned to 1).
Allowed modes are:
- '>=' values in `data` greater than or equal to the
corresponding threshold are assigned as 1.
- '>' values in `data` greater than the corresponding threshold
are assigned as 1.
- '<=' values in `data` less than or equal to the corresponding
threshold are assigned as 1.
- '<' values in `data` less than the corresponding threshold
are assigned as 1.
- '==' values in `data` equal to the corresponding threshold
are assigned as 1
- '!=' values in `data` not equal to the corresponding threshold
are assigned as 1.\
dims (Optional[iterable]): Strings corresponding to the dimensions in the input
xarray data objects that we wish to preserve in the output. All other
dimensions in the input data objects are collapsed.
The dimension 'threshold' should not be supplied, it will automatically
be preserved.
abs_tolerance (Optional[float]): If supplied, values in data that are
within abs_tolerance of a threshold are considered to be equal to
that threshold. This is generally used to correct for floating
point rounding, e.g. we may want to consider 1.0000000000000002 as
equal to 1.\
autosqueeze (Optional[bool]): If True and only one threshold is
supplied, then the dimension 'threshold' is squeezed out of the
output. If `thresholds` is float-like, then this is forced to
True, otherwise defaults to False.
Returns:
An xarray data object with the type of `data`, dimension `dims` +
'threshold'. The values of the output are the proportion of `data` that
satisfy the relationship to `thresholds` as specified by `mode`.
Examples:
>>> data = xr.DataArray([0, 0.5, 0.5, 1])
>>> _binary_discretise_proportion(data, [0, 0.5, 1], '==')
<xarray.DataArray (threshold: 3)>
array([ 0.25, 0.5 , 0.25])
Coordinates:
* threshold (threshold) float64 0.0 0.5 1.0
Attributes:
discretisation_tolerance: 0
discretisation_mode: ==
>>> _binary_discretise_proportion(data, [0, 0.5, 1], '>=')
<xarray.DataArray (threshold: 3)>
array([ 1. , 0.75, 0.25])
Coordinates:
* threshold (threshold) float64 0.0 0.5 1.0
Attributes:
discretisation_tolerance: 0
discretisation_mode: >=
See also:
`scores.processing.binary_discretise`
"""
# values are 1 when (data {mode} threshold), and 0 when ~(data {mode} threshold).
discrete_data = binary_discretise(data, thresholds, mode, abs_tolerance=abs_tolerance, autosqueeze=autosqueeze)

# the proportion in each category
dims_to_collapse = dims_complement(data, dims=dims)
proportion = discrete_data.mean(dim=dims_to_collapse)

# attach attributes
proportion.attrs = discrete_data.attrs

return proportion
126 changes: 126 additions & 0 deletions src/scores/processing.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Tools for processing data for verification"""
from collections.abc import Iterable
import operator
from typing import Optional, Union

Expand All @@ -7,6 +8,7 @@
import xarray as xr

from scores.typing import FlexibleDimensionTypes, XarrayLike
from scores.utils import gather_dimensions

INEQUALITY_MODES = {
">=": (operator.ge, -1),
Expand Down Expand Up @@ -260,3 +262,127 @@ def update_mask(mask, data_array):

# return matched data objects
return tuple(arg.where(mask) for arg in args)


def proportion_exceeding(
data: XarrayLike,
thresholds: Iterable,
reduce_dims: FlexibleDimensionTypes = None,
preserve_dims: FlexibleDimensionTypes = None,
):
"""
Calculates the proportion of `data` equal to or exceeding `thresholds`.
Args:
data (xarray.Dataset or xarray.DataArray): The data from which
to calculate the proportion exceeding `thresholds`
thresholds (iterable): The proportion of Flip-Flop index results
equal to or exceeding these thresholds will be calculated.
the flip-flop index.
dims (Optional[iterable]): Strings corresponding to the dimensions in the input
xarray data objects that we wish to preserve in the output. All other
dimensions in the input data objects are collapsed.
Returns:
An xarray data object with the type of `data` and dimensions
`dims` + 'threshold'. The values are the proportion of `data`
that are greater than or equal to the corresponding threshold.
"""
return _binary_discretise_proportion(data, thresholds, ">=", reduce_dims, preserve_dims)


def _binary_discretise_proportion(
data: XarrayLike,
thresholds: Iterable,
mode: str,
reduce_dims: FlexibleDimensionTypes = None,
preserve_dims: FlexibleDimensionTypes = None,
abs_tolerance: Optional[bool] = None,
autosqueeze: bool = False,
):
"""
Returns the proportion of `data` in each category. The categories are
defined by the relationship of data to threshold as specified by
the operation `mode`.
Args:
data: The data to convert
into 0 and 1 according the thresholds before calculating the
proportion.
thresholds: The proportion of Flip-Flop index results
equal to or exceeding these thresholds will be calculated.
the flip-flop index.
mode: Specifies the required relation of `data` to `thresholds`
for a value to fall in the 'event' category (i.e. assigned to 1).
Allowed modes are:
- '>=' values in `data` greater than or equal to the
corresponding threshold are assigned as 1.
- '>' values in `data` greater than the corresponding threshold
are assigned as 1.
- '<=' values in `data` less than or equal to the corresponding
threshold are assigned as 1.
- '<' values in `data` less than the corresponding threshold
are assigned as 1.
- '==' values in `data` equal to the corresponding threshold
are assigned as 1
- '!=' values in `data` not equal to the corresponding threshold
are assigned as 1.\
dims: Strings corresponding to the dimensions in the input
xarray data objects that we wish to preserve in the output. All other
dimensions in the input data objects are collapsed.
The dimension 'threshold' should not be supplied, it will automatically
be preserved.
abs_tolerance: If supplied, values in data that are
within abs_tolerance of a threshold are considered to be equal to
that threshold. This is generally used to correct for floating
point rounding, e.g. we may want to consider 1.0000000000000002 as
equal to 1.
autosqueeze: If True and only one threshold is
supplied, then the dimension 'threshold' is squeezed out of the
output. If `thresholds` is float-like, then this is forced to
True, otherwise defaults to False.
Returns:
An xarray data object with the type of `data`, dimension `dims` +
'threshold'. The values of the output are the proportion of `data` that
satisfy the relationship to `thresholds` as specified by `mode`.
Examples:
>>> data = xr.DataArray([0, 0.5, 0.5, 1])
>>> _binary_discretise_proportion(data, [0, 0.5, 1], '==')
<xarray.DataArray (threshold: 3)>
array([ 0.25, 0.5 , 0.25])
Coordinates:
* threshold (threshold) float64 0.0 0.5 1.0
Attributes:
discretisation_tolerance: 0
discretisation_mode: ==
>>> _binary_discretise_proportion(data, [0, 0.5, 1], '>=')
<xarray.DataArray (threshold: 3)>
array([ 1. , 0.75, 0.25])
Coordinates:
* threshold (threshold) float64 0.0 0.5 1.0
Attributes:
discretisation_tolerance: 0
discretisation_mode: >=
See also:
`scores.processing.binary_discretise`
"""
# values are 1 when (data {mode} threshold), and 0 when ~(data {mode} threshold).
discrete_data = binary_discretise(data, thresholds, mode, abs_tolerance=abs_tolerance, autosqueeze=autosqueeze)

# The proportion in each category
dims = gather_dimensions(data.dims, data.dims, reduce_dims, preserve_dims)
proportion = discrete_data.mean(dim=dims)

# attach attributes
proportion.attrs = discrete_data.attrs

return proportion

0 comments on commit 9e31845

Please sign in to comment.