refactor: add gather_dimensions for proprotion exceeding

nci · Dec 11, 2023 · 9e31845 · 9e31845
1 parent fd860d1
commit 9e31845
Show file tree

Hide file tree

Showing 2 changed files with 127 additions and 119 deletions.
diff --git a/src/scores/continuous/flip_flop_impl.py b/src/scores/continuous/flip_flop_impl.py
@@ -9,7 +9,7 @@
 import xarray as xr
 
 from scores.functions import angular_difference
-from scores.processing import binary_discretise
+from scores.processing import proportion_exceeding
 from scores.typing import XarrayLike
 from scores.utils import DimensionError, check_dims, dims_complement
 
@@ -433,121 +433,3 @@ def flip_flop_index_proportion_exceeding(
     flip_flop_exceeding.attrs = flip_flop_data.attrs
 
     return flip_flop_exceeding
-
-
-def proportion_exceeding(data: XarrayLike, thresholds: Iterable, dims: Optional[Iterable] = None):
-    """
-    Calculates the proportion of `data` equal to or exceeding `thresholds`.
-
-    Args:
-        data (xarray.Dataset or xarray.DataArray): The data from which
-            to calculate the proportion exceeding `thresholds`
-        thresholds (iterable): The proportion of Flip-Flop index results
-            equal to or exceeding these thresholds will be calculated.
-            the flip-flop index.
-        dims (Optional[iterable]): Strings corresponding to the dimensions in the input
-            xarray data objects that we wish to preserve in the output. All other
-            dimensions in the input data objects are collapsed.
-
-    Returns:
-        An xarray data object with the type of `data` and dimensions
-        `dims` + 'threshold'. The values are the proportion of `data`
-        that are greater than or equal to the corresponding threshold.
-
-    """
-    return _binary_discretise_proportion(data, thresholds, ">=", dims=dims)
-
-
-def _binary_discretise_proportion(
-    data: XarrayLike,
-    thresholds: Iterable,
-    mode: str,
-    dims: Optional[Iterable] = None,
-    abs_tolerance: Optional[bool] = None,
-    autosqueeze: Optional[bool] = False,
-):
-    """
-    Returns the proportion of `data` in each category. The categories are
-    defined by the relationship of data to threshold as specified by
-    the operation `mode`.
-
-    Args:
-        data (xarray.Dataset or xarray.DataArray): The data to convert
-           into 0 and 1 according the thresholds before calculating the
-           proportion.
-        thresholds (iterable): The proportion of Flip-Flop index results
-            equal to or exceeding these thresholds will be calculated.
-            the flip-flop index.
-        mode (str): Specifies the required relation of `data` to `thresholds`
-            for a value to fall in the 'event' category (i.e. assigned to 1).
-            Allowed modes are:
-
-            - '>=' values in `data` greater than or equal to the
-              corresponding threshold are assigned as 1.
-            - '>' values in `data` greater than the corresponding threshold
-              are assigned as 1.
-            - '<=' values in `data` less than or equal to the corresponding
-              threshold are assigned as 1.
-            - '<' values in `data` less than the corresponding threshold
-              are assigned as 1.
-            - '==' values in `data` equal to the corresponding threshold
-              are assigned as 1
-            - '!=' values in `data` not equal to the corresponding threshold
-              are assigned as 1.\
-        dims (Optional[iterable]): Strings corresponding to the dimensions in the input
-            xarray data objects that we wish to preserve in the output. All other
-            dimensions in the input data objects are collapsed.
-            The dimension 'threshold' should not be supplied, it will automatically
-            be preserved.
-        abs_tolerance (Optional[float]): If supplied, values in data that are
-            within abs_tolerance of a threshold are considered to be equal to
-            that threshold. This is generally used to correct for floating
-            point rounding, e.g. we may want to consider 1.0000000000000002 as
-            equal to 1.\
-        autosqueeze (Optional[bool]): If True and only one threshold is
-            supplied, then the dimension 'threshold' is squeezed out of the
-            output. If `thresholds` is float-like, then this is forced to
-            True, otherwise defaults to False.
-
-    Returns:
-        An xarray data object with the type of `data`, dimension `dims` +
-        'threshold'. The values of the output are the proportion of `data` that
-        satisfy the relationship to `thresholds` as specified by `mode`.
-
-    Examples:
-
-        >>> data = xr.DataArray([0, 0.5, 0.5, 1])
-
-        >>> _binary_discretise_proportion(data, [0, 0.5, 1], '==')
-        <xarray.DataArray (threshold: 3)>
-        array([ 0.25,  0.5 ,  0.25])
-        Coordinates:
-          * threshold  (threshold) float64 0.0 0.5 1.0
-        Attributes:
-            discretisation_tolerance: 0
-            discretisation_mode: ==
-
-        >>> _binary_discretise_proportion(data, [0, 0.5, 1], '>=')
-        <xarray.DataArray (threshold: 3)>
-        array([ 1.  ,  0.75,  0.25])
-        Coordinates:
-          * threshold  (threshold) float64 0.0 0.5 1.0
-        Attributes:
-            discretisation_tolerance: 0
-            discretisation_mode: >=
-
-    See also:
-        `scores.processing.binary_discretise`
-
-    """
-    # values are 1 when (data {mode} threshold), and 0 when ~(data {mode} threshold).
-    discrete_data = binary_discretise(data, thresholds, mode, abs_tolerance=abs_tolerance, autosqueeze=autosqueeze)
-
-    # the proportion in each category
-    dims_to_collapse = dims_complement(data, dims=dims)
-    proportion = discrete_data.mean(dim=dims_to_collapse)
-
-    # attach attributes
-    proportion.attrs = discrete_data.attrs
-
-    return proportion
diff --git a/src/scores/processing.py b/src/scores/processing.py
@@ -1,4 +1,5 @@
 """Tools for processing data for verification"""
+from collections.abc import Iterable
 import operator
 from typing import Optional, Union
 
@@ -7,6 +8,7 @@
 import xarray as xr
 
 from scores.typing import FlexibleDimensionTypes, XarrayLike
+from scores.utils import gather_dimensions
 
 INEQUALITY_MODES = {
     ">=": (operator.ge, -1),
@@ -260,3 +262,127 @@ def update_mask(mask, data_array):
 
     # return matched data objects
     return tuple(arg.where(mask) for arg in args)
+
+
+def proportion_exceeding(
+    data: XarrayLike,
+    thresholds: Iterable,
+    reduce_dims: FlexibleDimensionTypes = None,
+    preserve_dims: FlexibleDimensionTypes = None,
+):
+    """
+    Calculates the proportion of `data` equal to or exceeding `thresholds`.
+
+    Args:
+        data (xarray.Dataset or xarray.DataArray): The data from which
+            to calculate the proportion exceeding `thresholds`
+        thresholds (iterable): The proportion of Flip-Flop index results
+            equal to or exceeding these thresholds will be calculated.
+            the flip-flop index.
+        dims (Optional[iterable]): Strings corresponding to the dimensions in the input
+            xarray data objects that we wish to preserve in the output. All other
+            dimensions in the input data objects are collapsed.
+
+    Returns:
+        An xarray data object with the type of `data` and dimensions
+        `dims` + 'threshold'. The values are the proportion of `data`
+        that are greater than or equal to the corresponding threshold.
+
+    """
+    return _binary_discretise_proportion(data, thresholds, ">=", reduce_dims, preserve_dims)
+
+
+def _binary_discretise_proportion(
+    data: XarrayLike,
+    thresholds: Iterable,
+    mode: str,
+    reduce_dims: FlexibleDimensionTypes = None,
+    preserve_dims: FlexibleDimensionTypes = None,
+    abs_tolerance: Optional[bool] = None,
+    autosqueeze: bool = False,
+):
+    """
+    Returns the proportion of `data` in each category. The categories are
+    defined by the relationship of data to threshold as specified by
+    the operation `mode`.
+
+    Args:
+        data: The data to convert
+           into 0 and 1 according the thresholds before calculating the
+           proportion.
+        thresholds: The proportion of Flip-Flop index results
+            equal to or exceeding these thresholds will be calculated.
+            the flip-flop index.
+        mode: Specifies the required relation of `data` to `thresholds`
+            for a value to fall in the 'event' category (i.e. assigned to 1).
+            Allowed modes are:
+
+            - '>=' values in `data` greater than or equal to the
+              corresponding threshold are assigned as 1.
+            - '>' values in `data` greater than the corresponding threshold
+              are assigned as 1.
+            - '<=' values in `data` less than or equal to the corresponding
+              threshold are assigned as 1.
+            - '<' values in `data` less than the corresponding threshold
+              are assigned as 1.
+            - '==' values in `data` equal to the corresponding threshold
+              are assigned as 1
+            - '!=' values in `data` not equal to the corresponding threshold
+              are assigned as 1.\
+        dims: Strings corresponding to the dimensions in the input
+            xarray data objects that we wish to preserve in the output. All other
+            dimensions in the input data objects are collapsed.
+            The dimension 'threshold' should not be supplied, it will automatically
+            be preserved.
+        abs_tolerance: If supplied, values in data that are
+            within abs_tolerance of a threshold are considered to be equal to
+            that threshold. This is generally used to correct for floating
+            point rounding, e.g. we may want to consider 1.0000000000000002 as
+            equal to 1.
+        autosqueeze: If True and only one threshold is
+            supplied, then the dimension 'threshold' is squeezed out of the
+            output. If `thresholds` is float-like, then this is forced to
+            True, otherwise defaults to False.
+
+    Returns:
+        An xarray data object with the type of `data`, dimension `dims` +
+        'threshold'. The values of the output are the proportion of `data` that
+        satisfy the relationship to `thresholds` as specified by `mode`.
+
+    Examples:
+
+        >>> data = xr.DataArray([0, 0.5, 0.5, 1])
+
+        >>> _binary_discretise_proportion(data, [0, 0.5, 1], '==')
+        <xarray.DataArray (threshold: 3)>
+        array([ 0.25,  0.5 ,  0.25])
+        Coordinates:
+          * threshold  (threshold) float64 0.0 0.5 1.0
+        Attributes:
+            discretisation_tolerance: 0
+            discretisation_mode: ==
+
+        >>> _binary_discretise_proportion(data, [0, 0.5, 1], '>=')
+        <xarray.DataArray (threshold: 3)>
+        array([ 1.  ,  0.75,  0.25])
+        Coordinates:
+          * threshold  (threshold) float64 0.0 0.5 1.0
+        Attributes:
+            discretisation_tolerance: 0
+            discretisation_mode: >=
+
+    See also:
+        `scores.processing.binary_discretise`
+
+    """
+    # values are 1 when (data {mode} threshold), and 0 when ~(data {mode} threshold).
+    discrete_data = binary_discretise(data, thresholds, mode, abs_tolerance=abs_tolerance, autosqueeze=autosqueeze)
+
+    # The proportion in each category
+    dims = gather_dimensions(data.dims, data.dims, reduce_dims, preserve_dims)
+    proportion = discrete_data.mean(dim=dims)
+
+    # attach attributes
+    proportion.attrs = discrete_data.attrs
+
+    return proportion