Source code for improver.threshold

# (C) Crown Copyright, Met Office. All rights reserved.
#
# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
# See LICENSE in the root of the repository for full licensing details.
"""Module containing thresholding classes."""

import numbers
from collections.abc import Iterable
from typing import Dict, List, Optional, Tuple, Union

import iris
import numpy as np
from cf_units import Unit
from iris.cube import Cube

from improver import PostProcessingPlugin
from improver.ensemble_copula_coupling.ensemble_copula_coupling import (
    RebadgePercentilesAsRealizations,
)
from improver.metadata.constants import FLOAT_DTYPE
from improver.metadata.probabilistic import (
    format_cell_methods_for_probability,
    probability_is_above_or_below,
)
from improver.metadata.utilities import enforce_time_point_standard
from improver.utilities.cube_manipulation import enforce_coordinate_ordering
from improver.utilities.probability_manipulation import comparison_operator_dict
from improver.utilities.rescale import rescale
from improver.utilities.spatial import (
    create_vicinity_coord,
    distance_to_number_of_grid_cells,
    maximum_within_vicinity,
    rename_vicinity_cube,
)


[docs] class Threshold(PostProcessingPlugin): """Apply a threshold truth criterion to a cube. Calculate the threshold truth values based on a linear membership function around the threshold values provided. A cube will be returned with a new threshold dimension coordinate. Can operate on multiple time sequences within a cube. """
[docs] def __init__( self, threshold_values: Optional[Union[float, List[float]]] = None, threshold_config: Optional[Dict[str, Union[List[float], str]]] = None, fuzzy_factor: Optional[float] = None, threshold_units: Optional[str] = None, comparison_operator: str = ">", collapse_coord: Optional[Union[str, List[str]]] = None, collapse_cell_methods: Optional[dict] = None, vicinity: Optional[Union[float, List[float]]] = None, fill_masked: Optional[float] = None, ) -> None: """ Set up for processing an in-or-out of threshold field, including the generation of fuzzy_bounds which are required to threshold an input cube (through self.process(cube)). If fuzzy_factor is not None, fuzzy bounds are calculated using the threshold value in the units in which it is provided. The usage of fuzzy_factor is exemplified as follows: For a 6 mm/hr threshold with a 0.75 fuzzy factor, a range of 25% around this threshold (between (6*0.75=) 4.5 and (6*(2-0.75)=) 7.5) would be generated. The probabilities of exceeding values within this range are scaled linearly, so that 4.5 mm/hr yields a thresholded value of 0 and 7.5 mm/hr yields a thresholded value of 1. Therefore, in this case, the thresholded exceedance probabilities between 4.5 mm/hr and 7.5 mm/hr would follow the pattern: :: Data value | Probability ------------|------------- 4.5 | 0 5.0 | 0.167 5.5 | 0.333 6.0 | 0.5 6.5 | 0.667 7.0 | 0.833 7.5 | 1.0 Args: threshold_values: Threshold value or values (e.g. 270K, 300K) to use when calculating the probability of the input relative to the threshold value(s). The units of these values, e.g. K in the example can be defined using the threshold_units argument or are otherwise assumed to match the units of the diagnostic being thresholded. threshold_values and and threshold_config are mutually exclusive arguments, defining both will lead to an exception. threshold_config (dict): Threshold configuration containing threshold values and (optionally) fuzzy bounds. Best used in combination with 'threshold_units'. It should contain a dictionary of strings that can be interpreted as floats with the structure: "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND] e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]}, or with structure "THRESHOLD_VALUE": "None" (no fuzzy bounds). Repeated thresholds with different bounds are ignored; only the last duplicate will be used. threshold_values and and threshold_config are mutually exclusive arguments, defining both will lead to an exception. fuzzy_factor: Optional: specifies lower bound for fuzzy membership value when multiplied by each threshold. Upper bound is equivalent linear distance above threshold. threshold_units: Units of the threshold values. If not provided the units are assumed to be the same as those of the input cube. comparison_operator: Indicates the comparison_operator to use with the threshold. e.g. 'ge' or '>=' to evaluate 'data >= threshold' or '<' to evaluate 'data < threshold'. When using fuzzy thresholds, there is no difference between < and <= or > and >=. Valid choices: > >= < <= gt ge lt le. collapse_coord: A coordinate or list of coordinates over which an average is calculated by collapsing these coordinates. The only supported options are combinations of "realization" and "percentile" with "time"; realization and percentile cannot both be collapsed. If "percentile" is included the percentile coordinate will be rebadged as a realization coordinate prior to collapse. The percentile coordinate needs to be evenly spaced around the 50th percentile to allow successful conversion from percentiles to realizations and subsequent collapsing over the realization coordinate. collapse_cell_methods: An optional dictionary that describes cell methods to apply in relation to the collapsed coordinates. By default the threshold method does not return a cell method for collapsed coordinates. The dictionary should take the form: {"coord_name": "method"} with an entry for every coordinate for which a method is desired. fill_masked: If provided all masked points in cube will be replaced with the provided value. vicinity: A list of vicinity radii to use to calculate maximum in vicinity thresholded values. This must be done prior to realization collapse. Raises: ValueError: If threshold_config and threshold_values are both set ValueError: If neither threshold_config or threshold_values are set ValueError: If both fuzzy_factor and bounds within the threshold_config are set. ValueError: If the fuzzy_factor is not strictly between 0 and 1. ValueError: If using a fuzzy factor with a threshold of 0.0. ValueError: Can only collapse over one or a combination of realization and percentile coordinates with the time coordinate. ValueError: If collapse_coord contains both percentile and realization. """ if threshold_config and threshold_values: raise ValueError( "threshold_config and threshold_values are mutually exclusive " "arguments - please provide one or the other, not both" ) if threshold_config is None and threshold_values is None: raise ValueError( "One of threshold_config or threshold_values must be provided." ) thresholds, fuzzy_bounds = self._set_thresholds( threshold_values, threshold_config ) self.thresholds = [thresholds] if np.isscalar(thresholds) else thresholds self.threshold_units = ( None if threshold_units is None else Unit(threshold_units) ) self.threshold_coord_name = None # read fuzzy factor or set to 1 (basic thresholding) fuzzy_factor_loc = 1.0 if fuzzy_factor is not None: if fuzzy_bounds is not None: raise ValueError( "Invalid combination of keywords. Cannot specify " "both a fuzzy_factor and use a threshold_config that " "specifies bounds." ) if not 0 < fuzzy_factor < 1: raise ValueError( "Invalid fuzzy_factor: must be >0 and <1: {}".format(fuzzy_factor) ) if 0 in self.thresholds: raise ValueError( "Invalid threshold with fuzzy factor: cannot use a " "multiplicative fuzzy factor with threshold == 0, use " "the threshold_config approach instead." ) fuzzy_factor_loc = fuzzy_factor if fuzzy_bounds is None: self.fuzzy_bounds = self._generate_fuzzy_bounds(fuzzy_factor_loc) else: self.fuzzy_bounds = ( [fuzzy_bounds] if isinstance(fuzzy_bounds, tuple) else fuzzy_bounds ) self._check_fuzzy_bounds() self.original_units = None self.comparison_operator_dict = comparison_operator_dict() self.comparison_operator_string = comparison_operator self._decode_comparison_operator_string() if collapse_coord and not isinstance(collapse_coord, list): collapse_coord = [collapse_coord] if collapse_coord and not set(collapse_coord).issubset( ["percentile", "realization", "time"] ): raise ValueError( "Can only collapse over one or a combination of realization, " "percentile, and time coordinates." ) if ( collapse_coord and "percentile" in collapse_coord and "realization" in collapse_coord ): raise ValueError( "Cannot collapse over both percentile and realization coordinates." ) self.collapse_coord = collapse_coord if collapse_cell_methods: if not self.collapse_coord: raise ValueError( "Cannot apply cell methods without collapsing a coordinate." ) if not set(collapse_cell_methods.keys()).issubset(self.collapse_coord): raise ValueError( "Cell methods can only be defined for coordinates that " "are being collapsed." ) self.collapse_cell_methods = collapse_cell_methods self.vicinity = None if vicinity: if isinstance(vicinity, Iterable): self.vicinity = [float(x) for x in vicinity] else: self.vicinity = [float(vicinity)] if fill_masked is not None: fill_masked = float(fill_masked) self.fill_masked = fill_masked
[docs] @staticmethod def _set_thresholds( threshold_values: Optional[Union[float, List[float]]], threshold_config: Optional[dict], ) -> Tuple[List[float], Optional[List[float]]]: """ Interprets a threshold_config dictionary if provided, or ensures that a list of thresholds has suitable precision. Args: threshold_values: A list of threshold values or a single threshold value. threshold_config: A dictionary defining threshold values and optionally upper and lower bounds for those values to apply fuzzy thresholding. Returns: A tuple containing: thresholds: A list of threshold values as float64 type. fuzzy_bounds: A list of tuples that define the upper and lower bounds associated with each threshold value, these also as float64 type. If these are not set, None is returned instead. """ if threshold_config: thresholds = [] fuzzy_bounds = [] for key in threshold_config.keys(): # Ensure thresholds are float64 to avoid rounding errors during # possible unit conversion. thresholds.append(float(key)) # If the first threshold has no bounds, fuzzy_bounds is # set to None and subsequent bounds checks are skipped if threshold_config[key] == "None": fuzzy_bounds = None continue fuzzy_bounds.append(tuple(threshold_config[key])) else: # Ensure thresholds are float64 to avoid rounding errors during possible # unit conversion. if isinstance(threshold_values, numbers.Number): threshold_values = [threshold_values] thresholds = [float(x) for x in threshold_values] fuzzy_bounds = None return thresholds, fuzzy_bounds
[docs] def _generate_fuzzy_bounds( self, fuzzy_factor_loc: float ) -> List[Tuple[float, float]]: """Construct fuzzy bounds from a fuzzy factor. If the fuzzy factor is 1, the fuzzy bounds match the threshold values for basic thresholding. """ fuzzy_bounds = [] for thr in self.thresholds: lower_thr = thr * fuzzy_factor_loc upper_thr = thr * (2.0 - fuzzy_factor_loc) if thr < 0: lower_thr, upper_thr = upper_thr, lower_thr fuzzy_bounds.append((lower_thr, upper_thr)) return fuzzy_bounds
[docs] def _check_fuzzy_bounds(self) -> None: """If fuzzy bounds have been set from the command line, check they are consistent with the required thresholds """ for thr, bounds in zip(self.thresholds, self.fuzzy_bounds): if len(bounds) != 2: raise ValueError( "Invalid bounds for one threshold: {}." " Expected 2 floats.".format(bounds) ) if bounds[0] > thr or bounds[1] < thr: bounds_msg = ( "Threshold must be within bounds: " "!( {} <= {} <= {} )".format( bounds[0], thr, bounds[1] ) ) raise ValueError(bounds_msg)
[docs] def _add_threshold_coord(self, cube: Cube, threshold: float) -> None: """ Add a scalar threshold-type coordinate with correct name and units to a 2D slice containing thresholded data. The 'threshold' coordinate will be float64 to avoid rounding errors during possible unit conversion. Args: cube: Cube containing thresholded data (1s and 0s) threshold: Value at which the data has been thresholded """ coord = iris.coords.DimCoord( np.array([threshold], dtype="float64"), units=cube.units ) coord.rename(self.threshold_coord_name) coord.var_name = "threshold" cube.add_aux_coord(coord)
[docs] def _decode_comparison_operator_string(self) -> None: """Sets self.comparison_operator based on self.comparison_operator_string. This is a dict containing the keys 'function' and 'spp_string'. Raises errors if invalid options are found. Raises: ValueError: If self.comparison_operator_string does not match a defined method. """ try: self.comparison_operator = self.comparison_operator_dict[ self.comparison_operator_string ] except KeyError: msg = ( f'String "{self.comparison_operator_string}" ' "does not match any known comparison_operator method" ) raise ValueError(msg)
[docs] def _update_metadata(self, cube: Cube) -> None: """Rename the cube and add attributes to the threshold coordinate after merging Args: cube: Cube containing thresholded data """ threshold_coord = cube.coord(self.threshold_coord_name) threshold_coord.attributes.update( {"spp__relative_to_threshold": self.comparison_operator.spp_string} ) # Add any user defined cell_methods. if self.collapse_cell_methods: for coord, method in self.collapse_cell_methods.items(): cell_method = iris.coords.CellMethod(method, coord) cube.add_cell_method(cell_method) if cube.cell_methods: format_cell_methods_for_probability(cube, self.threshold_coord_name) cube.rename( "probability_of_{parameter}_{relative_to}_threshold".format( parameter=self.threshold_coord_name, relative_to=probability_is_above_or_below(cube), ) ) cube.units = Unit(1)
[docs] def _calculate_truth_value( self, cube: Cube, threshold: float, bounds: Tuple[float, float] ) -> np.ndarray: """ Compares the diagnostic values to the threshold value, converting units and applying fuzzy bounds as required. Returns the truth value. Args: cube: A cube containing the diagnostic values. The cube rather than array is passed in to allow for unit conversion. threshold: A single threshold value against which to compare the diagnostic values. bounds: The fuzzy bounds used for applying fuzzy thresholding. Returns: truth_value: An array of truth values given by the comparison of the diagnostic data to the threshold value. This is returned at the default float precision. """ if self.threshold_units is not None: cube.convert_units(self.threshold_units) # Ensure that the threshold value is the same datatype as the cube data to # avoid numerical precision affecting equality comparisons. threshold = np.float64(threshold).astype(cube.dtype) # if upper and lower bounds are equal, set a deterministic 0/1 # probability based on exceedance of the threshold if bounds[0] == bounds[1]: truth_value = self.comparison_operator.function(cube.data, threshold) # otherwise, scale exceedance probabilities linearly between 0/1 # at the min/max fuzzy bounds and 0.5 at the threshold value else: truth_value = np.where( cube.data < threshold, rescale( cube.data, data_range=(bounds[0], threshold), scale_range=(0.0, 0.5), clip=True, ), rescale( cube.data, data_range=(threshold, bounds[1]), scale_range=(0.5, 1.0), clip=True, ), ) # Rescaling exposes masked points so use the rescaled data only in # the unmasked locations, set masked locations to 0 and reapply # masking. if np.ma.is_masked(cube.data): truth_value = np.where(cube.data.mask, 0, truth_value) truth_value = np.ma.masked_array(truth_value, mask=cube.data.mask) # if requirement is for probabilities less_than or # less_than_or_equal_to the threshold (rather than # greater_than or greater_than_or_equal_to), invert # the exceedance probability if "less_than" in self.comparison_operator.spp_string: truth_value = 1.0 - truth_value return truth_value.astype(FLOAT_DTYPE)
[docs] def _vicinity_processing( self, thresholded_cube: Cube, truth_value: np.ndarray, unmasked: np.ndarray, landmask: np.ndarray, grid_point_radii: List[int], index: int, ): """ Apply max in vicinity processing to the thresholded values. The resulting modified threshold values are changed in place in thresholded_cube. Args: thresholded_cube: The cube into which the resulting values are added. truth_value: An array of thresholded values prior to the application of vicinity processing. unmasked: Array identifying unmasked data points that should be updated. landmask: A binary grid of the same size as truth_value that differentiates between land and sea points to allow the different surface types to be processed independently. grid_point_radii: The vicinity radius to apply expressed as a number of grid cells. index: Index corresponding to the threshold coordinate to identify which array we are summing the contribution into. """ for ivic, vicinity in enumerate(grid_point_radii): if truth_value.ndim > 2: maxes = np.zeros(truth_value.shape, dtype=FLOAT_DTYPE) for yxindex in np.ndindex(*truth_value.shape[:-2]): slice_max = maximum_within_vicinity( truth_value[yxindex + (slice(None), slice(None))], vicinity, landmask, ) maxes[yxindex] = slice_max else: maxes = maximum_within_vicinity(truth_value, vicinity, landmask) thresholded_cube.data[ivic][index][unmasked] += maxes[unmasked]
[docs] def _create_threshold_cube(self, cube: Cube) -> Cube: """ Create a cube with suitable metadata and zeroed data array for storing the thresholded diagnostic data. Args: cube: A template cube from which to take the data shape. Returns: thresholded_cube: A cube of a suitable form for storing and describing the thresholded data. """ template = cube.copy(data=np.zeros(cube.shape, dtype=FLOAT_DTYPE)) if self.threshold_units is not None: template.units = self.threshold_units thresholded_cube = iris.cube.CubeList() for threshold in self.thresholds: thresholded = template.copy() self._add_threshold_coord(thresholded, threshold) thresholded.units = 1 thresholded_cube.append(thresholded) thresholded_cube = thresholded_cube.merge_cube() # Promote the threshold coordinate to be dimensional if it is not already. if not thresholded_cube.coord_dims(self.threshold_coord_name): thresholded_cube = iris.util.new_axis( thresholded_cube, self.threshold_coord_name ) if self.vicinity is not None: vicinity_coord = create_vicinity_coord(self.vicinity) vicinity_expanded = iris.cube.CubeList() for vicinity_coord_slice in vicinity_coord: thresholded_copy = thresholded_cube.copy() thresholded_copy.add_aux_coord(vicinity_coord_slice) vicinity_expanded.append(thresholded_copy) del thresholded_copy thresholded_cube = vicinity_expanded.merge_cube() if not thresholded_cube.coords(vicinity_coord.name(), dim_coords=True): thresholded_cube = iris.util.new_axis( thresholded_cube, vicinity_coord.name() ) # Ensure the threshold cube has suitable metadata for a probabilistic output. self._update_metadata(thresholded_cube) return thresholded_cube
[docs] def process(self, input_cube: Cube, landmask: Cube = None) -> Cube: """Convert each point to a truth value based on provided threshold values. The truth value may or may not be fuzzy depending upon if fuzzy_bounds are supplied. If the plugin has a "threshold_units" member, this is used to convert both thresholds and fuzzy bounds into the units of the input cube. Args: input_cube: Cube to threshold. The code is dimension-agnostic. landmask: Cube containing a landmask. Used with vicinity processing only. Returns: Cube after a threshold has been applied, possibly using fuzzy bounds, and / or with vicinity processing applied to return a maximum in vicinity value. The data within this cube will contain values between 0 and 1 to indicate whether a given threshold has been exceeded or not. The cube meta-data will contain: * Input_cube name prepended with probability_of_X_above(or below)_threshold (where X is the diagnostic under consideration) * The cube name will be suffixed with _in_vicinity if vicinity processing has been applied. * Threshold dimension coordinate with same units as input_cube * Threshold attribute ("greater_than", "greater_than_or_equal_to", "less_than", or less_than_or_equal_to" depending on the operator) * Cube units set to (1). Raises: ValueError: if collapse coordinates are not found in input_cube. ValueError: Cannot apply land-mask cube without in-vicinity processing. ValueError: if a np.nan value is detected within the input cube. """ input_cube_crds = [crd.name() for crd in input_cube.coords()] if self.collapse_coord and not set(self.collapse_coord).issubset( input_cube_crds ): raise ValueError( "Cannot collapse over coordinates not present in the input_cube." f"collapse_coords: {self.collapse_coord}\n" f"input_cube_coords: {input_cube_crds}" ) if self.vicinity is None and landmask is not None: raise ValueError( "Cannot apply land-mask cube without in-vicinity processing" ) if self.fill_masked is not None: input_cube.data = np.ma.filled(input_cube.data, self.fill_masked) if self.collapse_coord and "percentile" in self.collapse_coord: input_cube = RebadgePercentilesAsRealizations()(input_cube) self.collapse_coord.remove("percentile") self.collapse_coord.append("realization") self.original_units = input_cube.units self.threshold_coord_name = input_cube.name() # Retain only the landmask array as bools. if landmask is not None: landmask = landmask.data.astype(bool) if self.vicinity is not None: grid_point_radii = [ distance_to_number_of_grid_cells(input_cube, radius) for radius in self.vicinity ] # Slice over collapse coords if required and create an empty threshold # cube to store the resulting thresholded data. if self.collapse_coord is not None: input_slices = input_cube.slices_over(self.collapse_coord) thresholded_cube = self._create_threshold_cube( next(input_cube.slices_over(self.collapse_coord)) ) else: input_slices = [input_cube] thresholded_cube = self._create_threshold_cube(input_cube) # Create a zeroed array for storing contributions (i.e. number of # unmasked realization values contributing to calculation). contribution_total = np.zeros( next(thresholded_cube.slices_over(self.threshold_coord_name)).shape, dtype=int, ) for cube in input_slices: # Tests performed on each slice rather than whole cube to avoid # realising all of the data. if np.isnan(cube.data).any(): raise ValueError("Error: NaN detected in input cube data") if np.ma.is_masked(cube.data): mask = cube.data.mask unmasked = ~mask else: unmasked = np.ones(cube.shape, dtype=bool) # All unmasked points contribute 1 to the numerator for calculating # a coordinate collapsed truth value. Note that if input_slices # above includes the realization/time coordinate (i.e. we are not # collapsing that coordinate) then contribution_total will include # this extra dimension and our denominator will be 1 at all unmasked # points. contribution_total += unmasked for index, (threshold, bounds) in enumerate( zip(self.thresholds, self.fuzzy_bounds) ): truth_value = self._calculate_truth_value(cube, threshold, bounds) if self.vicinity is not None: self._vicinity_processing( thresholded_cube, truth_value, unmasked, landmask, grid_point_radii, index, ) else: thresholded_cube.data[index][unmasked] += truth_value[unmasked] # Any x-y position for which there are no valid contributions must be # a masked point in every realization, so we can use this array to # modify only unmasked points and reapply a mask to the final result. valid = contribution_total.astype(bool) # Slice over the array to avoid ballooning the memory required for the # denominators through broadcasting. enforce_coordinate_ordering(thresholded_cube, self.threshold_coord_name) for i, dslice in enumerate(thresholded_cube.data): result = np.divide(dslice[valid], contribution_total[valid]) thresholded_cube.data[i, valid] = result if not valid.all(): thresholded_cube.data = np.ma.masked_array( thresholded_cube.data, mask=np.broadcast_to(~valid, thresholded_cube.shape), ) # Revert threshold coordinate units to those of the input cube. thresholded_cube.coord(self.threshold_coord_name).convert_units( self.original_units ) # If we've collapsed a time coordinate we need to add back suitable # time coordinates that capture the range of the times collapsed. if self.collapse_coord and "time" in self.collapse_coord: for crd in ["time", "forecast_period"]: time_crd = input_cube.coord(crd).copy().collapsed() thresholded_cube.remove_coord(crd) thresholded_cube.add_aux_coord(time_crd) enforce_time_point_standard(thresholded_cube) # Squeeze any single value dimension coordinates to make them scalar. thresholded_cube = iris.util.squeeze(thresholded_cube) if self.collapse_coord is not None and "realization" in self.collapse_coord: thresholded_cube.remove_coord("realization") # Re-cast to 32bit now that any unit conversion has already taken place. thresholded_cube.coord(var_name="threshold").points = thresholded_cube.coord( var_name="threshold" ).points.astype(FLOAT_DTYPE) if self.vicinity is not None: rename_vicinity_cube(thresholded_cube) enforce_coordinate_ordering( thresholded_cube, [ "time", "realization", "percentile", self.threshold_coord_name, "radius_of_vicinity", ], ) return thresholded_cube
[docs] class LatitudeDependentThreshold(Threshold): """Apply a latitude-dependent threshold truth criterion to a cube. Calculates the threshold truth values based on the threshold function provided. A cube will be returned with a new threshold dimension auxillary coordinate on the latitude axis. Can operate on multiple time sequences within a cube. """
[docs] def __init__( self, threshold_function: callable, threshold_units: Optional[str] = None, comparison_operator: str = ">", ) -> None: """ Sets up latitude-dependent threshold class Args: threshold_function: A function which takes a latitude value (in degrees) and returns the desired threshold. threshold_units: Units of the threshold values. If not provided the units are assumed to be the same as those of the input cube. comparison_operator: Indicates the comparison_operator to use with the threshold. e.g. 'ge' or '>=' to evaluate 'data >= threshold' or '<' to evaluate 'data < threshold'. When using fuzzy thresholds, there is no difference between < and <= or > and >=. Valid choices: > >= < <= gt ge lt le. """ super().__init__( threshold_values=[1], threshold_units=threshold_units, comparison_operator=comparison_operator, ) if not callable(threshold_function): raise TypeError("Threshold must be callable") self.threshold_function = threshold_function
[docs] def _add_latitude_threshold_coord(self, cube: Cube, threshold: np.ndarray) -> None: """ Add a 1D threshold-type coordinate with correct name and units to a 2D slice containing thresholded data. Assumes latitude coordinate is always the penultimate one (which standardise will have enforced) Args: cube: Cube containing thresholded data (1s and 0s) threshold: Values at which the data has been thresholded (matches cube's y-axis) """ coord = iris.coords.AuxCoord(threshold.astype(FLOAT_DTYPE), units=cube.units) coord.rename(self.threshold_coord_name) coord.var_name = "threshold" cube.add_aux_coord(coord, data_dims=len(cube.shape) - 2)
[docs] def process(self, input_cube: Cube) -> Cube: """Convert each point to a truth value based on provided threshold, fuzzy bound, and vicinity values. If the plugin has a "threshold_units" member, this is used to convert a copy of the input_cube into the units specified. Args: input_cube: Cube to threshold. Must have a latitude coordinate. Returns: Cube after a threshold has been applied. The data within this cube will contain values between 0 and 1 to indicate whether a given threshold has been exceeded or not. The cube meta-data will contain: * Input_cube name prepended with probability_of_X_above(or below)_threshold (where X is the diagnostic under consideration) * Threshold dimension coordinate with same units as input_cube * Threshold attribute ("greater_than", "greater_than_or_equal_to", "less_than", or less_than_or_equal_to" depending on the operator) * Cube units set to (1). Raises: ValueError: if a np.nan value is detected within the input cube. """ if np.isnan(input_cube.data).any(): raise ValueError("Error: NaN detected in input cube data") self.threshold_coord_name = input_cube.name() cube = input_cube.copy() if self.threshold_units is not None: cube.convert_units(self.threshold_units) cube.coord("latitude").convert_units("degrees") threshold_variant = cube.coord("latitude").points threshold_over_latitude = np.array(self.threshold_function(threshold_variant)) # Add a scalar axis for the longitude axis so that numpy's array- # broadcasting knows what we want to do truth_value = self.comparison_operator.function( cube.data, np.expand_dims(threshold_over_latitude, 1) ) truth_value = truth_value.astype(FLOAT_DTYPE) if np.ma.is_masked(cube.data): # update unmasked points only cube.data[~input_cube.data.mask] = truth_value[~input_cube.data.mask] else: cube.data = truth_value self._add_latitude_threshold_coord(cube, threshold_over_latitude) cube.coord(var_name="threshold").convert_units(input_cube.units) self._update_metadata(cube) enforce_coordinate_ordering(cube, ["time", "realization", "percentile"]) return cube