Source code for improver.expected_value

# (C) Crown Copyright, Met Office. All rights reserved.
#
# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
# See LICENSE in the root of the repository for full licensing details.
"""Calculation of expected value from a probability distribution."""

import numpy as np
from iris.coords import CellMethod
from iris.cube import Cube

from improver import PostProcessingPlugin
from improver.ensemble_copula_coupling.ensemble_copula_coupling import (
    RebadgePercentilesAsRealizations,
)
from improver.metadata.probabilistic import (
    find_threshold_coordinate,
    is_percentile,
    is_probability,
)
from improver.utilities.cube_manipulation import collapse_realizations
from improver.utilities.probability_manipulation import to_threshold_inequality


[docs] class ExpectedValue(PostProcessingPlugin): """Calculation of expected value from a probability distribution"""
[docs] @staticmethod def integrate_over_thresholds(cube: Cube) -> Cube: """ Calculation of expected value for threshold data by converting from cumulative distribution (CDF) to probability density (PDF), then integrating over the threshold dimension. Args: cube: Probabilistic data with a threshold coordinate. Returns: Expected value of probability distribution. Same shape as input cube but with threshold coordinate removed. """ # make sure that the threshold direction is "below" cube = to_threshold_inequality(cube, above=False) # set up threshold values, these will be needed as a multiplier during integration threshold_coord = find_threshold_coordinate(cube) threshold_coord_idx = cube.dim_coords.index(threshold_coord) thresholds = threshold_coord.points # check the thresholds are in increasing order if np.any(np.diff(thresholds) <= 0.0): raise ValueError("threshold coordinate in decreasing order") # add an extra threshold below/above with zero/one probability # ensure that the PDF integral covers the full CDF probability range # thresholds are usually float32 with epsilon of ~= 1.1e-7 eps = np.finfo(thresholds.dtype).eps # for small values (especially exactly zero), at least epsilon # for larger values, the next representable float number thresholds_expanded = np.array( [ thresholds[0] - max(eps, abs(thresholds[0] * eps)), *thresholds, thresholds[-1] + max(eps, abs(thresholds[-1] * eps)), ] ) # expand the data to match the newly added thresholds extra_data_shape = list(cube.shape) extra_data_shape[threshold_coord_idx] = 1 data_expanded = np.concatenate( [np.zeros(extra_data_shape), cube.data, np.ones(extra_data_shape)], axis=threshold_coord_idx, dtype=np.float32, ) data_pdf = np.diff(data_expanded, axis=threshold_coord_idx) del data_expanded # the PDF should always be positive if np.any(data_pdf < 0.0): raise ValueError( "PDF contains negative values - CDF was likely not monotonic increasing" ) # use the midpoint of each threshold to weight the PDF threshold_midpoints = thresholds_expanded[:-1] + 0.5 * np.diff( thresholds_expanded ) # expand the shape with additional length-1 dimensions so it broadcasts with data_pdf thresh_mid_shape = [ len(threshold_midpoints) if i == threshold_coord_idx else 1 for i in range(data_pdf.ndim) ] threshold_midpoints_bcast = threshold_midpoints.reshape(thresh_mid_shape) # apply threshold weightings to produce a weighed PDF suitable for integration weighted_pdf = data_pdf * threshold_midpoints_bcast del data_pdf # sum of weighted_pdf is equivalent to midpoint rule integration over the CDF ev_data = np.sum(weighted_pdf, axis=threshold_coord_idx, dtype=np.float32) del weighted_pdf # set up output cube based on input, with the threshold dimension removed ev_cube = next(cube.slices_over([threshold_coord])).copy() ev_cube.remove_coord(threshold_coord) # name and units come from the threshold coordinate ev_cube.rename(threshold_coord.name()) ev_cube.units = threshold_coord.units # replace data with calculated values ev_cube.data = ev_data return ev_cube
[docs] def process(self, cube: Cube) -> Cube: """Expected value calculation and metadata updates. Args: cube: Probabilistic data with a realization, threshold or percentile representation. Returns: Expected value of probability distribution. Same shape as input cube but with realization/threshold/percentile coordinate removed. """ if is_probability(cube): ev_cube = self.integrate_over_thresholds(cube) elif is_percentile(cube): ev_cube = collapse_realizations( RebadgePercentilesAsRealizations().process(cube) ) else: ev_cube = collapse_realizations(cube) ev_cube.add_cell_method(CellMethod("mean", coords="realization")) return ev_cube