Source code for improver.expected_value

# (C) Crown Copyright, Met Office. All rights reserved.
#
# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
# See LICENSE in the root of the repository for full licensing details.
"""Calculation of expected value from a probability distribution."""

import numpy as np
from iris.coords import CellMethod
from iris.cube import Cube

from improver import PostProcessingPlugin
from improver.ensemble_copula_coupling.ensemble_copula_coupling import (
    RebadgePercentilesAsRealizations,
)
from improver.metadata.probabilistic import (
    find_threshold_coordinate,
    is_percentile,
    is_probability,
)
from improver.utilities.cube_manipulation import collapse_realizations
from improver.utilities.probability_manipulation import to_threshold_inequality



[docs]
class ExpectedValue(PostProcessingPlugin):
    """Calculation of expected value from a probability distribution"""


[docs]
    @staticmethod
    def integrate_over_thresholds(cube: Cube) -> Cube:
        """
        Calculation of expected value for threshold data by converting from
        cumulative distribution (CDF) to probability density (PDF), then
        integrating over the threshold dimension.

        Args:
            cube:
                Probabilistic data with a threshold coordinate.

        Returns:
            Expected value of probability distribution. Same shape as input cube
            but with threshold coordinate removed.
        """
        # make sure that the threshold direction is "below"
        cube = to_threshold_inequality(cube, above=False)
        # set up threshold values, these will be needed as a multiplier during integration
        threshold_coord = find_threshold_coordinate(cube)
        threshold_coord_idx = cube.dim_coords.index(threshold_coord)
        thresholds = threshold_coord.points
        # check the thresholds are in increasing order
        if np.any(np.diff(thresholds) <= 0.0):
            raise ValueError("threshold coordinate in decreasing order")
        # add an extra threshold below/above with zero/one probability
        # ensure that the PDF integral covers the full CDF probability range
        # thresholds are usually float32 with epsilon of ~= 1.1e-7
        eps = np.finfo(thresholds.dtype).eps
        # for small values (especially exactly zero), at least epsilon
        # for larger values, the next representable float number
        thresholds_expanded = np.array(
            [
                thresholds[0] - max(eps, abs(thresholds[0] * eps)),
                *thresholds,
                thresholds[-1] + max(eps, abs(thresholds[-1] * eps)),
            ]
        )
        # expand the data to match the newly added thresholds
        extra_data_shape = list(cube.shape)
        extra_data_shape[threshold_coord_idx] = 1
        data_expanded = np.concatenate(
            [np.zeros(extra_data_shape), cube.data, np.ones(extra_data_shape)],
            axis=threshold_coord_idx,
            dtype=np.float32,
        )
        data_pdf = np.diff(data_expanded, axis=threshold_coord_idx)
        del data_expanded
        # the PDF should always be positive
        if np.any(data_pdf < 0.0):
            raise ValueError(
                "PDF contains negative values - CDF was likely not monotonic increasing"
            )
        # use the midpoint of each threshold to weight the PDF
        threshold_midpoints = thresholds_expanded[:-1] + 0.5 * np.diff(
            thresholds_expanded
        )
        # expand the shape with additional length-1 dimensions so it broadcasts with data_pdf
        thresh_mid_shape = [
            len(threshold_midpoints) if i == threshold_coord_idx else 1
            for i in range(data_pdf.ndim)
        ]
        threshold_midpoints_bcast = threshold_midpoints.reshape(thresh_mid_shape)
        # apply threshold weightings to produce a weighed PDF suitable for integration
        weighted_pdf = data_pdf * threshold_midpoints_bcast
        del data_pdf
        # sum of weighted_pdf is equivalent to midpoint rule integration over the CDF
        ev_data = np.sum(weighted_pdf, axis=threshold_coord_idx, dtype=np.float32)
        del weighted_pdf
        # set up output cube based on input, with the threshold dimension removed
        ev_cube = next(cube.slices_over([threshold_coord])).copy()
        ev_cube.remove_coord(threshold_coord)
        # name and units come from the threshold coordinate
        ev_cube.rename(threshold_coord.name())
        ev_cube.units = threshold_coord.units
        # replace data with calculated values
        ev_cube.data = ev_data
        return ev_cube



[docs]
    def process(self, cube: Cube) -> Cube:
        """Expected value calculation and metadata updates.

        Args:
            cube:
                Probabilistic data with a realization, threshold or percentile
                representation.

        Returns:
            Expected value of probability distribution. Same shape as input cube
            but with realization/threshold/percentile coordinate removed.
        """
        if is_probability(cube):
            ev_cube = self.integrate_over_thresholds(cube)
        elif is_percentile(cube):
            ev_cube = collapse_realizations(
                RebadgePercentilesAsRealizations().process(cube)
            )
        else:
            ev_cube = collapse_realizations(cube)
        ev_cube.add_cell_method(CellMethod("mean", coords="realization"))
        return ev_cube