Source code for improver.cli.apply_rainforests_calibration

# (C) Crown Copyright, Met Office. All rights reserved.
#
# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
# See LICENSE in the root of the repository for full licensing details.
"""CLI to apply rainforests calibration."""

import warnings

from improver import cli



[docs]
@cli.clizefy
@cli.with_output
def process(
    forecast: cli.inputcube,
    *features: cli.inputcube,
    model_config: cli.inputjson,
    output_thresholds: cli.comma_separated_list_of_float = None,
    output_threshold_config: cli.inputjson = None,
    threshold_units: str = None,
    threads: int = None,
    bin_data: bool = False,
):
    """
    Calibrate a forecast cube using the Rainforests method.

    Ensemble forecasts must be in realization representation. Deterministic forecasts
    can be processed to produce a pseudo-ensemble; a realization dimension will be added
    to deterministic forecast cubes if one is not already present.

    This calibration is done in a situation dependent fashion using a series of
    decision-tree models to construct representative error distributions which are
    then used to map each input ensemble member onto an error distribution. The
    error distributions are averaged in probability space, and interpolated to the
    output thresholds.

    It is assumed that the models have been trained using the `>=` comparator; i.e.
    they predict the probability that the error is greater than or equal to the various
    error thresholds. The output probability cube also uses the `>=` comparator.

    Args:
        forecast_cube (iris.cube.Cube):
            Cube containing the forecast to be calibrated; must be as realizations.
        feature_cubes (iris.cube.Cubelist):
            Cubelist containing the feature variables (physical parameters) used as inputs
            to the tree-models for the generation of the associated error distributions.
            Feature cubes are expected to have the same dimensions as forecast_cube, with
            the exception of the realization dimension. Where the feature_cube contains a
            realization dimension this is expected to be consistent, otherwise the cube will
            be broadcast along the realization dimension.
        model_config (dict):
            Dictionary containing RainForests model configuration data.
        output_thresholds (list):
            List of thresholds at which to evaluate output probabilities.
        output_threshold_config (dict):
            Threshold configuration dictionary where the keys are strings representing
            thresholds. The threshold config should follow the same format as that of
            the threshold cli, however here only the threshold keys are used and the
            threshold values are disregarded.
        threshold_units (str):
            Units in which threshold_values are specified. If not provided the units are
            assumed to be the same as those of the input cube. Specifying the units here
            will allow a suitable conversion to match the input units of forecast_cube.
        threads (int):
            Number of threads to use during prediction with tree-model objects.
            If unset, uses the default value set by the model library used.
        bin_data:
            Bin data according to splits used in models. This speeds up prediction
            if there are many data points which fall into the same bins for all threshold models.
            Limits the calculation of common feature values by only calculating them once.

    Returns:
        iris.cube.Cube:
            The forecast cube following calibration.
    """
    from iris.cube import CubeList

    from improver.calibration.rainforest_calibration import ApplyRainForestsCalibration

    if output_threshold_config and output_thresholds:
        raise ValueError(
            "--output-threshold-config and --output-thresholds are mutually exclusive "
            "- please set one or the other, not both"
        )
    if (not output_threshold_config) and (not output_thresholds):
        raise ValueError(
            "One of --output-threshold-config and --output-thresholds must be specified"
        )

    if output_threshold_config:
        message = "Fuzzy bounds are not supported. Values of output-threshold-config \
            will be ignored."
        warnings.warn(message)
        thresholds = [float(key) for key in output_threshold_config.keys()]
    else:
        thresholds = [float(x) for x in output_thresholds]
    return ApplyRainForestsCalibration(
        model_config_dict=model_config, threads=threads, bin_data=bin_data
    ).process(
        forecast,
        CubeList(features),
        output_thresholds=thresholds,
        threshold_units=threshold_units,
    )