Source code for improver.cli.apply_quantile_regression_random_forest

#!/usr/bin/env python
# (C) Crown Copyright, Met Office. All rights reserved.
#
# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
# See LICENSE in the root of the repository for full licensing details.
"""Script to apply a Quantile Regression Random Forest (QRF) model."""

from improver import cli


[docs] @cli.clizefy @cli.with_output def process( *file_paths: cli.inputpath, feature_config: cli.inputjson, target_cf_name: str, unique_site_id_keys: cli.comma_separated_list = "wmo_id", cycletime: str = None, forecast_period: int = None, ): """Applying the Quantile Regression Random Forest model. Loads in arguments for applying a Quantile Regression Random Forest (QRF) model which has been previously trained. Two sources of input data must be provided: The QRF model and the forecast cube to be calibrated. The output is a NetCDF file containing the calibrated forecast. Args: file_paths (cli.inputpaths): A list of input paths containing: - The path to the pickle file produced by training the QRF model. The pickle file contains the QRF model and the transformation and pre_transform_addition values if a transformation was applied. If no transformation was applied then the transformation and pre_transform_addition values will be None and 0, respectively. - The path to a NetCDF file containing the forecast to be calibrated. - Optionally, paths to NetCDF files containing additional predictors. feature_config (dict): Feature configuration defining the features to be used for quantile regression. The configuration is a dictionary of strings, where the keys are the names of the input cube(s) supplied, and the values are a list. This list can contain both computed features, such as the mean or standard deviation (std), or static features, such as the altitude. The computed features will be computed using the cube defined in the dictionary key. If the key is the feature itself e.g. a distance to water cube, then the value should state "static". This will ensure the cube's data is used as the feature. The config will have the structure: "DYNAMIC_VARIABLE_CF_NAME": ["FEATURE1", "FEATURE2"] e.g. { "air_temperature": ["mean", "std", "altitude"], "visibility_at_screen_level": ["mean", "std"] "distance_to_water": ["static"], } target_cf_name (str): A string containing the CF name of the forecast to be calibrated e.g. air_temperature. This will be used to separate it from the rest of the feature cubes, if present. unique_site_id_keys (str): The names of the coordinates that uniquely identify each site, e.g. "wmo_id" or "latitude,longitude". cycletime (str): The cycle time of the forecast to be calibrated in the format YYYYMMDDTHHMMZ. If not provided, the first cycle time found in the forecast cube will be used. forecast_period (int): The forecast period of the forecast to be calibrated in seconds. If not provided, the first forecast period found in the forecast cube will be used. Returns: iris.cube.Cube: The calibrated forecast cube. """ from improver.calibration import split_netcdf_parquet_pickle from improver.calibration.load_and_apply_quantile_regression_random_forest import ( PrepareAndApplyQRF, ) cubes, _, qrf_descriptors = split_netcdf_parquet_pickle(file_paths) result = PrepareAndApplyQRF( feature_config=feature_config, target_cf_name=target_cf_name, unique_site_id_keys=unique_site_id_keys, cycletime=cycletime, forecast_period=forecast_period, )(cubes, qrf_descriptors=qrf_descriptors) return result