Source code for improver.metadata.utilities

# (C) Crown Copyright, Met Office. All rights reserved.
#
# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
# See LICENSE in the root of the repository for full licensing details.
"""General IMPROVER metadata utilities"""

import hashlib
import pprint
import warnings
from typing import Any, Dict, List, Optional, Type, Union

import dask.array as da
import iris
import numpy as np
from cf_units import Unit
from iris.cube import Cube, CubeList
from numpy import ndarray
from numpy.ma.core import MaskedArray

from improver.metadata.constants.attributes import (
    MANDATORY_ATTRIBUTE_DEFAULTS,
    MANDATORY_ATTRIBUTES,
)
from improver.metadata.constants.time_types import TIME_COORDS


[docs] def create_new_diagnostic_cube( name: str, units: Union[Unit, str], template_cube: Cube, mandatory_attributes: Union[Dict[str, str], Dict], optional_attributes: Optional[Union[Dict[str, str], Dict]] = None, data: Optional[Union[MaskedArray, ndarray]] = None, dtype: Type = np.float32, ) -> Cube: """ Creates a new diagnostic cube with suitable metadata. Args: name: Standard or long name for output cube units: Units for output cube template_cube: Cube from which to copy dimensional and auxiliary coordinates mandatory_attributes: Dictionary containing values for the mandatory attributes "title", "source" and "institution". These are overridden by values in the optional_attributes dictionary, if specified. optional_attributes: Dictionary of optional attribute names and values. If values for mandatory attributes are included in this dictionary they override the values of mandatory_attributes. data: Data array. If not set, cube is filled with zeros using a lazy data object, as this will be overwritten later by the caller routine. dtype: Datatype for dummy cube data if "data" argument is None. Returns: Cube with correct metadata to accommodate new diagnostic field """ attributes = mandatory_attributes if optional_attributes is not None: attributes.update(optional_attributes) error_msg = "" for attr in MANDATORY_ATTRIBUTES: if attr not in attributes: error_msg += "{} attribute is required\n".format(attr) if error_msg: raise ValueError(error_msg) if data is None: data = da.zeros_like(template_cube.core_data(), dtype=dtype) aux_coords_and_dims, dim_coords_and_dims = [ [ (coord.copy(), template_cube.coord_dims(coord)) for coord in getattr(template_cube, coord_type) ] for coord_type in ("aux_coords", "dim_coords") ] cube = iris.cube.Cube( data, units=units, attributes=attributes, dim_coords_and_dims=dim_coords_and_dims, aux_coords_and_dims=aux_coords_and_dims, ) cube.rename(name) return cube
[docs] def generate_mandatory_attributes( diagnostic_cubes: List[Cube], model_id_attr: Optional[str] = None ) -> Dict[str, str]: """ Function to generate mandatory attributes for new diagnostics that are generated using several different model diagnostics as input to the calculation. If all input diagnostics have the same attribute use this, otherwise set a default value. Args: diagnostic_cubes: List of diagnostic cubes used in calculating the new diagnostic model_id_attr: Name of attribute used to identify source model for blending, if required Returns: Dictionary of mandatory attribute "key": "value" pairs. """ missing_value = object() attr_dicts = [cube.attributes for cube in diagnostic_cubes] required_attributes = [model_id_attr] if model_id_attr else [] attributes = MANDATORY_ATTRIBUTE_DEFAULTS.copy() for attr in MANDATORY_ATTRIBUTES + required_attributes: unique_values = {d.get(attr, missing_value) for d in attr_dicts} if len(unique_values) == 1 and missing_value not in unique_values: (attributes[attr],) = unique_values elif attr in required_attributes: msg = ( 'Required attribute "{}" is missing or ' "not the same on all input cubes" ) raise ValueError(msg.format(attr)) return attributes
[docs] def generate_hash(data_in: Any) -> str: """ Generate a hash from the data_in that can be used to uniquely identify equivalent data_in. Args: data_in: The data from which a hash is to be generated. This can be of any type that can be pretty printed. Returns: A hexadecimal string which is a hash hexdigest of the data as a string. """ bytestring = pprint.pformat(data_in).encode("utf-8") return hashlib.sha256(bytestring).hexdigest()
[docs] def create_coordinate_hash(cube: Cube) -> str: """ Generate a hash based on the input cube's x and y coordinates. This acts as a unique identifier for the grid which can be used to allow two grids to be compared. Args: cube: The cube from which x and y coordinates will be used to generate a hash. Returns: A hash created using the x and y coordinates of the input cube. """ hashable_data = [] for axis in ("x", "y"): coord = cube.coord(axis=axis) hashable_data.extend( [ list(coord.points), list(coord.bounds) if isinstance(coord.bounds, list) else None, coord.standard_name, coord.long_name, coord.coord_system, coord.units, ] ) return generate_hash(hashable_data)
[docs] def check_grid_match(cubes: Union[List[Cube], CubeList]) -> None: """ Checks that cubes are on, or originate from, compatible coordinate grids. Each cube is first checked for an existing 'model_grid_hash' which can be used to encode coordinate information on cubes that do not themselves contain a coordinate grid (e.g. spotdata cubes). If this is not found a new hash is generated to enable comparison. If the cubes are not compatible, an exception is raised to prevent the use of unmatched cubes. Args: cubes: A list of cubes to check for grid compatibility. Raises: ValueError: Raised if the cubes are not on matching grids as identified by the model_grid_hash. """ def _get_grid_hash(cube): try: cube_hash = cube.attributes["model_grid_hash"] except KeyError: cube_hash = create_coordinate_hash(cube) return cube_hash cubes = iter(cubes) reference_hash = _get_grid_hash(next(cubes)) for cube in cubes: cube_hash = _get_grid_hash(cube) if cube_hash != reference_hash: raise ValueError( "Cubes do not share or originate from the same " "grid, so cannot be used together." )
[docs] def get_model_id_attr(cubes: List[Cube], model_id_attr: str) -> str: """ Gets the specified model ID attribute from a list of input cubes, checking that the value is the same on all those cubes in the process. Args: cubes: List of cubes to get the attribute from model_id_attr: Attribute name Returns: The unique attribute value """ try: model_id_value = {cube.attributes[model_id_attr] for cube in cubes} except KeyError as error: failing_cubes = [ cube.name() for cube in cubes if not cube.attributes.get(model_id_attr, False) ] raise ValueError( f"Model ID attribute {model_id_attr} not present for {', '.join(failing_cubes)}." ) from error if len(model_id_value) != 1: raise ValueError( f"Attribute {model_id_attr} must be the same for all input cubes. " f"{' != '.join(model_id_value)}" ) (model_id_value,) = model_id_value return model_id_value
[docs] def enforce_time_point_standard(cube: Cube): """ Enforce the IMPROVER standard of a coordinate point that aligns with the upper bound of the period for time, forecast_period, and forecast reference time coordinates. Two exceptions are captured. A CoordinateNotFoundError allows all the time coordinates to be modified if they exist and ignored if they don't. The TypeError allows bounds that are set to None to be ignored. The cube is modified in place. Args: cube: Cube to enforce the IMPROVER standard on. """ for crd in TIME_COORDS: try: cube.coord(crd).points = [bound[-1] for bound in cube.coord(crd).bounds] except (iris.exceptions.CoordinateNotFoundError, TypeError): pass
[docs] def minimum_increment(cube: Cube, default: float = None) -> Union[float, int]: """ Determine the minimum increment for the cube data based on the 'least_significant_digit' attribute. If the attribute is not present, the default value is used and a warning issued. Args: cube: The cube for which to determine the minimum increment. default: The default minimum increment to use if the 'least_significant_digit' attribute is not present. Returns: The minimum increment data value as a float. Raises: ValueError: If the 'least_significant_digit' attribute is not present and no default is provided. """ try: least_significant_digit = int(cube.attributes["least_significant_digit"]) except KeyError: if default is None: raise ValueError( f"No 'least_significant_digit' attribute found in {cube.name()} cube and no default provided." ) result = default warnings.warn( f"No 'least_significant_digit' attribute found in {cube.name()} cube. Assuming increment of {result}." ) else: result = 10 ** (-least_significant_digit) return result