Source code for improver.standardise
# (C) Crown Copyright, Met Office. All rights reserved.
#
# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
# See LICENSE in the root of the repository for full licensing details.
"""Plugin to standardise metadata"""
from typing import Any, Dict, List, Optional
import numpy as np
from iris.coords import CellMethod
from iris.cube import Cube, CubeList
from iris.exceptions import CoordinateNotFoundError
from numpy import dtype, ndarray
from improver import BasePlugin
from improver.metadata.amend import amend_attributes
from improver.metadata.check_datatypes import (
_is_time_coord,
check_units,
get_required_dtype,
get_required_units,
)
from improver.metadata.constants.time_types import TIME_COORDS
from improver.utilities.common_input_handle import as_cube
from improver.utilities.round import round_close
[docs]
class StandardiseMetadata(BasePlugin):
"""Plugin to standardise cube metadata"""
[docs]
def __init__(
self,
new_name: Optional[str] = None,
new_units: Optional[str] = None,
coords_to_remove: Optional[List[str]] = None,
coord_modification: Optional[Dict[str, float]] = None,
attributes_dict: Optional[Dict[str, Any]] = None,
):
"""
Instantiate our class for standardising cube metadata.
Args:
new_name:
Optional rename for output cube
new_units:
Optional unit conversion for output cube
coords_to_remove:
Optional list of scalar coordinates to remove from output cube
coord_modification:
Optional dictionary used to directly modify the values of
scalar coordinates. To be used with extreme caution.
For example this dictionary might take the form:
{"height": 1.5} to set the height coordinate to have a value
of 1.5m (assuming original units of m).
This can be used to align e.g. temperatures defined at slightly
different heights where this difference is considered small
enough to ignore. Type is inferred, so providing a value of 2
will result in an integer type, whilst a value of 2.0 will
result in a float type.
attributes_dict:
Optional dictionary of required attribute updates. Keys are
attribute names, and values are the required changes.
See improver.metadata.amend.amend_attributes for details.
"""
self._new_name = new_name
self._new_units = new_units
self._coords_to_remove = coords_to_remove
self._coord_modification = coord_modification
self._attributes_dict = attributes_dict
[docs]
@staticmethod
def _remove_air_temperature_status_flag(cube: Cube) -> Cube:
"""
Remove air_temperature status_flag coord by applying as NaN to cube data.
See https://github.com/metoppv/improver/pull/1839 for further details.
"""
coord_name = "air_temperature status_flag"
try:
coord = cube.coord(coord_name)
except CoordinateNotFoundError:
coord = None
if coord:
if coord.attributes != {
"flag_meanings": "above_surface_pressure below_surface_pressure",
"flag_values": np.array([0, 1], dtype="int8"),
}:
raise ValueError(
f"'{coord_name}' coordinate is not of the expected form."
)
ncube = CubeList()
try:
cube_iterator = cube.slices_over("realization")
except CoordinateNotFoundError:
cube_iterator = [cube]
for cc in cube_iterator:
coord = cc.coord(coord_name)
if np.ma.is_masked(coord.points):
raise ValueError(
f"'{coord_name}' coordinate has unexpected mask values."
)
mask = np.asarray(coord.points)
cc.data[mask.astype(bool)] = np.nan
cc.remove_coord(coord_name)
ncube.append(cc)
cube = ncube.merge_cube()
return cube
[docs]
@staticmethod
def _collapse_scalar_dimensions(cube: Cube) -> Cube:
"""
Demote any scalar dimensions (excluding "realization") on the input
cube to auxiliary coordinates.
Args:
cube: The cube
Returns:
The collapsed cube
"""
coords_to_collapse = []
for coord in cube.coords(dim_coords=True):
if len(coord.points) == 1 and "realization" not in coord.name():
coords_to_collapse.append(coord)
for coord in coords_to_collapse:
cube = next(cube.slices_over(coord))
return cube
[docs]
@staticmethod
def _remove_scalar_coords(cube: Cube, coords_to_remove: List[str]) -> None:
"""Removes named coordinates from the input cube."""
for coord in coords_to_remove:
try:
cube.remove_coord(coord)
except CoordinateNotFoundError:
continue
[docs]
@staticmethod
def _modify_scalar_coord_value(
cube: Cube, coord_modification: Dict[str, float]
) -> None:
"""Modifies the value of each specified scalar coord (dictionary key)
to the provided value (dictionary value). Note that data types are not
enforced here as the subsequent enforcement step will fulfil this
requirement. Units are assumed to be the same as the original
coordinate value. Modifying multi-valued coordinates or time
coordinates is specifically prevented as there is greater scope to
harm data integrity (i.e. the description of the data and the data
becoming misaligned).
If the coordinate does not exist the modification request is silently
skipped.
Args:
cube:
Cube to be updated in place
coord_modification:
Dictionary defining the coordinates (keys) to be modified
and the values (values) to which they should be set.
"""
for coord, value in coord_modification.items():
if cube.coords(coord):
if cube.coords(coord, dim_coords=True):
raise ValueError(
"Modifying dimension coordinate values is not allowed "
"due to the risk of introducing errors."
)
if hasattr(value, "__len__") and len(value) > 1:
raise ValueError(
"Modifying multi-valued coordinates is not allowed. "
"This functionality should be used only for very "
"modest changes to scalar coordinates."
)
if _is_time_coord(cube.coord(coord)):
raise ValueError("Modifying time coordinates is not allowed.")
cube.coord(coord).points = np.array([value])
[docs]
@staticmethod
def _standardise_dtypes_and_units(cube: Cube) -> None:
"""
Modify input cube in place to conform to mandatory dtype and unit
standards.
Args:
cube:
Cube to be updated in place
"""
def as_correct_dtype(obj: ndarray, required_dtype: dtype) -> ndarray:
"""
Returns an object updated if necessary to the required dtype
Args:
obj:
The object to be updated
required_dtype:
The dtype required
Returns:
The updated object
"""
if obj.dtype != required_dtype:
return obj.astype(required_dtype)
return obj
cube.data = as_correct_dtype(cube.data, get_required_dtype(cube))
for coord in cube.coords():
if coord.name() in TIME_COORDS and not check_units(coord):
coord.convert_units(get_required_units(coord))
req_dtype = get_required_dtype(coord)
# ensure points and bounds have the same dtype
if np.issubdtype(req_dtype, np.integer):
coord.points = round_close(coord.points)
coord.points = as_correct_dtype(coord.points, req_dtype)
if coord.has_bounds():
if np.issubdtype(req_dtype, np.integer):
coord.bounds = round_close(coord.bounds)
coord.bounds = as_correct_dtype(coord.bounds, req_dtype)
[docs]
@staticmethod
def _discard_redundant_cell_methods(cube: Cube) -> None:
"""
Removes cell method "point": "time" from cube if present.
"""
if not cube.cell_methods:
return
removable_cms = [CellMethod(method="point", coords="time")]
updated_cms = []
for cm in cube.cell_methods:
if cm in removable_cms:
continue
updated_cms.append(cm)
cube.cell_methods = updated_cms
[docs]
@staticmethod
def _remove_long_name_if_standard_name(cube: Cube) -> None:
"""
Remove the long_name attribute from cubes if the cube also has a standard_name defined
"""
if cube.standard_name and cube.long_name:
cube.long_name = None
[docs]
def process(self, cube: Cube) -> Cube:
"""
Perform compulsory and user-configurable metadata adjustments.
The compulsory adjustments are:
- to collapse any scalar dimensions apart from realization (which is expected
always to be a dimension);
- to cast the cube data and coordinates into suitable datatypes;
- to convert time-related metadata into the required units
- to remove cell method ("point": "time").
If the air_temperature data is required, this can be retained by
removing the `air_temperature status_flag` as part of the standardise step
so that the process of masking this data with NaNs is bypassed.
See https://github.com/metoppv/improver/pull/1839 for further information.
Args:
cube:
Input cube to be standardised
Returns:
The processed cube
"""
cube = as_cube(cube)
# It is necessary to have the `_coords_to_remove step` first
# so that it allows keeping the air temperature data for
# a future calculation. Removing the `air_temperature status_flag`
# means the air temperature data will then not be masked by NaNs,
# as happens in the `_remove_air_temperature_status_flag` step if
# the flag is not removed.
if self._coords_to_remove:
self._remove_scalar_coords(cube, self._coords_to_remove)
cube = self._remove_air_temperature_status_flag(cube)
cube = self._collapse_scalar_dimensions(cube)
if self._new_name:
cube.rename(self._new_name)
if self._new_units:
cube.convert_units(self._new_units)
if self._coord_modification:
self._modify_scalar_coord_value(cube, self._coord_modification)
if self._attributes_dict:
amend_attributes(cube, self._attributes_dict)
self._discard_redundant_cell_methods(cube)
self._remove_long_name_if_standard_name(cube)
# this must be done after unit conversion as if the input is an integer
# field, unit conversion outputs the new data as float64
self._standardise_dtypes_and_units(cube)
return cube