Source code for improver.generate_ancillaries.generate_miscellaneous_ancillaries

# (C) Crown Copyright, Met Office. All rights reserved.
#
# This file is part of 'IMPROVER' and is released under the BSD 3-Clause license.
# See LICENSE in the root of the repository for full licensing details.
"""A module for functions that generate ancillary cubes."""

import numpy as np
from geopandas import GeoDataFrame
from iris import Constraint
from iris.cube import Cube, CubeList

from improver.generate_ancillaries.generate_distance_to_feature import DistanceTo
from improver.spotdata.build_spotdata_cube import build_spotdata_cube
from improver.spotdata.neighbour_finding import NeighbourSelection
from improver.spotdata.spot_extraction import SpotExtraction
from improver.spotdata.utilities import extract_site_json
from improver.utilities.spatial import (
    check_if_grid_is_equal_area,
    distance_to_number_of_grid_cells,
)


[docs] def generate_distance_to_ocean( epsg_projection: int, coastline: GeoDataFrame, land: GeoDataFrame, site_cube: Cube ) -> Cube: """Generate a distance to ocean ancillary cube. The DistanceTo plugin can't be used directly because there isn't a GeoDataFrame for the ocean. The DistanceTo class is used with the coastline GeoDataframe to calculate the distance of each site to the coastline. The DistanceTo class is also used to calculate the distance to land for each site. This identifies which sites are land and which are ocean. Sites in the ocean can then be set to 0m as they are in the ocean. Args: epsg_projection: The EPSG code of the coordinate reference system on to which latitude and longitudes will be projected to calculate distances. This is a projected coordinate system in which distances are measured in metres, for example a Lambert Azimuthal Equal Areas projection across the UK, code 3035. coastline: A GeoDataFrame containing the coastline geometry. land: A GeoDataFrame containing the land geometry. site_cube: A cube containing the site locations. There must be latitude and longitude coordinates. Returns: A cube containing the distance to ocean ancillary data. """ distance_to_coastline = DistanceTo( epsg_projection, new_name="distance_to_coastline" )(site_cube, coastline) # As we only care about identifying sites on land (i.e. 0m) we can use a small buffer # to speed up the calculation. distance_to_land = DistanceTo( epsg_projection, new_name="distance_to_land", buffer=10, clip_geometry_flag=True )(site_cube, land) # Set the distance to ocean to 0 for sites that are in the ocean distance_to_ocean = distance_to_coastline.copy(data=distance_to_coastline.data) distance_to_ocean.data[distance_to_land.data != 0] = 0 distance_to_ocean.rename("distance_to_ocean") return distance_to_ocean
[docs] def generate_distance_to_water(distance_to_water_feature: CubeList) -> Cube: """Generate a distance to water ancillary cube. The distance to water is the minimum of all the provided distance to water features, such as rivers, lakes, and oceans. The first cube in distance_to_water_feature is used as a template for the output metadata with the name updated to "distance_to_water". Args: distance_to_water_feature: A CubeList containing distance to water features from sites (i.e. rivers, lakes, and oceans). Each cube should have the same set of sites defined. Returns: A cube containing the distance to water ancillary data. """ # Calculate the minimum distance to water distances_to_features = np.stack([cube.data for cube in distance_to_water_feature]) min_distance = np.min(distances_to_features, axis=0) # Create a new cube for the distance to water distance_to_water = distance_to_water_feature[0].copy(data=min_distance) distance_to_water.rename("distance_to_water") return distance_to_water
[docs] def generate_roughness_length_at_sites( roughness_length: Cube, neighbour_cube: Cube ) -> Cube: """Generate a roughness length ancillary cube at the site locations. This performs a spot extraction of the roughness length data at the site locations and removes time related coordinates. Args: roughness_length: A cube containing the roughness length data. neighbour_cube: A cube containing information about the spot data sites and their grid point neighbours. Returns: A cube containing the roughness length at the site locations. """ roughness_length_spot = SpotExtraction(neighbour_selection_method="nearest")( neighbour_cube, roughness_length ) # Update metadata to remove any time coordinates cube_coord = [coord.name() for coord in roughness_length_spot.coords()] time_coordinates = ["time", "forecast_reference_time", "forecast_period"] for coord in time_coordinates: if coord in cube_coord: roughness_length_spot.remove_coord(coord) return roughness_length_spot
[docs] def generate_land_area_fraction_at_sites( land_cover_cube: Cube, neighbour_cube: Cube, radius: int = 2500 ) -> Cube: """Generate a land area fraction ancillary cube at the site locations by utilising the Corine Land cover. The Corine Land cover is available from https://doi.org/10.2909/960998c1-1870-4e82-8051-6485205ebbac. This function requires the land cover file is provided as an iris cube. A neighbour cube is generated on the native grid of the input land cover cube. This allows us to select the cell, on what ever resolution this data is provided, that is closest to each site location. A box can then be formed about this cell of a given size and the fraction of points within the box that are classified as land can be counted. The returned value is the fraction of the total box size that is classified as land on the input land cover grid. Args: land_cover_cube: A cube containing the Corine Land cover data. The data values should be integers representing different land cover types. neighbour_cube: A cube containing information about the spot data sites. We use this rather than a site list as it contains a completed set of altitudes which have been extracted from orography data on the model domain. These fill in where the site source data may be missing altitude information. radius: The radius in metres of the box about each site location to use to calculate the land area fraction. The default value of 2500m gives a box of approximately 5km x 5km. Returns: A cube containing the land area fraction at the site locations. """ # Determine the grid resolution of the land cover data in metres check_if_grid_is_equal_area(land_cover_cube) cell_radius = distance_to_number_of_grid_cells(land_cover_cube, radius) # Ensure we are working with an x/y grid only xaxis, yaxis = land_cover_cube.coord(axis="x"), land_cover_cube.coord(axis="y") land_cover_cube = next(land_cover_cube.slices([xaxis, yaxis])) # 41-44 is the key for water in the Corine Land cover dataset. land_mask = land_cover_cube.copy(data=np.where(land_cover_cube.data > 40, 0, 1)) # Oceans far from the coast have data value -128 to represent no data land_mask.data = np.where(land_cover_cube.data < 0, 0, land_mask.data) # 48 is the key for complex land surfaces in Corine land_mask.data = np.where(land_cover_cube.data == 48, 1, land_mask.data) # Extract the site definitions from the provided neighbour cube. site_definitions = extract_site_json(neighbour_cube) # If a unique site id is present, we need to extract the name for reuse. default_keys = ["altitude", "latitude", "longitude", "wmo_id"] try: (unique_site_id_key,) = [ key for key in site_definitions[0].keys() if key not in default_keys ] except ValueError: unique_site_id_key = None # Find nearest grid point to each site on the land cover grid. neighbour_plugin = NeighbourSelection(unique_site_id_key=unique_site_id_key) # We are using the land cover data here as both orography and landmask, # but we don't need any orographic information for a nearest neighbour # selection and with a guaranteed set of complete site altitudes provided # by the neighbour cube, so this is okay. neighbours = neighbour_plugin(site_definitions, land_mask, land_mask) kwargs = ( {"unique_site_id": 0, "unique_site_id_key": unique_site_id_key} if unique_site_id_key is not None else {} ) template = build_spotdata_cube( 0, "land_area_fraction", 1, 0, 0, 0, "00000", **kwargs ) # Iris does not concatenate variable length strings, so we need to set # types explicitly in each site cube to ensure these can be concatenated. crd_types = {crd.name(): neighbours.coord(crd).dtype for crd in template.coords()} # Constraints to allow us to extract the x and y grid indices. x_index_con = Constraint(grid_attributes_key="x_index") y_index_con = Constraint(grid_attributes_key="y_index") land_fraction = CubeList() for site in neighbours.slices_over("spot_index"): ix, iy = ( site.extract(x_index_con).data.astype(int).item(), site.extract(y_index_con).data.astype(int).item(), ) subset = land_mask.data[ max(0, ix - cell_radius) : ix + cell_radius + 1, max(0, iy - cell_radius) : iy + cell_radius + 1, ] fraction = subset.sum() / subset.size site_frac = template.copy(data=np.array([fraction], dtype=np.float32)) for crd, crd_type in crd_types.items(): site_frac.coord(crd).points = site.coord(crd).points.astype(crd_type) land_fraction.append(site_frac) return land_fraction.concatenate_cube()