diff --git a/docs/source/api/exports.rst b/docs/source/api/exports.rst index 988c9d4..fdef955 100644 --- a/docs/source/api/exports.rst +++ b/docs/source/api/exports.rst @@ -19,6 +19,8 @@ Exporting .. autofunction:: emiproc.exports.wrf.export_wrf_hourly_emissions +.. autofunction:: emiproc.exports.fluxy.export_fluxy + Gral ---- diff --git a/docs/source/models.rst b/docs/source/models.rst index 5145ab0..ce43a39 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -31,4 +31,18 @@ NetCDF is a common format for storing gridded data. emiproc can export emissions to NetCDF files, which can then be used as input for models. :py:func:`emiproc.exports.rasters.export_raster_netcdf` -:py:func:`emiproc.exports.netcdf.nc_cf_attributes` \ No newline at end of file +:py:func:`emiproc.exports.netcdf.nc_cf_attributes` + + +Fluxy +----- + +Fluxy is a Python package for visualizing inversion model results. +It also supports inventories in the form of NetCDF files. + +See the +`following script `_ +to learn how to export inventories to +fluxy and how to visualize them with fluxy. + +:py:func:`emiproc.exports.fluxy.export_fluxy` \ No newline at end of file diff --git a/emiproc/exports/fluxy.py b/emiproc/exports/fluxy.py new file mode 100644 index 0000000..25c292e --- /dev/null +++ b/emiproc/exports/fluxy.py @@ -0,0 +1,200 @@ +from __future__ import annotations + +import logging +from os import PathLike +from pathlib import Path + +import numpy as np +import xarray as xr +from pyproj import CRS + +from emiproc.exports.utils import get_temporally_scaled_array +from emiproc.grids import RegularGrid +from emiproc.inventories import Inventory +from emiproc.utilities import get_country_mask + +logger = logging.getLogger(__name__) + + +def export_fluxy( + invs: Inventory | list[Inventory], + output_dir: PathLike, + transport_model: str = "emiproc", + frequency: str = "yearly", +) -> None: + """Export emissions to Fluxy format. + + https://github.com/openghg/fluxy + + Fluxy is a python plotting tool for comparing inverse modeling results. + As part of fluxy, it is possible to plot prior emissions. + + The following is required on inventories to be exported to fluxy: + + * The inventory must have a :py:class:`~emiproc.grids.RegularGrid`. + * The inventory must have a year value given. (not None). + * The inventory must have temporal profiles. . + + Fluxy files must have a specifc format. + The files are generated in the following way: + + * __.nc + + + :param invs: Inventory or list of inventories to export. + A list of inventory assumes that you want to plot the emissions over multiple years. + :param output_dir: Directory to export the emissions to. + This directory is the name of the transport model in fluxy. + :param transport_model: The transport model name to "fake". (default: `emiproc`) + + :return: None + + + """ + + output_dir = Path(output_dir) / transport_model + output_dir.mkdir(parents=True, exist_ok=True) + + if isinstance(invs, Inventory): + invs = [invs] + if not isinstance(invs, list): + raise TypeError(f"Expected Inventory or list of Inventory, got {type(invs)}.") + if len(invs) == 0: + raise ValueError("Expected at least one inventory.") + + # Check that the grids are all the same + grids = [inv.grid for inv in invs] + if len(set(grids)) != 1: + raise ValueError( + "All inventories must have the same grid. " + f"Got {[grid.name for grid in grids]}." + ) + grid = grids[0] + assert isinstance(grid, RegularGrid), "Only regular grids are supported." + + # Check the grid is on WGS84 + if not CRS(grid.crs) == CRS("WGS84"): + raise ValueError("The grid must be on WGS84. " f"Got {grid.crs}.") + + def cell_to_lat_lon(ds: xr.Dataset): + ds = ds.assign_coords( + latitude=("cell", np.tile(grid.lat_range, grid.nx)), + longitude=("cell", np.repeat(grid.lon_range, grid.ny)), + ) + # Set the stacked coordinate as a multi-index + ds = ds.set_index(cell=["latitude", "longitude"]) + ds = ds.unstack({"cell": ("latitude", "longitude")}) + return ds + + da_fractions = get_country_mask(grid, return_fractions=True) + da_fractions = cell_to_lat_lon(da_fractions) + + substances = set(sum((inv.substances for inv in invs), [])) + + # First create a template with the grid and the time dimension + ds_template = xr.Dataset( + coords={ + "longitude": ( + "longitude", + grid.lon_range, + { + "standard_name": "longitude", + "long_name": "longitude of grid cell centre", + "units": "degrees_east", + "axis": "X", + }, + ), + "latitude": ( + "latitude", + grid.lat_range, + { + "standard_name": "latitude", + "long_name": "latitude of grid cell centre", + "units": "degrees_north", + "axis": "Y", + }, + ), + "country": ( + "country", + da_fractions["country"].data, + ), + }, + data_vars={ + "country_fraction": ( + ("country", "latitude", "longitude"), + da_fractions.data, + { + "long_name": "fraction of grid cell associated to country", + "units": "1", + "comments": "calculated by emiproc", + }, + ), + }, + ) + + # Check that all inventories have year not equal to None + invs_years = [inv.year for inv in invs] + if None in invs_years: + raise ValueError("All inventories must have a year. " f"Got {invs_years=}.") + # Make sure the years are all different + if len(set(invs_years)) != len(invs_years): + raise ValueError( + "All inventories must have different years. " f"Got {invs_years=}." + ) + + dss = [ + get_temporally_scaled_array( + inv, + time_range=inv.year, + sum_over_cells=False, + ) + for inv in invs + ] + + # Put all together and sum over the categories to have total emissions + ds = xr.concat(dss, dim="time").sum(dim="category") + + # Convert to fluxes (kg yr-1 -> kg m-2 yr-1) + ds /= xr.DataArray(grid.cell_areas, coords={"cell": ds["cell"]}) + + ds = cell_to_lat_lon(ds) + + units = {"units": "kg m-2 yr-1"} + + for sub in substances: + sub_dir = output_dir / sub + sub_dir.mkdir(parents=True, exist_ok=True) + file_stem = "_".join( + [ + transport_model, + sub, + frequency, + ] + ) + da_this = ds.sel(substance=sub).drop_vars("substance").assign_attrs(units) + ds_this = ds_template.assign(flux_total_prior=da_this) + # Calculate the country flux + ds_this = ds_this.assign( + country_flux_total_prior=( + ds_this["flux_total_prior"] * ds_this["country_fraction"] + ) + .sum(dim=["latitude", "longitude"]) + .assign_attrs(units), + ) + + ds_this = ds_this.assign( + **{ + # Assign the same variable names as the prior for the posterior variables + var_prior.replace("prior", "posterior"): ds_this[var_prior] + for var_prior in [ + "flux_total_prior", + "country_flux_total_prior", + ] + } + ) + + ds_this.to_netcdf( + sub_dir / f"{file_stem}.nc", + ) + + logger.info(f"Exported {len(substances)} substances to {output_dir}.") diff --git a/emiproc/inventories/edgar.py b/emiproc/inventories/edgar.py index 30075fb..6fe111e 100644 --- a/emiproc/inventories/edgar.py +++ b/emiproc/inventories/edgar.py @@ -66,6 +66,9 @@ def download_edgar_files( downloaded = [] + data_dir = Path(data_dir) + data_dir.mkdir(parents=True, exist_ok=True) + # Download the files for link in download_links: filename = link.split("/")[-1] @@ -125,6 +128,8 @@ def __init__( """ super().__init__() + self.year = year + nc_file_pattern = Path(nc_file_pattern_or_dir) logger = logging.getLogger(__name__) diff --git a/scripts/exports/edgar_2_fluxy.ipynb b/scripts/exports/edgar_2_fluxy.ipynb new file mode 100644 index 0000000..4b8fbce --- /dev/null +++ b/scripts/exports/edgar_2_fluxy.ipynb @@ -0,0 +1,361 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# EDGAR to fluxy \n", + "\n", + "Example how to convert EDGAR to fluxy format.\n", + "\n", + "Edgar processing taken from the edgar tutorial.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## EDGAR Files\n", + "\n", + "In this section, we will examine the EDGAR NetCDF files.\n", + "\n", + "These files can be downloaded directly from the [EDGAR website](https://edgar.jrc.ec.europa.eu/emissions_data_and_maps). However, the most convenient method is to use the `download_edgar_files` function provided by emiproc, as demonstrated below.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from emiproc.inventories.edgar import download_edgar_files\n", + "from emiproc import FILES_DIR\n", + "import matplotlib.pyplot as plt\n", + "\n", + "\n", + "plt.style.use(\"default\")\n", + "\n", + "local_dir = FILES_DIR / \"edgar\"\n", + "local_dir.mkdir(exist_ok=True)\n", + "\n", + "years = [2020, 2021, 2022]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for year in years:\n", + " download_edgar_files(local_dir / \"data\"/ str(year), year=year, substances=[\"CH4\", \"CO2\", \"CO2bio\"])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Creating code to parse inventory files for every project can be cumbersome.\n", + "\n", + "The goal of emiproc is to provide a straightforward interface for reading emissions inventories. In this case, you can use the `EDGARv8` class, which reads the EDGAR files and loads them as an `Inventory` object.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from emiproc.inventories.edgar import EDGARv8\n", + "\n", + "# Load the edgar inventory\n", + "\n", + "invs = [EDGARv8(local_dir / \"data\"/ str(year) / \"v8.0_*.nc\", year=year) for year in years]\n", + "invs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Remapping the Inventory\n", + "\n", + "In this section, we will remap the inventory to a grid somehwere over europe and also until the east coast of the US.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from emiproc.grids import RegularGrid\n", + "\n", + "grid = RegularGrid(xmin=-74, xmax=7, ymin=35.5, ymax=71.2, nx=170, ny=120)\n", + "grid" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In some cases, you may already have a grid for your simulation, which you could import into emiproc with appropriate code.\n", + "A [tutorial for developer](\n", + " https://emiproc.readthedocs.io/en/master/contrib/create_grid.html\n", + ")\n", + " is available. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Remap the inventory on the grid\n", + "from emiproc.regrid import remap_inventory\n", + "\n", + "\n", + "remappeds = [remap_inventory(inv, grid, weights_file=local_dir / \".weights_remap_fluxygrid_2\") for inv in invs]\n", + "remappeds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's examine our remapped inventory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from emiproc.plots import plot_inventory\n", + "\n", + "\n", + "plot_inventory(remappeds[0], total_only=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For this domain, the panorama is quite different.\n", + "For example, waste is well-managed in Australia, so its fraction of the total emissions is smaller.\n", + "Maybe Australia exports its waste to other regions such as Asia or Africa ? Who knows ?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Grouping Categories\n", + "\n", + "Many categories are similar and have very small emissions.\n", + "\n", + "In such cases, we can group some categories together to reduce the number of emission sectors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from emiproc.inventories.utils import group_categories\n", + "from emiproc.speciation import merge_substances\n", + "\n", + "group = lambda inv : group_categories(\n", + " merge_substances(inv,{\"CO2\": [\"CO2\", \"CO2bio\"]}),\n", + " categories_group={\n", + " \"agriculture\": [\n", + " \"Agricultural soils\",\n", + " \"Agricultural waste burning\",\n", + " \"Manure management\",\n", + " ],\n", + " \"industry\": [\n", + " \"Chemical processes\",\n", + " \"Power Industry\",\n", + " \"Oil refineries and Transformation industry\",\n", + " \"Fuel exploitation\",\n", + " \"Energy for buildings\",\n", + " \"Combustion for manufacturing\",\n", + " \"Iron and steel production\",\n", + " \"Non energy use of fuels\",\n", + " \"Solvents and products use\",\n", + " \"Non-ferrous metals production\",\n", + " \"Non-metallic minerals production\",\n", + " ],\n", + " \"livestock\": [\"Enteric fermentation\"],\n", + " \"waste\": [\n", + " \"Waste water handling\",\n", + " \"Solid waste incineration\",\n", + " \"Solid waste landfills\",\n", + " ],\n", + " \"transportation\": [\n", + " \"Aviation climbing_and_descent\",\n", + " \"Aviation cruise\",\n", + " \"Aviation landing_and_takeoff\",\n", + " \"Railways, pipelines, off-road transport\",\n", + " \"Shipping\",\n", + " \"Road transportation\",\n", + " ],\n", + " },\n", + ")\n", + "grouppeds = [group(inv) for inv in remappeds]\n", + "grouppeds[0].categories" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exporting the Inventory\n", + "\n", + "One of the main features of emiproc is its ability to export the inventory to various types of simulation inputs.\n", + "\n", + "In this example, we will save the inventory to a NetCDF file.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from emiproc.profiles.temporal.io import from_yaml\n", + "\n", + "yml_dir = FILES_DIR / \"profiles\" / \"yamls\"\n", + "profiles = {\n", + " \"transportation\": from_yaml(yml_dir / \"light_daily.yaml\"),\n", + " \"industry\": from_yaml(yml_dir / \"industry_daily.yaml\"),\n", + " \"agriculture\": from_yaml(yml_dir / \"no_factor.yaml\"),\n", + " \"livestock\": from_yaml(yml_dir / \"no_factor.yaml\"),\n", + " \"waste\": from_yaml(yml_dir / \"no_factor.yaml\"),\n", + "}\n", + "\n", + "for inv in grouppeds:\n", + " for category, profile in profiles.items():\n", + " inv.set_profile(profile, category=category)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from emiproc.exports.fluxy import export_fluxy\n", + "\n", + "# Export the inventory to fluxy format\n", + "export_fluxy(grouppeds, local_dir / \"fluxy\" / \"edgar_emiproc\", transport_model=\"EmiprocEdgar\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using fluxy to plot the inventory\n", + "\n", + "First we need to read the data, this is done wit the `read_model_output` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fluxy.io import read_model_output\n", + "\n", + "dss_fluxy = read_model_output(\n", + " \n", + " data_dir=local_dir / \"fluxy\" / \"edgar_emiproc\",\n", + " file_type='flux', # Fluxy can plot also concentrations, but in emiproc we only use fluxes\n", + " species=\"CH4\",\n", + " models=[\"EmiprocEdgar\"],\n", + ")\n", + "dss_fluxy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fluxy.plots.flux_map import plot_flux_map\n", + "\n", + "\n", + "# This function is a simple flux map\n", + "fig = plot_flux_map(\n", + " dss_fluxy,\n", + " species='CO2',\n", + " # Set this to only see the inventory\n", + " # In fluxy they run inversion results, in our case we only have the prior inventory\n", + " only = \"prior\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fluxy.plots.flux_map import plot_flux_map_over_time\n", + "\n", + "\n", + "# This function plots the map over time (years)\n", + "fig = plot_flux_map_over_time(\n", + " dss_fluxy,\n", + " var=\"flux_total_prior\",\n", + " species=\"CO2\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fluxy.plots.flux_timeseries import plot_country_flux\n", + "\n", + "# This function plots timeseries of the total fluxes for each country\n", + "plot_country_flux(\n", + " dss_fluxy,\n", + " species='CO2', \n", + " # Set to false, otherwise it will try to look for files you don't have\n", + " plot_inventory=False, \n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "undefined.undefined.undefined" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/exports/test_fluxy.py b/tests/exports/test_fluxy.py new file mode 100644 index 0000000..7f6719a --- /dev/null +++ b/tests/exports/test_fluxy.py @@ -0,0 +1,71 @@ +from __future__ import annotations +import pytest + +from emiproc import TESTS_DIR +from emiproc.exports.fluxy import export_fluxy +from emiproc.regrid import remap_inventory +from emiproc.tests_utils.temporal_profiles import three_profiles +from emiproc.tests_utils.test_grids import regular_grid +from emiproc.tests_utils.test_inventories import inv + + +def test_export_fluxy_fails_on_non_regular_grid(): + + with pytest.raises(AssertionError): + export_fluxy( + invs=inv, + output_dir=TESTS_DIR / "fluxy", + ) + + +def test_export_fluxy_no_profiles_raises(): + inventory = inv.copy() + inventory.set_crs(regular_grid.crs) + with pytest.raises(ValueError): + export_fluxy( + invs=remap_inventory(inventory, grid=regular_grid), + output_dir=TESTS_DIR / "fluxy", + ) + + +def test_export_fluxy_no_year_raises(): + inventory = inv.copy() + inventory.set_crs(regular_grid.crs) + + inventory.set_profile( + three_profiles[0], + category="test", + ) + inventory.set_profile( + three_profiles[1], + category="adf", + ) + with pytest.raises(ValueError): + export_fluxy( + invs=remap_inventory(inventory, grid=regular_grid), + output_dir=TESTS_DIR / "fluxy", + ) + + +def test_export_fluxy(): + """Main test where it works.""" + + inventory = inv.copy() + inventory.set_crs(regular_grid.crs) + inventory.year = 2020 + + inventory.set_profile( + three_profiles[0], + category="test", + ) + inventory.set_profile( + three_profiles[1], + category="adf", + ) + + export_fluxy( + invs=remap_inventory(inventory, grid=regular_grid), + output_dir=TESTS_DIR / "fluxy", + ) + + assert (TESTS_DIR / "fluxy" / "emiproc" / 'CH4' / "emiproc_CH4_yearly.nc").is_file()