Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 59 additions & 12 deletions openairclim/interpolate_time.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This Pull Request (PR) is from an external repository to the dlr-pa repository. Once the branch is within dlr-pa, reviewers can perform much easier checks with the new code. Otherwise, a fresh clone and install of OpenAirClim from the external repository is needed.

Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"EI_NOx": "NOx",
"dis_per_fuel": "distance",
}

DEFAULT_SPECIES_ORDER = ["fuel", "CO2", "H2O", "NOx", "distance"]

def interpolate(
config: dict, years: np.ndarray, val_dict: dict
Expand Down Expand Up @@ -182,7 +182,6 @@ def apply_scaling(
scaling factors are from evolution file,
time series and scaling factors are interpolated on time_range
before multiplication
TODO: implement scaling for individual species

Args:
config (dict): Configuration dictionary from config
Expand All @@ -196,8 +195,10 @@ def apply_scaling(
"""
# Get inventory years
inv_years = np.array(list(inv_dict.keys()))

# Interpolate scaling factors linearly on time_range
time_range, evo_interp_dict = interp_evolution(config)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

works fine, even with higher dimensional scaling factors array and with time steps > 1 year.


# If inventories have been adjusted beforehand, normalize scaling factors to inv_years
if inventories_adjusted:
# Filter evo_interp_dict to inv_years
Expand All @@ -213,18 +214,36 @@ def apply_scaling(
evo_interp_dict["scaling"], evo_filtered_interp_dict["scaling"]
)
evo_interp_dict = {"scaling": evo_norm_scaling_arr}

# Interpolate time series data linearly on time_range
_time_range, interp_dict = interp_linear(config, inv_years, val_dict)
# Multiply time series data by scaling factors

# Get species order for proper scaling
time_dir = config["time"]["dir"]
file_name = config["time"]["file"]
file_path = time_dir + file_name
evolution = xr.load_dataset(file_path)
if "species" in evolution.coords:
species_order = evolution.species.values.tolist()
else:
species_order = DEFAULT_SPECIES_ORDER
# Multiply time series data by correct scaling factors
out_dict = {}
for spec, series_arr in interp_dict.items():
# adapted for multiplication of multi-dimensional arrays
shape_tp = np.shape(np.transpose(series_arr))
scaling = np.transpose(np.resize(evo_interp_dict["scaling"], shape_tp))
out_dict[spec] = np.multiply(scaling, series_arr)
# Find the correct scaling factor index for this species
if spec in species_order:
species_idx = species_order.index(spec)
# Extract the scaling factors for this specific species
scaling_factors = evo_interp_dict["scaling"][:, species_idx]
else:
# If species not found, use fuel scaling as default
scaling_factors = evo_interp_dict["scaling"][:, species_order.index("fuel")]

# Multiply element-wise
out_dict[spec] = series_arr * scaling_factors

return time_range, out_dict


def apply_norm(config, val_dict, inv_dict):
"""Apply normalization on time series,
get data from evolution file and inventories,
Expand Down Expand Up @@ -591,42 +610,61 @@ def scale_inv(inv_dict: dict, scale_dict: dict) -> dict:
Args:
inv_dict (dict): Dictionary of xarray Datasets, keys are years of inventories
scale_dict (dict): Dictionary of scaling factors {"scaling": np.ndarray}
where scaling array has shape (n_years, n_species)

Returns:
dict: Dictionary of xarray Datasets (scaled emission inventories),
keys are years of inventories
"""
# Get array with scaling multipliers
# Get array with scaling multipliers - shape (n_years, n_species)
scale_arr = scale_dict["scaling"]

# Initialize output inventory dictionary
out_inv_dict = {}

# Array index corresponding to inventory years
i = 0
for year, inv in inv_dict.items():
# Get global inventory attributes
inv_attrs = inv.attrs
# Initialize output inventory
out_inv = xr.Dataset()
out_inv = xr.Dataset()
# Iterate over data variables in inventory
for data_key, data_arr in inv.items():
# Get attributes of data variable
data_attrs = data_arr.attrs
data_attrs = data_arr.attrs
# lon, lat, plev: do NOT multiply
if data_key in ["lon", "lat", "plev", "ac"]:
pass
# multiply fuel, species emissions, and distance by scaling multiplier
else:
data_arr = data_arr * scale_arr[i]
# Find the correct scaling factor for this data variable
if data_key in scale_dict["species"]:
species_idx = scale_dict["species"].index(data_key)
scaling_value = scale_arr[i, species_idx]
else:
# If species not found, use fuel scaling as default
warnings.warn(
f"Species '{data_key}' not found in the scaling dict. "
f"Variable will be scaled using fuel scaling.",
UserWarning
)
scaling_value = scale_arr[i, scale_dict["species"].index("fuel")]
data_arr = data_arr * scaling_value

# Add data variable to output inventory
data_arr.attrs = data_attrs
out_inv = out_inv.merge({data_key: data_arr})

# Set global inventory attributes
out_inv.attrs = inv_attrs
out_inv_dict[year] = out_inv
i = i + 1

return out_inv_dict



def scale_inventories(config: dict, inv_dict: dict) -> dict:
"""Applies scaling to a dictionary of emission inventories.
This function first interpolates evolution data variables to time_range from config.
Expand All @@ -651,6 +689,15 @@ def scale_inventories(config: dict, inv_dict: dict) -> dict:
evo_filtered_dict = filter_to_inv_years(
inv_years, time_range, evo_interp_dict
)
# Get species order for proper scaling
time_dir = config["time"]["dir"]
file_name = config["time"]["file"]
file_path = time_dir + file_name
evolution = xr.load_dataset(file_path)
if "species" in evolution.coords:
evo_filtered_dict["species"] = evolution["species"].values.tolist()
else:
evo_filtered_dict["species"] = DEFAULT_SPECIES_ORDER
# Perform actual scaling: Multiply inventory data variables by scaling factors
out_inv_dict = scale_inv(inv_dict, evo_filtered_dict)
return out_inv_dict
88 changes: 79 additions & 9 deletions tests/interpolate_time_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,13 @@ class TestScaleInv:

def test_correct_input(self, inv_dict):
"""Valid input returns dictionary of xr.Dataset, keys are inventory years"""
scale_dict = {"scaling": np.array([1.0, 2.0])}
scale_dict = {
"scaling": np.array([
[1.0, 1.0, 1.0, 1.0, 1.0],
[1.0, 1.2, 1.4, 1.6, 1.8]
]),
"species": ['fuel', 'CO2', 'H2O', 'NOx', 'distance']
}
years = list(inv_dict.keys())
out_dict = oac.scale_inv(inv_dict, scale_dict)
# Test for correct output type
Expand All @@ -240,29 +246,93 @@ def test_correct_input(self, inv_dict):
def test_correct_scaling(self, inv_dict):
"""Test for correct scaling of inventories"""
# Input
scale_dict = {"scaling": np.array([1.0, 2.0])}
inp_2020_fuel_arr = inv_dict[2020].fuel.values
inp_2050_fuel_arr = inv_dict[2050].fuel.values
scale_dict = {
"scaling": np.array([
[1.0, 1.0, 1.0, 1.0, 1.0],
[1.0, 1.2, 1.4, 1.6, 1.8]
]),
"species": ['fuel', 'CO2', 'H2O', 'NOx', 'distance']
}
inp_2020_arrs = {}
inp_2050_arrs = {}
out_2020_arrs = {}
out_2050_arrs = {}
for specie in scale_dict["species"]:
inp_2020_arrs[specie] = inv_dict[2020][specie].values
inp_2050_arrs[specie] = inv_dict[2050][specie].values
inp_2050_lon_arr = inv_dict[2050].lon.values
inp_2050_lat_arr = inv_dict[2050].lat.values
inp_2050_plev_arr = inv_dict[2050].plev.values
# Output
out_dict = oac.scale_inv(inv_dict, scale_dict)
out_2020_fuel_arr = out_dict[2020].fuel.values
out_2050_fuel_arr = out_dict[2050].fuel.values
for specie in scale_dict["species"]:
out_2020_arrs[specie] = out_dict[2020][specie].values
out_2050_arrs[specie] = out_dict[2050][specie].values
out_2050_lon_arr = out_dict[2050].lon.values
out_2050_lat_arr = out_dict[2050].lat.values
out_2050_plev_arr = out_dict[2050].plev.values
# Test for correct scaling of fuel data variable
np.testing.assert_equal(out_2020_fuel_arr, inp_2020_fuel_arr)
np.testing.assert_equal(out_2050_fuel_arr, (2.0 * inp_2050_fuel_arr))
for i, specie in enumerate(scale_dict["species"]):
np.testing.assert_equal(out_2020_arrs[specie], inp_2020_arrs[specie])
expected = scale_dict["scaling"][1, i] * inp_2050_arrs[specie]
np.testing.assert_allclose(out_2050_arrs[specie], expected, rtol=1e-12)

# Test that coordinates remain unchanged
np.testing.assert_equal(out_2050_lon_arr, inp_2050_lon_arr)
np.testing.assert_equal(out_2050_lat_arr, inp_2050_lat_arr)
np.testing.assert_equal(out_2050_plev_arr, inp_2050_plev_arr)

def test_incorrect_input(self, inv_dict):
"""Invalid scale_dict (invalid key) returns KeyError"""
scale_dict = {"invalid_key": np.array([1.0, 2.0])}
scale_dict = {
"invalid_key": np.array([
[1.0, 1.0, 1.0, 1.0, 1.0],
[1.0, 1.2, 1.4, 1.6, 1.8]
]),
"species": ['fuel', 'CO2', 'H2O', 'NOx', 'distance']
}
with pytest.raises(KeyError):
oac.scale_inv(inv_dict, scale_dict)




@pytest.mark.usefixtures("inv_dict")
class TestApplyScaling:
"""Tests for apply_scaling function"""

def test_basic_scaling(self, inv_dict):
"""Test that apply_scaling correctly multiplies by scaling factors"""

# --- Config ---
config = {
"time": {
"dir": "../oac/example/input/",
"file": "time_scaling_example.nc",
"range": [2020, 2051, 1],
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are some guidelines and good practices for software testing. I can provide you with some additional documentation on that topic if you like:

  • According to the testing pyramid there are three types: unit tests (base), integration tests (middle) and end-to-end tests (top). The base comes with the highest amount of tests, top has the fewest. This means one should focus on the unit tests. These tests are fast, isolated and check individual aspects of the individual implemented functions.
  • Decouple tests from external dependencies, e.g. other files in the repository. This concept is known as test isolation. This particular test would fail, if the utility script generating time_scaling_example.nc has not executed beforehand. Test isolation can be achieved by the concept of test doubles (e.g. mock objects). If the focus is unit testing instead of end-to-end, maybe the file itself is not needed anymore and it would be enough to code a minimal xarray Dataset which contains an example time evolution.

}

# --- Load scaling file to get species order ---
evolution = xr.load_dataset(config["time"]["dir"] + config["time"]["file"])
species_order = evolution.species.values.tolist()
scaling = evolution.scaling.values
scaling_years = evolution.time.values

# --- Prepare dummy val_dict ---
years = np.array(list(inv_dict.keys()))
val_dict = {spec: np.ones(len(years)) * (i + 1) for i, spec in enumerate(species_order)}

# --- Run apply_scaling ---
time_range, out_dict = oac.apply_scaling(config, val_dict, inv_dict, inventories_adjusted=False)

# --- Basic checks ---
assert isinstance(time_range, np.ndarray)
assert isinstance(out_dict, dict)
assert set(out_dict.keys()) == set(val_dict.keys())
assert all(out_dict[spec].shape == time_range.shape for spec in val_dict)

# --- Check scaling applied correctly ---
for idx, spec in enumerate(species_order):
expected = np.interp(time_range, scaling_years, scaling[:, idx]) * val_dict[spec][0]
np.testing.assert_allclose(out_dict[spec], expected, rtol=1e-12)
50 changes: 32 additions & 18 deletions utils/create_time_evolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,14 @@

# SCALING CONSTANTS
SCALING_TIME = np.arange(1990, 2200, 1)
SCALING_ARR = np.sin(SCALING_TIME * 0.2) * 0.6 + 1.0
SCALING_ARR = SCALING_ARR.astype("float32")
SPECIES = ["fuel", "CO2", "H2O", "NOx", "distance"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most of the times, I named lists and numpy arrays xxx_arr in OpenAirClim. In addition, species can be a singular and plural word. Therefore, I suggest renaming this list to SPECIES_ARR.

SCALING_DATA = np.vstack([
np.linspace(1.0, 2.0, len(SCALING_TIME)), # fuel: +100% growth
np.linspace(1.0, 1.75, len(SCALING_TIME)), # CO2: +75% growth
np.linspace(1.0, 1.5, len(SCALING_TIME)), # H2O: +50% growth
np.linspace(1.0, 0.8, len(SCALING_TIME)), # NOx: -20% reduction
np.linspace(1.0, 1.2, len(SCALING_TIME)) # distance: +120% growth
]).astype("float32")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for updating the scaling factors. Looks much more realistic!


# NORMALIZATION CONSTANTS
NORM_TIME = np.array(
Expand Down Expand Up @@ -74,51 +80,59 @@
# TIME SCALING


def plot_time_scaling(scaling_time: np.ndarray, scaling_arr: np.ndarray):
def plot_time_scaling(scaling_time: np.ndarray, scaling: np.ndarray, species: list):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As stated above. I suggest renaming to scaling_arr and species_arr. I used the notation _arr for arrays of any dimension throughout OpenAirClim.

"""
Plots the time scaling factors.

Args:
scaling_time (np.ndarray): The time values for the scaling factors.
scaling_arr (np.ndarray): The scaling factors to plot.
scaling (np.ndarray): 2D array of scaling factors [species, time]
species (list): List of species names

Returns:
None

"""
_fig, ax = plt.subplots()
ax.plot(scaling_time, scaling_arr)
ax.set_xlabel("year")
ax.set_ylabel("scaling factor")
plt.figure(figsize=(8, 4))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To better distinguish the two plots which are opened upon execution of this script, I suggest to add a string identifier, e.g. "Scaling" as first argument to function plt.figure(), see : https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.figure.html
Instead of "Figure 1", a nice name would show up then.

for i, specie in enumerate(species):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good example why the notation _arr is unambiguous. First, the reader can better distinguish the two objects species_arr (np.ndarray) and species (str) in the code. Second, the word specie (without "s" at the end) means something different, a coin ;-)

plt.plot(scaling_time, scaling[i], label=specie)
plt.xlabel("Year")
plt.ylabel("Scaling factor")
plt.title("Scaling factors over time by species")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


def create_time_scaling_xr(
scaling_time: np.ndarray, scaling_arr: np.ndarray
scaling_time: np.ndarray, scaling: np.ndarray, species: list
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see above, suggest renaming variables to xxx_arr

) -> xr.Dataset:
"""
Create an xarray dataset containing time scaling factors.

Args:
scaling_time (np.ndarray): The time values for the scaling factors.
scaling_arr (np.ndarray): The scaling factors to plot.
scaling (np.ndarray): 2D array of scaling factors [species, time]
species (list): List of species names

Returns:
xr.Dataset: The xarray dataset containing the time scaling factors.

"""
evolution = xr.Dataset(
data_vars=dict(scaling=(["time"], scaling_arr)),
coords=dict(time=scaling_time),
data_vars=dict(scaling=(["time", "species"], scaling.T)),
coords=dict(time=scaling_time, species=species),
)
evolution.time.attrs = {"units": "years"}
evolution.scaling.attrs = {"species": "all"}
evolution.species.attrs = {"description": "species names"}
evolution.scaling.attrs = {"long_name": "scaling factors", "units": "-"}
evolution.attrs = dict(
Title="Time scaling example",
Convention="CF-XXX",
Type="scaling",
Author="Stefan Völk",
Contact="stefan.voelk@dlr.de",
Author="Stefan Völk, Clémentin Léron",
Contact="openairclim@dlr.de",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you very much for updating the attributes/metadata 👍

)
return evolution

Expand Down Expand Up @@ -170,7 +184,7 @@ def create_time_normalization_xr(
Convention="CF-XXX",
Type="norm",
Author="Stefan Völk",
Contact="stefan.voelk@dlr.de",
Contact="openairclim@dlr.de",
)
return evolution

Expand Down Expand Up @@ -217,9 +231,9 @@ def convert_xr_to_nc(ds: xr.Dataset, file_name: str, out_path: str = OUT_PATH):


if __name__ == "__main__":
scaling_ds = create_time_scaling_xr(SCALING_TIME, SCALING_ARR)
scaling_ds = create_time_scaling_xr(SCALING_TIME, SCALING_DATA, SPECIES)
convert_xr_to_nc(scaling_ds, "time_scaling_example")
plot_time_scaling(SCALING_TIME, SCALING_ARR)
plot_time_scaling(SCALING_TIME, SCALING_DATA, SPECIES)
norm_ds = create_time_normalization_xr(
NORM_TIME, FUEL_ARR, EI_CO2_ARR, EI_H2O_ARR, DIS_PER_FUEL_ARR
)
Expand Down
Loading