-
Notifications
You must be signed in to change notification settings - Fork 2
Feature/scaling multi species #99
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: feature/scaling-multi-species
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,7 +19,7 @@ | |
| "EI_NOx": "NOx", | ||
| "dis_per_fuel": "distance", | ||
| } | ||
|
|
||
| DEFAULT_SPECIES_ORDER = ["fuel", "CO2", "H2O", "NOx", "distance"] | ||
|
|
||
| def interpolate( | ||
| config: dict, years: np.ndarray, val_dict: dict | ||
|
|
@@ -182,7 +182,6 @@ def apply_scaling( | |
| scaling factors are from evolution file, | ||
| time series and scaling factors are interpolated on time_range | ||
| before multiplication | ||
| TODO: implement scaling for individual species | ||
|
|
||
| Args: | ||
| config (dict): Configuration dictionary from config | ||
|
|
@@ -196,8 +195,10 @@ def apply_scaling( | |
| """ | ||
| # Get inventory years | ||
| inv_years = np.array(list(inv_dict.keys())) | ||
|
|
||
| # Interpolate scaling factors linearly on time_range | ||
| time_range, evo_interp_dict = interp_evolution(config) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. works fine, even with higher dimensional scaling factors array and with time steps > 1 year. |
||
|
|
||
| # If inventories have been adjusted beforehand, normalize scaling factors to inv_years | ||
| if inventories_adjusted: | ||
| # Filter evo_interp_dict to inv_years | ||
|
|
@@ -213,18 +214,36 @@ def apply_scaling( | |
| evo_interp_dict["scaling"], evo_filtered_interp_dict["scaling"] | ||
| ) | ||
| evo_interp_dict = {"scaling": evo_norm_scaling_arr} | ||
|
|
||
| # Interpolate time series data linearly on time_range | ||
| _time_range, interp_dict = interp_linear(config, inv_years, val_dict) | ||
| # Multiply time series data by scaling factors | ||
|
|
||
| # Get species order for proper scaling | ||
| time_dir = config["time"]["dir"] | ||
| file_name = config["time"]["file"] | ||
| file_path = time_dir + file_name | ||
| evolution = xr.load_dataset(file_path) | ||
| if "species" in evolution.coords: | ||
| species_order = evolution.species.values.tolist() | ||
| else: | ||
| species_order = DEFAULT_SPECIES_ORDER | ||
| # Multiply time series data by correct scaling factors | ||
| out_dict = {} | ||
| for spec, series_arr in interp_dict.items(): | ||
| # adapted for multiplication of multi-dimensional arrays | ||
| shape_tp = np.shape(np.transpose(series_arr)) | ||
| scaling = np.transpose(np.resize(evo_interp_dict["scaling"], shape_tp)) | ||
| out_dict[spec] = np.multiply(scaling, series_arr) | ||
| # Find the correct scaling factor index for this species | ||
| if spec in species_order: | ||
| species_idx = species_order.index(spec) | ||
| # Extract the scaling factors for this specific species | ||
| scaling_factors = evo_interp_dict["scaling"][:, species_idx] | ||
| else: | ||
| # If species not found, use fuel scaling as default | ||
| scaling_factors = evo_interp_dict["scaling"][:, species_order.index("fuel")] | ||
|
|
||
| # Multiply element-wise | ||
| out_dict[spec] = series_arr * scaling_factors | ||
|
|
||
| return time_range, out_dict | ||
|
|
||
|
|
||
| def apply_norm(config, val_dict, inv_dict): | ||
| """Apply normalization on time series, | ||
| get data from evolution file and inventories, | ||
|
|
@@ -591,42 +610,61 @@ def scale_inv(inv_dict: dict, scale_dict: dict) -> dict: | |
| Args: | ||
| inv_dict (dict): Dictionary of xarray Datasets, keys are years of inventories | ||
| scale_dict (dict): Dictionary of scaling factors {"scaling": np.ndarray} | ||
| where scaling array has shape (n_years, n_species) | ||
|
|
||
| Returns: | ||
| dict: Dictionary of xarray Datasets (scaled emission inventories), | ||
| keys are years of inventories | ||
| """ | ||
| # Get array with scaling multipliers | ||
| # Get array with scaling multipliers - shape (n_years, n_species) | ||
| scale_arr = scale_dict["scaling"] | ||
|
|
||
| # Initialize output inventory dictionary | ||
| out_inv_dict = {} | ||
|
|
||
| # Array index corresponding to inventory years | ||
| i = 0 | ||
| for year, inv in inv_dict.items(): | ||
| # Get global inventory attributes | ||
| inv_attrs = inv.attrs | ||
| # Initialize output inventory | ||
| out_inv = xr.Dataset() | ||
| out_inv = xr.Dataset() | ||
| # Iterate over data variables in inventory | ||
| for data_key, data_arr in inv.items(): | ||
| # Get attributes of data variable | ||
| data_attrs = data_arr.attrs | ||
| data_attrs = data_arr.attrs | ||
| # lon, lat, plev: do NOT multiply | ||
| if data_key in ["lon", "lat", "plev", "ac"]: | ||
| pass | ||
| # multiply fuel, species emissions, and distance by scaling multiplier | ||
| else: | ||
| data_arr = data_arr * scale_arr[i] | ||
| # Find the correct scaling factor for this data variable | ||
| if data_key in scale_dict["species"]: | ||
| species_idx = scale_dict["species"].index(data_key) | ||
| scaling_value = scale_arr[i, species_idx] | ||
| else: | ||
| # If species not found, use fuel scaling as default | ||
| warnings.warn( | ||
| f"Species '{data_key}' not found in the scaling dict. " | ||
| f"Variable will be scaled using fuel scaling.", | ||
| UserWarning | ||
| ) | ||
| scaling_value = scale_arr[i, scale_dict["species"].index("fuel")] | ||
| data_arr = data_arr * scaling_value | ||
|
|
||
| # Add data variable to output inventory | ||
| data_arr.attrs = data_attrs | ||
| out_inv = out_inv.merge({data_key: data_arr}) | ||
|
|
||
| # Set global inventory attributes | ||
| out_inv.attrs = inv_attrs | ||
| out_inv_dict[year] = out_inv | ||
| i = i + 1 | ||
|
|
||
| return out_inv_dict | ||
|
|
||
|
|
||
|
|
||
| def scale_inventories(config: dict, inv_dict: dict) -> dict: | ||
| """Applies scaling to a dictionary of emission inventories. | ||
| This function first interpolates evolution data variables to time_range from config. | ||
|
|
@@ -651,6 +689,15 @@ def scale_inventories(config: dict, inv_dict: dict) -> dict: | |
| evo_filtered_dict = filter_to_inv_years( | ||
| inv_years, time_range, evo_interp_dict | ||
| ) | ||
| # Get species order for proper scaling | ||
| time_dir = config["time"]["dir"] | ||
| file_name = config["time"]["file"] | ||
| file_path = time_dir + file_name | ||
| evolution = xr.load_dataset(file_path) | ||
| if "species" in evolution.coords: | ||
| evo_filtered_dict["species"] = evolution["species"].values.tolist() | ||
| else: | ||
| evo_filtered_dict["species"] = DEFAULT_SPECIES_ORDER | ||
| # Perform actual scaling: Multiply inventory data variables by scaling factors | ||
| out_inv_dict = scale_inv(inv_dict, evo_filtered_dict) | ||
| return out_inv_dict | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -229,7 +229,13 @@ class TestScaleInv: | |
|
|
||
| def test_correct_input(self, inv_dict): | ||
| """Valid input returns dictionary of xr.Dataset, keys are inventory years""" | ||
| scale_dict = {"scaling": np.array([1.0, 2.0])} | ||
| scale_dict = { | ||
| "scaling": np.array([ | ||
| [1.0, 1.0, 1.0, 1.0, 1.0], | ||
| [1.0, 1.2, 1.4, 1.6, 1.8] | ||
| ]), | ||
| "species": ['fuel', 'CO2', 'H2O', 'NOx', 'distance'] | ||
| } | ||
| years = list(inv_dict.keys()) | ||
| out_dict = oac.scale_inv(inv_dict, scale_dict) | ||
| # Test for correct output type | ||
|
|
@@ -240,29 +246,93 @@ def test_correct_input(self, inv_dict): | |
| def test_correct_scaling(self, inv_dict): | ||
| """Test for correct scaling of inventories""" | ||
| # Input | ||
| scale_dict = {"scaling": np.array([1.0, 2.0])} | ||
| inp_2020_fuel_arr = inv_dict[2020].fuel.values | ||
| inp_2050_fuel_arr = inv_dict[2050].fuel.values | ||
| scale_dict = { | ||
| "scaling": np.array([ | ||
| [1.0, 1.0, 1.0, 1.0, 1.0], | ||
| [1.0, 1.2, 1.4, 1.6, 1.8] | ||
| ]), | ||
| "species": ['fuel', 'CO2', 'H2O', 'NOx', 'distance'] | ||
| } | ||
| inp_2020_arrs = {} | ||
| inp_2050_arrs = {} | ||
| out_2020_arrs = {} | ||
| out_2050_arrs = {} | ||
| for specie in scale_dict["species"]: | ||
| inp_2020_arrs[specie] = inv_dict[2020][specie].values | ||
| inp_2050_arrs[specie] = inv_dict[2050][specie].values | ||
| inp_2050_lon_arr = inv_dict[2050].lon.values | ||
| inp_2050_lat_arr = inv_dict[2050].lat.values | ||
| inp_2050_plev_arr = inv_dict[2050].plev.values | ||
| # Output | ||
| out_dict = oac.scale_inv(inv_dict, scale_dict) | ||
| out_2020_fuel_arr = out_dict[2020].fuel.values | ||
| out_2050_fuel_arr = out_dict[2050].fuel.values | ||
| for specie in scale_dict["species"]: | ||
| out_2020_arrs[specie] = out_dict[2020][specie].values | ||
| out_2050_arrs[specie] = out_dict[2050][specie].values | ||
| out_2050_lon_arr = out_dict[2050].lon.values | ||
| out_2050_lat_arr = out_dict[2050].lat.values | ||
| out_2050_plev_arr = out_dict[2050].plev.values | ||
| # Test for correct scaling of fuel data variable | ||
| np.testing.assert_equal(out_2020_fuel_arr, inp_2020_fuel_arr) | ||
| np.testing.assert_equal(out_2050_fuel_arr, (2.0 * inp_2050_fuel_arr)) | ||
| for i, specie in enumerate(scale_dict["species"]): | ||
| np.testing.assert_equal(out_2020_arrs[specie], inp_2020_arrs[specie]) | ||
| expected = scale_dict["scaling"][1, i] * inp_2050_arrs[specie] | ||
| np.testing.assert_allclose(out_2050_arrs[specie], expected, rtol=1e-12) | ||
|
|
||
| # Test that coordinates remain unchanged | ||
| np.testing.assert_equal(out_2050_lon_arr, inp_2050_lon_arr) | ||
| np.testing.assert_equal(out_2050_lat_arr, inp_2050_lat_arr) | ||
| np.testing.assert_equal(out_2050_plev_arr, inp_2050_plev_arr) | ||
|
|
||
| def test_incorrect_input(self, inv_dict): | ||
| """Invalid scale_dict (invalid key) returns KeyError""" | ||
| scale_dict = {"invalid_key": np.array([1.0, 2.0])} | ||
| scale_dict = { | ||
| "invalid_key": np.array([ | ||
| [1.0, 1.0, 1.0, 1.0, 1.0], | ||
| [1.0, 1.2, 1.4, 1.6, 1.8] | ||
| ]), | ||
| "species": ['fuel', 'CO2', 'H2O', 'NOx', 'distance'] | ||
| } | ||
| with pytest.raises(KeyError): | ||
| oac.scale_inv(inv_dict, scale_dict) | ||
|
|
||
|
|
||
|
|
||
|
|
||
| @pytest.mark.usefixtures("inv_dict") | ||
| class TestApplyScaling: | ||
| """Tests for apply_scaling function""" | ||
|
|
||
| def test_basic_scaling(self, inv_dict): | ||
| """Test that apply_scaling correctly multiplies by scaling factors""" | ||
|
|
||
| # --- Config --- | ||
| config = { | ||
| "time": { | ||
| "dir": "../oac/example/input/", | ||
| "file": "time_scaling_example.nc", | ||
| "range": [2020, 2051, 1], | ||
| } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are some guidelines and good practices for software testing. I can provide you with some additional documentation on that topic if you like:
|
||
| } | ||
|
|
||
| # --- Load scaling file to get species order --- | ||
| evolution = xr.load_dataset(config["time"]["dir"] + config["time"]["file"]) | ||
| species_order = evolution.species.values.tolist() | ||
| scaling = evolution.scaling.values | ||
| scaling_years = evolution.time.values | ||
|
|
||
| # --- Prepare dummy val_dict --- | ||
| years = np.array(list(inv_dict.keys())) | ||
| val_dict = {spec: np.ones(len(years)) * (i + 1) for i, spec in enumerate(species_order)} | ||
|
|
||
| # --- Run apply_scaling --- | ||
| time_range, out_dict = oac.apply_scaling(config, val_dict, inv_dict, inventories_adjusted=False) | ||
|
|
||
| # --- Basic checks --- | ||
| assert isinstance(time_range, np.ndarray) | ||
| assert isinstance(out_dict, dict) | ||
| assert set(out_dict.keys()) == set(val_dict.keys()) | ||
| assert all(out_dict[spec].shape == time_range.shape for spec in val_dict) | ||
|
|
||
| # --- Check scaling applied correctly --- | ||
| for idx, spec in enumerate(species_order): | ||
| expected = np.interp(time_range, scaling_years, scaling[:, idx]) * val_dict[spec][0] | ||
| np.testing.assert_allclose(out_dict[spec], expected, rtol=1e-12) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,8 +10,14 @@ | |
|
|
||
| # SCALING CONSTANTS | ||
| SCALING_TIME = np.arange(1990, 2200, 1) | ||
| SCALING_ARR = np.sin(SCALING_TIME * 0.2) * 0.6 + 1.0 | ||
| SCALING_ARR = SCALING_ARR.astype("float32") | ||
| SPECIES = ["fuel", "CO2", "H2O", "NOx", "distance"] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Most of the times, I named lists and numpy arrays xxx_arr in OpenAirClim. In addition, species can be a singular and plural word. Therefore, I suggest renaming this list to |
||
| SCALING_DATA = np.vstack([ | ||
| np.linspace(1.0, 2.0, len(SCALING_TIME)), # fuel: +100% growth | ||
| np.linspace(1.0, 1.75, len(SCALING_TIME)), # CO2: +75% growth | ||
| np.linspace(1.0, 1.5, len(SCALING_TIME)), # H2O: +50% growth | ||
| np.linspace(1.0, 0.8, len(SCALING_TIME)), # NOx: -20% reduction | ||
| np.linspace(1.0, 1.2, len(SCALING_TIME)) # distance: +120% growth | ||
| ]).astype("float32") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for updating the scaling factors. Looks much more realistic! |
||
|
|
||
| # NORMALIZATION CONSTANTS | ||
| NORM_TIME = np.array( | ||
|
|
@@ -74,51 +80,59 @@ | |
| # TIME SCALING | ||
|
|
||
|
|
||
| def plot_time_scaling(scaling_time: np.ndarray, scaling_arr: np.ndarray): | ||
| def plot_time_scaling(scaling_time: np.ndarray, scaling: np.ndarray, species: list): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As stated above. I suggest renaming to |
||
| """ | ||
| Plots the time scaling factors. | ||
|
|
||
| Args: | ||
| scaling_time (np.ndarray): The time values for the scaling factors. | ||
| scaling_arr (np.ndarray): The scaling factors to plot. | ||
| scaling (np.ndarray): 2D array of scaling factors [species, time] | ||
| species (list): List of species names | ||
|
|
||
| Returns: | ||
| None | ||
|
|
||
| """ | ||
| _fig, ax = plt.subplots() | ||
| ax.plot(scaling_time, scaling_arr) | ||
| ax.set_xlabel("year") | ||
| ax.set_ylabel("scaling factor") | ||
| plt.figure(figsize=(8, 4)) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To better distinguish the two plots which are opened upon execution of this script, I suggest to add a string identifier, e.g. "Scaling" as first argument to function |
||
| for i, specie in enumerate(species): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a good example why the notation |
||
| plt.plot(scaling_time, scaling[i], label=specie) | ||
| plt.xlabel("Year") | ||
| plt.ylabel("Scaling factor") | ||
| plt.title("Scaling factors over time by species") | ||
| plt.legend() | ||
| plt.grid(True) | ||
| plt.tight_layout() | ||
| plt.show() | ||
|
|
||
|
|
||
| def create_time_scaling_xr( | ||
| scaling_time: np.ndarray, scaling_arr: np.ndarray | ||
| scaling_time: np.ndarray, scaling: np.ndarray, species: list | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see above, suggest renaming variables to |
||
| ) -> xr.Dataset: | ||
| """ | ||
| Create an xarray dataset containing time scaling factors. | ||
|
|
||
| Args: | ||
| scaling_time (np.ndarray): The time values for the scaling factors. | ||
| scaling_arr (np.ndarray): The scaling factors to plot. | ||
| scaling (np.ndarray): 2D array of scaling factors [species, time] | ||
| species (list): List of species names | ||
|
|
||
| Returns: | ||
| xr.Dataset: The xarray dataset containing the time scaling factors. | ||
|
|
||
| """ | ||
| evolution = xr.Dataset( | ||
| data_vars=dict(scaling=(["time"], scaling_arr)), | ||
| coords=dict(time=scaling_time), | ||
| data_vars=dict(scaling=(["time", "species"], scaling.T)), | ||
| coords=dict(time=scaling_time, species=species), | ||
| ) | ||
| evolution.time.attrs = {"units": "years"} | ||
| evolution.scaling.attrs = {"species": "all"} | ||
| evolution.species.attrs = {"description": "species names"} | ||
| evolution.scaling.attrs = {"long_name": "scaling factors", "units": "-"} | ||
| evolution.attrs = dict( | ||
| Title="Time scaling example", | ||
| Convention="CF-XXX", | ||
| Type="scaling", | ||
| Author="Stefan Völk", | ||
| Contact="stefan.voelk@dlr.de", | ||
| Author="Stefan Völk, Clémentin Léron", | ||
| Contact="openairclim@dlr.de", | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you very much for updating the attributes/metadata 👍 |
||
| ) | ||
| return evolution | ||
|
|
||
|
|
@@ -170,7 +184,7 @@ def create_time_normalization_xr( | |
| Convention="CF-XXX", | ||
| Type="norm", | ||
| Author="Stefan Völk", | ||
| Contact="stefan.voelk@dlr.de", | ||
| Contact="openairclim@dlr.de", | ||
| ) | ||
| return evolution | ||
|
|
||
|
|
@@ -217,9 +231,9 @@ def convert_xr_to_nc(ds: xr.Dataset, file_name: str, out_path: str = OUT_PATH): | |
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| scaling_ds = create_time_scaling_xr(SCALING_TIME, SCALING_ARR) | ||
| scaling_ds = create_time_scaling_xr(SCALING_TIME, SCALING_DATA, SPECIES) | ||
| convert_xr_to_nc(scaling_ds, "time_scaling_example") | ||
| plot_time_scaling(SCALING_TIME, SCALING_ARR) | ||
| plot_time_scaling(SCALING_TIME, SCALING_DATA, SPECIES) | ||
| norm_ds = create_time_normalization_xr( | ||
| NORM_TIME, FUEL_ARR, EI_CO2_ARR, EI_H2O_ARR, DIS_PER_FUEL_ARR | ||
| ) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This Pull Request (PR) is from an external repository to the dlr-pa repository. Once the branch is within dlr-pa, reviewers can perform much easier checks with the new code. Otherwise, a fresh clone and install of OpenAirClim from the external repository is needed.