dlr-pa · ClementinLeron · Oct 15, 2025 · Oct 17, 2025 · Nov 4, 2025 · stefan-voelk
diff --git a/openairclim/interpolate_time.py b/openairclim/interpolate_time.py
@@ -19,7 +19,7 @@
     "EI_NOx": "NOx",
     "dis_per_fuel": "distance",
 }
-
+DEFAULT_SPECIES_ORDER = ["fuel", "CO2", "H2O", "NOx", "distance"]
 
 def interpolate(
     config: dict, years: np.ndarray, val_dict: dict
@@ -182,7 +182,6 @@ def apply_scaling(
     scaling factors are from evolution file,
     time series and scaling factors are interpolated on time_range
     before multiplication
-    TODO: implement scaling for individual species
 
     Args:
         config (dict): Configuration dictionary from config
@@ -196,8 +195,10 @@ def apply_scaling(
     """
     # Get inventory years
     inv_years = np.array(list(inv_dict.keys()))
+
     # Interpolate scaling factors linearly on time_range
     time_range, evo_interp_dict = interp_evolution(config)
+
     # If inventories have been adjusted beforehand, normalize scaling factors to inv_years
     if inventories_adjusted:
         # Filter evo_interp_dict to inv_years
@@ -213,18 +214,36 @@ def apply_scaling(
             evo_interp_dict["scaling"], evo_filtered_interp_dict["scaling"]
         )
         evo_interp_dict = {"scaling": evo_norm_scaling_arr}
+
     # Interpolate time series data linearly on time_range
     _time_range, interp_dict = interp_linear(config, inv_years, val_dict)
-    # Multiply time series data by scaling factors
+
+    # Get species order for proper scaling
+    time_dir = config["time"]["dir"]
+    file_name = config["time"]["file"]
+    file_path = time_dir + file_name
+    evolution = xr.load_dataset(file_path)
+    if "species" in evolution.coords:
+        species_order = evolution.species.values.tolist()
+    else:
+        species_order = DEFAULT_SPECIES_ORDER   
+    # Multiply time series data by correct scaling factors
     out_dict = {}
     for spec, series_arr in interp_dict.items():
-        # adapted for multiplication of multi-dimensional arrays
-        shape_tp = np.shape(np.transpose(series_arr))
-        scaling = np.transpose(np.resize(evo_interp_dict["scaling"], shape_tp))
-        out_dict[spec] = np.multiply(scaling, series_arr)
+        # Find the correct scaling factor index for this species
+        if spec in species_order:
+            species_idx = species_order.index(spec)
+            # Extract the scaling factors for this specific species
+            scaling_factors = evo_interp_dict["scaling"][:, species_idx]
+        else:
+            # If species not found, use fuel scaling as default
+            scaling_factors = evo_interp_dict["scaling"][:, species_order.index("fuel")]
+
+        # Multiply element-wise
+        out_dict[spec] = series_arr * scaling_factors
+
     return time_range, out_dict
 
-
 def apply_norm(config, val_dict, inv_dict):
     """Apply normalization on time series,
     get data from evolution file and inventories,
@@ -591,42 +610,61 @@ def scale_inv(inv_dict: dict, scale_dict: dict) -> dict:
     Args:
         inv_dict (dict): Dictionary of xarray Datasets, keys are years of inventories
         scale_dict (dict): Dictionary of scaling factors {"scaling": np.ndarray}
+            where scaling array has shape (n_years, n_species)
 
     Returns:
         dict: Dictionary of xarray Datasets (scaled emission inventories),
             keys are years of inventories
     """
-    # Get array with scaling multipliers
+    # Get array with scaling multipliers - shape (n_years, n_species)
     scale_arr = scale_dict["scaling"]
+
     # Initialize output inventory dictionary
     out_inv_dict = {}
+
     # Array index corresponding to inventory years
     i = 0
     for year, inv in inv_dict.items():
         # Get global inventory attributes
         inv_attrs = inv.attrs
         # Initialize output inventory
-        out_inv = xr.Dataset()
+        out_inv = xr.Dataset()        
         # Iterate over data variables in inventory
         for data_key, data_arr in inv.items():
             # Get attributes of data variable
-            data_attrs = data_arr.attrs
+            data_attrs = data_arr.attrs            
             # lon, lat, plev: do NOT multiply
             if data_key in ["lon", "lat", "plev", "ac"]:
                 pass
             # multiply fuel, species emissions, and distance by scaling multiplier
             else:
-                data_arr = data_arr * scale_arr[i]
+                # Find the correct scaling factor for this data variable
+                if data_key in scale_dict["species"]:
+                    species_idx = scale_dict["species"].index(data_key)
+                    scaling_value = scale_arr[i, species_idx]
+                else:
+                    # If species not found, use fuel scaling as default
+                    warnings.warn(
+                    f"Species '{data_key}' not found in the scaling dict. "
+                    f"Variable will be scaled using fuel scaling.",
+                    UserWarning
+                )                    
+                    scaling_value = scale_arr[i, scale_dict["species"].index("fuel")]
+                data_arr = data_arr * scaling_value
+
             # Add data variable to output inventory
             data_arr.attrs = data_attrs
             out_inv = out_inv.merge({data_key: data_arr})
+
         # Set global inventory attributes
         out_inv.attrs = inv_attrs
         out_inv_dict[year] = out_inv
         i = i + 1
+
     return out_inv_dict
 
 
+
 def scale_inventories(config: dict, inv_dict: dict) -> dict:
     """Applies scaling to a dictionary of emission inventories.
     This function first interpolates evolution data variables to time_range from config.
@@ -651,6 +689,15 @@ def scale_inventories(config: dict, inv_dict: dict) -> dict:
     evo_filtered_dict = filter_to_inv_years(
         inv_years, time_range, evo_interp_dict
     )
+    # Get species order for proper scaling
+    time_dir = config["time"]["dir"]
+    file_name = config["time"]["file"]
+    file_path = time_dir + file_name
+    evolution = xr.load_dataset(file_path)
+    if "species" in evolution.coords:
+        evo_filtered_dict["species"] = evolution["species"].values.tolist()
+    else:
+        evo_filtered_dict["species"] = DEFAULT_SPECIES_ORDER
     # Perform actual scaling: Multiply inventory data variables by scaling factors
     out_inv_dict = scale_inv(inv_dict, evo_filtered_dict)
     return out_inv_dict
diff --git a/tests/interpolate_time_test.py b/tests/interpolate_time_test.py
@@ -229,7 +229,13 @@ class TestScaleInv:
 
     def test_correct_input(self, inv_dict):
         """Valid input returns dictionary of xr.Dataset, keys are inventory years"""
-        scale_dict = {"scaling": np.array([1.0, 2.0])}
+        scale_dict = {
+            "scaling": np.array([
+                [1.0, 1.0, 1.0, 1.0, 1.0], 
+                [1.0, 1.2, 1.4, 1.6, 1.8]
+            ]),
+            "species": ['fuel', 'CO2', 'H2O', 'NOx', 'distance']
+        }
         years = list(inv_dict.keys())
         out_dict = oac.scale_inv(inv_dict, scale_dict)
         # Test for correct output type
@@ -240,29 +246,93 @@ def test_correct_input(self, inv_dict):
     def test_correct_scaling(self, inv_dict):
         """Test for correct scaling of inventories"""
         # Input
-        scale_dict = {"scaling": np.array([1.0, 2.0])}
-        inp_2020_fuel_arr = inv_dict[2020].fuel.values
-        inp_2050_fuel_arr = inv_dict[2050].fuel.values
+        scale_dict = {
+            "scaling": np.array([
+                [1.0, 1.0, 1.0, 1.0, 1.0], 
+                [1.0, 1.2, 1.4, 1.6, 1.8]
+            ]),
+            "species": ['fuel', 'CO2', 'H2O', 'NOx', 'distance']
+        }
+        inp_2020_arrs = {}
+        inp_2050_arrs = {}
+        out_2020_arrs = {}
+        out_2050_arrs = {}
+        for specie in scale_dict["species"]:
+            inp_2020_arrs[specie] = inv_dict[2020][specie].values
+            inp_2050_arrs[specie] = inv_dict[2050][specie].values
         inp_2050_lon_arr = inv_dict[2050].lon.values
         inp_2050_lat_arr = inv_dict[2050].lat.values
         inp_2050_plev_arr = inv_dict[2050].plev.values
         # Output
         out_dict = oac.scale_inv(inv_dict, scale_dict)
-        out_2020_fuel_arr = out_dict[2020].fuel.values
-        out_2050_fuel_arr = out_dict[2050].fuel.values
+        for specie in scale_dict["species"]:
+            out_2020_arrs[specie] = out_dict[2020][specie].values
+            out_2050_arrs[specie] = out_dict[2050][specie].values
         out_2050_lon_arr = out_dict[2050].lon.values
         out_2050_lat_arr = out_dict[2050].lat.values
         out_2050_plev_arr = out_dict[2050].plev.values
         # Test for correct scaling of fuel data variable
-        np.testing.assert_equal(out_2020_fuel_arr, inp_2020_fuel_arr)
-        np.testing.assert_equal(out_2050_fuel_arr, (2.0 * inp_2050_fuel_arr))
+        for i, specie in enumerate(scale_dict["species"]):
+            np.testing.assert_equal(out_2020_arrs[specie], inp_2020_arrs[specie])
+            expected = scale_dict["scaling"][1, i] * inp_2050_arrs[specie]
+            np.testing.assert_allclose(out_2050_arrs[specie], expected, rtol=1e-12)
+
         # Test that coordinates remain unchanged
         np.testing.assert_equal(out_2050_lon_arr, inp_2050_lon_arr)
         np.testing.assert_equal(out_2050_lat_arr, inp_2050_lat_arr)
         np.testing.assert_equal(out_2050_plev_arr, inp_2050_plev_arr)
 
     def test_incorrect_input(self, inv_dict):
         """Invalid scale_dict (invalid key) returns KeyError"""
-        scale_dict = {"invalid_key": np.array([1.0, 2.0])}
+        scale_dict = {
+            "invalid_key": np.array([
+                [1.0, 1.0, 1.0, 1.0, 1.0], 
+                [1.0, 1.2, 1.4, 1.6, 1.8]
+            ]),
+            "species": ['fuel', 'CO2', 'H2O', 'NOx', 'distance']
+        }
         with pytest.raises(KeyError):
             oac.scale_inv(inv_dict, scale_dict)
+
+
+
+
+@pytest.mark.usefixtures("inv_dict")
+class TestApplyScaling:
+    """Tests for apply_scaling function"""
+
+    def test_basic_scaling(self, inv_dict):
+        """Test that apply_scaling correctly multiplies by scaling factors"""
+
+        # --- Config ---
+        config = {
+            "time": {
+                "dir": "../oac/example/input/",
+                "file": "time_scaling_example.nc",
+                "range": [2020, 2051, 1],
+            }
+        }
+
+        # --- Load scaling file to get species order ---
+        evolution = xr.load_dataset(config["time"]["dir"] + config["time"]["file"])
+        species_order = evolution.species.values.tolist()
+        scaling = evolution.scaling.values
+        scaling_years = evolution.time.values
+
+        # --- Prepare dummy val_dict ---
+        years = np.array(list(inv_dict.keys()))
+        val_dict = {spec: np.ones(len(years)) * (i + 1) for i, spec in enumerate(species_order)}
+
+        # --- Run apply_scaling ---
+        time_range, out_dict = oac.apply_scaling(config, val_dict, inv_dict, inventories_adjusted=False)
+
+        # --- Basic checks ---
+        assert isinstance(time_range, np.ndarray)
+        assert isinstance(out_dict, dict)
+        assert set(out_dict.keys()) == set(val_dict.keys())
+        assert all(out_dict[spec].shape == time_range.shape for spec in val_dict)
+
+        # --- Check scaling applied correctly ---
+        for idx, spec in enumerate(species_order):
+            expected = np.interp(time_range, scaling_years, scaling[:, idx]) * val_dict[spec][0]
+            np.testing.assert_allclose(out_dict[spec], expected, rtol=1e-12)
diff --git a/utils/create_time_evolution.py b/utils/create_time_evolution.py
@@ -10,8 +10,14 @@
 
 # SCALING CONSTANTS
 SCALING_TIME = np.arange(1990, 2200, 1)
-SCALING_ARR = np.sin(SCALING_TIME * 0.2) * 0.6 + 1.0
-SCALING_ARR = SCALING_ARR.astype("float32")
+SPECIES = ["fuel", "CO2", "H2O", "NOx", "distance"]
+SCALING_DATA = np.vstack([
+    np.linspace(1.0, 2.0, len(SCALING_TIME)),             # fuel: +100% growth
+    np.linspace(1.0, 1.75, len(SCALING_TIME)),            # CO2: +75% growth
+    np.linspace(1.0, 1.5, len(SCALING_TIME)),             # H2O: +50% growth
+    np.linspace(1.0, 0.8, len(SCALING_TIME)),             # NOx: -20% reduction
+    np.linspace(1.0, 1.2, len(SCALING_TIME))              # distance: +120% growth
+]).astype("float32")
 
 # NORMALIZATION CONSTANTS
 NORM_TIME = np.array(
@@ -74,51 +80,59 @@
 # TIME SCALING
 
 
-def plot_time_scaling(scaling_time: np.ndarray, scaling_arr: np.ndarray):
+def plot_time_scaling(scaling_time: np.ndarray, scaling: np.ndarray, species: list):
     """
     Plots the time scaling factors.
 
     Args:
         scaling_time (np.ndarray): The time values for the scaling factors.
-        scaling_arr (np.ndarray): The scaling factors to plot.
+        scaling (np.ndarray): 2D array of scaling factors [species, time]
+        species (list): List of species names
 
     Returns:
         None
 
     """
-    _fig, ax = plt.subplots()
-    ax.plot(scaling_time, scaling_arr)
-    ax.set_xlabel("year")
-    ax.set_ylabel("scaling factor")
+    plt.figure(figsize=(8, 4))
+    for i, specie in enumerate(species):
+        plt.plot(scaling_time, scaling[i], label=specie)
+    plt.xlabel("Year")
+    plt.ylabel("Scaling factor")
+    plt.title("Scaling factors over time by species")
+    plt.legend()
+    plt.grid(True)
+    plt.tight_layout()
     plt.show()
 
 
 def create_time_scaling_xr(
-    scaling_time: np.ndarray, scaling_arr: np.ndarray
+    scaling_time: np.ndarray, scaling: np.ndarray, species: list
 ) -> xr.Dataset:
     """
     Create an xarray dataset containing time scaling factors.
 
     Args:
         scaling_time (np.ndarray): The time values for the scaling factors.
-        scaling_arr (np.ndarray): The scaling factors to plot.
+        scaling (np.ndarray): 2D array of scaling factors [species, time]
+        species (list): List of species names
 
     Returns:
         xr.Dataset: The xarray dataset containing the time scaling factors.
 
     """
     evolution = xr.Dataset(
-        data_vars=dict(scaling=(["time"], scaling_arr)),
-        coords=dict(time=scaling_time),
+        data_vars=dict(scaling=(["time", "species"], scaling.T)),
+        coords=dict(time=scaling_time, species=species),
     )
     evolution.time.attrs = {"units": "years"}
-    evolution.scaling.attrs = {"species": "all"}
+    evolution.species.attrs = {"description": "species names"}
+    evolution.scaling.attrs = {"long_name": "scaling factors", "units": "-"}
     evolution.attrs = dict(
         Title="Time scaling example",
         Convention="CF-XXX",
         Type="scaling",
-        Author="Stefan Völk",
-        Contact="stefan.voelk@dlr.de",
+        Author="Stefan Völk, Clémentin Léron",
+        Contact="openairclim@dlr.de",
     )
     return evolution
 
@@ -170,7 +184,7 @@ def create_time_normalization_xr(
         Convention="CF-XXX",
         Type="norm",
         Author="Stefan Völk",
-        Contact="stefan.voelk@dlr.de",
+        Contact="openairclim@dlr.de",
     )
     return evolution
 
@@ -217,9 +231,9 @@ def convert_xr_to_nc(ds: xr.Dataset, file_name: str, out_path: str = OUT_PATH):
 
 
 if __name__ == "__main__":
-    scaling_ds = create_time_scaling_xr(SCALING_TIME, SCALING_ARR)
+    scaling_ds = create_time_scaling_xr(SCALING_TIME, SCALING_DATA, SPECIES)
     convert_xr_to_nc(scaling_ds, "time_scaling_example")
-    plot_time_scaling(SCALING_TIME, SCALING_ARR)
+    plot_time_scaling(SCALING_TIME, SCALING_DATA, SPECIES)
     norm_ds = create_time_normalization_xr(
         NORM_TIME, FUEL_ARR, EI_CO2_ARR, EI_H2O_ARR, DIS_PER_FUEL_ARR
     )