Sedimark · ShahinAbdoulSoukour · Jun 22, 2025 · Jun 22, 2025 · Jun 22, 2025 · Jun 22, 2025
diff --git a/InteroperabilityEnabler/utils/merge_data.py b/InteroperabilityEnabler/utils/merge_data.py
@@ -5,20 +5,20 @@
 Maintainer: Shahin ABDOUL SOUKOUR - Inria
 """
 
+import numpy as np
 import pandas as pd
 
-
 def merge_predicted_data(df_initial, predicted_df_with_metadata):
     """
     Merge predicted data into the initial DataFrame by matching column names.
-    Add 'null' for missing columns in the predicted data.
+    Add `np.nan` (proper null) for missing columns in the predicted data.
 
     Args:
         df_initial: DataFrame containing the original selected columns.
-        predicted_df_with_metadata: DataFrame containing the predicted data with metadata (column names).
+        predicted_df_with_metadata: DataFrame containing the predicted data with metadata.
 
     Returns:
-        A merged Pandas DataFrame with 'null' for missing columns.
+        A merged Pandas DataFrame with `np.nan` for missing columns.
     """
     try:
         # Get all unique columns from both DataFrames
@@ -29,9 +29,9 @@ def merge_predicted_data(df_initial, predicted_df_with_metadata):
         # Ensure all columns are present in both DataFrames
         for col in all_columns:
             if col not in df_initial.columns:
-                df_initial[col] = "null"
+                df_initial[col] = np.nan  # Use np.nan instead of "null"
             if col not in predicted_df_with_metadata.columns:
-                predicted_df_with_metadata[col] = "null"
+                predicted_df_with_metadata[col] = np.nan  # Use np.nan instead of "null"
 
         # Reorder columns to match the initial DataFrame's order
         df = df_initial[sorted(all_columns)]

diff --git a/tests/test_basic.py b/tests/test_basic.py
@@ -1,4 +1,5 @@
 import pandas as pd
+import numpy as np
 import pytest
 from InteroperabilityEnabler.utils.data_formatter import data_to_dataframe
 from InteroperabilityEnabler.utils.annotation_dataset import add_quality_annotations_to_df
@@ -291,4 +292,5 @@ def test_merge_predicted_data_predicted_missing_column():
     predicted_df = pd.DataFrame({"temperature": [-6.6, -6.1]})  # missing windSpeed
     merged = merge_predicted_data(df_initial, predicted_df)
     assert "windSpeed" in merged.columns
-    assert merged.iloc[2]["windSpeed"] == "null"
+    # Proper way to check for NaN values
+    assert pd.isna(merged.iloc[2]["windSpeed"]) # This checks if the value is NaN