Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions InteroperabilityEnabler/utils/merge_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,20 @@
Maintainer: Shahin ABDOUL SOUKOUR - Inria
"""

import numpy as np
import pandas as pd


def merge_predicted_data(df_initial, predicted_df_with_metadata):
"""
Merge predicted data into the initial DataFrame by matching column names.
Add 'null' for missing columns in the predicted data.
Add `np.nan` (proper null) for missing columns in the predicted data.

Args:
df_initial: DataFrame containing the original selected columns.
predicted_df_with_metadata: DataFrame containing the predicted data with metadata (column names).
predicted_df_with_metadata: DataFrame containing the predicted data with metadata.

Returns:
A merged Pandas DataFrame with 'null' for missing columns.
A merged Pandas DataFrame with `np.nan` for missing columns.
"""
try:
# Get all unique columns from both DataFrames
Expand All @@ -29,9 +29,9 @@ def merge_predicted_data(df_initial, predicted_df_with_metadata):
# Ensure all columns are present in both DataFrames
for col in all_columns:
if col not in df_initial.columns:
df_initial[col] = "null"
df_initial[col] = np.nan # Use np.nan instead of "null"
if col not in predicted_df_with_metadata.columns:
predicted_df_with_metadata[col] = "null"
predicted_df_with_metadata[col] = np.nan # Use np.nan instead of "null"

# Reorder columns to match the initial DataFrame's order
df = df_initial[sorted(all_columns)]
Expand Down
4 changes: 3 additions & 1 deletion tests/test_basic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pandas as pd
import numpy as np
import pytest
from InteroperabilityEnabler.utils.data_formatter import data_to_dataframe
from InteroperabilityEnabler.utils.annotation_dataset import add_quality_annotations_to_df
Expand Down Expand Up @@ -291,4 +292,5 @@ def test_merge_predicted_data_predicted_missing_column():
predicted_df = pd.DataFrame({"temperature": [-6.6, -6.1]}) # missing windSpeed
merged = merge_predicted_data(df_initial, predicted_df)
assert "windSpeed" in merged.columns
assert merged.iloc[2]["windSpeed"] == "null"
# Proper way to check for NaN values
assert pd.isna(merged.iloc[2]["windSpeed"]) # This checks if the value is NaN