Skip to content

Commit 5700556

Browse files
Julien76Julien Hericher
and
Julien Hericher
authored
add interpolation padding method (#2)
* fix warning of incorrect escape * add padding interpolation * add interpolation mode to the calculation options * add changelog and bump package version --------- Co-authored-by: Julien Hericher <[email protected]>
1 parent 85f1169 commit 5700556

File tree

7 files changed

+88
-27
lines changed

7 files changed

+88
-27
lines changed

CHANGES.md

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Changes
2+
3+
## Version 0.1.5:
4+
5+
**date: 2024-09-24**
6+
7+
- Add a new interpolation mode for missing values:
8+
- you can now choose between:
9+
- "padding" -> propagate last known value in a DataFrame column downward for NaN, only work downward (NaN values at start won't be filled);
10+
- "linear" -> linearly interpolate missing values based on the last two known, it works both downward and updward.

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "ngi-calculations"
3-
version = "0.1.4"
3+
version = "0.1.5"
44
description = "CPT correlations including commonly used empirical correlations"
55
authors = [
66
"Julien Hericher <[email protected]>",

src/ngi_calculations/cpt_correlations/definitions/geo.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ class GeoParameters:
230230
label="normalized cone resistance",
231231
unit="MPa",
232232
symbol="qn",
233-
equation="$q_{net} = 1000 * q_{t} - \sigma_v^{total}$",
233+
equation=r"$q_{net} = 1000 * q_{t} - \sigma_v^{total}$",
234234
value_range=(0, None),
235235
)
236236

@@ -320,7 +320,7 @@ class GeoParameters:
320320
label="Effective vertical stress",
321321
unit="kPa",
322322
symbol="sig_v eff",
323-
equation="$\sigma_v^{eff} = \max( \sigma_v^{total} - u_0 ; 0 )$",
323+
equation=r"$\sigma_v^{eff} = \max( \sigma_v^{total} - u_0 ; 0 )$",
324324
value_range=(0, None),
325325
legend_="σ v,eff",
326326
)
@@ -354,7 +354,7 @@ class GeoParameters:
354354
label="normalized cone resistance",
355355
unit="-",
356356
symbol="Qt",
357-
equation="$Qt = ( 1000 * qt - \sigma_v^{total} ) / \sigma_v^{eff}$",
357+
equation=r"$Qt = ( 1000 * qt - \sigma_v^{total} ) / \sigma_v^{eff}$",
358358
value_range=(0, None),
359359
)
360360

@@ -364,7 +364,7 @@ class GeoParameters:
364364
# label="normalized pressure",
365365
unit="-",
366366
symbol="Bq",
367-
equation="$Bq = ( u_2 - u_0 ) / ( 1000 * qt - \sigma_v^{total} )$",
367+
equation=r"$Bq = ( u_2 - u_0 ) / ( 1000 * qt - \sigma_v^{total} )$",
368368
value_range=(0, None),
369369
)
370370

@@ -382,7 +382,7 @@ class GeoParameters:
382382
label="normalized friction ratio",
383383
unit="%",
384384
symbol="Fr",
385-
equation="$Fr = fs / ( 1000 * qt - \sigma_v^{total} ) * 100\%$",
385+
equation=r"$Fr = fs / ( 1000 * qt - \sigma_v^{total} ) * 100\%$",
386386
value_range=(0, None),
387387
)
388388

@@ -391,23 +391,23 @@ class GeoParameters:
391391
label="soil behavior index",
392392
unit="-",
393393
symbol="Ic",
394-
equation="$Ic = \sqrt{( 3.47 - \log Qt)^2 + (\log Fr + 1.22)^2 }$",
394+
equation=r"$Ic = \sqrt{( 3.47 - \log Qt)^2 + (\log Fr + 1.22)^2 }$",
395395
)
396396

397397
n = GeoParameter(
398398
key="n",
399399
label="exponent for normalized soil behavior index",
400400
unit="-",
401401
symbol="n",
402-
equation="$n = \min \left( 0.381 * Ic + 0.05 * (\sigma_v^{eff} / p_{Atm}) - 0.15 ), 1.0\right)$",
402+
equation=r"$n = \min \left( 0.381 * Ic + 0.05 * (\sigma_v^{eff} / p_{Atm}) - 0.15 ), 1.0\right)$",
403403
)
404404

405405
Qtn = GeoParameter(
406406
key="Qtn",
407407
label="normalized cone resistance by n exponent",
408408
unit="-",
409409
symbol="Qtn",
410-
equation="$Qtn = Qt * \left( \frac{p_{Atm}}{\sigma_v^{eff}}\right)^{n - 1}$",
410+
equation=r"$Qtn = Qt * \left( \frac{p_{Atm}}{\sigma_v^{eff}}\right)^{n - 1}$",
411411
)
412412

413413
Icn = GeoParameter(
@@ -416,7 +416,7 @@ class GeoParameters:
416416
axis_title="SBT",
417417
unit="-",
418418
symbol="Icn",
419-
equation="$Icn = \sqrt{\left( 3.47 - \\frac{\log Qtn}{\log 10} \\right)^2 + \left(\\frac{\log Fr}{\log 10} + 1.22 \\right)^2 }$",
419+
equation=r"$Icn = \sqrt{\left( 3.47 - \\frac{\log Qtn}{\log 10} \\right)^2 + \left(\\frac{\log Fr}{\log 10} + 1.22 \\right)^2 }$",
420420
legend_="Icn (Robertson, 2009)",
421421
value_range=(0, 4.5),
422422
)

src/ngi_calculations/cpt_correlations/methods/cpt_process/calculations.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def _integrate_lab_profile(self):
170170
_df.set_index(depth, drop=False, inplace=True)
171171

172172
# Interpolate the missing values.
173-
_df = interpolate_missing_values(_df, key_col=depth, col_list=lab_cols)
173+
_df = interpolate_missing_values(_df, key_col=depth, col_list=lab_cols, mode=self.options.interpolation_mode)
174174

175175
# # Prevent values for certain lab profiles to go below zero
176176
# _df[GEO.u0.key] = _df[GEO.u0.key].clip(lower=0.0)

src/ngi_calculations/cpt_correlations/methods/cpt_process/options.py

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import Literal
2+
13
from pydantic.main import BaseModel
24

35

@@ -6,3 +8,4 @@ class CptProcessOptions(BaseModel):
68
shift_depth: float = 0.0 # Shift CPT depth (in m, negative value shifts upwards)
79
adjust_depth_tilt: bool = False # Adjust depth due to tilt
810
compensate_atm_pressure: bool = False
11+
interpolation_mode: Literal["linear", "padding"] = "linear"

src/ngi_calculations/cpt_correlations/utils/interpolation.py

+38-4
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,36 @@
1-
from typing import List
1+
from typing import Literal
22

33
import numpy as np
44
import pandas as pd
55
from scipy.interpolate import interp1d
66

77

8-
def interpolate_missing_values(df: pd.DataFrame, key_col: str, inplace: bool = False, col_list: List[str] = False):
8+
def set_replace_columns(df: pd.DataFrame, col_list: list[str]) -> list[str]:
9+
# Get the columns where interpolation should be performed (any column with NaN)
10+
return col_list if col_list else df.columns[df.isna().any()].tolist()
11+
12+
13+
def interpolate_missing_values__padding_method(df: pd.DataFrame, inplace: bool = False, col_list: list[str] = []):
914
# Whether to mutate the provided DataFrame or not
1015
_df = df if inplace else df.copy()
1116

12-
# Get the columns where interpolation should be performed (any column with NaN)
13-
_cols: List[str] = col_list if col_list else _df.columns[df.isna().any()].tolist()
17+
# Get the columns where interpolation should be performed
18+
_cols = set_replace_columns(_df, col_list)
19+
20+
for col in _cols:
21+
_df[col] = _df[col].interpolate(method="pad")
22+
23+
return _df
24+
25+
26+
def interpolate_missing_values__linear_method(
27+
df: pd.DataFrame, key_col: str, inplace: bool = False, col_list: list[str] = []
28+
):
29+
# Whether to mutate the provided DataFrame or not
30+
_df = df if inplace else df.copy()
31+
32+
# Get the columns where interpolation should be performed
33+
_cols = set_replace_columns(_df, col_list)
1434

1535
# Store the key col values as a numpy array
1636
key_values = np.array(_df[key_col].values.tolist())
@@ -30,3 +50,17 @@ def interpolate_missing_values(df: pd.DataFrame, key_col: str, inplace: bool = F
3050
_df[col] = f(key_values)
3151

3252
return _df
53+
54+
55+
def interpolate_missing_values(
56+
df: pd.DataFrame,
57+
key_col: str = "depth",
58+
inplace: bool = False,
59+
col_list: list[str] = [],
60+
mode: Literal["linear", "padding"] = "linear",
61+
):
62+
return (
63+
interpolate_missing_values__linear_method(df, key_col, inplace, col_list)
64+
if mode == "linear"
65+
else interpolate_missing_values__padding_method(df, inplace, col_list)
66+
)

tests/utils/test_interpolation.py

+26-12
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,29 @@
1414
)
1515

1616

17-
def test_interpolate_missing_values() -> None:
18-
expected = pd.DataFrame(
19-
{
20-
"depth": [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
21-
"A": [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0],
22-
"B": [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0],
23-
"C": [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0],
24-
"D": [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0],
25-
}
26-
)
27-
result = interpolate_missing_values(df, key_col="depth")
28-
pd.testing.assert_frame_equal(result, expected)
17+
class Test_Interpolation:
18+
def test_interpolate_missing_values__linear_method(self) -> None:
19+
expected = pd.DataFrame(
20+
{
21+
"depth": [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
22+
"A": [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0],
23+
"B": [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0],
24+
"C": [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0],
25+
"D": [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0],
26+
}
27+
)
28+
result = interpolate_missing_values(df, key_col="depth", mode="linear")
29+
pd.testing.assert_frame_equal(result, expected)
30+
31+
def test_interpolate_missing_values__padding_method(self) -> None:
32+
expected = pd.DataFrame(
33+
{
34+
"depth": [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
35+
"A": [np.nan, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 90.0, 90.0],
36+
"B": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 90.0, 90.0],
37+
"C": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0],
38+
"D": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 80.0, 80.0, 100.0],
39+
}
40+
)
41+
result = interpolate_missing_values(df, mode="padding")
42+
pd.testing.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)