Skip to content

Commit 0b8cc98

Browse files
Julien76Julien Hericher
and
Julien Hericher
authored
feature: add multi cone cpt handling (#5)
* add simplify from simplify py * add url from original implementation * add support for multi cone * improve unique cpt identifier --------- Co-authored-by: Julien Hericher <[email protected]>
1 parent 4e55cb1 commit 0b8cc98

File tree

10 files changed

+287
-156
lines changed

10 files changed

+287
-156
lines changed

src/ngi_calculations/cpt_correlations/methods/cpt_process/calculations.py

+37-9
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from ngi_calculations.cpt_correlations.definitions.geo import GEO
99
from ngi_calculations.cpt_correlations.definitions.physics import PhysicParameters as PHY
1010
from ngi_calculations.cpt_correlations.methods.cpt_process.options import CptProcessOptions
11+
from ngi_calculations.cpt_correlations.models.cpt_cone import CptCone
1112
from ngi_calculations.cpt_correlations.models.cpt_raw import RawCPT
1213
from ngi_calculations.cpt_correlations.models.lab_data import LabData
1314
from ngi_calculations.cpt_correlations.utils.interpolation import interpolate_missing_values
@@ -30,7 +31,6 @@ def __init__(self, raw_cpt: RawCPT, lab_data: LabData, options: CptProcessOption
3031
self.raw_cpt = raw_cpt
3132
self.lab_data = lab_data
3233
self.options = options
33-
# self.calculate()
3434

3535
@measure("ProcessCPT_Calculation", log_time=False, log_child=False)
3636
@track_execution
@@ -50,6 +50,7 @@ def calculate(self):
5050
self._differential_pressure()
5151
self._normalized_differential_pressure()
5252
self._elevation()
53+
self._set_cone_info()
5354
self._total_cone_resistance()
5455
self._net_cone_resistance()
5556
self._normalized_cone_resistance()
@@ -63,7 +64,7 @@ def calculate(self):
6364
self._filter_data()
6465

6566
def _set_initial_data(self):
66-
self.data = self.raw_cpt.data[self.raw_cpt.columns.all].copy()
67+
self.data = self.raw_cpt.data.copy()
6768
self.data[GEO.u2_raw.key] = self.data[GEO.u2.key]
6869
self.data[GEO.depth_raw.key] = self.data[GEO.depth.key]
6970
self.depth_raw = self.data[GEO.depth.key].values
@@ -164,7 +165,14 @@ def _integrate_lab_profile(self):
164165
_df = _df.drop([x for x in lab_cols if x in _df.columns], axis=1)
165166

166167
# Merge the two Dataframes
167-
_df.astype(np.float64, copy=False)
168+
# Create a type dictionary for all columns
169+
types_dict = {col: np.float64 for col in _df.columns if col != self.options.cpt_identifier and col != depth}
170+
# Add depth type if needed
171+
types_dict[depth] = np.float64
172+
# Apply types
173+
_df = _df.astype(types_dict)
174+
175+
# _df.astype(np.float64, copy=False)
168176
_df = pd.merge(_df, lab_df, on=depth, how="outer")
169177
_df.sort_values(by=depth, inplace=True)
170178
_df.set_index(depth, drop=False, inplace=True)
@@ -231,16 +239,36 @@ def _elevation(self):
231239
# TODO: Where to make available the self.options.elevation?
232240
self.data[GEO.elevation.key] = self.options.elevation - self.data[GEO.depth.key]
233241

242+
def _set_cone_info(self):
243+
if self.options.cpt_identifier in self.data.columns:
244+
# Create a function to map each identifier to the corresponding cone area ratio value
245+
def get_cone_area_ratio(identifier):
246+
cone = self.raw_cpt.cone.get(identifier)
247+
# Return the numeric cone_area_ratio attribute, not the entire CptCone object
248+
return cone.cone_area_ratio if cone else CptCone().cone_area_ratio
249+
250+
# Apply the function to each row's identifier
251+
self.data[GEO.cone_area_ratio.key] = self.data[self.options.cpt_identifier].apply(get_cone_area_ratio)
252+
else:
253+
# If the identifier column doesn't exist, use the default value
254+
self.data[GEO.cone_area_ratio.key] = CptCone().cone_area_ratio
255+
256+
# Do the same for sleeve_area_ratio
257+
if self.options.cpt_identifier in self.data.columns:
258+
259+
def get_sleeve_area_ratio(identifier):
260+
cone = self.raw_cpt.cone.get(identifier)
261+
# Return the numeric sleeve_area_ratio attribute, not the entire CptCone object
262+
return cone.sleeve_area_ratio if cone else CptCone().sleeve_area_ratio
263+
264+
self.data[GEO.sleeve_area_ratio.key] = self.data[self.options.cpt_identifier].apply(get_sleeve_area_ratio)
265+
else:
266+
self.data[GEO.sleeve_area_ratio.key] = CptCone().sleeve_area_ratio
267+
234268
def _total_cone_resistance(self):
235-
self.data[GEO.cone_area_ratio.key] = self.raw_cpt.cone.cone_area_ratio
236-
self.data[GEO.sleeve_area_ratio.key] = self.raw_cpt.cone.sleeve_area_ratio
237269
self.data[GEO.qt.key] = (
238270
1000 * self.data[GEO.qc.key] + self.data[GEO.u2.key] * (1 - self.data[GEO.cone_area_ratio.key])
239271
) / 1000
240-
# self.df[GEO.qt.key] = (
241-
# 1000 * self.df[GEO.qc.key]
242-
# + self.df[GEO.u2.key] * (1 - self.df[GEO.cone_area_ratio.key]) * self.df[GEO.sleeve_area_ratio.key]
243-
# ) / 1000
244272

245273
def _net_cone_resistance(self):
246274
self.data[GEO.qn.key] = self.data[GEO.qt.key] - self.data[GEO.sigVtTotal.key] / 1000

src/ngi_calculations/cpt_correlations/methods/cpt_process/options.py

+1
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ class CptProcessOptions(BaseModel):
99
adjust_depth_tilt: bool = False # Adjust depth due to tilt
1010
compensate_atm_pressure: bool = False
1111
interpolation_mode: Literal["linear", "padding"] = "linear"
12+
cpt_identifier: str = "method_id"

src/ngi_calculations/cpt_correlations/models/cpt_raw.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def all(self):
3434
class RawCPT(CustomBaseModel):
3535
data: pd.DataFrame = Field(..., description="Pandas DataFrame containing raw CPT data")
3636
columns: RawCPTColumns = RawCPTColumns()
37-
cone: CptCone = CptCone()
37+
cone: dict[str, CptCone] | None = None
3838

3939
@field_validator("data")
4040
def validate_data(cls, value):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
# Description: inspired from https://github.com/omarestrella/simplify.py
2+
3+
4+
def getSquareDistance(p1, p2, xKey, yKey):
5+
"""
6+
Square distance between two points
7+
"""
8+
dx = p1[xKey] - p2[xKey]
9+
dy = p1[yKey] - p2[yKey]
10+
11+
return dx * dx + dy * dy
12+
13+
14+
def getSquareSegmentDistance(p, p1, p2, xKey, yKey):
15+
"""
16+
Square distance between point and a segment
17+
"""
18+
x = p1[xKey]
19+
y = p1[yKey]
20+
21+
dx = p2[xKey] - x
22+
dy = p2[yKey] - y
23+
24+
if dx != 0 or dy != 0:
25+
t = ((p[xKey] - x) * dx + (p[yKey] - y) * dy) / (dx * dx + dy * dy)
26+
27+
if t > 1:
28+
x = p2[xKey]
29+
y = p2[yKey]
30+
elif t > 0:
31+
x += dx * t
32+
y += dy * t
33+
34+
dx = p[xKey] - x
35+
dy = p[yKey] - y
36+
37+
return dx * dx + dy * dy
38+
39+
40+
def simplifyRadialDistance(points, tolerance, xKey, yKey):
41+
length = len(points)
42+
prev_point = points[0]
43+
new_points = [prev_point]
44+
45+
for i in range(length):
46+
point = points[i]
47+
48+
if getSquareDistance(point, prev_point, xKey, yKey) > tolerance:
49+
new_points.append(point)
50+
prev_point = point
51+
52+
if prev_point != point:
53+
new_points.append(point)
54+
55+
return new_points
56+
57+
58+
def simplifyDouglasPeucker(points, tolerance, xKey="x", yKey="y"):
59+
length = len(points)
60+
markers = [0] * length # Maybe not the most efficent way?
61+
62+
first = 0
63+
last = length - 1
64+
65+
first_stack = []
66+
last_stack = []
67+
68+
new_points = []
69+
70+
markers[first] = 1
71+
markers[last] = 1
72+
73+
while last:
74+
max_sqdist = 0
75+
76+
for i in range(first, last):
77+
sqdist = getSquareSegmentDistance(points[i], points[first], points[last], xKey, yKey)
78+
79+
if sqdist > max_sqdist:
80+
index = i
81+
max_sqdist = sqdist
82+
83+
if max_sqdist > tolerance:
84+
markers[index] = 1
85+
86+
first_stack.append(first)
87+
last_stack.append(index)
88+
89+
first_stack.append(index)
90+
last_stack.append(last)
91+
92+
# Can pop an empty array in Javascript, but not Python, so check
93+
# the length of the list first
94+
if len(first_stack) == 0:
95+
first = None
96+
else:
97+
first = first_stack.pop()
98+
99+
if len(last_stack) == 0:
100+
last = None
101+
else:
102+
last = last_stack.pop()
103+
104+
for i in range(length):
105+
if markers[i]:
106+
new_points.append(points[i])
107+
108+
return new_points
109+
110+
111+
def simplify(
112+
points: list[dict], tolerance: float = 0.1, highestQuality: bool = True, xKey: str = "x", yKey: str = "y"
113+
) -> list[dict]:
114+
sqtolerance = tolerance * tolerance
115+
116+
if not highestQuality:
117+
points = simplifyRadialDistance(points, sqtolerance, xKey, yKey)
118+
119+
points = simplifyDouglasPeucker(points, sqtolerance, xKey, yKey)
120+
121+
return points

tests/conftest.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,11 @@
33

44
import pandas as pd
55
import pytest
6-
from tests.data.calculation_data import Files2, get_data_from_excel_calculation_file
76

87
from ngi_calculations.cpt_correlations.methods.cpt_process.calculations import CPTProcessCalculation
9-
from ngi_calculations.cpt_correlations.models.cpt_cone import CptCone
108
from ngi_calculations.cpt_correlations.models.cpt_raw import RawCPT
119
from ngi_calculations.cpt_correlations.models.lab_data import LabData
10+
from tests.data.calculation_data import Files2, get_data_from_excel_calculation_file
1211

1312

1413
def dict_parametrize(data, **kwargs) -> Callable:
@@ -29,7 +28,7 @@ class FullCase:
2928
@pytest.fixture(params=[Files2.testA], scope="session")
3029
def analysis_from_excel(request):
3130
excel_data = get_data_from_excel_calculation_file(request.param)
32-
raw_cpt = RawCPT(data=excel_data.raw_cpt, cone=CptCone(cone_area_ratio=excel_data.cone_area_ratio))
31+
raw_cpt = RawCPT(data=excel_data.raw_cpt, cone=excel_data.cone)
3332
lab_data = LabData(data=excel_data.lab_data)
3433
processor = CPTProcessCalculation(raw_cpt=raw_cpt, lab_data=lab_data)
3534
return FullCase(raw_cpt=raw_cpt, processor=processor, expected=excel_data.processed_cpt)

tests/data/calculation_data.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import pandas as pd
66

7+
from ngi_calculations.cpt_correlations.models.cpt_cone import CptCone
78
from tests.data.excel_data_column_mapping import (
89
CptProcessedColumns,
910
CptRawColumns,
@@ -83,8 +84,7 @@ class ExcelFileConfig:
8384
lab_data: ExcelDataConfig
8485
raw_cpt: ExcelDataConfig
8586
processed_cpt: ExcelDataConfig
86-
# interpretation: ExcelInterpretation
87-
cone_area_ratio: float
87+
cone: dict[str, CptCone]
8888

8989

9090
@dataclass
@@ -104,7 +104,7 @@ class Files2(Enum):
104104
raw_cpt=ExcelDataConfig(
105105
sheetname="cpt_raw",
106106
start_column="A",
107-
end_column="H",
107+
end_column="I",
108108
header_row=2,
109109
data_start_row=4,
110110
data_end_row=439,
@@ -122,13 +122,17 @@ class Files2(Enum):
122122
processed_cpt=ExcelDataConfig(
123123
sheetname="cpt_processed",
124124
start_column="A",
125-
end_column="AC",
125+
end_column="AF",
126126
header_row=2,
127127
data_start_row=4,
128128
data_end_row=439,
129129
column_mapping=CptProcessedColumns().columns,
130130
),
131-
cone_area_ratio=0.846,
131+
cone={
132+
"aaa": CptCone(cone_area_ratio=0.85, sleeve_area_ratio=1.0),
133+
"bbb": CptCone(cone_area_ratio=0.92, sleeve_area_ratio=0.9),
134+
"ccc": CptCone(cone_area_ratio=0.75, sleeve_area_ratio=0.8),
135+
},
132136
)
133137

134138

@@ -137,8 +141,7 @@ class ExcelData2:
137141
lab_data: pd.DataFrame
138142
raw_cpt: pd.DataFrame
139143
processed_cpt: pd.DataFrame
140-
# interpretation: ExcelInterpretation
141-
cone_area_ratio: float
144+
cone: dict[str, CptCone]
142145

143146

144147
CURRENT_DIR = os.getcwd()
@@ -156,5 +159,5 @@ def get_data_from_excel_calculation_file(file: Files2):
156159
raw_cpt=cpt_raw_data,
157160
lab_data=lab_data,
158161
processed_cpt=cpt_processed_data,
159-
cone_area_ratio=settings.cone_area_ratio,
162+
cone=settings.cone,
160163
)

tests/data/calculations/testA.xlsx

41.9 KB
Binary file not shown.

tests/data/excel_data_column_mapping.py

+32-29
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,15 @@
77
class CptRawColumns:
88
columns: dict = field(
99
default_factory=lambda: {
10-
"A": GEO.depth.key,
11-
"B": GEO.qc.key,
12-
"C": GEO.fs.key,
13-
"D": GEO.u2.key,
14-
"E": GEO.temperature.key,
15-
"F": GEO.penetration_rate.key,
16-
"G": GEO.penetration_force.key,
17-
"H": GEO.tilt.key,
10+
"A": "method_id",
11+
"B": GEO.depth.key,
12+
"C": GEO.qc.key,
13+
"D": GEO.fs.key,
14+
"E": GEO.u2.key,
15+
"F": GEO.temperature.key,
16+
"G": GEO.penetration_rate.key,
17+
"H": GEO.penetration_force.key,
18+
"I": GEO.tilt.key,
1819
}
1920
)
2021

@@ -40,26 +41,28 @@ class CptProcessedColumns:
4041
columns: dict = field(
4142
default_factory=lambda: {
4243
**CptRawColumns().columns,
43-
"I": GEO.wc.key,
44-
"J": GEO.WP.key,
45-
"K": GEO.LL.key,
46-
"L": GEO.Ip.key,
47-
"M": GEO.St.key,
48-
"N": GEO.uw.key,
49-
"O": GEO.u0.key,
50-
"P": GEO.sigVtTotal.key,
51-
"Q": GEO.sigVtEff.key,
52-
"R": GEO.qt.key,
53-
"S": GEO.qn.key,
54-
"T": GEO.u_delta.key,
55-
"U": GEO.Fr.key,
56-
"V": GEO.Qt.key,
57-
"W": GEO.Bq.key,
58-
"X": GEO.u_delta_norm.key,
59-
"Y": GEO.Ic.key,
60-
"Z": GEO.n.key,
61-
"AA": GEO.Qtn.key,
62-
"AB": GEO.Icn.key,
63-
"AC": GEO.Rf.key,
44+
"J": GEO.wc.key,
45+
"K": GEO.WP.key,
46+
"L": GEO.LL.key,
47+
"M": GEO.Ip.key,
48+
"N": GEO.St.key,
49+
"O": GEO.uw.key,
50+
"P": GEO.u0.key,
51+
"Q": GEO.cone_area_ratio.key,
52+
"R": GEO.sleeve_area_ratio.key,
53+
"S": GEO.sigVtTotal.key,
54+
"T": GEO.sigVtEff.key,
55+
"U": GEO.qt.key,
56+
"V": GEO.qn.key,
57+
"W": GEO.u_delta.key,
58+
"X": GEO.Fr.key,
59+
"Y": GEO.Qt.key,
60+
"Z": GEO.Bq.key,
61+
"AA": GEO.u_delta_norm.key,
62+
"AB": GEO.Ic.key,
63+
"AC": GEO.n.key,
64+
"AD": GEO.Qtn.key,
65+
"AE": GEO.Icn.key,
66+
"AF": GEO.Rf.key,
6467
}
6568
)

0 commit comments

Comments
 (0)