Skip to content

Commit 3a144fe

Browse files
authored
Merge pull request #71 from GeoOcean/67-swan-generation-bathy
[JTH] merge fixed parameters structre implementation in swan wrapper and more
2 parents e0d6ae7 + eb432ce commit 3a144fe

File tree

13 files changed

+628
-371
lines changed

13 files changed

+628
-371
lines changed

bluemath_tk/core/decorators.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ def wrapper(
120120
directional_variables: List[str] = [],
121121
custom_scale_factor: dict = {},
122122
min_number_of_points: int = None,
123+
max_number_of_iterations: int = 10,
123124
normalize_data: bool = True,
124125
):
125126
if data is None:
@@ -133,6 +134,11 @@ def wrapper(
133134
if min_number_of_points is not None:
134135
if not isinstance(min_number_of_points, int) or min_number_of_points <= 0:
135136
raise ValueError("Minimum number of points must be integer and > 0")
137+
if (
138+
not isinstance(max_number_of_iterations, int)
139+
or max_number_of_iterations <= 0
140+
):
141+
raise ValueError("Maximum number of iterations must be integer and > 0")
136142
if not isinstance(normalize_data, bool):
137143
raise TypeError("Normalize data must be a boolean")
138144
return func(

bluemath_tk/datamining/kma.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ def fit(
182182
directional_variables: List[str] = [],
183183
custom_scale_factor: dict = {},
184184
min_number_of_points: int = None,
185+
max_number_of_iterations: int = 10,
185186
normalize_data: bool = True,
186187
) -> None:
187188
"""
@@ -206,6 +207,10 @@ def fit(
206207
min_number_of_points : int, optional
207208
The minimum number of points to consider a cluster.
208209
Default is None.
210+
max_number_of_iterations : int, optional
211+
The maximum number of iterations for the K-Means algorithm.
212+
This is used when min_number_of_points is not None.
213+
Default is 10.
209214
normalize_data : bool, optional
210215
A flag to normalize the data. Default is True.
211216
"""
@@ -248,9 +253,10 @@ def fit(
248253
if np.all(counts >= min_number_of_points):
249254
stable_kma_child = True
250255
number_of_tries += 1
251-
if number_of_tries > 10:
256+
if number_of_tries > max_number_of_iterations:
252257
raise ValueError(
253-
"Failed to find a stable K-Means configuration after 10 attempts."
258+
f"Failed to find a stable K-Means configuration after {max_number_of_iterations} attempts."
259+
"Change max_number_of_iterations or min_number_of_points."
254260
)
255261
self.logger.info(
256262
f"Found a stable K-Means configuration after {number_of_tries} attempts."
@@ -318,6 +324,7 @@ def fit_predict(
318324
directional_variables: List[str] = [],
319325
custom_scale_factor: dict = {},
320326
min_number_of_points: int = None,
327+
max_number_of_iterations: int = 10,
321328
normalize_data: bool = True,
322329
) -> Tuple[pd.DataFrame, pd.DataFrame]:
323330
"""
@@ -337,6 +344,10 @@ def fit_predict(
337344
min_number_of_points : int, optional
338345
The minimum number of points to consider a cluster.
339346
Default is None.
347+
max_number_of_iterations : int, optional
348+
The maximum number of iterations for the K-Means algorithm.
349+
This is used when min_number_of_points is not None.
350+
Default is 10.
340351
normalize_data : bool, optional
341352
A flag to normalize the data. Default is True.
342353
@@ -352,6 +363,7 @@ def fit_predict(
352363
directional_variables=directional_variables,
353364
custom_scale_factor=custom_scale_factor,
354365
min_number_of_points=min_number_of_points,
366+
max_number_of_iterations=max_number_of_iterations,
355367
normalize_data=normalize_data,
356368
)
357369

bluemath_tk/predictor/xwt.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,8 @@ def fit(
286286

287287
kma: KMA = self.steps.get("kma")
288288
self.num_clusters = kma.num_clusters
289+
# TODO: standarize PCs by first PC variance
290+
# pca.pcs_df / pca.pcs.stds.isel(n_component=0).values
289291
kma_bmus, _kma_bmus_df = kma.fit_predict(
290292
data=pca.pcs_df,
291293
**fit_params.get("kma", {}),

bluemath_tk/topo_bathy/swan_grid.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,26 @@ def generate_grid_parameters(bathy_data: xr.DataArray) -> dict:
2323
"""
2424

2525
return {
26-
"xpc": np.nanmin(bathy_data.lon), # x origin
27-
"ypc": np.nanmin(bathy_data.lat), # y origin
26+
"xpc": int(np.nanmin(bathy_data.lon)), # x origin
27+
"ypc": int(np.nanmin(bathy_data.lat)), # y origin
2828
"alpc": 0, # x-axis direction
29-
"xlenc": np.nanmax(bathy_data.lon)
30-
- np.nanmin(bathy_data.lon), # grid length in x
31-
"ylenc": np.nanmax(bathy_data.lat)
32-
- np.nanmin(bathy_data.lat), # grid length in y
29+
"xlenc": int(
30+
np.nanmax(bathy_data.lon) - np.nanmin(bathy_data.lon)
31+
), # grid length in x
32+
"ylenc": int(
33+
np.nanmax(bathy_data.lat) - np.nanmin(bathy_data.lat)
34+
), # grid length in y
3335
"mxc": len(bathy_data.lon) - 1, # number mesh x, una menos pq si no SWAN peta
3436
"myc": len(bathy_data.lat) - 1, # number mesh y, una menos pq si no SWAN peta
35-
"dxinp": bathy_data.lon[1].values
36-
- bathy_data.lon[0].values, # size mesh x (resolution in x)
37-
"dyinp": bathy_data.lat[1].values
38-
- bathy_data.lat[0].values, # size mesh y (resolution in y)
37+
"xpinp": np.nanmin(bathy_data.lon), # x origin
38+
"ypinp": np.nanmin(bathy_data.lat), # y origin
39+
"alpinp": 0, # x-axis direction
40+
"mxinp": len(bathy_data.lon) - 1, # number mesh x
41+
"myinp": len(bathy_data.lat) - 1, # number mesh y
42+
"dxinp": abs(
43+
bathy_data.lon[1].values - bathy_data.lon[0].values
44+
), # size mesh x (resolution in x)
45+
"dyinp": abs(
46+
bathy_data.lat[1].values - bathy_data.lat[0].values
47+
), # size mesh y (resolution in y)
3948
}

bluemath_tk/waves/binwaves.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,11 @@ def process_kp_coefficients(
110110
print(f"Error processing {input_spec_file} and {output_spec_file}")
111111
print(e)
112112

113-
return (
114-
xr.concat(output_kp_list, dim="case_num")
115-
.fillna(0.0)
116-
.sortby("freq")
117-
.sortby("dir")
118-
)
113+
# Concat files one by one
114+
concatened_kp = output_kp_list[0]
115+
for file in output_kp_list[1:]:
116+
concatened_kp = xr.concat([concatened_kp, file], dim="case_num")
117+
return concatened_kp.fillna(0.0).sortby("freq").sortby("dir")
119118

120119

121120
def reconstruc_spectra(
@@ -149,15 +148,15 @@ def reconstruc_spectra(
149148

150149
# Setup Dask client
151150
if num_workers is None:
152-
num_workers = os.environ.get("BLUEMATH_NUM_WORKERS", 2)
151+
num_workers = os.environ.get("BLUEMATH_NUM_WORKERS", 4)
153152
client = setup_dask_client(n_workers=num_workers, memory_limit=memory_limit)
154153

155154
try:
156155
# Process with controlled chunks
157156
offshore_spectra_chunked = offshore_spectra.chunk(
158-
{"time": chunk_sizes.get("time", 24)}
157+
{"time": chunk_sizes.get("time", 24 * 7)}
159158
)
160-
kp_coeffs_chunked = kp_coeffs.chunk({"site": 1})
159+
kp_coeffs_chunked = kp_coeffs.chunk({"site": 10})
161160
with ProgressBar():
162161
onshore_spectra = (
163162
(offshore_spectra_chunked * kp_coeffs_chunked).sum(dim="case_num")
@@ -250,7 +249,7 @@ def plot_selected_cases_grid(
250249
ax = fig.add_subplot(1, 1, 1, projection="polar")
251250

252251
# prepare data
253-
x = np.append(np.deg2rad(directions - 7.5), np.deg2rad(directions - 7.5)[0])
252+
x = np.append(np.deg2rad(directions), np.deg2rad(directions)[0])
254253
y = np.append(0, frequencies)
255254
z = (
256255
np.array(range(len(frequencies) * len(directions)))

0 commit comments

Comments
 (0)