Skip to content

Commit 8c71860

Browse files
authored
Merge pull request #266 from NatLabRockies/develop
FLASC v2.5
2 parents 95d069b + d0d6df1 commit 8c71860

12 files changed

Lines changed: 1248 additions & 14 deletions

File tree

.github/workflows/continuous-integration-workflow.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
strategy:
1010
fail-fast: false
1111
matrix:
12-
python-version: ["3.9", "3.10", "3.11"]
12+
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
1313
os: [ubuntu-latest]
1414

1515
steps:

.github/workflows/deploy-pages.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
- name: Set up Python
2222
uses: actions/setup-python@v4
2323
with:
24-
python-version: "3.10"
24+
python-version: "3"
2525

2626
- name: Install dependencies
2727
run: |

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,5 @@ SMARTEOLE_WakeSteering_ReadMe.xlsx
3232
SMARTEOLE_WakeSteering_Map.pdf
3333
SMARTEOLE-WFC-open-dataset.zip
3434
examples_artificial_data/03_energy_ratio/heterogeneity_layouts.pdf
35+
examples_smarteole/data/SMARTEOLE-LES-simulation-data/data
3536
*.pkl

docs/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
Welcome to the documentation of the NLR FLASC repository!
55

66
```{note}
7-
As of FLASC v2.3, FLASC requires `numpy` version 2, following the update in FLORIS v4.3. See the [numpy documentation for details](https://numpy.org/doc/stable/numpy_2_0_migration_guide.html).
7+
As of FLASC v2.5, FLASC requires python v3.10 or greater.
88
```
99

1010
FLASC provides a comprehensive toolkit for wind farm analysis, combining SCADA data processing with advanced wake modeling capabilities. The repository is intended as a community-driven toolbox, available on its [GitHub Repository](https://github.com/NatLabRockies/flasc).

docs/introduction.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,13 @@ See {cite:p}`Doekemeijer2022a` and {cite:p}`Bay2022a` for practical examples of
6262

6363
If FLASC played a role in your research, please cite it. This software can be cited as:
6464

65-
FLASC. Version 2.4.2 (2026). Available at https://github.com/NatLabRockies/flasc.
65+
FLASC. Version 2.5 (2026). Available at https://github.com/NatLabRockies/flasc.
6666

6767
For LaTeX users:
6868

6969
@misc{flasc2026,
7070
author = {NLR},
71-
title = {FLASC. Version 2.4.2},
71+
title = {FLASC. Version 2.5},
7272
year = {2026},
7373
publisher = {GitHub},
7474
journal = {GitHub repository},

examples_smarteole/12_model_les_wake_loss_validation.ipynb

Lines changed: 402 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
import glob
2+
import os
3+
import zipfile
4+
from datetime import timedelta as td
5+
6+
import numpy as np
7+
import pandas as pd
8+
import xarray as xr
9+
from floris.utilities import wrap_360
10+
from zenodo_get import download as zn_download
11+
12+
from flasc.data_processing.time_operations import df_resample_by_interpolation
13+
14+
15+
class AspireTimeseriesReader:
16+
"""This class is used to read the output .tar.gz files from Whiffle and export
17+
them in various formats.
18+
"""
19+
20+
def __init__(self, aspire_metmast_filelist=[], aspire_turbine_filelist=[], verbose=False):
21+
"""Initialize the class.
22+
23+
Args:
24+
aspire_filelist (list): List of strings, where each entry of the list defines the
25+
path to one of the ASPIRE nc files.
26+
"""
27+
self.metmast_files = aspire_metmast_filelist
28+
self.turbine_files = aspire_turbine_filelist
29+
self.turbine_datasets = None
30+
self.metmast_datasets = None
31+
self.verbose = verbose
32+
33+
# Private functions
34+
def _read_member_as_xarray(self, fn):
35+
"""Read the contents of one of the files within the .tar.gz file as an xarray DataSet.
36+
37+
Args:
38+
fn (str): Filename refering to a turbine ASPIRE datafile.
39+
40+
Returns:
41+
dataset (xarray.Dataset): Contents of the HDF5 file imported as an xarray Dataset.
42+
"""
43+
# Now read the turbine .nc (HDF5) data file as an xarray and convert to a Pandas DataFrame
44+
dataset = xr.open_dataset(fn).load() # Load it into xarray format
45+
dataset.close() # Close dataset after reading to avoid conflicts
46+
47+
if self.verbose:
48+
print(f"Successfully imported the contents of {os.path.basename(fn)}.")
49+
50+
return dataset
51+
52+
def _concatenate_dataframes_and_remove_startup_time(self, df_list):
53+
"""Typically, ASPIRE simulations are performed one day at a time, including several hours of
54+
simulation start-up. This means that each day runs for about 26 hours, and that means there
55+
is an overlap window of about 2 hours between simulations. The start-up period, usually the
56+
first two hours of the simulation, must be removed so that each datafile contains exactly
57+
24 hours of data. This script removes the startup periods and concatenates the Pandas
58+
DataFrames into a single file.
59+
60+
Args:
61+
df_list (list): List where each entry is a Pandas DataFrame containing the simulation
62+
data for one day from ASPIRE. These datasets typically start about 2 hours before the
63+
actual day of simulation. These 2 hours will be removed so that it perfectly connects
64+
with the simulation data of the previous day.
65+
66+
Returns:
67+
df_out (pd.DataFrame): Pandas DataFrame containing multiple days of simulation data,
68+
with the start-up periods and overlapping measurement times removed.
69+
"""
70+
# Here we stitch dataframes together while removing start-up periods
71+
72+
# The dataset usually starts a couple hours before midnight in the day before the
73+
# actual day of the simulation. That is the start-up period. We must remove that
74+
# period from the dataset. Let's do this for the first dataframe separately.
75+
df = df_list[0].copy()
76+
first_day_change = np.where(np.diff([t.day for t in df["time"]]) != 0)[0][0] + 2
77+
df = df.loc[first_day_change::] # Remove start-up period from first file
78+
df_list[0] = df # Update the dataframe with the start-up measurements removed
79+
80+
# Establish the end time of the first simulation, so we can use that to remove start-up
81+
# periods from the next files
82+
t_end_prev_simulation = df.iloc[-1]["time"]
83+
84+
for ii in range(1, len(df_list)):
85+
# For every file after the first, we can see where the previous simulation ended
86+
# and make sure we remove measurement data of this simulation that happens *before*
87+
# the latest simulation time of the previous file. Namely, that period is considered
88+
# the start-up period for this file and should be removed.
89+
df = df_list[ii].copy()
90+
dt = df["time"].diff().median() # Average duration between timesteps
91+
df = df.loc[
92+
df["time"] > t_end_prev_simulation + 0.5 * dt
93+
] # Only keep timesteps at least 5 minutes past the last measurement from previous
94+
95+
# Update the dataframe with the start-up measurements removed
96+
df_list[ii] = df
97+
t_end_prev_simulation = df.iloc[-1]["time"]
98+
99+
# Collect all the outputs together into a single DataFrame and sort it chronologically
100+
df_out = pd.concat(df_list, axis=0).sort_values(by="time").reset_index(drop=True)
101+
return df_out
102+
103+
def get_turbine_hub_heights(self):
104+
"""Extract the turbine hub heights from the imported turbine data files.
105+
106+
Returns:
107+
hub_heights (np.array): Array with length equal to the number of turbines, containing
108+
the hub height value for each wind turbine in the ASPIRE simulation.
109+
"""
110+
if self.turbine_datasets is None:
111+
raise UserWarning(
112+
"Cannot extract turbine hub heights. Please read the files first using "
113+
"get_turbine_data_as_xarrays()."
114+
)
115+
116+
# Extract hub heights from the first file
117+
hub_heights = np.array(self.turbine_datasets[0]["ztur"], dtype=float)
118+
return hub_heights
119+
120+
def get_turbine_data_as_xarrays(self):
121+
"""Import the turbine HDF5 file from each .tar.gz file and format them as xarray Datasets.
122+
123+
Returns:
124+
turbine_datasets: List of xarray Datasets, one for each tarball file, containing
125+
the turbine simulation output data.
126+
"""
127+
turbine_datasets = []
128+
for fn in self.turbine_files:
129+
# Now read the turbine .nc (HDF5) data file as an xarray
130+
turbine_dataset = self._read_member_as_xarray(fn)
131+
turbine_datasets.append(turbine_dataset)
132+
133+
self.turbine_datasets = turbine_datasets
134+
return turbine_datasets
135+
136+
def get_turbine_data_as_dataframe(self, variables=None):
137+
"""Convert the turbine data that is formatted as xarray Datasets to a single Pandas
138+
DataFrame that is easy to investigate and do analysis with.
139+
140+
Args:
141+
variables (list, optional): List of turbine measurement variables that should be
142+
exported from the simulation file. Defaults to ["ptur", "ufsf", "vfsf"].
143+
144+
Returns:
145+
df_out (pd.DataFrame): Pandas DataFrame containing the turbine measurement data in a
146+
wide table format, where there is one column for each turbine and for each variable.
147+
Each rows depicts the timestamp of one set of measurements. The overlapping time windows
148+
of about 2 hours between each day of ASPIRE simulation has been removed so that the
149+
start-up periods are removed and the dataset is monotonically increasing with 10-minute
150+
timesteps.
151+
"""
152+
# Load the turbine data files, if we haven't done that yet
153+
if self.turbine_datasets is None:
154+
self.get_turbine_data_as_xarrays() # Get all turbine data as xarrays
155+
156+
# Default options:
157+
if variables is None:
158+
if "ufsf" in list(self.turbine_datasets[0].data_vars):
159+
variables = ["ptur", "ufsf", "vfsf"]
160+
print(f"WARNING: Using legacy variable naming convention from GRASP, '{variables}'")
161+
elif "Mdfs" in list(self.turbine_datasets[0].data_vars):
162+
variables = ["ptur", "Mdfs", "cosangle", "sinangle"]
163+
else:
164+
raise UserWarning("Unfamiliar variable naming convention in GRASP datafiles.")
165+
166+
# Convert files one by one to Pandas DataFrames
167+
df_list = []
168+
num_turbines = len(self.turbine_datasets[0].turbine)
169+
for turbine_dataset in self.turbine_datasets:
170+
# Convert the data from each file into a 'wide' Pandas DataFrame
171+
df_turbine = turbine_dataset[variables].to_dataframe().unstack()
172+
173+
# Update column names in wide format following FLASC format
174+
columns = []
175+
for var in variables:
176+
columns = columns + [f"{var:s}_{ti:03d}" for ti in range(num_turbines)]
177+
df_turbine.columns = columns
178+
df_turbine = df_turbine.reset_index()
179+
180+
# Finally, append it to a list that we will stitch together later
181+
df_list.append(df_turbine)
182+
183+
# Concatenate all the individual dataframes and deal with overlap in timeseries entries
184+
df_out = self._concatenate_dataframes_and_remove_startup_time(df_list)
185+
return df_out
186+
187+
def construct_flasc_timeseries(self):
188+
""" """
189+
# First, get the turbine information, if we dont have those in memory yet
190+
self.get_turbine_data_as_dataframe()
191+
df_tot = self.get_turbine_data_as_dataframe()
192+
193+
# Calculate turbine wind speed and wind direction, if variables available
194+
n_turbines = len([c for c in df_tot.columns if c.startswith("ptur_")])
195+
df_tot.columns = [c.replace("ptur_", "pow_") for c in df_tot.columns]
196+
197+
dict_out = {}
198+
for ti in range(n_turbines):
199+
u = df_tot[f"cosangle_{ti:03d}"] * df_tot[f"Mdfs_{ti:03d}"]
200+
v = df_tot[f"sinangle_{ti:03d}"] * df_tot[f"Mdfs_{ti:03d}"]
201+
dict_out[f"ws_{ti:03d}"] = np.sqrt(u**2.0 + v**2.0)
202+
dict_out[f"wd_{ti:03d}"] = wrap_360(180.0 + np.rad2deg(np.arctan2(u, v)))
203+
204+
# Append local wind speed and direction measurements to the dataframe
205+
df_tot = pd.concat([df_tot, pd.DataFrame(dict_out)], axis=1)
206+
df_tot = df_tot.drop(columns=[c for c in df_tot.columns if c.startswith("cosangle_")])
207+
df_tot = df_tot.drop(columns=[c for c in df_tot.columns if c.startswith("sinangle_")])
208+
df_tot = df_tot.drop(columns=[c for c in df_tot.columns if c.startswith("Mdfs_")])
209+
210+
return df_tot
211+
212+
213+
if __name__ == "__main__":
214+
# Download files from Zenodo. Note that this may fail in certain VPN
215+
# environments or with certain SSL certificates.
216+
root_path = os.path.dirname(os.path.abspath(__file__))
217+
data_path = os.path.join(root_path, "data")
218+
zn_download("10.5281/zenodo.18888663", output_dir=data_path)
219+
220+
# Unzip the LES timeseries data
221+
path_to_zip_file = os.path.join(data_path, "les_output.zip")
222+
with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref:
223+
zip_ref.extractall(data_path)
224+
225+
aspire_turbine_files = glob.glob(
226+
os.path.join(data_path, "les_output", "2020", "*", "*", "00", "turbinesOut.les.nc")
227+
)
228+
aspire_turbine_files = np.sort(aspire_turbine_files)
229+
230+
# Load them one at a time
231+
atr = AspireTimeseriesReader(aspire_turbine_filelist=aspire_turbine_files, verbose=True)
232+
df_turbine = atr.get_turbine_data_as_dataframe()
233+
df_les_timeseries = atr.construct_flasc_timeseries()
234+
235+
# Resample to 10 min steps
236+
t0 = (
237+
str(df_les_timeseries["time"].iloc[0])[0:17] + "00"
238+
) # Create time array rounded to nearest 10-min averages
239+
t1 = str(df_les_timeseries["time"].iloc[-1])[0:17] + "00"
240+
time_array = pd.date_range(start=t0, end=t1, freq="10min").tolist()
241+
df_les_timeseries = df_resample_by_interpolation(
242+
df=df_les_timeseries,
243+
time_array=time_array, # Interpolate onto the same timeseries that df_metmast uses
244+
circular_cols=[
245+
c for c in df_les_timeseries.columns if c.startswith("wd_")
246+
], # No variables in turbine measurement dataset that require circular averaging
247+
interp_method="linear", # Use linear interpolation
248+
max_gap=td(
249+
minutes=20
250+
), # Do not interpolate over gaps larger than 20 minutes between measurements
251+
verbose=False,
252+
)
253+
254+
# Save as .csv
255+
fout = os.path.join(root_path, "les_timeseries.csv")
256+
df_les_timeseries.to_csv(fout, index=False, float_format="%.3f")
257+
print("Converted LES timeseries data has been saved to: " + fout)

0 commit comments

Comments
 (0)