Skip to content

Commit 6ed78b5

Browse files
authored
Merge pull request #249 from gavargas22/feat/parallel_processing
Using concurrent futures for processing multiple las files in parallel
2 parents b44e90b + 0e3f812 commit 6ed78b5

File tree

5 files changed

+63
-27
lines changed

5 files changed

+63
-27
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,7 @@ csv-plugin.xml
7171

7272
# temporary test folder
7373
tests/temp
74+
75+
# UV related files
76+
uv.lock
77+
.python-version

pyproject.toml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
[build-system]
2-
requires = ["hatchling", "hatch-vcs"]
3-
build-backend = "hatchling.build"
4-
51
[project]
62
name = "welly"
73
dynamic = ["version"]
@@ -22,6 +18,7 @@ classifiers = [
2218
"License :: OSI Approved :: Apache Software License",
2319
"Operating System :: OS Independent"
2420
]
21+
requires-python = ">=3.9"
2522
dependencies = [
2623
"numpy",
2724
"scipy",
@@ -61,3 +58,7 @@ testpaths = ["tests"]
6158
[tool.setuptools_scm]
6259
write_to = "welly/_version.py"
6360
git_describe_command = "git describe --dirty --tags --long --match v* --first-parent"
61+
62+
[build-system]
63+
requires = ["hatchling", "hatch-vcs"]
64+
build-backend = "hatchling.build"

welly/las.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -482,11 +482,11 @@ def file_from_url(url):
482482
text_file (StringIO): an in-memory stream for text.
483483
"""
484484
try:
485-
text_file = StringIO(request.urlopen(url).read().decode())
485+
with request.urlopen(url) as response:
486+
content = response.read().decode()
487+
return content # Return the content directly instead of wrapping in StringIO
486488
except error.HTTPError as e:
487-
raise Exception('Could not retrieve url: ', e)
488-
489-
return text_file
489+
raise Exception(f'Could not retrieve url: {url} - {e}')
490490

491491

492492
def get_las_version(las):

welly/project.py

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,26 @@
2121
from .plot import plot_kdes_project, plot_map_project
2222

2323

24+
def _load_well_from_las(filepath, remap=None, funcs=None, data=True, req=None, alias=None, encoding=None, printfname=None, index=None, **kwargs):
25+
"""Helper function for concurrent well loading."""
26+
try:
27+
# Handle URLs directly in the subprocess to avoid file handle issues
28+
# when passing file objects between processes
29+
return Well.from_las(filepath,
30+
remap=remap,
31+
funcs=funcs,
32+
data=data,
33+
req=req,
34+
alias=alias,
35+
encoding=encoding,
36+
printfname=printfname,
37+
index=index,
38+
**kwargs)
39+
except Exception as e:
40+
print(f"Error loading well {filepath}: {e}")
41+
return None
42+
43+
2444
class Project(object):
2545
"""
2646
Just a list of Well objects.
@@ -163,6 +183,9 @@ def from_las(cls,
163183
Returns:
164184
project. The project object.
165185
"""
186+
import concurrent.futures
187+
from tqdm import tqdm
188+
166189
if max is None:
167190
max = 1e12
168191
if (req is not None) and (alias is None):
@@ -180,20 +203,24 @@ def from_las(cls,
180203
else:
181204
uris = path # It's a list-like of files and/or URLs.
182205

183-
wells = [Well.from_las(f,
184-
remap=remap,
185-
funcs=funcs,
186-
data=data,
187-
req=req,
188-
alias=alias,
189-
encoding=encoding,
190-
printfname=printfname,
191-
index=index,
192-
**kwargs,
193-
)
194-
for i, f in tqdm(enumerate(uris)) if i < max]
195-
196-
return cls(list(filter(None, wells)))
206+
# Limit to the maximum number of wells requested
207+
uris = [f for i, f in enumerate(uris) if i < max]
208+
209+
wells = []
210+
with concurrent.futures.ProcessPoolExecutor() as executor:
211+
# Submit all tasks and create a mapping of futures to original indices
212+
future_to_idx = {executor.submit(_load_well_from_las, uri, remap=remap, funcs=funcs, data=data, req=req, alias=alias, encoding=encoding, printfname=printfname, index=index, **kwargs): i for i, uri in enumerate(uris)}
213+
214+
# Use tqdm to show a progress bar
215+
for future in tqdm(concurrent.futures.as_completed(future_to_idx), total=len(uris), desc="Loading wells"):
216+
try:
217+
well = future.result()
218+
if well is not None:
219+
wells.append(well)
220+
except Exception as e:
221+
print(f"Error loading well: {e}")
222+
223+
return cls(wells, source=path)
197224

198225
def add_canstrat_striplogs(self, path, uwi_transform=None, name='canstrat'):
199226
"""

welly/well.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -298,11 +298,15 @@ def from_las(cls,
298298
if printfname:
299299
print(fname)
300300

301-
# If https URL is passed try reading and formatting it to text file.
302-
if re.match(r'https?://.+\..+/.+?', fname) is not None:
303-
fname = file_from_url(fname)
304-
305-
datasets = from_las(fname, encoding=encoding, **kwargs)
301+
# If https URL is passed, download the content
302+
is_url = re.match(r'https?://.+\..+/.+?', fname) is not None
303+
if is_url:
304+
content = file_from_url(fname)
305+
# Pass the content string directly to from_las
306+
datasets = from_las(content, encoding=encoding, **kwargs)
307+
else:
308+
# Regular file path
309+
datasets = from_las(fname, encoding=encoding, **kwargs)
306310

307311
# Create well from datasets.
308312
well = cls.from_datasets(datasets,

0 commit comments

Comments
 (0)