Skip to content

Commit 2d16ab0

Browse files
authored
Merge pull request PyProphet#141 from singjc/patch/pq_memory_opt
Patch/pq memory opt
2 parents c850fbd + be1ba51 commit 2d16ab0

File tree

6 files changed

+1390
-307
lines changed

6 files changed

+1390
-307
lines changed

pyprophet/data_handling.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@
1515
def profile(fun):
1616
return fun
1717

18+
def format_bytes(size):
19+
for unit in ['B', 'KB', 'MB', 'GB']:
20+
if size < 1024.0:
21+
return f"{size:.2f} {unit}"
22+
size /= 1024.0
23+
return f"{size:.2f} TB"
24+
1825
# selection of scores with low cross-correlation for metabolomics scoring
1926
def use_metabolomics_scores():
2027
return [
@@ -112,8 +119,12 @@ def create_index_if_not_exists(con, index_name, table_name, column_name):
112119

113120

114121
def is_parquet_file(file_path):
122+
'''
123+
Check if the file is a valid Parquet file.
124+
'''
115125
import pyarrow.parquet as pq
116126
from pyarrow.lib import ArrowInvalid, ArrowIOError
127+
117128
# First check extension
118129
if not os.path.splitext(file_path)[1].lower() in ('.parquet', '.pq'):
119130
return False
@@ -124,6 +135,29 @@ def is_parquet_file(file_path):
124135
return True
125136
except (ArrowInvalid, ArrowIOError, OSError):
126137
return False
138+
139+
def is_valid_split_parquet_dir(path):
140+
'''
141+
Checks if the directory contains both required parquet files
142+
and that each is a valid Parquet file.
143+
'''
144+
if not os.path.isdir(path):
145+
return False
146+
147+
required_files = [
148+
"precursors_features.parquet",
149+
"transition_features.parquet"
150+
]
151+
152+
for filename in required_files:
153+
full_path = os.path.join(path, filename)
154+
if not os.path.isfile(full_path):
155+
return False
156+
if not is_parquet_file(full_path):
157+
return False
158+
159+
return True
160+
127161

128162
def get_parquet_column_names(file_path):
129163
"""

0 commit comments

Comments
 (0)