1- from loguru import logger
21import os
32from typing import Optional
43
54import fire
65import pandas as pd
7- import qlib
6+ from loguru import logger
87from tqdm import tqdm
98
9+ import qlib
1010from qlib .data import D
1111
1212
@@ -36,6 +36,7 @@ def __init__(
3636 self .large_step_threshold_price = large_step_threshold_price
3737 self .large_step_threshold_volume = large_step_threshold_volume
3838 self .missing_data_num = missing_data_num
39+ self .qlib_dir = os .path .abspath (os .path .expanduser (qlib_dir ))
3940
4041 if csv_path :
4142 assert os .path .isdir (csv_path ), f"{ csv_path } should be a directory."
@@ -68,6 +69,43 @@ def load_qlib_data(self):
6869 self .data [instrument ] = df
6970 print (df )
7071
72+ # NOTE:
73+ # This check is added due to a known issue in Qlib where feature paths
74+ # are constructed using lowercased instrument names. On case-sensitive
75+ # file systems (e.g. Linux), uppercase directory names under `features/`
76+ # will cause data loading failures.
77+ #
78+ # See: https://github.com/microsoft/qlib/issues/2053
79+ def check_features_dir_lowercase (self ) -> Optional [pd .DataFrame ]:
80+ """
81+ Check whether all subdirectories under `<qlib_dir>/features` are named in lowercase.
82+
83+ This validation helps prevent data loading issues on case-sensitive
84+ file systems caused by uppercase instrument directory names.
85+ """
86+ if not self .qlib_dir :
87+ return None
88+
89+ features_dir = os .path .join (self .qlib_dir , "features" )
90+ if not os .path .isdir (features_dir ):
91+ logger .warning (f"`features` directory not found under { self .qlib_dir } " )
92+ return None
93+
94+ bad_dirs = []
95+ for name in os .listdir (features_dir ):
96+ full_path = os .path .join (features_dir , name )
97+ if os .path .isdir (full_path ) and name != name .lower ():
98+ bad_dirs .append (name )
99+
100+ if bad_dirs :
101+ result_df = pd .DataFrame ({"non_lowercase_dir" : bad_dirs })
102+ return result_df
103+ else :
104+ logger .info (
105+ f"✅ All subdirectories under `{ os .path .join (self .qlib_dir , 'features' )} ` are named in lowercase."
106+ )
107+ return None
108+
71109 def check_missing_data (self ) -> Optional [pd .DataFrame ]:
72110 """Check if any data is missing in the DataFrame."""
73111 result_dict = {
@@ -177,11 +215,13 @@ def check_data(self):
177215 check_large_step_changes_result = self .check_large_step_changes ()
178216 check_required_columns_result = self .check_required_columns ()
179217 check_missing_factor_result = self .check_missing_factor ()
218+ check_features_dir_case_result = self .check_features_dir_lowercase ()
180219 if (
181220 check_large_step_changes_result is not None
182221 or check_large_step_changes_result is not None
183222 or check_required_columns_result is not None
184223 or check_missing_factor_result is not None
224+ or check_features_dir_case_result is not None
185225 ):
186226 print (f"\n Summary of data health check ({ len (self .data )} files checked):" )
187227 print ("-------------------------------------------------" )
@@ -197,6 +237,11 @@ def check_data(self):
197237 if isinstance (check_missing_factor_result , pd .DataFrame ):
198238 logger .warning (f"The factor column does not exist or is empty" )
199239 print (check_missing_factor_result )
240+ if isinstance (check_features_dir_case_result , pd .DataFrame ):
241+ logger .warning (
242+ f"Some subdirectories under `{ os .path .join (self .qlib_dir , 'features' )} ` contain uppercase letters, please rename them to lowercase manually."
243+ )
244+ print (check_features_dir_case_result )
200245
201246
202247if __name__ == "__main__" :
0 commit comments