Skip to content

Commit 7fbdb82

Browse files
author
alcholiclg
committed
fix financial data fetcher
1 parent f939e53 commit 7fbdb82

4 files changed

Lines changed: 150 additions & 115 deletions

File tree

ms_agent/tools/findata/akshare_source.py

Lines changed: 73 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,6 @@ def get_historical_k_data(
175175
return df
176176

177177
except Exception as e:
178-
logger.error(f'Error fetching K-data for {code}: {e}')
179178
raise DataSourceError(f'Failed to fetch K-data: {e}')
180179

181180
def get_stock_basic_info(self, code: str) -> pd.DataFrame:
@@ -190,59 +189,67 @@ def get_stock_basic_info(self, code: str) -> pd.DataFrame:
190189
else:
191190
return self._get_a_share_basic_info(code)
192191

192+
except (NoDataFoundError, DataSourceError):
193+
# Re-raise custom errors without wrapping
194+
raise
193195
except Exception as e:
194-
logger.error(f'Error fetching basic info for {code}: {e}')
196+
# Only wrap unexpected errors
195197
raise DataSourceError(f'Failed to fetch basic info: {e}')
196198

197199
def _get_hk_basic_info(self, code: str) -> pd.DataFrame:
198200
"""Get HK stock basic information"""
199201
clean_code = self._convert_code(code, market='HK')
202+
df_stock_info = pd.DataFrame()
203+
df_business_info = pd.DataFrame()
204+
205+
# Try to fetch base info
200206
try:
201207
df_base_info = ak.stock_hk_spot_em()
202208
stock_info = df_base_info[df_base_info['代码'] == clean_code]
203-
if stock_info.empty:
204-
raise NoDataFoundError(f'No basic info found for {code}')
205-
206-
df_stock_info = pd.DataFrame({
207-
'code': [code],
208-
'code_name': [stock_info['名称'].iloc[0]],
209-
'listingDate': [''], # listing date might not be available
210-
'outDate': [''],
211-
'type': ['2'], # type of stock
212-
'status': ['1']
213-
})
209+
if not stock_info.empty:
210+
df_stock_info = pd.DataFrame({
211+
'code': [code],
212+
'code_name': [stock_info['名称'].iloc[0]],
213+
'listingDate': [''], # listing date might not be available
214+
'outDate': [''],
215+
'type': ['2'], # type of stock
216+
'status': ['1']
217+
})
218+
except Exception:
219+
logger.warning(f'Failed to fetch HK stock base info for {code}')
214220

221+
# Try to fetch business info
222+
try:
215223
df_business_info = ak.stock_zyjs_ths(symbol=clean_code)
216-
if df_business_info.empty or not df_business_info:
217-
raise NoDataFoundError(f'No business info found for {code}')
224+
if not df_business_info.empty:
225+
df_business_info = df_business_info.rename(
226+
columns={
227+
'公司名称': 'company name',
228+
'英文名称': 'english name',
229+
'注册地': 'place of incorporation',
230+
'注册地址': 'registered address',
231+
'公司成立日期': 'date of incorporation',
232+
'所属行业': 'industry',
233+
'董事长': 'chairman',
234+
'公司秘书': 'company secretary',
235+
'员工人数': 'number of employees',
236+
'办公地址': 'office address',
237+
'公司网址': 'website',
238+
'E-MAIL': 'email',
239+
'年结日': 'financial year end',
240+
'联系电话': 'contact number',
241+
'核数师': 'auditor',
242+
'传真': 'fax',
243+
'公司介绍': 'company description'
244+
})
245+
except Exception:
246+
logger.warning(
247+
f'Failed to fetch HK stock business info for {code}')
218248

219-
df_business_info = df_business_info.rename(
220-
columns={
221-
'公司名称': 'company name',
222-
'英文名称': 'english name',
223-
'注册地': 'place of incorporation',
224-
'注册地址': 'registered address',
225-
'公司成立日期': 'date of incorporation',
226-
'所属行业': 'industry',
227-
'董事长': 'chairman',
228-
'公司秘书': 'company secretary',
229-
'员工人数': 'number of employees',
230-
'办公地址': 'office address',
231-
'公司网址': 'website',
232-
'E-MAIL': 'email',
233-
'年结日': 'financial year end',
234-
'联系电话': 'contact number',
235-
'核数师': 'auditor',
236-
'传真': 'fax',
237-
'公司介绍': 'company description'
238-
})
249+
if df_stock_info.empty and df_business_info.empty:
250+
raise NoDataFoundError(f'No basic info found for {code}')
239251

240-
return pd.concat([df_stock_info, df_business_info], axis=1)
241-
242-
except Exception as e:
243-
logger.error(f'Error fetching HK stock basic info for {code}: {e}')
244-
raise DataSourceError(
245-
f'Failed to fetch HK stock basic info for {code}: {e}')
252+
return pd.concat([df_stock_info, df_business_info], axis=1)
246253

247254
def _get_us_basic_info(self, code: str) -> pd.DataFrame:
248255
"""Get US stock basic information"""
@@ -268,7 +275,6 @@ def _get_us_basic_info(self, code: str) -> pd.DataFrame:
268275
return result_df
269276

270277
except Exception as e:
271-
logger.error(f'Error fetching US stock basic info for {code}: {e}')
272278
raise DataSourceError(
273279
f'Error fetching US stock basic info for {code}: {e}')
274280

@@ -302,7 +308,7 @@ def _get_a_share_basic_info(self, code: str) -> pd.DataFrame:
302308
})
303309

304310
df_business_info = ak.stock_zyjs_ths(symbol=clean_code)
305-
if df_business_info.empty or not df_business_info:
311+
if df_business_info.empty:
306312
raise NoDataFoundError(f'No business info found for {code}')
307313

308314
df_business_info = df_business_info.rename(
@@ -319,7 +325,6 @@ def _get_a_share_basic_info(self, code: str) -> pd.DataFrame:
319325
ignore_index=True)
320326

321327
except Exception as e:
322-
logger.error(f'Error fetching A-share basic info for {code}: {e}')
323328
raise DataSourceError(
324329
f'Error fetching A-share basic info for {code}: {e}')
325330

@@ -481,7 +486,6 @@ def _filter_columns(row_df: pd.DataFrame,
481486
logger.warning(
482487
f'Failed to fetch financial_hk_analysis_indicator_em or financial_us_analysis_indicator_em: {e}'
483488
)
484-
ind_df = pd.DataFrame()
485489
result['financial_indicators'] = ind_df
486490

487491
elif code.startswith(('sh.', 'sz.', 'bj.')):
@@ -501,6 +505,7 @@ def _filter_columns(row_df: pd.DataFrame,
501505

502506
for data_type in data_types:
503507
try:
508+
result[data_type] = pd.DataFrame()
504509
if data_type in ('profit', 'operation', 'growth',
505510
'dupont'):
506511
if ind_df.empty:
@@ -513,14 +518,14 @@ def _filter_columns(row_df: pd.DataFrame,
513518

514519
elif data_type == 'balance':
515520
df = ak.stock_balance_sheet_by_report_em(
516-
symbol=clean_code)
521+
symbol=code.replace('.', '').upper())
517522
row = _select_row_by_report(df)
518523
if not row.empty:
519524
result[data_type] = row
520525

521526
elif data_type == 'cash_flow':
522527
df = ak.stock_cash_flow_sheet_by_report_em(
523-
symbol=clean_code)
528+
symbol=code.replace('.', '').upper())
524529
row = _select_row_by_report(df)
525530
if not row.empty:
526531
result[data_type] = row
@@ -533,7 +538,7 @@ def _filter_columns(row_df: pd.DataFrame,
533538
logger.warning(f'Failed to fetch {data_type} data: {e}')
534539
continue
535540

536-
if not result:
541+
if not result or all(df.empty for df in result.values()):
537542
raise NoDataFoundError(
538543
f'No financial data found for {code} ({year}Q{quarter})')
539544

@@ -579,7 +584,6 @@ def get_stock_list(self,
579584
return df
580585

581586
except Exception as e:
582-
logger.error(f'Error fetching stock list: {e}')
583587
raise DataSourceError(f'Failed to fetch stock list: {e}')
584588

585589
def get_trade_dates(self,
@@ -591,6 +595,10 @@ def get_trade_dates(self,
591595
try:
592596
df = ak.tool_trade_date_hist_sina()
593597

598+
# Ensure trade_date is string for comparison
599+
if 'trade_date' in df.columns:
600+
df['trade_date'] = df['trade_date'].astype(str)
601+
594602
if start_date:
595603
df = df[df['trade_date'] >= start_date]
596604
if end_date:
@@ -599,7 +607,6 @@ def get_trade_dates(self,
599607
return df
600608

601609
except Exception as e:
602-
logger.error(f'Error fetching trade dates: {e}')
603610
raise DataSourceError(f'Failed to fetch trade dates: {e}')
604611

605612
def get_macro_data(
@@ -626,8 +633,8 @@ def get_macro_data(
626633
try:
627634
if data_type in ('deposit_rate', 'loan_rate'):
628635
result[data_type] = ak.rate_interbank()
629-
elif data_type in ('required_reserve_ratio', 'shibor'):
630-
logger.warning(
636+
elif data_type in ('required_reserve_ratio'):
637+
raise DataSourceError(
631638
'Required reserve ratio is not supported by AKShare')
632639
continue
633640
elif data_type == 'money_supply_year':
@@ -640,7 +647,8 @@ def get_macro_data(
640647
raise ValueError(f'Invalid data type: {data_type}')
641648

642649
except Exception as e:
643-
logger.error(f'Error fetching {data_type} data: {e}')
650+
logger.warning(f'Failed to fetch {data_type} data: {e}')
651+
result[data_type] = pd.DataFrame()
644652
continue
645653

646654
if not result:
@@ -654,15 +662,18 @@ def _get_money_supply_data_month(
654662
end_date: Optional[str] = None) -> pd.DataFrame:
655663
try:
656664
df = ak.macro_china_money_supply() # from 2008-01 to now
657-
df['月份'] = pd.to_datetime(df['月份'])
665+
df['月份'] = pd.to_datetime(df['月份'].str.replace('月份',
666+
'').str.replace(
667+
'年', '-'))
668+
df['月份'] = df['月份'].dt.to_period('M')
658669
if start_date:
659-
df = df[df['月份'] >= pd.to_datetime(start_date)]
670+
df = df[
671+
df['月份'] >= pd.to_datetime(start_date).strftime('%Y-%m')]
660672
if end_date:
661-
df = df[df['月份'] <= pd.to_datetime(end_date)]
673+
df = df[df['月份'] <= pd.to_datetime(end_date).strftime('%Y-%m')]
662674

663675
return df.sort_values('月份').reset_index(drop=True)
664676
except Exception as e:
665-
logger.error(f'Error fetching monthly money supply data: {e}')
666677
raise DataSourceError(
667678
f'Error fetching monthly money supply data: {e}')
668679

@@ -675,9 +686,8 @@ def _get_money_supply_data_year(
675686
month_df['年'] = month_df['月份'].dt.year
676687
last_in_year = (
677688
month_df.sort_values('月份').groupby(
678-
'年份', as_index=False).tail(1).reset_index(drop=True))
689+
'', as_index=False).tail(1).reset_index(drop=True))
679690
cols = [
680-
'年份',
681691
'货币和准货币(M2)-数量(亿元)',
682692
'货币和准货币(M2)-同比增长',
683693
'货币(M1)-数量(亿元)',
@@ -686,11 +696,11 @@ def _get_money_supply_data_year(
686696
'流通中的现金(M0)-同比增长',
687697
]
688698
year_df = last_in_year[
689-
['年份'] + [c for c in cols if c in last_in_year.columns]]
699+
[''] + [c for c in cols if c in last_in_year.columns]]
690700

691701
if start_date:
692-
year_df = year_df[year_df['年份'] >= pd.to_datetime(start_date).year]
702+
year_df = year_df[year_df[''] >= pd.to_datetime(start_date).year]
693703
if end_date:
694-
year_df = year_df[year_df['年份'] <= pd.to_datetime(end_date).year]
704+
year_df = year_df[year_df[''] <= pd.to_datetime(end_date).year]
695705

696-
return year_df.sort_values('年份').reset_index(drop=True)
706+
return year_df.sort_values('').reset_index(drop=True)

ms_agent/tools/findata/baostock_source.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Copyright (c) Alibaba, Inc. and its affiliates.
22
import threading
33
from contextlib import contextmanager
4+
from copy import deepcopy
45
from typing import Dict, List, Optional
56

67
import baostock as bs
@@ -310,25 +311,42 @@ def get_macro_data(
310311
result = {}
311312
with baostock_session():
312313
for data_type in data_types:
314+
parsed_extra_kwargs = {}
315+
parsed_start_date = start_date
316+
parsed_end_date = end_date
317+
313318
if data_type == 'deposit_rate':
314319
query_func = bs.query_deposit_rate_data
320+
315321
elif data_type == 'loan_rate':
316322
query_func = bs.query_loan_rate_data
323+
317324
elif data_type == 'required_reserve_ratio':
318325
query_func = bs.query_required_reserve_ratio_data
319326
if not extra_kwargs or not extra_kwargs.get('yearType'):
320-
extra_kwargs['yearType'] = '0'
327+
parsed_extra_kwargs['yearType'] = '0'
328+
else:
329+
parsed_extra_kwargs.update(extra_kwargs)
330+
321331
elif data_type == 'money_supply_month':
322332
query_func = bs.query_money_supply_data_month
333+
parsed_start_date = pd.to_datetime(start_date).strftime(
334+
'%Y-%m')
335+
parsed_end_date = pd.to_datetime(end_date).strftime(
336+
'%Y-%m')
337+
323338
elif data_type == 'money_supply_year':
324339
query_func = bs.query_money_supply_data_year
325-
elif data_type == 'shibor':
326-
query_func = bs.query_shibor_data
340+
parsed_start_date = pd.to_datetime(start_date).strftime(
341+
'%Y')
342+
parsed_end_date = pd.to_datetime(end_date).strftime('%Y')
343+
327344
else:
328345
raise ValueError(f'Invalid data type: {data_type}')
329346

330-
df = self._query_macro_data(query_func, data_type, start_date,
331-
end_date, **extra_kwargs)
347+
df = self._query_macro_data(query_func, data_type,
348+
parsed_start_date, parsed_end_date,
349+
**parsed_extra_kwargs)
332350
result[data_type] = df
333351

334352
if not result:
@@ -341,6 +359,11 @@ def _query_macro_data(self, query_func, data_type: str, start_date: str,
341359
"""Query macro data using provided function (assumes session is already active)"""
342360
logger.info(f'Fetching {data_type} for {start_date} to {end_date}')
343361

344-
rs = query_func(start_date=start_date, end_date=end_date, **kwargs)
345-
return self._query_to_dataframe(
346-
rs, f'{data_type} for {start_date} to {end_date}')
362+
try:
363+
rs = query_func(start_date=start_date, end_date=end_date, **kwargs)
364+
return self._query_to_dataframe(
365+
rs, f'{data_type} for {start_date} to {end_date}')
366+
367+
except Exception as e:
368+
logger.warning(f'Failed to fetch {data_type} data: {e}')
369+
return pd.DataFrame()

0 commit comments

Comments
 (0)