Skip to content

Commit 5fd0d0f

Browse files
authored
Merge pull request #2430 from bretsky/fix/curl-cffi-cookies
Incorporate curl_cffi to avoid rate limiting and cookie errors
2 parents fcecf2a + ad33ab9 commit 5fd0d0f

File tree

8 files changed

+24
-50
lines changed

8 files changed

+24
-50
lines changed

meta.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ requirements:
2626
- frozendict >=2.3.4
2727
- beautifulsoup4 >=4.11.1
2828
- html5lib >=1.1
29+
- curl_cffi >=0.7
2930
- peewee >=3.16.2
3031
- pip
3132
- python
@@ -41,6 +42,7 @@ requirements:
4142
- frozendict >=2.3.4
4243
- beautifulsoup4 >=4.11.1
4344
- html5lib >=1.1
45+
- curl_cffi >=0.7
4446
- peewee >=3.16.2
4547
- python
4648

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ beautifulsoup4>=4.11.1
99
peewee>=3.16.2
1010
requests_cache>=1.0
1111
requests_ratelimiter>=0.3.1
12-
scipy>=1.6.3
12+
scipy>=1.6.3
13+
curl_cffi>=0.7

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
'requests>=2.31', 'multitasking>=0.0.7',
6464
'platformdirs>=2.0.0', 'pytz>=2022.5',
6565
'frozendict>=2.3.4', 'peewee>=3.16.2',
66-
'beautifulsoup4>=4.11.1'],
66+
'beautifulsoup4>=4.11.1', 'curl_cffi>=0.7'],
6767
extras_require={
6868
'nospam': ['requests_cache>=1.0', 'requests_ratelimiter>=0.3.1'],
6969
'repair': ['scipy>=1.6.3'],

yfinance/base.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828

2929
import numpy as np
3030
import pandas as pd
31-
import requests
31+
from curl_cffi import requests
32+
3233

3334
from . import utils, cache
3435
from .data import YfData
@@ -48,7 +49,7 @@
4849
class TickerBase:
4950
def __init__(self, ticker, session=None, proxy=_SENTINEL_):
5051
self.ticker = ticker.upper()
51-
self.session = session
52+
self.session = session or requests.Session(impersonate="chrome")
5253
self._tz = None
5354

5455
self._isin = None

yfinance/data.py

Lines changed: 12 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import random
33
from functools import lru_cache
44

5-
import requests as requests
5+
from curl_cffi import requests
66
from bs4 import BeautifulSoup
77
import datetime
88

@@ -82,7 +82,7 @@ def __init__(self, session=None, proxy=None):
8282
self._cookie_lock = threading.Lock()
8383

8484
self._session, self._proxy = None, None
85-
self._set_session(session or requests.Session())
85+
self._set_session(session or requests.Session(impersonate="chrome"))
8686
self._set_proxy(proxy)
8787

8888
utils.get_yf_logger().debug(f"Using User-Agent: {self.user_agent_headers['User-Agent']}")
@@ -174,14 +174,6 @@ def _load_cookie_basic(self):
174174
return cookie_dict['cookie']
175175

176176
def _get_cookie_basic(self, timeout=30):
177-
if self._cookie is not None:
178-
utils.get_yf_logger().debug('reusing cookie')
179-
return self._cookie
180-
181-
self._cookie = self._load_cookie_basic()
182-
if self._cookie is not None:
183-
return self._cookie
184-
185177
# To avoid infinite recursion, do NOT use self.get()
186178
# - 'allow_redirects' copied from @psychoz971 solution - does it help USA?
187179
response = self._session.get(
@@ -190,31 +182,16 @@ def _get_cookie_basic(self, timeout=30):
190182
timeout=timeout,
191183
allow_redirects=True)
192184

193-
if not response.cookies:
194-
utils.get_yf_logger().debug("response.cookies = None")
195-
return None
196-
self._cookie = list(response.cookies)[0]
197-
if self._cookie == '':
198-
utils.get_yf_logger().debug("list(response.cookies)[0] = ''")
199-
return None
200-
self._save_cookie_basic(self._cookie)
201-
utils.get_yf_logger().debug(f"fetched basic cookie = {self._cookie}")
202-
return self._cookie
203-
204185
def _get_crumb_basic(self, timeout=30):
205186
if self._crumb is not None:
206187
utils.get_yf_logger().debug('reusing crumb')
207188
return self._crumb
208189

209-
cookie = self._get_cookie_basic()
210-
if cookie is None:
211-
return None
212-
190+
self._get_cookie_basic()
213191
# - 'allow_redirects' copied from @psychoz971 solution - does it help USA?
214192
get_args = {
215193
'url': "https://query1.finance.yahoo.com/v1/test/getcrumb",
216194
'headers': self.user_agent_headers,
217-
'cookies': {cookie.name: cookie.value},
218195
'timeout': timeout,
219196
'allow_redirects': True
220197
}
@@ -233,9 +210,9 @@ def _get_crumb_basic(self, timeout=30):
233210

234211
@utils.log_indent_decorator
235212
def _get_cookie_and_crumb_basic(self, timeout):
236-
cookie = self._get_cookie_basic(timeout)
213+
self._get_cookie_basic(timeout)
237214
crumb = self._get_crumb_basic(timeout)
238-
return cookie, crumb
215+
return crumb
239216

240217
def _get_cookie_csrf(self, timeout):
241218
if self._cookie is not None:
@@ -338,7 +315,7 @@ def _get_crumb_csrf(self, timeout=30):
338315

339316
@utils.log_indent_decorator
340317
def _get_cookie_and_crumb(self, timeout=30):
341-
cookie, crumb, strategy = None, None, None
318+
crumb, strategy = None, None
342319

343320
utils.get_yf_logger().debug(f"cookie_mode = '{self._cookie_strategy}'")
344321

@@ -348,16 +325,16 @@ def _get_cookie_and_crumb(self, timeout=30):
348325
if crumb is None:
349326
# Fail
350327
self._set_cookie_strategy('basic', have_lock=True)
351-
cookie, crumb = self._get_cookie_and_crumb_basic(timeout)
328+
crumb = self._get_cookie_and_crumb_basic(timeout)
352329
else:
353330
# Fallback strategy
354-
cookie, crumb = self._get_cookie_and_crumb_basic(timeout)
355-
if cookie is None or crumb is None:
331+
crumb = self._get_cookie_and_crumb_basic(timeout)
332+
if crumb is None:
356333
# Fail
357334
self._set_cookie_strategy('csrf', have_lock=True)
358335
crumb = self._get_crumb_csrf()
359336
strategy = self._cookie_strategy
360-
return cookie, crumb, strategy
337+
return crumb, strategy
361338

362339
@utils.log_indent_decorator
363340
def get(self, url, user_agent_headers=None, params=None, timeout=30):
@@ -382,21 +359,15 @@ def _make_request(self, url, request_method, user_agent_headers=None, body=None,
382359
if 'crumb' in params:
383360
raise Exception("Don't manually add 'crumb' to params dict, let data.py handle it")
384361

385-
cookie, crumb, strategy = self._get_cookie_and_crumb()
362+
crumb, strategy = self._get_cookie_and_crumb()
386363
if crumb is not None:
387364
crumbs = {'crumb': crumb}
388365
else:
389366
crumbs = {}
390-
if strategy == 'basic' and cookie is not None:
391-
# Basic cookie strategy adds cookie to GET parameters
392-
cookies = {cookie.name: cookie.value}
393-
else:
394-
cookies = None
395367

396368
request_args = {
397369
'url': url,
398370
'params': {**params, **crumbs},
399-
'cookies': cookies,
400371
'timeout': timeout,
401372
'headers': user_agent_headers or self.user_agent_headers
402373
}
@@ -412,10 +383,8 @@ def _make_request(self, url, request_method, user_agent_headers=None, body=None,
412383
self._set_cookie_strategy('csrf')
413384
else:
414385
self._set_cookie_strategy('basic')
415-
cookie, crumb, strategy = self._get_cookie_and_crumb(timeout)
386+
crumb, strategy = self._get_cookie_and_crumb(timeout)
416387
request_args['params']['crumb'] = crumb
417-
if strategy == 'basic':
418-
request_args['cookies'] = {cookie.name: cookie.value}
419388
response = request_method(**request_args)
420389
utils.get_yf_logger().debug(f'response code={response.status_code}')
421390

yfinance/multi.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
import multitasking as _multitasking
3030
import pandas as _pd
31+
from curl_cffi import requests
3132

3233
from . import Ticker, utils
3334
from .data import YfData
@@ -90,6 +91,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True,
9091
Optional. Always return a MultiIndex DataFrame? Default is True
9192
"""
9293
logger = utils.get_yf_logger()
94+
session = session or requests.Session(impersonate="chrome")
9395

9496
if auto_adjust is None:
9597
# Warn users that default has changed to True

yfinance/scrapers/history.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from math import isclose
77
import time as _time
88
import bisect
9+
from curl_cffi import requests
910

1011
from yfinance import shared, utils
1112
from yfinance.const import _BASE_URL_, _PRICE_COLNAMES_, _SENTINEL_
@@ -19,7 +20,7 @@ def __init__(self, data, ticker, tz, session=None, proxy=_SENTINEL_):
1920
if proxy is not _SENTINEL_:
2021
utils.print_once("YF deprecation warning: set proxy via new config function: yf.set_config(proxy=proxy)")
2122
self._data._set_proxy(proxy)
22-
self.session = session
23+
self.session = session or requests.Session(impersonate="chrome")
2324

2425
self._history_cache = {}
2526
self._history_metadata = None

yfinance/utils.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
import numpy as _np
3636
import pandas as _pd
3737
import pytz as _tz
38-
import requests as _requests
3938
from dateutil.relativedelta import relativedelta
4039
from pytz import UnknownTimeZoneError
4140

@@ -196,7 +195,6 @@ def get_all_by_isin(isin, proxy=const._SENTINEL_, session=None):
196195
# Deferred this to prevent circular imports
197196
from .search import Search
198197

199-
session = session or _requests.Session()
200198
search = Search(query=isin, max_results=1, session=session, proxy=proxy)
201199

202200
# Extract the first quote and news

0 commit comments

Comments
 (0)