Skip to content

Commit ba8da2f

Browse files
Add config option to define database update frequency (#540)
* First attempt to solve the bug * Address suggestions * Fix failing unit test * Few tweaks * Nit changes * Address requested changes * Address requested changes * Nit change * Nit change * Address review --------- Co-authored-by: Martin Molinero <[email protected]>
1 parent 6c6fa92 commit ba8da2f

File tree

4 files changed

+106
-5
lines changed

4 files changed

+106
-5
lines changed

Diff for: lean/components/cloud/data_downloader.py

+46-3
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,38 @@ def _store_local_file(file_content: bytes, file_path: Path):
2727
f.write(file_content)
2828

2929

30+
def parse_timedelta(database_update_frequency: str):
31+
if '.' not in database_update_frequency and ':' not in database_update_frequency:
32+
return None
33+
if database_update_frequency.count(".") == 1: # Ideally, the format is DD.HH:MM:SS
34+
days_component, time_component = map(str, database_update_frequency.split("."))
35+
days = int(days_component)
36+
hours, minutes, seconds = map(int, time_component.split(":"))
37+
else: # However, the format can also be HH:MM:SS
38+
days = 0
39+
hours, minutes, seconds = map(int, database_update_frequency.split(":"))
40+
return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds)
41+
42+
3043
class DataDownloader:
3144
"""The DataDownloader is responsible for downloading data from QuantConnect Datasets."""
3245

33-
def __init__(self, logger: Logger, api_client: APIClient, lean_config_manager: LeanConfigManager):
46+
def __init__(self,
47+
logger: Logger,
48+
api_client: APIClient,
49+
lean_config_manager: LeanConfigManager,
50+
database_update_frequency: str):
3451
"""Creates a new CloudBacktestRunner instance.
3552
3653
:param logger: the logger to use to log messages with
3754
:param api_client: the APIClient instance to use when communicating with the QuantConnect API
3855
:param lean_config_manager: the LeanConfigManager instance to retrieve the data directory from
56+
:param database_update_frequency: the value of the config option database-update-frequency
3957
"""
4058
self._logger = logger
4159
self._api_client = api_client
4260
self._lean_config_manager = lean_config_manager
61+
self.database_update_frequency = database_update_frequency
4362

4463
def update_database_files(self):
4564
"""Will update lean data folder database files if required
@@ -49,9 +68,21 @@ def update_database_files(self):
4968
now = datetime.now()
5069
config = self._lean_config_manager.get_lean_config()
5170
last_update = config["file-database-last-update"] if "file-database-last-update" in config else ''
52-
if not last_update or now - datetime.strptime(last_update, '%m/%d/%Y') > timedelta(days=1):
71+
72+
# The last update date can be in '%m/%d/%Y'(old format) or '%m/%d/%Y %H:%M:%S'(new format)
73+
last_update = self.parse_last_update_date(last_update)
74+
if self.database_update_frequency is None: # The user has not set this parameter yet
75+
self.database_update_frequency = "1.00:00:00"
76+
77+
frequency = parse_timedelta(self.database_update_frequency)
78+
if not frequency:
79+
self._logger.debug(f"Skipping database-update-frequency, frequency is:"
80+
f" {str(self.database_update_frequency)}")
81+
return
82+
self._logger.debug(f"database-update-frequency is: {str(frequency)}")
83+
if not last_update or now - last_update > frequency:
5384
data_dir = self._lean_config_manager.get_data_directory()
54-
self._lean_config_manager.set_properties({"file-database-last-update": now.strftime('%m/%d/%Y')})
85+
self._lean_config_manager.set_properties({"file-database-last-update": now.strftime('%m/%d/%Y %H:%M:%S')})
5586

5687
_store_local_file(self._api_client.data.download_public_file(
5788
"https://raw.githubusercontent.com/QuantConnect/Lean/master/Data/symbol-properties/symbol-properties-database.csv"),
@@ -64,6 +95,9 @@ def update_database_files(self):
6495
pass
6596
else:
6697
self._logger.error(str(e))
98+
except ValueError as e:
99+
self._logger.debug(f"Value of config option database-update-frequency is invalid: {str(e)}. "
100+
f"Database update will be skipped")
67101
except Exception as e:
68102
self._logger.error(str(e))
69103

@@ -113,6 +147,15 @@ def _process_bulk(self, file: Path, destination: Path):
113147
from os import remove
114148
remove(file)
115149

150+
def parse_last_update_date(self, last_update_date: str) -> datetime:
151+
formats = ['%m/%d/%Y', '%m/%d/%Y %H:%M:%S']
152+
153+
for fmt in formats:
154+
try:
155+
return datetime.strptime(last_update_date, fmt)
156+
except ValueError:
157+
continue
158+
116159
def remove_suffix(self,input_string, suffix):
117160
if suffix and input_string.endswith(suffix):
118161
return input_string[:-len(suffix)]

Diff for: lean/components/config/cli_config_manager.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,23 @@ def __init__(self, general_storage: Storage, credentials_storage: Storage) -> No
5555
f"The Docker image used when running the research environment ({DEFAULT_RESEARCH_IMAGE} if not set).",
5656
False,
5757
general_storage)
58+
self.database_update_frequency = Option("database-update-frequency",
59+
"How often the databases are updated. "
60+
"The format is DD.HH:MM:SS. If the frequency "
61+
"is less than a day can just be HH:MM:SS. "
62+
"Update can be disabled by setting this option to a non-date"
63+
" value (-, _, ..., etc.). "
64+
"If unset, default value is 1 day",
65+
False,
66+
general_storage)
5867

5968
self.all_options = [
6069
self.user_id,
6170
self.api_token,
6271
self.default_language,
6372
self.engine_image,
64-
self.research_image
73+
self.research_image,
74+
self.database_update_frequency
6575
]
6676

6777
def get_option_by_key(self, key: str) -> Option:

Diff for: lean/container.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,10 @@ def initialize(self,
141141
self.project_manager,
142142
self.project_config_manager,
143143
self.organization_manager)
144-
self.data_downloader = DataDownloader(self.logger, self.api_client, self.lean_config_manager)
144+
self.data_downloader = DataDownloader(self.logger,
145+
self.api_client,
146+
self.lean_config_manager,
147+
self.cli_config_manager.database_update_frequency.get_value())
145148
self.cloud_project_manager = CloudProjectManager(self.api_client,
146149
self.project_config_manager,
147150
self.pull_manager,

Diff for: tests/commands/config/test_set.py

+45
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@
1414
from click.testing import CliRunner
1515

1616
from lean.commands import lean
17+
from lean.components.cloud.data_downloader import parse_timedelta
1718
from lean.container import container
19+
from datetime import timedelta
20+
import pytest
1821

1922

2023
def test_config_set_updates_the_value_of_the_option() -> None:
@@ -25,6 +28,48 @@ def test_config_set_updates_the_value_of_the_option() -> None:
2528
assert container.cli_config_manager.user_id.get_value() == "12345"
2629

2730

31+
@pytest.mark.parametrize("raw_frequency, expected", [("_", None), ("=", None), ("_", None),
32+
("1.0:0:0", timedelta(days=1)),
33+
("0.1:0:0", timedelta(hours=1)),
34+
("0.0:1:0", timedelta(minutes=1)),
35+
("0.0:0:1", timedelta(seconds=1)),
36+
("1:0:0", timedelta(hours=1)),
37+
("0:1:0", timedelta(minutes=1)),
38+
("0:0:1", timedelta(seconds=1)),
39+
("01.00:00:00", timedelta(days=1)),
40+
("00.01:00:00", timedelta(hours=1)),
41+
("00.00:01:00", timedelta(minutes=1)),
42+
("00.00:00:01", timedelta(seconds=1)),
43+
("01:00:00", timedelta(hours=1)),
44+
("00:01:00", timedelta(minutes=1)),
45+
("00:00:01", timedelta(seconds=1)),
46+
("1.00:00:00", timedelta(days=1)),
47+
("00.1:00:00", timedelta(hours=1)),
48+
("00.00:1:00", timedelta(minutes=1)),
49+
("00.00:00:1", timedelta(seconds=1)),
50+
("1:00:00", timedelta(hours=1)),
51+
("00:1:00", timedelta(minutes=1)),
52+
("00:00:1", timedelta(seconds=1)),
53+
("00.1:00:1", timedelta(hours=1, seconds=1)),
54+
("00.1:1:1", timedelta(hours=1, minutes=1, seconds=1)),
55+
("1:00:1", timedelta(hours=1, seconds=1)),
56+
("1:1:1", timedelta(hours=1, minutes=1, seconds=1)),
57+
("1.1:1:1", timedelta(days=1, hours=1, minutes=1, seconds=1)),
58+
("10.20:30:40", timedelta(days=10, hours=20, minutes=30, seconds=40)),
59+
("30.23:59:59", timedelta(days=30, hours=23, minutes=59, seconds=59)),
60+
("60.23:59:59", timedelta(days=60, hours=23, minutes=59, seconds=59)),
61+
("20:30:40", timedelta(hours=20, minutes=30, seconds=40)),
62+
("00:59:59", timedelta(minutes=59, seconds=59)),
63+
("00:00:59", timedelta(seconds=59))])
64+
def test_set_database_update_frequency_works_with_different_timespans(raw_frequency: str, expected: timedelta) -> None:
65+
result = CliRunner().invoke(lean, ["config", "set", "database-update-frequency", raw_frequency])
66+
67+
assert result.exit_code == 0
68+
69+
frequency = parse_timedelta(raw_frequency)
70+
assert frequency == expected
71+
72+
2873
def test_config_set_aborts_when_no_option_with_given_key_exists() -> None:
2974
result = CliRunner().invoke(lean, ["config", "set", "this-option-does-not-exist", "value"])
3075

0 commit comments

Comments
 (0)