Skip to content

Commit 9fc1f35

Browse files
authored
Add retries to taxonomy reading. (#205)
When we try to load a taxonomy multiple times concurrently, we run into `FileExistsError`s as the various threads try to cache files at the same location.
1 parent 3692396 commit 9fc1f35

File tree

2 files changed

+34
-2
lines changed

2 files changed

+34
-2
lines changed

src/ferc_xbrl_extractor/arelle_interface.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Abstract away interface to Arelle XBRL Library."""
22

33
import io
4+
import time
45
from pathlib import Path
56
from typing import Literal
67

@@ -12,12 +13,22 @@
1213
from pydantic import BaseModel
1314

1415

15-
def _taxonomy_view(taxonomy_source: str | FileSource.FileSource):
16+
def _taxonomy_view(taxonomy_source: str | FileSource.FileSource, max_retries: int = 7):
1617
"""Actually use Arelle to get a taxonomy and its relationships."""
1718
cntlr = Cntlr.Cntlr()
1819
cntlr.startLogging(logFileName="logToPrint")
1920
model_manager = ModelManager.initialize(cntlr)
20-
taxonomy = ModelXbrl.load(model_manager, taxonomy_source)
21+
for try_count in range(max_retries):
22+
try:
23+
cntlr.logger.debug(f"Try #{try_count}: {taxonomy_source=}")
24+
taxonomy = ModelXbrl.load(model_manager, taxonomy_source)
25+
continue
26+
except FileExistsError as e:
27+
if (try_count + 1) == max_retries:
28+
raise e
29+
backoff = 2 ** (try_count + 1)
30+
cntlr.logger.warning(f"Failed try #{try_count}, retrying in {backoff}s")
31+
time.sleep(backoff)
2132

2233
view = ViewRelationshipSet(taxonomy, "taxonomy.json", "roles", None, None, None)
2334
view.view(XbrlConst.parentChild, None, None, None)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import concurrent.futures
2+
from unittest.mock import patch
3+
4+
from arelle import Cntlr
5+
6+
from ferc_xbrl_extractor.arelle_interface import load_taxonomy
7+
8+
9+
def test_concurrent_taxonomy_load(tmp_path):
10+
cntlr = Cntlr.Cntlr()
11+
cntlr.webCache.cacheDir = str(tmp_path)
12+
cntlr.webCache.clear()
13+
path = "https://eCollection.ferc.gov/taxonomy/form60/2022-01-01/form/form60/form-60_2022-01-01.xsd"
14+
with patch("ferc_xbrl_extractor.arelle_interface.Cntlr.Cntlr", lambda: cntlr):
15+
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
16+
futures = [executor.submit(load_taxonomy, path) for _ in range(2)]
17+
done, _not_done = concurrent.futures.wait(
18+
futures, timeout=10, return_when=concurrent.futures.ALL_COMPLETED
19+
)
20+
errored = {fut for fut in done if fut.exception()}
21+
assert len(errored) == 0

0 commit comments

Comments
 (0)