Skip to content

Commit 9529b10

Browse files
committed
do: update concurrent sirene API fetcher
1 parent 4e5d56b commit 9529b10

File tree

1 file changed

+72
-0
lines changed

1 file changed

+72
-0
lines changed

services/public_apis/insee/main.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import math
2+
import json
3+
import asyncio
4+
import aiohttp
5+
import threading
6+
7+
API_URL = "https://api.insee.fr/entreprises/sirene/V3.11/siren"
8+
SECRETS_JSON = "services/public_apis/insee/secrets.insee.sirene_data_provider.json"
9+
10+
RESPONSE_SIZE = 1000
11+
12+
13+
def print_thread_count():
14+
print(f"Active Threads: {len(threading.enumerate())}")
15+
print(f"Threads: {[thread.name for thread in threading.enumerate()]}")
16+
17+
18+
async def fetch_data_chunk(
19+
session: aiohttp.ClientSession,
20+
headers: dict,
21+
begin_at: int = 0,
22+
response_size: int = RESPONSE_SIZE,
23+
):
24+
25+
url = f"{API_URL}?debut={begin_at}&nombre={response_size}"
26+
27+
async with session.get(url=url, headers=headers) as response:
28+
29+
return await response.text()
30+
31+
32+
async def main():
33+
with open(SECRETS_JSON) as secrets_file:
34+
token = json.load(secrets_file)["token"]
35+
36+
headers = {
37+
"Authorization": f"Bearer {token}",
38+
"Accept": "application/json",
39+
}
40+
async with aiohttp.ClientSession() as session:
41+
async with session.get(url=API_URL, headers=headers) as response:
42+
print(f"Status: {response.status}")
43+
44+
total_number_of_rows = int(response.headers["X-Total-Count"])
45+
nb_of_chunks = math.ceil(total_number_of_rows / RESPONSE_SIZE)
46+
47+
# TEMP limiter of queries
48+
if nb_of_chunks > 10:
49+
nb_of_chunks = 10
50+
51+
print_thread_count()
52+
53+
tasks = [
54+
fetch_data_chunk(session=session, headers=headers, begin_at=chunk * RESPONSE_SIZE)
55+
for chunk in range(nb_of_chunks)
56+
]
57+
58+
print_thread_count()
59+
60+
responses = await asyncio.gather(*tasks)
61+
62+
print_thread_count()
63+
64+
print(f"Number of responses : {len(responses)}")
65+
66+
# for payload in responses:
67+
# print(payload)
68+
print("END")
69+
70+
71+
print_thread_count()
72+
asyncio.run(main())

0 commit comments

Comments
 (0)