Skip to content

Commit 31463cc

Browse files
authored
Feat/190 presentation population communes (#194)
2 parents c5b354f + d00008b commit 31463cc

13 files changed

+170
-74
lines changed

common/utils/source_extractors.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,12 @@ async def download(self) -> AsyncGenerator[ExtractionResult, None]:
4747

4848
is_last = False
4949
url = self.url
50+
is_first_page = True
5051

5152
while not is_last:
5253

5354
# yield the request result
54-
result = await self.download_page(url)
55+
result = await self.download_page(url, is_first_page)
5556

5657
is_last = result.is_last
5758

@@ -60,19 +61,26 @@ async def download(self) -> AsyncGenerator[ExtractionResult, None]:
6061
await asyncio.sleep(60 / self.api_config.throttle)
6162

6263
url = result.next_url
64+
is_first_page = False
6365

6466
logger.debug(f"Next page: {result.next_url}")
6567

6668
yield result
6769

68-
async def download_page(self, url: str) -> ExtractionResult:
70+
async def download_page(self, url: str, is_first_page: bool = False) -> ExtractionResult:
6971
"""Downloads data corresponding to the given source model.
7072
The parameters of the request (URL, headers etc) are set using the inherited set_query_parameters method.
7173
"""
7274

7375
# if url has a query string, ignore the dict-defined parameters
7476
url_querystr = urllib.parse.urlparse(url).query
7577
passed_params = self.model.extract_params if url_querystr == "" else None
78+
79+
# For INSEE Melodi API: add page=1 for first request if not already present
80+
# This ensures the API returns proper pagination metadata (next, isLast)
81+
if is_first_page and passed_params is not None and "page" not in passed_params:
82+
passed_params = {**passed_params, "page": 1}
83+
7684
# logger.info(f"querying '{url}'")
7785

7886
success = False

datasources.yaml

Lines changed: 15 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -345,9 +345,9 @@ domains:
345345
format: json
346346
extract_params:
347347
maxResult: 10000
348-
# TIME_PERIOD: 2021
349-
startPeriod: "2009-01-01"
350-
endPeriod: "2025-10-01"
348+
TIME_PERIOD: 2022
349+
# startPeriod: "2009-01-01"
350+
# endPeriod: "2025-10-01"
351351
GEO: # géographies
352352
- COM # communes
353353
- DEP # départements
@@ -387,8 +387,9 @@ domains:
387387
format: json
388388
extract_params:
389389
maxResult: 10000
390-
startPeriod: "2009-01-01"
391-
endPeriod: "2025-10-01"
390+
TIME_PERIOD: 2022
391+
# startPeriod: "2009-01-01"
392+
# endPeriod: "2025-10-01"
392393
GEO: ["COM", "DEP", "REG"]
393394
RP_MEASURE: DWELLINGS # nombre de logements
394395
L_STAY: _T
@@ -420,8 +421,9 @@ domains:
420421
format: json
421422
extract_params:
422423
maxResult: 10000
423-
startPeriod: "2009-01-01"
424-
endPeriod: "2025-10-01"
424+
TIME_PERIOD: 2022
425+
# startPeriod: "2009-01-01"
426+
# endPeriod: "2025-10-01"
425427
GEO: ["COM", "DEP", "REG"]
426428
RP_MEASURE: DWELLINGS
427429
L_STAY: _T
@@ -450,8 +452,9 @@ domains:
450452
format: json
451453
extract_params:
452454
maxResult: 10000
453-
startPeriod: "2009-01-01"
454-
endPeriod: "2025-10-01"
455+
TIME_PERIOD: 2022
456+
# startPeriod: "2009-01-01"
457+
# endPeriod: "2025-10-01"
455458
GEO: ["COM", "DEP", "REG"]
456459
RP_MEASURE: DWELLINGS
457460
L_STAY: _T
@@ -480,8 +483,9 @@ domains:
480483
format: json
481484
extract_params:
482485
maxResult: 10000
483-
startPeriod: "2009-01-01"
484-
endPeriod: "2025-10-01"
486+
TIME_PERIOD: 2022
487+
# startPeriod: "2009-01-01"
488+
# endPeriod: "2025-10-01"
485489
GEO: ["COM", "DEP", "REG"]
486490
RP_MEASURE: DWELLINGS_ROOMS # nombre de pièces
487491
L_STAY: _T
@@ -697,26 +701,4 @@ domains:
697701
name: unzip_load_csv_files
698702
type: notebook
699703

700-
taux_pauvrete_communes:
701-
API: INSEE.statistiques
702-
description: |
703-
Taux de pauvretés au niveau des communes par seuil
704-
Millésime 2021
705-
type: FileExtractor
706-
endpoint: /fichier/7756855/indic-struct-distrib-revenu-2021-COMMUNES_csv.zip
707-
format: zip
708-
preprocessor:
709-
name: unzip_load_csv_files
710-
type: notebook
711704

712-
taux_pauvrete_supra:
713-
API: INSEE.statistiques
714-
description: |
715-
Taux de pauvretés au niveau des arrondissements, departements, région par seuil
716-
Millésime 2021
717-
type: FileExtractor
718-
endpoint: /fichier/7756855/indic-struct-distrib-revenu-2021-SUPRA_csv.zip
719-
format: zip
720-
preprocessor:
721-
name: unzip_load_csv_files
722-
type: notebook

dbt_odis/models/bronze/_odis_bronze__sources.yml

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,7 @@ sources:
122122

123123

124124
- name: presentation_page_epci
125-
description: Source JSON loadée dans le champ data contenant
126-
loaded_at_field: created_at
127-
128-
- name: presentation_page_population_communes
129-
description: Source JSON loadée dans le champ data contenant
125+
description: Source JSON loadée dans le champ data contenant
130126
loaded_at_field: created_at
131127

132128
- name: services_services
@@ -150,11 +146,7 @@ sources:
150146
loaded_at_field : created_at
151147

152148
- name: population_population_totale
153-
description: Source JSON loadée dans le champ data
154-
loaded_at_field: created_at
155-
156-
- name: population_categorie_socio_pro
157-
description: Source JSON loadée dans le champ data
149+
description: Source JSON loadée dans le champ data
158150
loaded_at_field: created_at
159151

160152
- name: population_by_age

dbt_odis/models/bronze/presentation_page_population_communes.sql

Lines changed: 0 additions & 22 deletions
This file was deleted.

dbt_odis/models/gold/_odis_gold__models.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,3 +592,19 @@ models:
592592
- name: densite
593593
description: densité de population, nombre d'habitants au kilomètre carré.
594594
data_type: numeric
595+
- name: gold_presentation_population_communes
596+
description: |
597+
Table contenant le nombre d'habitants par communes. Les grandes villes sont considérées à l'échelle de la commune, pas de l'arrondissement.
598+
Source: utilise les données de population_population_superficie (DS_RP_SERIE_HISTORIQUE) filtrées pour les communes uniquement.
599+
columns:
600+
- name: codgeo
601+
description: Code INSEE de la commune (différent du code postal)
602+
data_type: text
603+
tests:
604+
- assert_big_cities_exist
605+
- name: population_totale
606+
description: Population totale de la commune d'après les données INSEE
607+
data_type: float
608+
- name: year
609+
description: Année de la mesure
610+
data_type: integer

dbt_odis/models/gold/gold_population_by_age_gender_csp.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ with code_geo_final as (
88
case
99
when A.geocode_type = 'REG' then concat('reg', A.geocode)
1010
else A.geocode end
11-
as codegeo,
11+
as codgeo,
1212
A.time_period as "year",
1313
A.POP,
1414
A.POPH,

dbt_odis/models/gold/gold_population_densite.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ with densite as (
77
select
88
-- ajout du prefix 'reg' pour les régions, pour les differencier des DEP
99
case
10-
when codegeo_type = 'REG' then concat('reg', codegeo) else codegeo end
11-
as codegeo,
10+
when codgeo_type = 'REG' then concat('reg', codgeo) else codgeo end
11+
as codgeo,
1212
"year",
1313
-- densite au km2, mais superficie exprimee en hectares
1414
cast(round(population / nullif(superficie * 1e-2, 0), 0) as int) as densite
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{{ config(
2+
tags = ['gold', 'population', 'presentation'],
3+
alias='vw_presentation_population_communes_gold',
4+
) }}
5+
6+
7+
select
8+
codgeo,
9+
population as population_totale,
10+
year
11+
from {{ ref("stg_population_population_superficie") }}
12+
where codgeo_type = 'COM'

dbt_odis/models/silver/stg_population_population_superficie.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
with pivot_pop_sup as
88
(
99
select
10-
split_part("geo", '-', 1) as codegeo_year,
11-
split_part("geo", '-', 2) as codegeo_type, -- REG, COM, DEP, etc.
12-
split_part("geo", '-', 3) as codegeo,
10+
split_part("geo", '-', 1) as codgeo_year,
11+
split_part("geo", '-', 2) as codgeo_type, -- REG, COM, DEP, etc.
12+
split_part("geo", '-', 3) as codgeo,
1313
time_period as "year",
1414
-- pivot population et superficie
1515
cast(max(case when rp_measure = 'POP' then split_part(measure_value, '.', 1) end) as int) as population,

dbt_odis/seeds/_odis_bronze__seeds.yml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,26 @@ seeds:
5151
Intitulé_FAP86: text
5252
Code_FAP22: text
5353
Intitulé_FAP22: text
54+
5455
- name: corresp_codes_nuances
5556
description: "Correspondance des codes nuances politiques avec leurs libellés"
5657
config:
5758
quote_columns: true
5859
delimiter: ';'
5960
column_types:
6061
code_nuance: text
61-
libelle: text
62+
libelle: text
63+
64+
- name: insee_top_50_communes_par_habitants
65+
description: "50 communes françaises principales par nombre d'habitants."
66+
config:
67+
quote_columns: true
68+
delimiter: ';'
69+
column_types:
70+
AGE: text
71+
GEO: text
72+
GEO_OBJECT: text
73+
RP_MEASURE: text
74+
SEX: text
75+
TIME_PERIOD: float
76+
OBS_VALUE: float

0 commit comments

Comments
 (0)