-
-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathsources.py
More file actions
23 lines (18 loc) · 798 Bytes
/
sources.py
File metadata and controls
23 lines (18 loc) · 798 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
"""Utilities for helping with World Bank WDI source name standardization.
Usage:
>>> import os
>>> import json
>>> from worldbank_wdi import CONFIGPATH
>>> from worldbank_wdi.sources import get_unstandardized_source_names
>>> l = get_unstandardized_source_names()
>>> with open(os.path.join(CONFIGPATH, "unstandardized_source_names.json"), "w") as f:
>>> json.dump([{"rawName": src, "name": "", "dataPublisherSource": ""} for src in l], f, indent=2)
"""
import os
from typing import List
import pandas as pd
from worldbank_wdi import INPATH
def get_unstandardized_source_names() -> List[str]:
df = pd.read_csv(os.path.join(INPATH, "WDISeries.csv.zip"), compression="gzip")
l = df["Source"].dropna().drop_duplicates().sort_values().tolist()
return l