Varsion 0.1.1

mwouts · web-flow · commit 5f2a872cd0b7 · 2019-04-09T02:22:12.000+02:00
**BugFixes** - Fixed `simplify_index` when the data is a scalar - Non-WDI indicators can now be loaded (#4) - Package also works under Python 2.7 (#1)
diff --git a/.travis.yml b/.travis.yml
@@ -3,6 +3,9 @@ language: python
 python:
   - "3.6"
   - "3.7"
+  - "2.7"
+  - "3.4"
+  - "3.5"
 install:
   # command to install dependencies
   - pip install -r requirements-dev.txt
diff --git a/HISTORY.md b/HISTORY.md
@@ -0,0 +1,16 @@
+Release History
+===============
+
+0.1.1 (2019-04-09)
+------------------
+
+**BugFixes**
+
+- Fixed `simplify_index` when the data is a scalar
+- Non-WDI indicators can now be loaded (#4)
+- Package also works under Python 2.7 (#1)
+
+0.1.0 (2019-04-06)
+------------------
+
+Initial release
diff --git a/README.md b/README.md
@@ -100,7 +100,7 @@ Go to our Binder and run either this [README](https://mybinder.org/v2/gh/mwouts/
 
 ## The World Bank
 
-The [World Bank](https://www.worldbank.org/) Data has a [Data Catalog](https://datacatalog.worldbank.org/), and an interactive [data explorer](https://data.worldbank.org/indicator/sp.pop.totl).
+The [World Bank](https://www.worldbank.org/) has a [Data Catalog](https://datacatalog.worldbank.org/), and an interactive [data explorer](https://data.worldbank.org/indicator/sp.pop.totl).
 
 Third party applications that allow to access the data from various languages are listed [here](https://data.worldbank.org/products/third-party-apps).
 
@@ -110,9 +110,10 @@ The World Bank data is also available in Google's [Data Explorer](https://data.w
 
 ## Python
 
-Alternatively to `world_bank_data`, Python users may find useful the following two packages:
+Alternatively to `world_bank_data`, Python users may find useful the following packages:
 - [`wbpy`](https://github.com/mattduck/wbpy/blob/master/README.rst), nicely documented but last released in 2013.
 - [`wbdata`](https://github.com/oliversherouse/wbdata/blob/master/README.rst), which works well.
+- [`pandas_datareader`](https://pandas-datareader.readthedocs.io/en/latest/readers/world-bank.html)
 
 The reason for which I wrote `world_bank_data` is mostly speed, e.g. I wanted to use the lastest version of the World Bank API (v2) and benefit from significant speed improvements. Reimplementing the API also gave me a finer control on the mapping of options.
 
@@ -128,7 +129,7 @@ See also the [Introduction to the wbstats R-package](https://cran.r-project.org/
 
 ## Country and indicator description in non-English languages
 
-The World Bank describes their sources and indicators in a other languages than English. Use either the `language` argument in each of `get_countries`, `get_indicators`, etc, or change the default globally:
+The World Bank describes their sources and indicators in other languages than English. Use either the `language` argument in each of `get_countries`, `get_indicators`, etc, or change the default globally:
 
 ```python
 wb.options.language = 'vi'
diff --git a/examples/A sunburst plot of the world population.ipynb b/examples/A sunburst plot of the world population.ipynb
diff --git a/examples/A sunburst plot of the world population.py b/examples/A sunburst plot of the world population.py
@@ -20,18 +20,24 @@
 
 # +
 import pandas as pd
-import urllib
 import mock
 import plotly.offline as offline
 import world_bank_data as wb
 
+try:
+    # Python 3.6
+    from urllib.request import urlopen
+except ImportError:
+    # Python 2.7
+    from urllib import urlopen
+
 # Only show head and tail of dataframes
 pd.set_option('display.max_rows', 6)
 
 
 # Plotly.js in version 1.46.1
 def get_latest_plotlyjs(url='https://cdn.plot.ly/plotly-1.46.1.min.js'):
-    return urllib.request.urlopen(url).read().decode('utf-8')
+    return urlopen(url).read().decode('utf-8')
 
 
 with mock.patch('plotly.offline.offline.get_plotlyjs', get_latest_plotlyjs):
@@ -80,7 +86,9 @@ def get_latest_plotlyjs(url='https://cdn.plot.ly/plotly-1.46.1.min.js'):
 
 # And now we can plot the World Population
 offline.iplot(dict(
-    data=[dict(type='sunburst', **all_levels, hoverinfo='text')],
+    data=[dict(type='sunburst', hoverinfo='text', **all_levels)],
     layout=dict(title='World Population (World Bank, 2017)<br>Click on a region to zoom',
                 width=800, height=800)),
     validate=False)
+
+
diff --git a/setup.py b/setup.py
@@ -33,6 +33,9 @@
                  'Intended Audience :: Education',
                  'Intended Audience :: Science/Research',
                  'Programming Language :: Python',
+                 'Programming Language :: Python :: 2.7',
+                 'Programming Language :: Python :: 3.4',
+                 'Programming Language :: Python :: 3.5',
                  'Programming Language :: Python :: 3.6',
                  'Programming Language :: Python :: 3.7')
 )
diff --git a/tests/test_country.py b/tests/test_country.py
@@ -25,7 +25,7 @@ def test_country_language():
 
 def test_two_countries():
     cnt = get_countries(['FRA', 'ITA'])
-    assert cnt.index.to_list() == ['FRA', 'ITA']
+    assert set(cnt.index) == set(['FRA', 'ITA'])
     assert cnt.latitude.dtype == float
     assert_numeric_or_string(cnt)
 
diff --git a/tests/test_indicator.py b/tests/test_indicator.py
@@ -1,4 +1,5 @@
 import pytest
+import numbers
 from world_bank_data import get_indicators, get_series
 from .tools import assert_numeric_or_string
 
@@ -26,6 +27,15 @@ def test_indicators_topic():
     assert_numeric_or_string(idx)
 
 
+def test_indicators_source():
+    idx = get_indicators(source=11)
+    assert len(idx.index) < 2000
+    assert_numeric_or_string(idx)
+
+    with pytest.raises(ValueError):
+        get_indicators(source=21)
+
+
 def test_indicator_most_recent_value():
     idx = get_series('SP.POP.TOTL', mrv=1)
     assert len(idx.index) > 200
@@ -36,6 +46,12 @@ def test_indicator_most_recent_value():
     assert_numeric_or_string(idx_mrv5)
 
 
+def test_non_wdi_indicator():
+    idx = get_series('TX.VAL.MRCH.CD.WB', mrv=1)
+    assert len(idx.index) > 50
+    assert_numeric_or_string(idx)
+
+
 def test_indicator_use_id():
     idx = get_series('SP.POP.TOTL', mrv=1, id_or_value='id', simplify_index=True)
     assert len(idx.index) > 200
@@ -44,12 +60,29 @@ def test_indicator_use_id():
     assert idx.index.names == ['Country']
 
 
+def test_indicator_simplify_scalar():
+    pop = get_series('SP.POP.TOTL', 'CHN', mrv=1, simplify_index=True)
+    assert isinstance(pop, numbers.Number)
+
+
 def test_indicator_date():
     idx = get_series('SP.POP.TOTL', date='2010:2018')
     assert len(idx.index) > 200 * 8
     assert_numeric_or_string(idx)
 
 
+def test_indicator_values():
+    idx = get_series('SP.POP.TOTL', date='2017', simplify_index=True).sort_values(ascending=False)
+    assert len(idx.index) > 200
+    assert idx.index.values[0] == 'World'
+    assert idx.iloc[0] == 7530360149.0
+
+    idx = get_series('SP.POP.TOTL', date='2017', simplify_index=True, id_or_value='id').sort_values(ascending=False)
+    assert len(idx.index) > 200
+    assert idx.index.values[0] == 'WLD'
+    assert idx.iloc[0] == 7530360149.0
+
+
 @pytest.mark.skip('jsonstat format not supported here')
 def test_indicator_monthly():
     idx = get_series('DPANUSSPB', country=['CHN', 'BRA'], date='2012M01:2012M08')
diff --git a/tests/test_language.py b/tests/test_language.py
@@ -1,14 +1,16 @@
+# -*- coding: utf-8 -*-
+
 import re
 import mock
 from world_bank_data import search_countries
 
 
 def test_language():
     assert search_countries(re.compile('ES')).name[0] == 'Spain'
-    assert search_countries(re.compile('ES'), language='es').name[0] == 'España'
+    assert search_countries(re.compile('ES'), language='es').name[0] == u'España'
 
 
 def test_language_through_options():
     assert search_countries(re.compile('ES')).name[0] == 'Spain'
     with mock.patch('world_bank_data.options.language', 'es'):
-        assert search_countries(re.compile('ES')).name[0] == 'España'
+        assert search_countries(re.compile('ES')).name[0] == u'España'
diff --git a/tests/test_others.py b/tests/test_others.py
@@ -5,27 +5,43 @@
 def test_lending_types():
     df = get_lendingtypes()
     assert df.index.names == ['id']
-    assert df.columns.to_list() == ['iso2code', 'value']
+    assert set(df.columns) == set(['iso2code', 'value'])
     assert_numeric_or_string(df)
 
 
 def test_income_levels():
     df = get_incomelevels()
     assert df.index.names == ['id']
-    assert df.columns.to_list() == ['iso2code', 'value']
+    assert set(df.columns) == set(['iso2code', 'value'])
     assert_numeric_or_string(df)
 
 
 def test_topics():
     df = get_topics()
     assert df.index.names == ['id']
-    assert df.columns.to_list() == ['value', 'sourceNote']
+    assert set(df.columns) == set(['value', 'sourceNote'])
     assert_numeric_or_string(df)
 
 
 def test_sources():
     df = get_sources()
     assert df.index.names == ['id']
-    assert df.columns.to_list() == ['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability',
-                                    'metadataavailability', 'concepts']
+    assert set(df.columns) == set(['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability',
+                                   'metadataavailability', 'concepts'])
+    assert_numeric_or_string(df)
+
+
+def test_sources_int():
+    df = get_sources(11)
+    assert df.index.names == ['id']
+    assert set(df.columns) == set(['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability',
+                                   'metadataavailability', 'concepts'])
+    assert_numeric_or_string(df)
+
+
+def test_sources_two_int():
+    df = get_sources([11, 36])
+    assert df.index.names == ['id']
+    assert set(df.columns) == set(['lastupdated', 'name', 'code', 'description', 'url', 'dataavailability',
+                                   'metadataavailability', 'concepts'])
     assert_numeric_or_string(df)
diff --git a/tests/test_region.py b/tests/test_region.py
@@ -24,7 +24,7 @@ def test_one_region_list():
 def test_two_regions():
     reg = get_regions(['AFR', 'ANR'])
     assert 'id' not in reg.columns
-    assert reg.index.to_list() == ['AFR', 'ANR']
+    assert set(reg.index) == set(['AFR', 'ANR'])
     assert_numeric_or_string(reg)
 
 
diff --git a/tests/tools.py b/tests/tools.py
@@ -5,9 +5,8 @@ def assert_numeric_or_string(x):
     """Make sure that the Series or Dataframe in argument only contains simple types"""
     if isinstance(x, pd.Series):
         if x.dtype.kind not in ['i', 'f']:
-            for y in x:
-                assert isinstance(y, str), "Series '{}' is expected to contain " \
-                                           'only numeric or string types, found {}'.format(x.name, y)
+            assert x.apply(type).isin([type(u''), type('')]).all(), \
+                "Series '{}' is neither numeric nor strings".format(x.name)
     else:
         for col in x:
             assert_numeric_or_string(x[col])
diff --git a/world_bank_data/indicator.py b/world_bank_data/indicator.py
@@ -2,9 +2,9 @@
 
 import numpy as np
 import pandas as pd
+import world_bank_data.options as options
 from .request import wb_get, wb_get_table
 from .search import search
-import world_bank_data.options as options
 
 
 def get_indicators(indicator=None, language=None, id_or_value=None, **params):
@@ -37,9 +37,10 @@ def get_series(indicator, country=None, id_or_value=None, simplify_index=False,
     :param params: Additional parameters for the World Bank API, like date or mrv"""
 
     id_or_value = id_or_value or options.id_or_value
+    params['format'] = 'jsonstat'
 
-    idx = wb_get('country', country, 'indicator', indicator, data_format='jsonstat', **params)
-    idx = idx['WDI']
+    idx = wb_get('country', country, 'indicator', indicator, **params)
+    _, idx = idx.popitem()
 
     dimension = idx.pop('dimension')
     value = idx.pop('value')
@@ -53,9 +54,15 @@ def get_series(indicator, country=None, id_or_value=None, simplify_index=False,
         index = [dim for dim in index if len(dim) != 1]
 
     if len(index) > 1:
+        # Our series is indexed by a multi-index
         index = pd.MultiIndex.from_product(index, names=[dim.name for dim in index])
-    else:
+    elif len(index) == 1:
+        # A simple index is enough
         index = index[0]
+    else:
+        # Index has dimension zero. Data should be a scalar
+        assert len(value) == 1, 'Data has no dimension and was expected to be a scalar'
+        return value[0]
 
     return pd.Series(value, index=index, name=indicator)
 
@@ -65,14 +72,14 @@ def _parse_category(cat, use_labels):
     cat = cat['category']
 
     index = np.array(list(cat['index'].values()))
-    assert np.array_equal(index, np.arange(len(index))), 'Index should be ordered. Please use Python 3.6 or above.'
-
     codes = np.array(list(cat['index'].keys()))
+
+    codes = pd.Series(codes, index=index, name=name).sort_index()
     if not use_labels:
-        return pd.Series(codes, index=index, name=name)
+        return codes
 
     codes2 = np.array(list(cat['label'].keys()))
-    assert np.array_equal(codes, codes2), 'Codes should be identical'
-
     labels = np.array(list(cat['label'].values()))
-    return pd.Series(labels, index=index, name=name)
+    labels = pd.Series(labels, index=codes2, name=name).sort_index()
+
+    return pd.Series(labels.loc[codes].values, index=codes.index, name=name)
diff --git a/world_bank_data/request.py b/world_bank_data/request.py
diff --git a/world_bank_data/version.py b/world_bank_data/version.py