Skip to content

Commit 64a575d

Browse files
authored
feat(geopandas support): return GeoDataFrame if geopandas is installed (#143)
* update(nldi.py): Add CRS information--"EPSG:4236"--to GeoDataFame objects * feat(geopandas support): return GeoDataFrame if geopandas is installed. * Update tests for geopandas updates
1 parent c434b13 commit 64a575d

File tree

2 files changed

+112
-25
lines changed

2 files changed

+112
-25
lines changed

dataretrieval/nwis.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@
2323

2424
from .utils import query
2525

26+
try:
27+
import geopandas as gpd
28+
except ImportError:
29+
gpd = None
30+
2631
WATERDATA_BASE_URL = 'https://nwis.waterdata.usgs.gov/'
2732
WATERDATA_URL = WATERDATA_BASE_URL + 'nwis/'
2833
WATERSERVICE_URL = 'https://waterservices.usgs.gov/nwis/'
@@ -38,6 +43,7 @@
3843
'water_use',
3944
'ratings',
4045
]
46+
_CRS = "EPSG:4236"
4147

4248

4349
def format_response(
@@ -71,6 +77,14 @@ def format_response(
7177
if service == 'peaks':
7278
df = preformat_peaks_response(df)
7379

80+
if gpd is not None:
81+
if "dec_lat_va" in list(df):
82+
geoms = gpd.points_from_xy(
83+
df.dec_long_va.values,
84+
df.dec_lat_va.values
85+
)
86+
df = gpd.GeoDataFrame(df, geometry=geoms, crs=_CRS)
87+
7488
# check for multiple sites:
7589
if 'datetime' not in df.columns:
7690
# XXX: consider making site_no index

tests/waterservices_test.py

Lines changed: 98 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
)
2323
from dataretrieval.utils import NoSitesError
2424

25+
try:
26+
import geopandas as gpd
27+
except ImportError:
28+
gpd = None
2529

2630
def test_query_waterdata_validation():
2731
"""Tests the validation parameters of the query_waterservices method"""
@@ -80,7 +84,10 @@ def test_get_dv(requests_mock):
8084
response_file_path = 'data/waterservices_dv.txt'
8185
mock_request(requests_mock, request_url, response_file_path)
8286
df, md = get_dv(sites=["01491000", "01645000"], start='2020-02-14', end='2020-02-15')
83-
assert type(df) is DataFrame
87+
88+
if not isinstance(df, DataFrame):
89+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
90+
8491
assert df.size == 8
8592
assert_metadata(requests_mock, request_url, md, site, None, format)
8693

@@ -99,7 +106,9 @@ def test_get_dv_site_value_types(requests_mock, site_input_type_list):
99106
else:
100107
sites = site
101108
df, md = get_dv(sites=sites, start='2020-02-14', end='2020-02-15')
102-
assert type(df) is DataFrame
109+
if not isinstance(df, DataFrame):
110+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
111+
103112
assert df.size == 8
104113

105114

@@ -112,7 +121,9 @@ def test_get_iv(requests_mock):
112121
response_file_path = 'data/waterservices_iv.txt'
113122
mock_request(requests_mock, request_url, response_file_path)
114123
df, md = get_iv(sites=["01491000", "01645000"], start='2019-02-14', end='2020-02-15')
115-
assert type(df) is DataFrame
124+
if not isinstance(df, DataFrame):
125+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
126+
116127
assert df.size == 563380
117128
assert md.url == request_url
118129
assert_metadata(requests_mock, request_url, md, site, None, format)
@@ -132,7 +143,8 @@ def test_get_iv_site_value_types(requests_mock, site_input_type_list):
132143
else:
133144
sites = site
134145
df, md = get_iv(sites=sites, start='2019-02-14', end='2020-02-15')
135-
assert type(df) is DataFrame
146+
if not isinstance(df, DataFrame):
147+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
136148
assert df.size == 563380
137149
assert md.url == request_url
138150

@@ -142,15 +154,26 @@ def test_get_info(requests_mock):
142154
Tests get_info method correctly generates the request url and returns the result in a DataFrame.
143155
Note that only sites and format are passed as query params
144156
"""
157+
size = 24
145158
format = "rdb"
146159
site = '01491000%2C01645000'
147160
parameter_cd = "00618"
148161
request_url = 'https://waterservices.usgs.gov/nwis/site?sites={}&parameterCd={}&siteOutput=Expanded&format={}'.format(site, parameter_cd, format)
149162
response_file_path = 'data/waterservices_site.txt'
150163
mock_request(requests_mock, request_url, response_file_path)
151164
df, md = get_info(sites=["01491000", "01645000"], parameterCd="00618")
152-
assert type(df) is DataFrame
153-
assert df.size == 24
165+
if not isinstance(df, DataFrame):
166+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
167+
168+
if "geometry" in list(df):
169+
geom_type = df.geom_type.unique()
170+
if len(geom_type) > 1 or geom_type[0] != "Point":
171+
raise AssertionError(
172+
f"Geometry type {geom_type} not valid, expecting Point"
173+
)
174+
size += len(df)
175+
176+
assert df.size == size
154177
assert md.url == request_url
155178
assert_metadata(requests_mock, request_url, md, site, [parameter_cd], format)
156179

@@ -167,7 +190,19 @@ def test_get_qwdata(requests_mock):
167190
mock_request(requests_mock, request_url, response_file_path)
168191
with pytest.warns(DeprecationWarning):
169192
df, md = get_qwdata(sites=["01491000", "01645000"])
170-
assert type(df) is DataFrame
193+
if not isinstance(df, DataFrame):
194+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
195+
196+
if "geometry" in list(df):
197+
if not isinstance(df, gpd.GeoDataFrame):
198+
raise AssertionError(f"{type(df)} is not a GeoDataFrame")
199+
200+
geom_type = df.geom_type.unique()
201+
if len(geom_type) > 1 or geom_type[0] != "Point":
202+
raise AssertionError(
203+
f"Geometry type {geom_type} not valid, expecting Point"
204+
)
205+
171206
assert df.size == 1821472
172207
assert_metadata(requests_mock, request_url, md, site, None, format)
173208

@@ -202,7 +237,9 @@ def test_get_gwlevels(requests_mock):
202237
response_file_path = 'data/waterservices_gwlevels.txt'
203238
mock_request(requests_mock, request_url, response_file_path)
204239
df, md = get_gwlevels(sites=[site])
205-
assert type(df) is DataFrame
240+
if not isinstance(df, DataFrame):
241+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
242+
206243
assert df.size == 16
207244
assert_metadata(requests_mock, request_url, md, site, None, format)
208245

@@ -221,7 +258,8 @@ def test_get_gwlevels_site_value_types(requests_mock, site_input_type_list):
221258
else:
222259
sites = site
223260
df, md = get_gwlevels(sites=sites)
224-
assert type(df) is DataFrame
261+
if not isinstance(df, DataFrame):
262+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
225263
assert df.size == 16
226264

227265

@@ -234,7 +272,9 @@ def test_get_discharge_peaks(requests_mock):
234272
response_file_path = 'data/waterservices_peaks.txt'
235273
mock_request(requests_mock, request_url, response_file_path)
236274
df, md = get_discharge_peaks(sites=[site], start='2000-02-14', end='2020-02-15')
237-
assert type(df) is DataFrame
275+
if not isinstance(df, DataFrame):
276+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
277+
238278
assert df.size == 240
239279
assert_metadata(requests_mock, request_url, md, site, None, format)
240280

@@ -255,7 +295,9 @@ def test_get_discharge_peaks_sites_value_types(requests_mock, site_input_type_li
255295
sites = site
256296

257297
df, md = get_discharge_peaks(sites=sites, start='2000-02-14', end='2020-02-15')
258-
assert type(df) is DataFrame
298+
if not isinstance(df, DataFrame):
299+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
300+
259301
assert df.size == 240
260302

261303

@@ -269,7 +311,9 @@ def test_get_discharge_measurements(requests_mock):
269311
response_file_path = 'data/waterdata_measurements.txt'
270312
mock_request(requests_mock, request_url, response_file_path)
271313
df, md = get_discharge_measurements(sites=[site], start='2000-02-14', end='2020-02-15')
272-
assert type(df) is DataFrame
314+
if not isinstance(df, DataFrame):
315+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
316+
273317
assert df.size == 2130
274318
assert_metadata(requests_mock, request_url, md, site, None, format)
275319

@@ -288,7 +332,8 @@ def test_get_discharge_measurements_sites_value_types(requests_mock, site_input_
288332
else:
289333
sites = site
290334
df, md = get_discharge_measurements(sites=sites, start='2000-02-14', end='2020-02-15')
291-
assert type(df) is DataFrame
335+
if not isinstance(df, DataFrame):
336+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
292337
assert df.size == 2130
293338

294339

@@ -300,7 +345,8 @@ def test_get_pmcodes(requests_mock):
300345
response_file_path = 'data/waterdata_pmcodes.txt'
301346
mock_request(requests_mock, request_url, response_file_path)
302347
df, md = get_pmcodes(parameterCd='00618')
303-
assert type(df) is DataFrame
348+
if not isinstance(df, DataFrame):
349+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
304350
assert df.size == 13
305351
assert_metadata(requests_mock, request_url, md, None, None, format)
306352

@@ -319,7 +365,8 @@ def test_get_pmcodes_parameterCd_value_types(requests_mock, parameterCd_input_ty
319365
else:
320366
parameterCd = parameterCd
321367
df, md = get_pmcodes(parameterCd=parameterCd)
322-
assert type(df) is DataFrame
368+
if not isinstance(df, DataFrame):
369+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
323370
assert df.size == 13
324371

325372

@@ -332,7 +379,8 @@ def test_get_water_use_national(requests_mock):
332379
response_file_path = 'data/water_use_national.txt'
333380
mock_request(requests_mock, request_url, response_file_path)
334381
df, md = get_water_use()
335-
assert type(df) is DataFrame
382+
if not isinstance(df, DataFrame):
383+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
336384
assert df.size == 225
337385
assert_metadata(requests_mock, request_url, md, None, None, format)
338386

@@ -369,7 +417,8 @@ def test_get_water_use_national_county_value_types(requests_mock, county_input_t
369417
else:
370418
counties = county
371419
df, md = get_water_use(counties=counties)
372-
assert type(df) is DataFrame
420+
if not isinstance(df, DataFrame):
421+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
373422
assert df.size == 225
374423

375424

@@ -387,7 +436,8 @@ def test_get_water_use_national_county_value_types(requests_mock, category_input
387436
else:
388437
categories = category
389438
df, md = get_water_use(categories=categories)
390-
assert type(df) is DataFrame
439+
if not isinstance(df, DataFrame):
440+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
391441
assert df.size == 225
392442

393443

@@ -400,7 +450,8 @@ def test_get_water_use_allegheny(requests_mock):
400450
response_file_path = 'data/water_use_allegheny.txt'
401451
mock_request(requests_mock, request_url, response_file_path)
402452
df, md = get_water_use(state="PA", counties="003")
403-
assert type(df) is DataFrame
453+
if not isinstance(df, DataFrame):
454+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
404455
assert df.size == 1981
405456
assert_metadata(requests_mock, request_url, md, None, None, format)
406457

@@ -421,13 +472,16 @@ def test_get_ratings(requests_mock):
421472
response_file_path = 'data/waterservices_ratings.txt'
422473
mock_request(requests_mock, request_url, response_file_path)
423474
df, md = get_ratings(site_no=site)
424-
assert type(df) is DataFrame
475+
if not isinstance(df, DataFrame):
476+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
477+
425478
assert df.size == 33
426479
assert_metadata(requests_mock, request_url, md, site, None, format)
427480

428481

429482
def test_what_sites(requests_mock):
430483
"""Tests what_sites method correctly generates the request url and returns the result in a DataFrame"""
484+
size = 2472
431485
format = "rdb"
432486
parameter_cd = '00010%2C00060'
433487
parameter_cd_list = ["00010","00060"]
@@ -437,8 +491,22 @@ def test_what_sites(requests_mock):
437491
mock_request(requests_mock, request_url, response_file_path)
438492

439493
df, md = what_sites(bBox=[-83.0,36.5,-81.0,38.5], parameterCd=parameter_cd_list, hasDataTypeCd="dv")
440-
assert type(df) is DataFrame
441-
assert df.size == 2472
494+
if not isinstance(df, DataFrame):
495+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
496+
497+
if gpd is not None:
498+
if not isinstance(df, gpd.GeoDataFrame):
499+
raise AssertionError(f"{type(df)} is not a GeoDataFrame")
500+
501+
geom_type = df.geom_type.unique()
502+
if len(geom_type) > 1 or geom_type[0] != "Point":
503+
raise AssertionError(
504+
f"Geometry type {geom_type} not valid, expecting Point"
505+
)
506+
507+
size += len(df)
508+
509+
assert df.size == size
442510
assert_metadata(requests_mock, request_url, md, None, parameter_cd_list, format)
443511

444512

@@ -450,7 +518,8 @@ def test_get_stats(requests_mock):
450518
mock_request(requests_mock, request_url, response_file_path)
451519

452520
df, md = get_stats(sites=["01491000", "01645000"])
453-
assert type(df) is DataFrame
521+
if not isinstance(df, DataFrame):
522+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
454523
assert df.size == 51936
455524
assert_metadata(requests_mock, request_url, md, None, None, format)
456525

@@ -468,7 +537,8 @@ def test_get_stats_site_value_types(requests_mock, site_input_type_list):
468537
else:
469538
sites = site
470539
df, md = get_stats(sites=sites)
471-
assert type(df) is DataFrame
540+
if not isinstance(df, DataFrame):
541+
raise AssertionError(f"{type(df)} is not DataFrame base class type")
472542
assert df.size == 51936
473543

474544

@@ -486,7 +556,10 @@ def assert_metadata(requests_mock, request_url, md, site, parameter_cd, format):
486556
with open('data/waterservices_site.txt') as text:
487557
requests_mock.get(site_request_url, text=text.read())
488558
site_info, _ = md.site_info
489-
assert type(site_info) is DataFrame
559+
if not isinstance(site_info, DataFrame):
560+
raise AssertionError(
561+
f"{type(site_info)} is not DataFrame base class type"
562+
)
490563
if parameter_cd is None:
491564
assert md.variable_info is None
492565
else:

0 commit comments

Comments
 (0)