|
19 | 19 | import pandas as pd
|
20 | 20 | import requests
|
21 | 21 |
|
22 |
| -from dataretrieval.utils import BaseMetadata, format_datetime, to_str, update_merge |
| 22 | +from dataretrieval.utils import BaseMetadata, format_datetime, to_str |
23 | 23 |
|
24 | 24 | from .utils import query
|
25 | 25 |
|
@@ -834,6 +834,7 @@ def get_iv(
|
834 | 834 | response = query_waterservices(
|
835 | 835 | service='iv', format='json', ssl_check=ssl_check, **kwargs
|
836 | 836 | )
|
| 837 | + |
837 | 838 | df = _read_json(response.json())
|
838 | 839 | return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
|
839 | 840 |
|
@@ -1304,67 +1305,88 @@ def _read_json(json):
|
1304 | 1305 | A custom metadata object
|
1305 | 1306 |
|
1306 | 1307 | """
|
1307 |
| - merged_df = pd.DataFrame() |
1308 |
| - |
1309 |
| - for timeseries in json['value']['timeSeries']: |
1310 |
| - site_no = timeseries['sourceInfo']['siteCode'][0]['value'] |
1311 |
| - param_cd = timeseries['variable']['variableCode'][0]['value'] |
1312 |
| - # check whether min, max, mean record XXX |
1313 |
| - option = timeseries['variable']['options']['option'][0].get('value') |
1314 |
| - |
1315 |
| - # loop through each parameter in timeseries. |
1316 |
| - for parameter in timeseries['values']: |
1317 |
| - col_name = param_cd |
1318 |
| - method = parameter['method'][0]['methodDescription'] |
1319 |
| - |
1320 |
| - # if len(timeseries['values']) > 1 and method: |
1321 |
| - if method: |
1322 |
| - # get method, format it, and append to column name |
1323 |
| - method = method.strip('[]()').lower() |
1324 |
| - col_name = f'{col_name}_{method}' |
1325 |
| - |
1326 |
| - if option: |
1327 |
| - col_name = f'{col_name}_{option}' |
1328 |
| - |
1329 |
| - record_json = parameter['value'] |
1330 |
| - |
1331 |
| - if not record_json: |
1332 |
| - # no data in record |
1333 |
| - continue |
1334 |
| - # should be able to avoid this by dumping |
1335 |
| - record_json = str(record_json).replace("'", '"') |
1336 |
| - |
1337 |
| - # read json, converting all values to float64 and all qualifiers |
1338 |
| - # Lists can't be hashed, thus we cannot df.merge on a list column |
1339 |
| - record_df = pd.read_json( |
1340 |
| - StringIO(record_json), |
1341 |
| - orient='records', |
1342 |
| - dtype={'value': 'float64', 'qualifiers': 'unicode'}, |
1343 |
| - convert_dates=False, |
1344 |
| - ) |
| 1308 | + merged_df = pd.DataFrame(columns=['site_no', 'datetime']) |
1345 | 1309 |
|
1346 |
| - record_df['qualifiers'] = ( |
1347 |
| - record_df['qualifiers'].str.strip('[]').str.replace("'", '') |
1348 |
| - ) |
1349 |
| - record_df['site_no'] = site_no |
1350 |
| - |
1351 |
| - record_df.rename( |
1352 |
| - columns={ |
1353 |
| - 'value': col_name, |
1354 |
| - 'dateTime': 'datetime', |
1355 |
| - 'qualifiers': col_name + '_cd', |
1356 |
| - }, |
1357 |
| - inplace=True, |
1358 |
| - ) |
| 1310 | + site_list = [ |
| 1311 | + ts['sourceInfo']['siteCode'][0]['value'] for ts in json['value']['timeSeries'] |
| 1312 | + ] |
1359 | 1313 |
|
1360 |
| - if merged_df.empty: |
1361 |
| - merged_df = record_df |
| 1314 | + # create a list of indexes for each change in site no |
| 1315 | + # for example, [0, 21, 22] would be the first and last indeces |
| 1316 | + index_list = [0] |
| 1317 | + index_list.extend( |
| 1318 | + [i + 1 for i, (a, b) in enumerate(zip(site_list[:-1], site_list[1:])) if a != b] |
| 1319 | + ) |
| 1320 | + index_list.append(len(site_list)) |
| 1321 | + |
| 1322 | + for i in range(len(index_list) - 1): |
| 1323 | + start = index_list[i] # [0] |
| 1324 | + end = index_list[i + 1] # [21] |
| 1325 | + |
| 1326 | + # grab a block containing timeseries 0:21, |
| 1327 | + # which are all from the same site |
| 1328 | + site_block = json['value']['timeSeries'][start:end] |
| 1329 | + if not site_block: |
| 1330 | + continue |
| 1331 | + |
| 1332 | + site_no = site_block[0]['sourceInfo']['siteCode'][0]['value'] |
| 1333 | + site_df = pd.DataFrame(columns=['datetime']) |
| 1334 | + |
| 1335 | + for timeseries in site_block: |
| 1336 | + param_cd = timeseries['variable']['variableCode'][0]['value'] |
| 1337 | + # check whether min, max, mean record XXX |
| 1338 | + option = timeseries['variable']['options']['option'][0].get('value') |
| 1339 | + |
| 1340 | + # loop through each parameter in timeseries, then concat to the merged_df |
| 1341 | + for parameter in timeseries['values']: |
| 1342 | + col_name = param_cd |
| 1343 | + method = parameter['method'][0]['methodDescription'] |
| 1344 | + |
| 1345 | + # if len(timeseries['values']) > 1 and method: |
| 1346 | + if method: |
| 1347 | + # get method, format it, and append to column name |
| 1348 | + method = method.strip('[]()').lower() |
| 1349 | + col_name = f'{col_name}_{method}' |
| 1350 | + |
| 1351 | + if option: |
| 1352 | + col_name = f'{col_name}_{option}' |
| 1353 | + |
| 1354 | + record_json = parameter['value'] |
| 1355 | + |
| 1356 | + if not record_json: |
| 1357 | + # no data in record |
| 1358 | + continue |
| 1359 | + # should be able to avoid this by dumping |
| 1360 | + record_json = str(record_json).replace("'", '"') |
| 1361 | + |
| 1362 | + # read json, converting all values to float64 and all qualifiers |
| 1363 | + # Lists can't be hashed, thus we cannot df.merge on a list column |
| 1364 | + record_df = pd.read_json( |
| 1365 | + StringIO(record_json), |
| 1366 | + orient='records', |
| 1367 | + dtype={'value': 'float64', 'qualifiers': 'unicode'}, |
| 1368 | + convert_dates=False, |
| 1369 | + ) |
| 1370 | + |
| 1371 | + record_df['qualifiers'] = ( |
| 1372 | + record_df['qualifiers'].str.strip('[]').str.replace("'", '') |
| 1373 | + ) |
1362 | 1374 |
|
1363 |
| - else: |
1364 |
| - merged_df = update_merge( |
1365 |
| - merged_df, record_df, na_only=True, on=['site_no', 'datetime'] |
| 1375 | + record_df.rename( |
| 1376 | + columns={ |
| 1377 | + 'value': col_name, |
| 1378 | + 'dateTime': 'datetime', |
| 1379 | + 'qualifiers': col_name + '_cd', |
| 1380 | + }, |
| 1381 | + inplace=True, |
1366 | 1382 | )
|
1367 | 1383 |
|
| 1384 | + site_df = site_df.merge(record_df, how='outer', on='datetime') |
| 1385 | + |
| 1386 | + # end of site loop |
| 1387 | + site_df['site_no'] = site_no |
| 1388 | + merged_df = pd.concat([merged_df, site_df]) |
| 1389 | + |
1368 | 1390 | # convert to datetime, normalizing the timezone to UTC when doing so
|
1369 | 1391 | if 'datetime' in merged_df.columns:
|
1370 | 1392 | merged_df['datetime'] = pd.to_datetime(merged_df['datetime'], utc=True)
|
|
0 commit comments