-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmvm_python.py
More file actions
173 lines (145 loc) · 5.69 KB
/
mvm_python.py
File metadata and controls
173 lines (145 loc) · 5.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#-------------------------------------------------------------------------------
# Name: mvm_python.py
# Purpose: Download data from the MVM web API.
#
# Author: James Sample
#
# Created: 19/10/2018
# Copyright: (c) James Sample and NIVA, 2018
# Licence: <your licence>
#-------------------------------------------------------------------------------
""" Basic Python functions for downloading data from the Swedish water quality
database:
https://miljodata.slu.se/mvm/
"""
def convert_dates(time_string):
""" Modifed from here:
https://stackoverflow.com/a/28507530/505698
"""
import re
from datetime import datetime, timedelta, timezone
epoch = datetime(1970, 1, 1, tzinfo=timezone.utc)
ticks, offset = re.match(r'/Date\((\d+)([+-]\d{4})?\)/$', time_string).groups()
utc_dt = epoch + timedelta(milliseconds=int(ticks))
if offset:
offset = int(offset)
hours, minutes = divmod(abs(offset), 100)
if offset < 0:
hours, minutes = -hours, -minutes
dt = utc_dt.astimezone(timezone(timedelta(hours=hours, minutes=minutes)))
return datetime(dt.year, dt.month, dt.day)
else:
return datetime(utc_dt.year, utc_dt.month, utc_date.day)
def get_mvm_token(token_path):
""" Read valid MVM access token from .xlsx
"""
import pandas as pd
import datetime as dt
# Read Excel
df = pd.read_excel(token_path, sheet_name='Sheet1')
# Get most recent token
df.sort_values(by=['Expiry_Date'],
inplace=True,
ascending=False)
token = df['Token'][0]
exp_date = df['Expiry_Date'][0]
# Check valid
if exp_date < dt.datetime.now():
raise ValueError('The token file has no valid tokens.\n'
'Please log-in to MVM and update the tokens in the file:\n\n'
' https://miljodata.slu.se/mvm/')
return token
def query_mvm_station_data(site_id, st_yr, end_yr, token_path):
""" Download data for a specific site using the Miljödata MVM API.
Based on documentation here:
http://miljodata.slu.se/mvm/OpenAPI
Args:
site_id: Int. MD-MVM ID for site of interest
st_yr: Int. Start year of interest
end_yr: Int. End year of interest
public_token: Raw str. Path to .xlsx containing tokens. See
example .xlsx for details
Returns:
Dataframe
"""
import requests
import pandas as pd
import numpy as np
# Get access token
token = get_mvm_token(token_path)
# Build url and get data
url = (r'http://miljodata.slu.se/mvm/ws/ObservationsService.svc/rest'
r'/GetSamplesBySite?token=%s&siteid=%s&fromYear=%s&toYear=%s' % (token,
site_id,
st_yr,
end_yr))
response = requests.get(url)
data = response.json()
# Dict for data of interest
data_dict = {'mvm_id':[],
'station_name':[],
'station_type':[],
'sample_id':[],
'sample_date':[],
'depth1':[],
'depth2':[],
'par_name':[],
'par_desc':[],
'unit':[],
'value':[],
'flag':[]}
# Loop over samples
for samp in data:
# Get sample data
stn_id = int(samp['SiteId'])
stn_name = samp['SiteName']
stn_type = samp['SiteType']
samp_id = int(samp['SampleId'])
date = convert_dates(samp['SampleDate'])
depth1 = samp['MinDepth']
if depth1:
depth1 = float(samp['MinDepth'].replace(',', '.'))
else:
depth1 = np.nan
depth2 = samp['MaxDepth']
if depth2:
depth2 = float(samp['MaxDepth'].replace(',', '.'))
else:
depth2 = np.nan
# Loop over pars
for par in samp['ObservationValues']:
# Get par data
par_name = par['PropertyAbbrevName']
par_desc = par['PropertyName']
par_unit = par['UnitOfMeasureName']
# Deal with LOD flags
par_value = par['ReportedValue']
if par_value:
par_value = par_value.replace(',', '.')
if par_value[0] in ('<', '>'):
flag = par_value[0]
par_value = par_value[1:]
else:
flag = np.nan
par_value = float(par_value)
else:
par_value = np.nan
flag = np.nan
# Add to dict
data_dict['mvm_id'].append(stn_id)
data_dict['station_name'].append(stn_name)
data_dict['station_type'].append(stn_type)
data_dict['sample_id'].append(samp_id)
data_dict['sample_date'].append(date)
data_dict['depth1'].append(depth1)
data_dict['depth2'].append(depth2)
data_dict['par_name'].append(par_name)
data_dict['par_desc'].append(par_desc)
data_dict['unit'].append(par_unit)
data_dict['value'].append(par_value)
data_dict['flag'].append(flag)
df = pd.DataFrame(data_dict)
df.sort_values(by=['sample_date', 'par_name'],
inplace=True)
df.reset_index(inplace=True, drop=True)
return df