-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdataFetch.py
More file actions
89 lines (71 loc) · 3.73 KB
/
dataFetch.py
File metadata and controls
89 lines (71 loc) · 3.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import requests
import re
import json
from time import gmtime, strftime
from bs4 import BeautifulSoup
from tinydb import TinyDB, Query
url = "http://water.weather.gov/ahps2/hydrograph_to_xml.php?gage=rmdv2&output=tabular&time_zone=est"
def river_heights(url):
response = requests.get(url) #requests looks like by far the best library to do this kind of pull with
soup = BeautifulSoup(response.text, "lxml") #lxml appears to be common default here, may need to be installed
observed = []
forecasted = []
mytabs = soup.find_all('table') #there are three tables here, first is garbage, second is observed, third is forecast
for trs in mytabs[1].find_all('tr'): #source for this:http://stackoverflow.com/questions/24593282/web-scraping-daily-tables-into-csv-with-beautifulsoup-in-python
tds = trs.find_all('td')
row = [elem.text.strip().encode('utf-8') for elem in tds]
#print row
observed.append(row)
for trs in mytabs[2].find_all('tr'): #table[1] is observed data in the html, table[2] is forecasted, table[0] is junk
tds = trs.find_all('td')
row = [elem.text.strip().encode('utf-8') for elem in tds]
#print row
forecasted.append(row)
observedNow = observed[2] #observed is the list of lists containing our data, observedNows is just the list with the date, height, and flow we want
forecastedNow = forecasted[2]
#print observedNow[1] #observedNow[1] is the hieght in string format
#print forecastedNow[1]
heightNow = re.findall("\d+\.\d+", observedNow[1])[0]
heightTomorrow = re.findall("\d+\.\d+", forecastedNow[1])[0]
heightNow = float(heightNow)
heightTomorrow = float(heightTomorrow)
heightNow = format(heightNow, '.1f')
heightTomorrow = format(heightTomorrow, '.1f')
return heightNow, heightTomorrow
#return re.findall("\d+\.\d+", observedNow[1])[0], re.findall("\d+\.\d+", forecastedNow[1])[0]
def riverTemps(url):
response = requests.get(url) #requests looks like by far the best library to do this kind of pull with
soup = BeautifulSoup(response.text, "lxml")
tempString = soup(text=re.compile('recent'))[3] #search the html text for the word 'recent', this is where all the values are, 4th in list is temp in C
#print tempString
tempC = float(re.findall("\d+\.\d+", tempString)[0]) #regex, produces list of numbers, first is one we want, turn to float and store
tempF = tempC * 1.8 +32
return format(tempF, '.0f') #round to no decimal places
def darkSkyCallTemps(url):
response = requests.get(url)
# For successful API call, response code will be 200 (OK)
if(response.ok):
jData = json.loads(response.content)
tempForecastF = jData['daily']['data'][0]['temperatureMax']
tempNowF = jData['currently']['temperature']
return format(tempNowF, '.0f'), format(tempForecastF, '.0f')
#return jData
else:
# If response code is not ok (200), print the resulting http error code with description
response.raise_for_status()
currentHeight, forecastedHeight = river_heights("http://water.weather.gov/ahps2/hydrograph_to_xml.php?gage=rmdv2&output=tabular&time_zone=est")
currentWater = riverTemps("http://waterdata.usgs.gov/usa/nwis/uv?02035000")
currentTemp, forecastTemp = darkSkyCallTemps("https://api.forecast.io/forecast/1999155451fa19339ca7acb65b970fcf/37.5333,-77.4667")
data = {
'currentHeight' : currentHeight,
'currentWater' : currentWater,
'currentTemp' : currentTemp,
'forecastHeight' : forecastedHeight,
'forecastWater' : 0,
'forecastTemp' : forecastTemp,
'dateFetched' : strftime("%Y-%m-%d %H:%M:%S")
}
with open('public_html/data.json', 'w') as f:
json.dump(data, f)
db = TinyDB('db.json')
db.insert(data)