-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathscraper.py
executable file
·41 lines (36 loc) · 1.26 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/python
import scraperwiki
import urllib2
import xlrd
import zipfile
import re
from cStringIO import StringIO
url = "http://www.posta.sk/subory/322/psc-obci-a-ulic.zip"
archive_file = StringIO(urllib2.urlopen(url).read())
archive = zipfile.ZipFile(archive_file)
archive_namelist = archive.namelist()
r1 = re.compile('^obce.', re.IGNORECASE)
r2 = re.compile('^ulice.', re.IGNORECASE)
obce_filename = filter(r1.match, archive_namelist)[0]
ulice_filename = filter(r2.match, archive_namelist)[0]
# Post codes for cities
wb = xlrd.open_workbook(file_contents=archive.read(obce_filename))
sheet = wb.sheets()[0]
for row in range(1, sheet.nrows):
data = {
'obec': sheet.cell(row, 1).value,
'okres': sheet.cell(row, 2).value,
'psc': sheet.cell(row, 3).value,
'kraj': sheet.cell(row, 7).value,
}
scraperwiki.sqlite.save(unique_keys=['obec'], data=data, table_name="towns")
# Streets
wb = xlrd.open_workbook(file_contents=archive.read(ulice_filename))
sheet = wb.sheets()[0]
for row in range(1, sheet.nrows):
data = {
'ulica': sheet.cell(row, 1).value,
'psc': sheet.cell(row, 2).value,
'obec': sheet.cell(row, 6).value,
}
scraperwiki.sqlite.save(unique_keys=['ulica'], data=data, table_name="streets")