-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupdate.py
executable file
·63 lines (45 loc) · 1.28 KB
/
update.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/python3
# Imports
from urllib import request # Downloading HTML
import re # Parsing with regex
from csv import writer # Export data
# Address with list on ARRL website
URL = 'http://www.arrl.org/international-call-sign-series'
# Output file with parsed list
OUTFILE = 'prefixlist.csv'
# Open file
f = open(OUTFILE, "w")
# Download HTML
page = request.urlopen(URL).read().decode('utf-8')
# Create regular expression
regex = re.compile('<tr><td><strong>(.*?)</strong></td>\s*<td>(.*?)</td>\s*</tr>', re.MULTILINE)
# List
li = regex.findall(page)
# Remove first and last rows (table headers)
li = li[1:-1]
# Remove all links and non-breaking spaces
for i in range(len(li)):
# Convert tuple to list
lst = list(li[i])
# Remove non-breaking spaces
rep = lst[1].replace(u'\xa0', u'')
lst.append(rep)
# Remove HTML links
lst[0] = re.sub('<.*?>', '', lst[0])
# Split range to two list elements
spl = lst[0].split('-')
lst[0] = spl[0]
lst[1] = spl[1]
# Delete stars and spaces from first row
lst[0] = lst[0].strip('*')
lst[0] = lst[0].strip(' ')
# Save changes to main list
li[i] = lst
# Export data to file
wr = writer(f)
for el in li:
wr.writerow(el)
# Close file
f.close()
# Write message
print("Exported " + str(len(li)) + " lines of data to " + OUTFILE)