-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathote.py
98 lines (81 loc) · 4.1 KB
/
ote.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import requests
import urllib.parse
import json
import csv
import itertools
import sys
# Define the Greek alphabet in all caps
alphabet = ['Α', 'Β', 'Γ', 'Δ', 'Ε', 'Ζ', 'Η', 'Θ', 'Ι', 'Κ', 'Λ', 'Μ', 'Ν', 'Ξ', 'Ο', 'Π', 'Ρ', 'Σ', 'Τ', 'Υ', 'Φ', 'Χ', 'Ψ', 'Ω']
print("Should i use ALL Greek Alphabet, or do you wanna specify your wordlist? ")
while True:
choice_wl = input("Press A for ALL, or C for custom or Q to quit:")
if choice_wl == "A":
print("Using all Alphabet:")
print(alphabet)
# Generate all possible 3-letter combinations of the Greek alphabet
combinations = itertools.product(alphabet, repeat=3)
# Convert the combinations to a list of strings
wordlist = ["".join(c) for c in combinations]
break
elif choice_wl == "C":
filename = input("Enter the filename of the list of words: ")
with open(filename, 'r', encoding='utf-8') as f:
wordlist = [line.strip() for line in f]
break
elif choice_wl == "Q":
print("Quitting the program.")
sys.exit()
else:
print("Invalid option. Please try again.")
allnames = len(wordlist)
url_template = 'https://www.11888.gr/search/white_pages/?query={}&location={}&page={}'
# Prompt the user for the name of the file containing the list of words
location = input("Vale tin topothesia me tin morfi 'Kallithea, Attiki': ")
filenamecsv = input("Enter a filename for the CSV file: ")
if not filenamecsv.endswith('.csv'):
filenamecsv += '.csv'
with open(filenamecsv, 'w', encoding='utf-8', newline='') as f:
writer = csv.writer(f)
writer.writerow([
'word', 'lastname', 'firstname', 'street1', 'number1',
'phone', 'residential', 'subregion', 'municipality',
'postal_code', 'provider', 'detailed_url'
])
for i, word in enumerate(wordlist):
encoded_word = urllib.parse.quote(word, safe='')
encoded_location = urllib.parse.quote(location, safe='')
url = url_template.format(encoded_word, location, 0)
response = requests.get(url)
data = json.loads(response.text)
number_of_pages = data['data']['total_pages']
if number_of_pages == 0:
print(round((i / allnames) * 100, 2), "% ", "skipping", i + 1, "name", word, "has no data")
continue
else:
for currentpage in range(number_of_pages):
url = url_template.format(encoded_word, location, currentpage)
response = requests.get(url)
data = json.loads(response.text)
print(round((i / allnames) * 100, 2), "% ", word, 'name', i + 1, 'of', allnames, 'total of', currentpage + 1, '/', number_of_pages, 'pages')
results = data.get('data', {}).get('results', [])
for result in results:
lastname = result['name'].get('last', '')
firstname = result['name'].get('first', '')
street1 = result['address'].get('street1', '')
number1 = result['address'].get('number1', '')
postal_code = result['address'].get('postal_code', '')
subregion = result['address'].get('subregion', {}).get('name', '')
municipality = result['address'].get('municipality', {}).get('name', '')
detailed_url = "https://11888.gr" + result.get('detail_url', '')
# Get phones (first one if exists)
phones = result.get('phones', [])
phone = phones[0].get('number', '') if phones else ''
residential = phones[0].get('type', '') if phones else ''
# Get all providers
providers = [phone.get('provider', '') for phone in phones if 'provider' in phone]
providers_str = ', '.join(providers)
writer.writerow([
word, lastname, firstname, street1, number1,
phone, residential, subregion, municipality,
postal_code, providers_str, detailed_url
])