Skip to content

Commit d3d26f0

Browse files
committed
Port to Python 3 (including poster and wikitools)
Signed-off-by: Elsie Hupp <[email protected]>
1 parent 0cfde9e commit d3d26f0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+4670
-2489
lines changed

.travis.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
language: python
2-
python: 2.7
2+
python: 3.8
33
install:
44
- pip install tox
55
script:

.vscode/settings.json

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"python.analysis.extraPaths": [
3+
".",
4+
"/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages"
5+
],
6+
"python.pythonPath": "/usr/local/bin/python3.8",
7+
"restructuredtext.confPath": ""
8+
}

docs/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@
288288
#
289289
# latex_appendices = []
290290

291-
# It false, will not define \strong, \code, itleref, \crossref ... but only
291+
# It false, will not define \strong, \code, itleref, \crossref ... but only
292292
# \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added
293293
# packages.
294294
#

dumpgenerator.py

+307-321
Large diffs are not rendered by default.

gui.py

+15-17
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python2
1+
#!/usr/bin/env python3
22
# -*- coding: utf-8 -*-
33

44
# Copyright (C) 2011-2012 WikiTeam
@@ -19,10 +19,8 @@
1919
import platform
2020
import random
2121
import re
22-
from Tkinter import *
23-
import ttk
24-
import tkMessageBox
25-
import thread
22+
from tkinter import Tk, ttk, messagebox, Label, W, E, N, S, LabelFrame, Entry, StringVar, OptionMenu, Button, Text, Scrollbar, LEFT, SUNKEN, Menu
23+
import threading
2624
import time
2725
import urllib
2826
import webbrowser
@@ -102,7 +100,7 @@ def __init__(self, master):
102100
self.optionmenu11var.set("api.php")
103101
self.optionmenu11 = OptionMenu(self.labelframe11, self.optionmenu11var, self.optionmenu11var.get(), "index.php")
104102
self.optionmenu11.grid(row=0, column=2)
105-
self.button11 = Button(self.labelframe11, text="Check", command=lambda: thread.start_new_thread(self.checkURL, ()), width=5)
103+
self.button11 = Button(self.labelframe11, text="Check", command=lambda: threading.start_new_threading(self.checkURL, ()), width=5)
106104
self.button11.grid(row=0, column=3)
107105
#batch download labelframe
108106
self.label12 = Label(self.labelframe12, text="Wiki URLs:")
@@ -174,12 +172,12 @@ def __init__(self, master):
174172
self.tree.heading('status', text='Status')
175173
self.tree.grid(row=2, column=0, columnspan=9, sticky=W+E+N+S)
176174
[self.tree.heading(column, text=column, command=lambda: self.treeSortColumn(column=column, reverse=False)) for column in columns]
177-
#self.tree.bind("<Double-1>", (lambda: thread.start_new_thread(self.downloadDump, ())))
175+
#self.tree.bind("<Double-1>", (lambda: threading.start_new_threading(self.downloadDump, ())))
178176
self.tree.tag_configure('downloaded', background='lightgreen')
179177
self.tree.tag_configure('nodownloaded', background='white')
180-
self.button21 = Button(self.frame2, text="Load available dumps", command=lambda: thread.start_new_thread(self.loadAvailableDumps, ()), width=15)
178+
self.button21 = Button(self.frame2, text="Load available dumps", command=lambda: threading.start_new_threading(self.loadAvailableDumps, ()), width=15)
181179
self.button21.grid(row=3, column=0)
182-
self.button23 = Button(self.frame2, text="Download selection", command=lambda: thread.start_new_thread(self.downloadDump, ()), width=15)
180+
self.button23 = Button(self.frame2, text="Download selection", command=lambda: threading.start_new_threading(self.downloadDump, ()), width=15)
183181
self.button23.grid(row=3, column=4)
184182
self.button22 = Button(self.frame2, text="Clear list", command=self.deleteAvailableDumps, width=10)
185183
self.button22.grid(row=3, column=8, columnspan=2)
@@ -213,7 +211,7 @@ def __init__(self, master):
213211
#end menu
214212

215213
def blocked(self):
216-
tkMessageBox.showerror("Error", "There is a task in progress. Please, wait.")
214+
messagebox.showerror("Error", "There is a task in progress. Please, wait.")
217215

218216
def checkURL(self):
219217
if re.search(ur"(?im)^https?://[^/]+\.[^/]+/", self.entry11.get()): #well-constructed URL?, one dot at least, aaaaa.com, but bb.aaaaa.com is allowed too
@@ -234,7 +232,7 @@ def checkURL(self):
234232
self.entry11.config(background='red')
235233
self.msg('index.php is incorrect!', level='error')
236234
else:
237-
tkMessageBox.showerror("Error", "You have to write a correct api.php or index.php URL.")
235+
messagebox.showerror("Error", "You have to write a correct api.php or index.php URL.")
238236

239237
def sumSizes(self, sizes):
240238
total = 0
@@ -266,15 +264,15 @@ def run(self):
266264
dumpgenerator.main(params=params)
267265
268266
#check dump
269-
"""
267+
"""
270268

271269
def msg(self, msg='', level=''):
272270
levels = { 'ok': 'lightgreen', 'warning': 'yellow', 'error': 'red' }
273271
if levels.has_key(level.lower()):
274-
print '%s: %s' % (level.upper(), msg)
272+
print ('%s: %s' % (level.upper(), msg))
275273
self.status.config(text='%s: %s' % (level.upper(), msg), background=levels[level.lower()])
276274
else:
277-
print msg
275+
print (msg)
278276
self.status.config(text=msg, background='grey')
279277

280278
def treeSortColumn(self, column, reverse=False):
@@ -326,7 +324,7 @@ def downloadDump(self, event=None):
326324
else:
327325
self.msg('Problems in %d dumps. Downloaded %d of %d (and %d were previously downloaded).' % (len(items)-(c+d), c, len(items), d), level='error')
328326
else:
329-
tkMessageBox.showerror("Error", "You have to select some dumps to download.")
327+
messagebox.showerror("Error", "You have to select some dumps to download.")
330328
self.clearAvailableDumps()
331329
self.showAvailableDumps()
332330
self.filterAvailableDumps()
@@ -410,7 +408,7 @@ def loadAvailableDumps(self):
410408
wikifarms_r = re.compile(ur"(%s)" % ('|'.join(wikifarms.keys())))
411409
c = 0
412410
for mirror, url, regexp in self.urls:
413-
print 'Loading data from', mirror, url
411+
print ('Loading data from', mirror, url)
414412
self.msg(msg='Please wait... Loading data from %s %s' % (mirror, url))
415413
f = urllib.urlopen(url)
416414
m = re.compile(regexp).finditer(f.read())
@@ -452,7 +450,7 @@ def callback(self):
452450
self.msg("Feature not implemented for the moment. Contributions are welcome.", level='warning')
453451

454452
def askclose():
455-
if tkMessageBox.askokcancel("Quit", "Do you really wish to exit?"):
453+
if messagebox.askokcancel("Quit", "Do you really wish to exit?"):
456454
root.destroy()
457455

458456
if __name__ == "__main__":

launcher.py

+17-17
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python2
1+
#!/usr/bin/env python3
22
# -*- coding: utf-8 -*-
33

44
# Copyright (C) 2011-2016 WikiTeam
@@ -28,17 +28,17 @@
2828

2929
def main():
3030
if len(sys.argv) < 2:
31-
print 'python script.py file-with-apis.txt'
31+
print ('python script.py file-with-apis.txt')
3232
sys.exit()
3333

34-
print 'Reading list of APIs from', sys.argv[1]
34+
print ('Reading list of APIs from', sys.argv[1])
3535
wikis = open(sys.argv[1], 'r').read().splitlines()
36-
print '%d APIs found' % (len(wikis))
36+
print ('%d APIs found' % (len(wikis)))
3737

3838
for wiki in wikis:
39-
print "#"*73
40-
print "# Downloading", wiki
41-
print "#"*73
39+
print ("#"*73)
40+
print ("# Downloading", wiki)
41+
print ("#"*73)
4242
wiki = wiki.lower()
4343
# Make the prefix in standard way; api and index must be defined, not important which is which
4444
prefix = dumpgenerator.domain2prefix(config={'api': wiki, 'index': wiki})
@@ -52,17 +52,17 @@ def main():
5252
break #stop searching, dot not explore subdirectories
5353

5454
if compressed:
55-
print 'Skipping... This wiki was downloaded and compressed before in', zipfilename
55+
print ('Skipping... This wiki was downloaded and compressed before in', zipfilename)
5656
# Get the archive's file list.
5757
if ( ( ( sys.version_info[0] == 3 ) and ( sys.version_info[1] > 0 ) ) or ( ( sys.version_info[0] == 2 ) and ( sys.version_info[1] > 6 ) ) ):
5858
archivecontent = subprocess.check_output (['7z', 'l', zipfilename])
5959
if re.search(ur"%s.+-history\.xml" % (prefix), archivecontent) is None:
6060
# We should perhaps not create an archive in this case, but we continue anyway.
61-
print "ERROR: The archive contains no history!"
61+
print ("ERROR: The archive contains no history!")
6262
if re.search(ur"Special:Version\.html", archivecontent) is None:
63-
print "WARNING: The archive doesn't contain Special:Version.html, this may indicate that download didn't finish."
63+
print ("WARNING: The archive doesn't contain Special:Version.html, this may indicate that download didn't finish.")
6464
else:
65-
print "WARNING: Content of the archive not checked, we need python 2.7+ or 3.1+."
65+
print ("WARNING: Content of the archive not checked, we need python 2.7+ or 3.1+.")
6666
# TODO: Find a way like grep -q below without doing a 7z l multiple times?
6767
continue
6868

@@ -81,10 +81,10 @@ def main():
8181
# such as editthis.info, wiki-site.com, wikkii (adjust the value as needed;
8282
# typically they don't provide any crawl-delay value in their robots.txt).
8383
if started and wikidir: #then resume
84-
print 'Resuming download, using directory', wikidir
85-
subprocess.call(['python2', 'dumpgenerator.py', '--api={}'.format(wiki), '--xml', '--images', '--resume', '--path={}'.format(wikidir)], shell=False)
84+
print ('Resuming download, using directory', wikidir)
85+
subprocess.call(['python3', 'dumpgenerator.py', '--api={}'.format(wiki), '--xml', '--images', '--resume', '--path={}'.format(wikidir)], shell=False)
8686
else: #download from scratch
87-
subprocess.call(['python2', 'dumpgenerator.py', '--api={}'.format(wiki), '--xml', '--images'], shell=False)
87+
subprocess.call(['python3', 'dumpgenerator.py', '--api={}'.format(wiki), '--xml', '--images'], shell=False)
8888
started = True
8989
#save wikidir now
9090
for f in os.listdir('.'):
@@ -98,7 +98,7 @@ def main():
9898
finished = False
9999
if started and wikidir and prefix:
100100
if (subprocess.call (['tail -n 1 %s/%s-history.xml | grep -q "</mediawiki>"' % (wikidir, prefix)], shell=True) ):
101-
print "No </mediawiki> tag found: dump failed, needs fixing; resume didn't work. Exiting."
101+
print ("No </mediawiki> tag found: dump failed, needs fixing; resume didn't work. Exiting.")
102102
else:
103103
finished = True
104104
# You can also issue this on your working directory to find all incomplete dumps:
@@ -108,7 +108,7 @@ def main():
108108
if finished:
109109
time.sleep(1)
110110
os.chdir(wikidir)
111-
print 'Changed directory to', os.getcwd()
111+
print ('Changed directory to', os.getcwd())
112112
# Basic integrity check for the xml. The script doesn't actually do anything, so you should check if it's broken. Nothing can be done anyway, but redownloading.
113113
subprocess.call('grep "<title>" *.xml -c;grep "<page>" *.xml -c;grep "</page>" *.xml -c;grep "<revision>" *.xml -c;grep "</revision>" *.xml -c', shell=True)
114114
# Make a non-solid archive with all the text and metadata at default compression. You can also add config.txt if you don't care about your computer and user names being published or you don't use full paths so that they're not stored in it.
@@ -123,7 +123,7 @@ def main():
123123
subprocess.call('7z' + ' a -ms=off -mx=1 ../%s-wikidump.7z.tmp %s-images.txt images/' % (prefix, prefix), shell=True)
124124
subprocess.call('mv' + ' ../%s-wikidump.7z.tmp ../%s-wikidump.7z' % (prefix, prefix), shell=True)
125125
os.chdir('..')
126-
print 'Changed directory to', os.getcwd()
126+
print ('Changed directory to', os.getcwd())
127127
time.sleep(1)
128128

129129
if __name__ == "__main__":

listsofwikis/mediawiki/checkalive.py

+21-13
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python2
1+
#!/usr/bin/env python3
22
# -*- coding: utf-8 -*-
33

44
# Copyright (C) 2011-2012 WikiTeam
@@ -17,27 +17,35 @@
1717

1818
# Script to check if a list of wikis are alive or dead
1919

20-
import thread
20+
import threading
2121
import time
22-
import sys
23-
import urllib2
24-
import exceptions
22+
import urllib.request
23+
from urllib.error import *
24+
from http.server import BaseHTTPRequestHandler
2525
import re
2626

2727
# Configuration
2828
delay = 30 # Seconds before timing out on request
2929
limit = 100
3030

3131
def printapi(api):
32-
print api, 'is alive'
33-
open('wikisalive.txt', 'a').write(('%s\n' % api.strip()).encode('utf-8'))
32+
print (api, 'is alive')
33+
open('wikisalive.txt', 'a').write(str('%s\n' % api.strip()))
3434

3535
def checkcore(api):
36-
req = urllib2.Request(api, None)
36+
req = urllib.request(api, None)
3737
try:
38-
raw = urllib2.urlopen(req, None, delay).read()
39-
except IOError: # http://docs.python.org/2/howto/urllib2.html#handling-exceptions
40-
print api, 'is dead or has errors'
38+
raw = urllib.request.urlopenurlopen(req, None, delay).read()
39+
except URLError as reason: # https://docs.python.org/3/library/urllib.error.html
40+
41+
if reason.isinstance(HTTPError):
42+
print (api + 'is dead or has errors because:')
43+
print ("Error code " + HTTPError.code + ": " + BaseHTTPRequestHandler.responses[HTTPError.code].shortmessage)
44+
print (BaseHTTPRequestHandler.responses[HTTPError.code].longmessage)
45+
print ("Reason: " + HTTPError.reason)
46+
print ("HTTP Headers:\n" + HTTPError.headers)
47+
else:
48+
print (api + 'is dead or has errors because:' + reason)
4149
return
4250
# RSD is available since 1.17, bug 25648
4351
rsd = re.search(r'(?:link rel="EditURI".+href=")(?:https?:)?(.+api.php)\?action=rsd', raw)
@@ -60,11 +68,11 @@ def checkcore(api):
6068
index = domain.group(1) + login.group(1)
6169
printapi(index)
6270
else:
63-
print api, 'is not a MediaWiki wiki'
71+
print (api, 'is not a MediaWiki wiki')
6472

6573
def check(apis):
6674
for api in apis:
67-
thread.start_new_thread(checkcore, (api,))
75+
threading.start_new_threading(checkcore, (api,))
6876
time.sleep(0.1)
6977
time.sleep(delay+1)
7078

listsofwikis/mediawiki/miraheze-spider.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python2
1+
#!/usr/bin/env python3
22
# -*- coding: utf-8 -*-
33

44
# Copyright (C) 2014-2017 WikiTeam developers
@@ -29,7 +29,7 @@ def main():
2929
m = re.findall(ur'<tr><td>(<del>)?<a href="https://([^>]+?)/">[^<]+</a>', raw)
3030
m.sort()
3131
for i in m:
32-
print 'https://' + i[1] + '/w/api.php'
32+
print ('https://' + i[1] + '/w/api.php')
3333

3434
if __name__ == '__main__':
3535
main()

listsofwikis/mediawiki/neoseeker-spider.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python2
1+
#!/usr/bin/env python3
22
# -*- coding: utf-8 -*-
33

44
# Copyright (C) 2014-2017 WikiTeam developers
@@ -29,7 +29,7 @@ def main():
2929
m = re.findall(ur'<li><a href=\'([^>]+?)/wiki/\'>', raw)
3030
m.sort()
3131
for i in m:
32-
print i + '/w/api.php'
32+
print (i + '/w/api.php')
3333

3434
if __name__ == '__main__':
3535
main()

listsofwikis/mediawiki/orain-spider.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python2
1+
#!/usr/bin/env python3
22
# -*- coding: utf-8 -*-
33

44
# Copyright (C) 2014 WikiTeam developers
@@ -28,7 +28,7 @@ def main():
2828
raw = r.text
2929
m = re.findall(ur'<tr><td><a href="//([^>]+?)/">[^<]+</a></td></tr>', raw)
3030
for i in m:
31-
print 'http://' + i + '/w/api.php'
31+
print ('http://' + i + '/w/api.php')
3232

3333
if __name__ == '__main__':
3434
main()

listsofwikis/mediawiki/referata-spider.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python2
1+
#!/usr/bin/env python3
22
# -*- coding: utf-8 -*-
33

44
# Copyright (C) 2014 WikiTeam developers
@@ -33,7 +33,7 @@ def main():
3333

3434
m = re.findall(ur'(?im)<h3 class="r"><a href=\"([^ ]+?)" onmouse', raw)
3535
for i in m:
36-
print i
36+
print (i)
3737

3838
if re.search(ur'id="ofr"', raw): #resultados omitidos, final
3939
break

listsofwikis/mediawiki/shoutwiki-spider.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python2
1+
#!/usr/bin/env python3
22
# -*- coding: utf-8 -*-
33

44
# Copyright (C) 2014 WikiTeam developers
@@ -41,7 +41,7 @@ def main():
4141
for site in jsonsites['query']['listwikis']:
4242
siteid = int(site['id'])
4343
siteurl = site['url']
44-
print siteurl
44+
print (siteurl)
4545

4646
if len(jsonsites['query']['listwikis']) == int(swlimit):
4747
#there are more

listsofwikis/mediawiki/wiki-site-spider.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python2
1+
#!/usr/bin/env python3
22
# -*- coding: utf-8 -*-
33

44
# Copyright (C) 2014 WikiTeam developers
@@ -32,7 +32,7 @@ def main():
3232
raw = r.text
3333
m = re.findall(ur'<td><a href="([^>]+?)"', raw)
3434
for i in m:
35-
print i
35+
print (i)
3636

3737
if __name__ == '__main__':
3838
main()

0 commit comments

Comments
 (0)