|
9 | 9 |
|
10 | 10 | from __future__ import print_function |
11 | 11 |
|
| 12 | +import argparse |
12 | 13 | import random |
13 | 14 | import time |
14 | 15 |
|
15 | 16 | import wptools |
16 | 17 |
|
| 18 | +DEFAULT_DELAY = 3 |
| 19 | +DEFAULT_LANGUAGE = 'en' |
| 20 | + |
17 | 21 | # connection refused 'ge' |
18 | 22 | # wikidata fail 'zh-min-nan' |
19 | 23 | LANGUAGES = [ |
|
30 | 34 | 'kn', 'ia', 'qu', 'ckb', 'mn', 'arz'] |
31 | 35 |
|
32 | 36 |
|
33 | | -def main(delay=1): |
| 37 | +def languages(): |
| 38 | + """ |
| 39 | + Many sites not fully supporting APIs. See LANGUAGES. |
| 40 | + """ |
| 41 | + return LANGUAGES |
| 42 | + |
| 43 | + # site = wptools.site() |
| 44 | + # site.get_sites() |
| 45 | + # sites = site.data['sites'] |
| 46 | + # wps = [x for x in sites if 'wikipedia' in x] |
| 47 | + # return [x.split('/')[-1].split('.')[0] for x in wps] |
| 48 | + |
| 49 | + |
| 50 | +def popular(lang): |
34 | 51 | """ |
35 | | - GET random pages forever |
| 52 | + returns list of most popular pages |
36 | 53 | """ |
| 54 | + site = wptools.site(silent=True) |
| 55 | + if lang: |
| 56 | + site.get_info("%s.wikipedia.org" % lang) |
| 57 | + else: |
| 58 | + site.get_info() |
| 59 | + return [x['title'] for x in site.data['mostviewed']] |
37 | 60 |
|
38 | | - print("%d languages" % len(LANGUAGES)) |
| 61 | + |
| 62 | +def print_header(delay, lang, pages): |
| 63 | + """ |
| 64 | + print header for stress test |
| 65 | + """ |
| 66 | + langstr = lang |
| 67 | + if langstr is None: |
| 68 | + langstr = len(languages()) |
| 69 | + |
| 70 | + pagestr = len(pages) |
| 71 | + if pagestr == 1: |
| 72 | + pagestr = "+" |
| 73 | + |
| 74 | + msg = [] |
| 75 | + msg.append("WPTOOLS STRESS TEST") |
| 76 | + msg.append(time.asctime(time.gmtime())) |
| 77 | + msg.append("delay: %d lang: %s pages: %s" % (delay, langstr, pagestr)) |
| 78 | + msgstr = " ".join(msg) |
| 79 | + |
| 80 | + if len(pages) > 1: |
| 81 | + print("Getting top %s.wikipedia.org pages" % lang) |
| 82 | + for i, title in enumerate(pages[:10]): |
| 83 | + print(" %d. %s" % (i + 1, title)) |
| 84 | + |
| 85 | + print("%s\n%s" % (msgstr, "=" * len(msgstr))) |
| 86 | + |
| 87 | + |
| 88 | +def main(args): |
| 89 | + """ |
| 90 | + GET top pages or random pages forever |
| 91 | + """ |
| 92 | + delay = args.delay |
| 93 | + lang = args.lang or 'en' |
| 94 | + top = args.top |
39 | 95 |
|
40 | 96 | start = int(time.time()) |
41 | | - count = 0 |
42 | | - while True: |
43 | | - count += 1 |
44 | | - elapsed = int(time.time()) - start |
45 | 97 |
|
46 | | - lang = random.choice(LANGUAGES) |
47 | | - page = wptools.page(lang=lang, silent=True) |
48 | | - page.get() |
| 98 | + pages = ['forever'] |
| 99 | + if top: |
| 100 | + pages = popular(lang) |
| 101 | + |
| 102 | + print_header(delay, lang, pages) |
| 103 | + |
| 104 | + try: |
| 105 | + count = 0 |
| 106 | + requests = 0 |
| 107 | + elapsed = 0 |
| 108 | + |
| 109 | + while len(pages) > 0: |
| 110 | + language = lang or random.choice(languages()) |
| 111 | + page = wptools.page(lang=language, silent=True) |
| 112 | + if top: |
| 113 | + page = wptools.page(pages.pop(0), lang=language, silent=True) |
| 114 | + |
| 115 | + page.get() |
| 116 | + |
| 117 | + preview = page.data.get('extext') |
| 118 | + if preview: |
| 119 | + preview = preview.strip().replace("\n", '')[:64] |
49 | 120 |
|
50 | | - print("[%d](%d) %s" % (count, elapsed, page.data.get('url'))) |
| 121 | + url = page.data.get('url') |
51 | 122 |
|
52 | | - preview = page.data.get('extext') |
53 | | - if preview: |
54 | | - preview = preview.strip().replace("\n", '')[:72] |
| 123 | + elapsed = int(time.time()) - start |
55 | 124 |
|
56 | | - print(" %s %s" % (page.data.get('wikibase'), preview)) |
| 125 | + count += 1 |
| 126 | + nrq = len(page.data.get('requests')) |
| 127 | + requests += nrq |
| 128 | + rps = float(0) |
| 129 | + if elapsed > 0: |
| 130 | + rps = float(requests) / elapsed |
| 131 | + frps = '{:.1f}'.format(rps) |
57 | 132 |
|
58 | | - time.sleep(delay) |
| 133 | + print("[%d] %d %s %s" % (count, nrq, frps, url)) |
| 134 | + print("%s %s" % (page.data.get('wikibase'), preview)) |
| 135 | + |
| 136 | + time.sleep(delay) |
| 137 | + |
| 138 | + except KeyboardInterrupt: |
| 139 | + print("Done. %d requests %d seconds" % (requests, elapsed)) |
| 140 | + |
| 141 | + except: |
| 142 | + page.flags['silent'] = False |
| 143 | + page.show() |
| 144 | + print("EXCEPTION %d requests %d seconds" % (requests, elapsed)) |
| 145 | + raise |
| 146 | + |
| 147 | + |
| 148 | +def parse_args(): |
| 149 | + """ |
| 150 | + parse args for main() |
| 151 | + """ |
| 152 | + argp = argparse.ArgumentParser() |
| 153 | + argp.add_argument('-d', '--delay', help='delay in seconds', |
| 154 | + type=int, default=DEFAULT_DELAY) |
| 155 | + argp.add_argument('-l', '--lang', help='language code') |
| 156 | + argp.add_argument('-t', '--top', action='store_true', help='get top pages') |
| 157 | + return argp.parse_args() |
59 | 158 |
|
60 | 159 |
|
61 | 160 | if __name__ == "__main__": |
62 | | - main() |
| 161 | + main(parse_args()) |
0 commit comments