Skip to content

Commit fde5e15

Browse files
committed
Merge branch 'continue-categories'
2 parents a79633d + ecad62d commit fde5e15

File tree

4 files changed

+88
-5
lines changed

4 files changed

+88
-5
lines changed

tests/category_cmcontinue.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# -*- coding:utf-8 -*-
2+
3+
query = 'https://en.wikipedia.org/w/api.php?action=query&formatversion=2&list=categorymembers&cmlimit=500&cmtitle=Category%3ABAFTA%20winners%20%28people%29'
4+
5+
response = r"""{
6+
"batchcomplete": true,
7+
"continue": {
8+
"cmcontinue": "page|412745372f4907044b352f2f41273d03424b352f2f412704412745372f490121018f7f8f7f8f808f09|42525291",
9+
"continue": "-||"
10+
},
11+
"query": {
12+
"categorymembers": [
13+
{
14+
"pageid": 22167530,
15+
"ns": 0,
16+
"title": "Allison Abbate"
17+
}
18+
]
19+
}
20+
}"""
21+
22+
cache = {'query': query,
23+
'response': response,
24+
'info': {'content': 'TEST', 'status': 200}}

tests/stress.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import argparse
1313
import random
14+
import sys
1415
import time
1516

1617
import wptools
@@ -77,12 +78,17 @@ def print_header(delay, lang, pages):
7778
msg.append("delay: %d lang: %s pages: %s" % (delay, langstr, pagestr))
7879
msgstr = " ".join(msg)
7980

81+
header = [msgstr]
82+
header.append("=" * len(msgstr))
83+
header.append("Python " + sys.version)
84+
header.append('-' * len(msgstr))
85+
8086
if len(pages) > 1:
8187
print("Getting top %s.wikipedia.org pages" % lang)
8288
for i, title in enumerate(pages[:10]):
8389
print(" %d. %s" % (i + 1, title))
8490

85-
print("%s\n%s" % (msgstr, "=" * len(msgstr)))
91+
print("\n".join(header))
8692

8793

8894
def main(args):

tests/test_basic.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import wptools
1010

1111
from . import category
12+
from . import category_cmcontinue
1213
from . import disambiguation
1314
from . import imageinfo
1415
from . import labels_1
@@ -84,16 +85,33 @@ def test_category_get_members(self):
8485
cat = wptools.category('TEST')
8586
cat.cache['category'] = category.cache
8687
cat._set_data('category')
87-
self.assertTrue(len(cat.data['members']), 92)
88+
self.assertEqual(len(cat.data['members']), 68)
89+
self.assertEqual(len(cat.data['subcategories']), 24)
8890
self.assertTrue('requests' not in cat.data)
8991

9092
def test_category_get_members_namespace(self):
9193
cat = wptools.category('TEST', namespace=0)
9294
cat.cache['category'] = category.cache
9395
cat._set_data('category')
94-
self.assertTrue(len(cat.data['members']), 92)
96+
self.assertEqual(len(cat.data['members']), 68)
9597
self.assertTrue('requests' not in cat.data)
9698

99+
def test_category_get_members_continue(self):
100+
cat = wptools.category('TEST')
101+
cat.cache['category'] = category_cmcontinue.cache
102+
cat._set_data('category')
103+
self.assertTrue('cmcontinue' in cat.data)
104+
self.assertEqual(len(cat.data['members']), 1)
105+
106+
qry = cat._query('category', wptools.query.WPToolsQuery())
107+
self.assertTrue('&cmcontinue=page|' in qry)
108+
self.assertTrue(qry.endswith('|42525291'))
109+
110+
cat.cache['category'] = category.cache
111+
cat._set_data('category')
112+
self.assertTrue('cmcontinue' not in cat.data)
113+
self.assertEqual(len(cat.data['members']), 69)
114+
97115
def test_category_query(self):
98116
cat = wptools.category('TEST')
99117
qobj = wptools.query.WPToolsQuery()

wptools/category.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,24 @@ def __init__(self, *args, **kwargs):
6666
if not pageid and not title:
6767
self.get_random()
6868

69+
def _add_members(self, catmembers):
70+
"""
71+
Adds category members and subcategories to data
72+
"""
73+
members = [x for x in catmembers if x['ns'] == 0]
74+
subcats = [x for x in catmembers if x['ns'] == 14]
75+
76+
if 'members' in self.data:
77+
self.data['members'].extend(members)
78+
else:
79+
self.data.update({'members': members})
80+
81+
if subcats:
82+
if 'subcategories' in self.data:
83+
self.data['subcategories'].extend(subcats)
84+
else:
85+
self.data.update({'subcategories': subcats})
86+
6987
def _query(self, action, qobj):
7088
"""
7189
Form query to enumerate category
@@ -76,17 +94,30 @@ def _query(self, action, qobj):
7694
if action == 'random':
7795
return qobj.random(namespace=14)
7896
elif action == 'category':
79-
return qobj.category(title=title, pageid=pageid)
97+
qry = qobj.category(title=title, pageid=pageid)
98+
if self.data.get('cmcontinue'):
99+
qry += "&cmcontinue=%s" % self.data['cmcontinue']
100+
return qry
80101

81102
def _set_data(self, action):
82103
"""
83104
Set category member data from API response
84105
"""
85106
data = self._load_response(action)
86107

108+
try:
109+
cmcontinue = data.get('continue').get('cmcontinue')
110+
if cmcontinue:
111+
self.data['cmcontinue'] = cmcontinue
112+
del self.cache['category']
113+
except AttributeError:
114+
if 'cmcontinue' in self.data:
115+
del self.data['cmcontinue']
116+
87117
if action == 'category':
88118
members = data.get('query').get('categorymembers')
89-
self.data.update({'members': members})
119+
if members:
120+
self._add_members(members)
90121

91122
if action == 'random':
92123
rand = data['query']['random'][0]
@@ -120,6 +151,10 @@ def get_members(self, show=True, proxy=None, timeout=0):
120151

121152
self._get('category', show, proxy, timeout)
122153

154+
if self.data.get('cmcontinue'):
155+
while self.data.get('cmcontinue'):
156+
self._get('category', show, proxy, timeout)
157+
123158
return self
124159

125160
def get_random(self, show=True, proxy=None, timeout=0):

0 commit comments

Comments
 (0)