66
77from app .task import Task
88from example .testspider import LagouSpider
9+ from example .zhihuspider import BihuSpider
10+ from conf .config import redis_client , mongo_storage
11+ import time
12+ from plogger import get_logger
13+
14+ log = get_logger ('testsdks' )
915
1016class TestTask (Task ):
1117
@@ -22,5 +28,32 @@ def start(self):
2228 for d in spider .result ['job' ].export_sql ('test.test' ):
2329 print (d )
2430
31+
32+ class BihuTask (Task ):
33+
34+ def create_task (self , data ):
35+ for k in data :
36+ if redis_client .sadd ('bihuset' ,k ['url_token' ]):
37+ redis_client .lpush ('bihutask' ,k ['url_token' ] )
38+ log .info ('now task number {} {}' .format (redis_client .llen ('bihutask' ), redis_client .scard ('bihuset' )))
39+
40+ def execute (self ):
41+
42+ sp = BihuSpider () # 按照用户为粒度
43+ sp .tasks = 'https://www.zhihu.com/api/v4/members/{}/followees?offset=0&limit=20' .format (self .tasks )
44+ sp .start ()
45+ mongo_storage ['dev' ]['bihu' ].insert ({'name' :self .tasks , 'followee' : sp .result ['data' ]['data' ]})
46+ self .create_task (sp .result ['data' ]['data' ])
47+ for _ in range (100 ):
48+ time .sleep (3 )
49+ if sp .result ['data' ]['next_page' ]:
50+ sp .tasks = sp .result ['data' ]['next_page' ]
51+ sp .start ()
52+ mongo_storage ['dev' ]['bihu' ].insert ({'name' : self .tasks , 'followee' : sp .result ['data' ]['data' ]})
53+ self .create_task (sp .result ['data' ]['data' ])
54+ else :
55+ break
56+
57+
2558if __name__ == '__main__' :
26- TestTask (tasks = []). log_task ()
59+ BihuTask (tasks = '' , group = 'test' , source = 'ptest' ). execute ()
0 commit comments