77Copyright (c) 2016-2017 [email protected] (http://twi1ight.com/) 88See the file 'doc/COPYING' for copying permission
99"""
10+ import os
1011import sys
1112import argparse
12- from multiprocessing import Process
13+ from multiprocessing import Process , Value , Lock , Event
1314
1415from core .utils .log import logger
1516from core .utils .redis_utils import RedisUtils
1617from core .utils .url import URL
1718from core .worker .consumer import Consumer
1819from core .worker .producer import Producer
19- from settings import VERSION , RedisConf , MongoConf
20+ from settings import VERSION , RedisConf , MongoConf , TMPDIR_PATH
2021
2122
2223def cmdparse ():
@@ -52,25 +53,29 @@ def cmdparse():
5253
5354if __name__ == '__main__' :
5455 args = cmdparse ()
55- producer_pool = []
56- consumer_pool = []
5756 redis_handle = RedisUtils (db = args .redis_db )
5857 if args .keepon :
5958 redis_handle .restore_startup_params (args )
6059 logger .info (args )
6160
61+ for f in os .listdir (TMPDIR_PATH ):
62+ os .remove (os .path .join (TMPDIR_PATH , f ))
63+ tspider_context = {}
64+ tspider_context ['live_spider_counts' ] = Value ('i' , 0 )
65+ tspider_context ['task_done' ] = Event ()
66+ tspider_context ['lock' ] = Lock ()
6267 kwargs = {'tld' : args .tld , 'cookie_file' : args .cookie_file ,
6368 'redis_db' : args .redis_db , 'mongo_db' : args .mongo_db }
6469 for _ in range (args .consumer ):
6570 worker = Consumer (** kwargs ).consume
66- proc = Process (name = 'consumer-%d' % _ , target = worker )
71+ proc = Process (name = 'consumer-%d' % _ , target = worker , args = (tspider_context ,))
72+ proc .daemon = True
6773 proc .start ()
68- consumer_pool .append (proc )
6974 for _ in range (args .producer ):
7075 worker = Producer (** kwargs ).produce
71- proc = Process (name = 'producer-%d' % _ , target = worker )
76+ proc = Process (name = 'producer-%d' % _ , target = worker , args = (tspider_context ,))
77+ proc .daemon = True
7278 proc .start ()
73- producer_pool .append (proc )
7479
7580 if not args .keepon :
7681 redis_handle .flushdb ()
@@ -88,5 +93,4 @@ def cmdparse():
8893 producer .create_task_from_file (target )
8994
9095 redis_handle .close ()
91- map (lambda x : x .join (), consumer_pool )
92- map (lambda x : x .join (), producer_pool )
96+ tspider_context ['task_done' ].wait ()
0 commit comments