|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | + |
| 3 | +# Scrapy settings for textdata project |
| 4 | +# |
| 5 | +# For simplicity, this file contains only settings considered important or |
| 6 | +# commonly used. You can find more settings consulting the documentation: |
| 7 | +# |
| 8 | +# http://doc.scrapy.org/en/latest/topics/settings.html |
| 9 | +# http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html |
| 10 | +# http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html |
| 11 | + |
| 12 | +BOT_NAME = 'textdata' |
| 13 | + |
| 14 | +SPIDER_MODULES = ['textdata.spiders'] |
| 15 | +NEWSPIDER_MODULE = 'textdata.spiders' |
| 16 | + |
| 17 | + |
| 18 | +# Crawl responsibly by identifying yourself (and your website) on the user-agent |
| 19 | +#USER_AGENT = 'textdata (+http://www.yourdomain.com)' |
| 20 | + |
| 21 | +# Obey robots.txt rules |
| 22 | +ROBOTSTXT_OBEY = True |
| 23 | + |
| 24 | +# Configure maximum concurrent requests performed by Scrapy (default: 16) |
| 25 | +#CONCURRENT_REQUESTS = 32 |
| 26 | + |
| 27 | +# Configure a delay for requests for the same website (default: 0) |
| 28 | +# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay |
| 29 | +# See also autothrottle settings and docs |
| 30 | +#DOWNLOAD_DELAY = 3 |
| 31 | +# The download delay setting will honor only one of: |
| 32 | +#CONCURRENT_REQUESTS_PER_DOMAIN = 16 |
| 33 | +#CONCURRENT_REQUESTS_PER_IP = 16 |
| 34 | + |
| 35 | +# Disable cookies (enabled by default) |
| 36 | +#COOKIES_ENABLED = False |
| 37 | + |
| 38 | +# Disable Telnet Console (enabled by default) |
| 39 | +#TELNETCONSOLE_ENABLED = False |
| 40 | + |
| 41 | +# Override the default request headers: |
| 42 | +#DEFAULT_REQUEST_HEADERS = { |
| 43 | +# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', |
| 44 | +# 'Accept-Language': 'en', |
| 45 | +#} |
| 46 | + |
| 47 | +# Enable or disable spider middlewares |
| 48 | +# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html |
| 49 | +#SPIDER_MIDDLEWARES = { |
| 50 | +# 'textdata.middlewares.TextdataSpiderMiddleware': 543, |
| 51 | +#} |
| 52 | + |
| 53 | +# Enable or disable downloader middlewares |
| 54 | +# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html |
| 55 | +#DOWNLOADER_MIDDLEWARES = { |
| 56 | +# 'textdata.middlewares.MyCustomDownloaderMiddleware': 543, |
| 57 | +#} |
| 58 | + |
| 59 | +# Enable or disable extensions |
| 60 | +# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html |
| 61 | +#EXTENSIONS = { |
| 62 | +# 'scrapy.extensions.telnet.TelnetConsole': None, |
| 63 | +#} |
| 64 | + |
| 65 | +# Configure item pipelines |
| 66 | +# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html |
| 67 | +#ITEM_PIPELINES = { |
| 68 | +# 'textdata.pipelines.TextdataPipeline': 300, |
| 69 | +#} |
| 70 | + |
| 71 | +# Enable and configure the AutoThrottle extension (disabled by default) |
| 72 | +# See http://doc.scrapy.org/en/latest/topics/autothrottle.html |
| 73 | +#AUTOTHROTTLE_ENABLED = True |
| 74 | +# The initial download delay |
| 75 | +#AUTOTHROTTLE_START_DELAY = 5 |
| 76 | +# The maximum download delay to be set in case of high latencies |
| 77 | +#AUTOTHROTTLE_MAX_DELAY = 60 |
| 78 | +# The average number of requests Scrapy should be sending in parallel to |
| 79 | +# each remote server |
| 80 | +#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 |
| 81 | +# Enable showing throttling stats for every response received: |
| 82 | +#AUTOTHROTTLE_DEBUG = False |
| 83 | + |
| 84 | +# Enable and configure HTTP caching (disabled by default) |
| 85 | +# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings |
| 86 | +#HTTPCACHE_ENABLED = True |
| 87 | +#HTTPCACHE_EXPIRATION_SECS = 0 |
| 88 | +#HTTPCACHE_DIR = 'httpcache' |
| 89 | +#HTTPCACHE_IGNORE_HTTP_CODES = [] |
| 90 | +#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' |
0 commit comments