爬虫scrapy之settings设置样本

BOT_NAME = ‘cnblog’

SPIDER_MODULES = [‘cnblog.spiders’]
NEWSPIDER_MODULE = ‘cnblog.spiders’

ROBOTSTXT_OBEY = False

DEFAULT_REQUEST_HEADERS = {
‘Accept’: ‘text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8’,
‘Accept-Language’: ‘en’,
‘User-Agent’: “Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36”
}

ITEM_PIPELINES = {
cnblog.pipelines.CnblogPipeline‘: 300,
}

Author: bkdwei