爬虫之pipeline过滤数据样本代码

from scrapy.exceptions import DropItem

class TutorialPipeline(object):

    words_to_filter = [‘politics’, ‘religion’]

    def process_item(self, item, spider):
        for word in self.words_to_filter:
            if word in unicode(item[‘description’]).lower():
                raise DropItem("Contains forbidden word: %s" % word)
        else:
            return item

Author: bkdwei