今天我们用爬虫框架feapder进行小说的简单爬取
话不多说
下面是代码
import feapder
path = r'D:\爬取文件'
#轻量级爬虫
class TaobaoSpider(feapder.AirSpider):
def start_callback(self):
print("爬虫开始")
def end_callback(self):
print("爬虫结束")
#下发任务
def start_requests(self):
#网页地址链接
yield feapder.Request('http://book.zongheng.com/showchapter/1141504.html', render=True)
def parse(self, request, response):
'''
解析详情
:param request:
:param response:
:return:
'''
#不支持的字符忽略
response.encoding_errors = 'ignore'
#找到网页内容标签
content_list = response.xpath('//div[@class="volume-list"]/div[2]/ul')
#创建字典
lists = []
for content in content_list:
#遍历
# print(content)
#找章节标题
title = content.xpath('li/a//text()').extract()
#找章节链接
href = content.xpath('li/a/@href').extract()
#写入文件
for title, href in zip(title, href):
#看一下结果
print(title, href)
with open(path + f'{title}.txt', 'w', encoding='utf-8') as f:
for con in href:
f.write(con)
print(f'{title} 下载完成')
if __name__ == '__main__':
TaobaoSpider(thread_count=10).start()
Ok!,到此结束
|