# 爬虫批量获取笑话段子
import requests
from lxml import etree
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)\
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0\
.2743.116 Safari/537.36',
'Accept-Language': 'zh-CN,zh;q=0.8'}
url = 'https://ishuo.cn/special/qiushibaike'
response=requests.get(url,headers=headers).text
html = etree.HTML(response)
result = html.xpath('//div[@class="content"]/ul/li/a/@href')
for site in result:
xurl="https://ishuo.cn/"+site
response2=requests.get(xurl).text
html2=etree.HTML(response2)
result2=html2.xpath("//div[@class='content']")
for i in range(0,len(result2)):
print(result2[i].text)
print("-------------------")
# https://ishuo.cn/subject/169
|