import requests
from fake_useragent import UserAgent
import time
from bs4 import BeautifulSoup as bs
from lxml import etree
def get_html(url):
try:
headers = {'UserAgent':UserAgent().chrome}
r = requests.get(url,headers = headers)
r.raise_for_status
r.encoding = r.apparent_encoding
return r.text
except Exception as e:
return "爬取失败{}".format(e.args)
def ap_html(html):
e = etree.HTML(html)
url_num = e.xpath('//div/a[@data-algrid="0.0.0"]/@href')
return url_num
def contents(url_num):
con_html = get_html("https:{}".format(url_num[0]))
return con_html
def main():
if __name__ == '__main__':
x = input("请输入搜索的小说名字:")
url = "https://www.qidian.com/search?kw={}".format(x.encode('utf-8'))
html = get_html(url)
url_num = ap_html(html)
print(contents(url_num))
main()
|