图片路由: https://wallhaven.cc/random
?1. 首先对图片页面进行分析,
2. 进行判断分页情况下的路由拼接
3.可以添加一些headers、Cookie等反爬技术
4.用xpath找到第一层小图片节点,
5.进行获取路由的判断和异常捕获
6.再次进行请求解析路由,获取当前路由下大图片的图片链接。
7.对获取的链接进行下载本地
8.代码编写如下:
from lxml import etree
import requests
n = 0
for page in range(1,11):
url = f'https://wallhaven.cc/random?seed=zNGmWy&page={page}'
# 添加的请求头,防止访问不到页面
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
"Cookie":'ZHID=76F5D24EEAD18CB4512234D2C997A025; platform=H5; PassportCaptchaId=c44d5f12972f57fb28b25f0aecd08d1f; AST=1636638274498ee9eb5f23e; ver=2018; zh_visitTime=1636631076146; zhffr=0; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2217d0ed05d762ec-030bb163da3afe-57b1a33-1049088-17d0ed05d771a4%22%2C%22%24device_id%22%3A%2217d0ed05d762ec-030bb163da3afe-57b1a33-1049088-17d0ed05d771a4%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D; v_user=%7Chttp%3A%2F%2Fbook.zongheng.com%2Fstore%2Fc0%2Fc0%2Fb0%2Fu0%2Fp1%2Fv0%2Fs9%2Ft0%2Fu0%2Fi1%2FALL.html%7C57062943; Hm_lvt_c202865d524849216eea846069349eb9=1636631076; __zhct=a304fc7a5ef7343d85830ae98acc80cb.131660699915374; logon=NTM0MzI4MjE%3D%7CMA%3D%3D%7C%7C5Lmm5Y%2BLNTk1MjgwOTY%3D%7CdHJ1ZQ%3D%3D%7CLTEzOTUwMDcxODU%3D%7CAE100885193A0ED46043D53610652EB8; __logon__=NTM0MzI4MjE%3D%7CMA%3D%3D%7C%7C5Lmm5Y%2BLNTk1MjgwOTY%3D%7CdHJ1ZQ%3D%3D%7CLTEzOTUwMDcxODU%3D%7CAE100885193A0ED46043D53610652EB8; loginphone=15139489082; Hm_lpvt_c202865d524849216eea846069349eb9=1636635436; Hm_up_c202865d524849216eea846069349eb9=%7B%22uid_%22%3A%7B%22value%22%3A%2253432821%22%2C%22scope%22%3A1%7D%7D; visit_list=53432821'
}
# 请求图片带页码的路由
response = requests.get(url,headers=headers)
html_str = response.content.decode()
# 运用xpath 进行解析路由
root = etree.HTML(html_str)
li_node_list = root.xpath('//section[@class="thumb-listing-page"]/ul/li')
# 打印当前页数的 图片数量
print(len(li_node_list))
for li_node in li_node_list:
# 获取到里面的 高清大图 路由
url2 = li_node.xpath('.//figure/a/@href')[0]
if url2:
# 进行异常处理
try:
response2 = requests.get(url2,headers=headers)
html_str2 = response2.content.decode()
root2 = etree.HTML(html_str2)
# 获取到当前路由下 图片的节点
src_node = root2.xpath('.//section[@id="showcase"]//img/@src')[0]
n += 1
# 将图片 进行存储到 当前文件夹路径
with open(f"./壁纸大图{n}.jpg",'wb') as f:
photo = rq.get(src_node).content
f.write(photo)
print(src_node,f"./壁纸大图{n}.jpg 下载成功")
# 出现异常时 打印异常 跳出此次异常
except Exception as e:
print("当前出现异常,错误为:",e)
continue
|