Python爬虫爬取王者荣耀英雄人物高清图片
实现效果:
网页分析
从第一个网页中,获取每个英雄头像点击后进入的新网页地址,即a标签的 href 属性值:
划线部分的网址是需要拼接的 在每个英雄的具体网页内,爬取英雄皮肤图片: Tip: 网页编码要去控制台查一下,不要习惯性写 “utf-8”,不然会出现中文乱码。
源码粘贴
"""
@File :getSkins.py
@Author :
@Date :2021/7/22
@Desc :
"""
import requests
from bs4 import BeautifulSoup
import urllib
import codecs
def getSkin():
link = "https://pvp.qq.com/web201605/herolist.shtml"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36'
}
r = requests.get(link, headers=headers)
r.encoding = 'GBK'
soup = BeautifulSoup(r.text, 'lxml')
a_list = soup.find("div",class_="herolist-content")
ul = a_list.findAll("li")
path = "D:\MyWeb\Heroes\\"
for u in ul:
l = str(u).split("<li><a")
ll = str(l[:2:]).split("target=")
lll = str(ll[0]).split("\"")
link = "https://pvp.qq.com/web201605/"+lll[1]
r = requests.get(link, headers=headers)
r.encoding = "GBK"
soup = BeautifulSoup(r.text, 'lxml')
a_list = soup.find("div", class_="zk-con1 zk-con").get('style')
img_name = soup.find("h2", class_="cover-name").text
img_url = str(str(a_list).split("//")[1]).split("')")[0]
bytes = urllib.request.urlopen("http://"+img_url)
f = codecs.open(path+img_name+".png","wb")
f.write(bytes.read())
f.flush()
f.close()
print("End======")
getSkin()
|