作为python新手上路的小白,今天我来给大家安利一个爬取王者荣耀官方网站皮肤壁纸的项目,请大家多多关照!
一、登陆官网
首先我们登录王者荣耀官网,在菜单栏点击进入皮肤壁纸高清图可以看到以下画面:
?通过按键F12查看网页元素我们可以看到网页对应的URL地址:
所以接下来我们将路径初始化并且定义一个请求函数,此函数用来处理调用并返回每一页URL:?
#初始化路径
def __init__(self, save_path='./英雄'):
self.save_path = save_path
self.time = str(time.time()).split('.')
self.url = 'https://apps.game.qq.com/cgi-bin/ams/module/ishow/V1.0/query/workList_inc.cgi?activityId=2735&sVerifyCode=ABCD&sDataType=JSON&iListNum=20&totalpage=0&page={}&iOrder=0&iSortNumClose=1&iAMSActivityId=51991&_everyRead=true&iTypeId=2&iFlowId=267733&iActId=2735&iModuleId=2735&_=%s' % self.time[0]
def request(self, url):
#发送一个请求
#请求的标签
#请求的时间
#返回的结果
'''
Send a request
:param url: the url of request
:param timeout: the time of request
:return: the result of request
'''
response = session.get(url, timeout=10)#必须在10秒钟内反应,否则报错
assert response.status_code == 200#判断服务器对响应结果的请求,若状态码为200,则继续处理
return response
二、选择分辨率
由于一共有多页,每页有大概二十张皮肤壁纸,由上面的图片我们可以看到每张皮肤壁纸下面又有8种格式的分辨率,因此,为了能够自行选择下载想要的分辨率,我们找到相应的代码段,如下:
sProdImgNo_1: "https%3A%2F%2Fshp%2Eqpic%2Ecn%2Fishow%2F2735122214%2F1640154186%5F1265602313%5F35509%5FsProdImgNo%5F1%2Ejpg%2F200"
sProdImgNo_2: "https%3A%2F%2Fshp%2Eqpic%2Ecn%2Fishow%2F2735122214%2F1640154186%5F1265602313%5F35509%5FsProdImgNo%5F2%2Ejpg%2F200"
sProdImgNo_3: "https%3A%2F%2Fshp%2Eqpic%2Ecn%2Fishow%2F2735122214%2F1640154187%5F1265602313%5F35509%5FsProdImgNo%5F3%2Ejpg%2F200"
sProdImgNo_4: "https%3A%2F%2Fshp%2Eqpic%2Ecn%2Fishow%2F2735122214%2F1640154187%5F1265602313%5F35509%5FsProdImgNo%5F4%2Ejpg%2F200"
sProdImgNo_5: "https%3A%2F%2Fshp%2Eqpic%2Ecn%2Fishow%2F2735122214%2F1640154187%5F1265602313%5F35509%5FsProdImgNo%5F5%2Ejpg%2F200"
sProdImgNo_6: "https%3A%2F%2Fshp%2Eqpic%2Ecn%2Fishow%2F2735122214%2F1640154187%5F1265602313%5F35509%5FsProdImgNo%5F6%2Ejpg%2F200"
sProdImgNo_7: "https%3A%2F%2Fshp%2Eqpic%2Ecn%2Fishow%2F2735122214%2F1640154187%5F1265602313%5F35509%5FsProdImgNo%5F7%2Ejpg%2F200"
sProdImgNo_8: "https%3A%2F%2Fshp%2Eqpic%2Ecn%2Fishow%2F2735122214%2F1640154188%5F1265602313%5F35509%5FsProdImgNo%5F8%2Ejpg%2F200"
sProdName: "%E9%95%9C%2D%E5%8C%BF%E5%85%89%E8%BF%BD%E5%BD%B1%E8%80%85%E5%A3%81%E7%BA%B8"
以上代码段就是各个分辨率的百分比编码格式,为了将其和相应的英雄名称进行解码,我们从urllib库引入parse模块实现分辨率的选择下载功能:
import os
import requests
import json
import re
from retrying import retry
from urllib import parse
def run(self):
'''
The program entry
'''
print('↓' * 20 + ' 格式选择: ' + '↓' * 20)
print("\n")
#各种格式分辨率和尺寸
print('1.缩略图 2.1024x768 3.1280x720 4.1280x1024 5.1440x900 6.1920x1080 7.1920x1200 8.1920x1440')
print("\n")
size = input('请输入想要下载的图片分辨率序号:')
print("\n")
print('开始下载英雄皮肤...')
print("\n")
page = 0
offset = 0#从第0页开始下载
#调用request函数返回相应的页数的url
total_response = self.request(self.url.format(page)).text
#将json格式数据转换为字典
total_res = json.loads(total_response)
total_page = --int(total_res['iTotalPages'])
a=input("请问是否下载全皮肤?:")
print("\n")
if str(a) == "是":
print('--总共 {} 页...' . format(total_page))
while True:
if offset > total_page:
break
url = self.url.format(offset)
response = self.request(url).text
result = json.loads(response)
now = 0
for item in result["List"]:
now += 1
hero_name = parse.unquote(item['sProdName']).split('-')[0]#将百分比编码序列解码
hero_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', hero_name)#重新加载英雄名字
print('---正在下载第 {} 页 {} 英雄皮肤 进度{}/{}...' . format(offset, hero_name, now, len(result["List"])))
hero_url = parse.unquote(item['sProdImgNo_{}'.format(str(size))])#选择相应的尺寸和图片分辨率进行下载
以上是通过选择分辨率下载每一页所有皮肤的函数选择功能,接下来如果我们要下载指定英雄的对应皮肤,就要进行英雄名称判断代码段如下:
#指定想要下载相应英雄的皮肤
if hero_name == str(b):
print('---正在下载{}的皮肤...' . format(str(b)))#指定下载输入的英雄名对应的皮肤
hero_url = parse.unquote(item['sProdImgNo_{}'.format(str(size))])#选择相应的尺寸和图片分辨率进行下载
三、创建路径:
我们建立一个与对应英雄名相同的路径和文件夹名称:
#判断是否存在路径和文件
if not os.path.exists(save_path):
os.makedirs(save_path)
if not os.path.exists(save_name):
with open(save_name, 'wb') as f:
response_content = self.request(hero_url.replace("/200", "/0")).content
f.write(response_content)
四、运行效果:
全部代码:?
import time
import datetime
import os
import requests
import json
import re
from retrying import retry
from urllib import parse
from bs4 import BeautifulSoup
#使代码总体保持一致
session=requests.session()
#定义王者荣耀类
class HonorOfKings:
#定义一个主类,该文件包含所有文档
#一个文档包含多个句子的段落
#它加载原始文件并将原始文件转换为新内容
#然后这个类将保存新内容
'''
This is a main Class, the file contains all documents.
One document contains paragraphs that have several sentences
It loads the original file and converts the original file to new content
Then the new content will be saved by this class
'''
#初始化路径
def __init__(self, save_path='./英雄'):
self.save_path = save_path
self.time = str(time.time()).split('.')
self.url = 'https://apps.game.qq.com/cgi-bin/ams/module/ishow/V1.0/query/workList_inc.cgi?activityId=2735&sVerifyCode=ABCD&sDataType=JSON&iListNum=20&totalpage=0&page={}&iOrder=0&iSortNumClose=1&iAMSActivityId=51991&_everyRead=true&iTypeId=2&iFlowId=267733&iActId=2735&iModuleId=2735&_=%s' % self.time[0]
def request(self, url):
#发送一个请求
#请求的标签
#请求的时间
#返回的结果
'''
Send a request
:param url: the url of request
:param timeout: the time of request
:return: the result of request
'''
response = session.get(url, timeout=10)#必须在10秒钟内反应,否则报错
assert response.status_code == 200#判断服务器对响应结果的请求,若状态码为200,则继续处理
return response
#定义一个欢迎函数,面向开发者打招呼
def hello(self):
'''
This is a welcome speech
:return: self
'''
print("*" * 50)
print(' ' * 18 + '王者荣耀壁纸下载')
print(' ' * 5 + '作者: 廖思轲 Date: {}'.format(datetime.datetime.now()))
print("*" * 50)
return self
#定义执行函数
def run(self):
'''
The program entry
'''
print('↓' * 20 + ' 格式选择: ' + '↓' * 20)
print("\n")
#各种格式分辨率和尺寸
print('1.缩略图 2.1024x768 3.1280x720 4.1280x1024 5.1440x900 6.1920x1080 7.1920x1200 8.1920x1440')
print("\n")
size = input('请输入想要下载的图片分辨率序号:')
print("\n")
print('开始下载英雄皮肤...')
print("\n")
page = 0
offset = 0#从第0页开始下载
#调用request函数返回相应的页数的url
total_response = self.request(self.url.format(page)).text
#将json格式数据转换为字典
total_res = json.loads(total_response)
total_page = --int(total_res['iTotalPages'])
a=input("请问是否下载全皮肤?:")
print("\n")
if str(a) == "是":
print('--总共 {} 页...' . format(total_page))
while True:
if offset > total_page:
break
url = self.url.format(offset)
response = self.request(url).text
result = json.loads(response)
now = 0
for item in result["List"]:
now += 1
hero_name = parse.unquote(item['sProdName']).split('-')[0]#将百分比编码序列解码
hero_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', hero_name)#重新加载英雄名字
print('---正在下载第 {} 页 {} 英雄皮肤 进度{}/{}...' . format(offset, hero_name, now, len(result["List"])))
hero_url = parse.unquote(item['sProdImgNo_{}'.format(str(size))])#选择相应的尺寸和图片分辨率进行下载
#创建路径和文件名
save_path = self.save_path + '/' + hero_name
save_name = save_path + '/' + hero_url.split('/')[-2]
#判断是否存在路径和文件
if not os.path.exists(save_path):
os.makedirs(save_path)
if not os.path.exists(save_name):
with open(save_name, 'wb') as f:
response_content = self.request(hero_url.replace("/200", "/0")).content
f.write(response_content)
offset += 1
print('--下载完成...')
elif str(a) == "否":
b=input("请输入你想下载的英雄名:")
print("\n")
while True:
if offset > total_page:
break
url = self.url.format(offset)
response = self.request(url).text
result = json.loads(response)
now = 0
for item in result["List"]:
now += 1
hero_name = parse.unquote(item['sProdName']).split('-')[0]#将百分比编码序列解码
hero_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', hero_name)#重新加载英雄名字
#指定想要下载相应英雄的皮肤
if hero_name == str(b):
print('---正在下载{}的皮肤...' . format(str(b)))#指定下载输入的英雄名对应的皮肤
hero_url = parse.unquote(item['sProdImgNo_{}'.format(str(size))])#选择相应的尺寸和图片分辨率进行下载
#创建路径和文件名
save_path = self.save_path + '/' + str(b)
save_name = save_path + '/' + hero_url.split('/')[-2]
#判断是否存在路径和文件
if not os.path.exists(save_path):
os.makedirs(save_path)
if not os.path.exists(save_name):
with open(save_name, 'wb') as f:
response_content = self.request(hero_url.replace("/200", "/0")).content
f.write(response_content)
offset += 1
print("下载完成...")
else:
print("Request with Error!")
if __name__ == "__main__":
HonorOfKings().hello().run()
|