aiohttp模块
参考aiohttp库简单教程 - 简书
什么是aiohttp
aiohttp是一个为Python提供异步HTTP 客户端/服务端编程,基于asyncio的异步库。asyncio可以实现单线程并发IO操作,其实现了TCP、UDP、SSL等协议,aiohttp就是基于asyncio实现的http框架。
安装?
pip3 install aiohttp
?使用
在网络请求中,一个请求就是一个会话,然后aiohttp使用的是ClientSession来管理会话, 客户端会话(ClientSession)支持使用上下文管理器在结束时自动关闭。
import aiohttp
import asyncio
async def main():
async with aiohttp.ClientSession() as session:
async with session.get("https://www.3gbizhi.com/meinv/xgmn_2.html") as resp:
print(await resp.text())
asyncio.run(main())
读取响应内容
# 读取文本内容
await resp.text()
# 读取非文本内容
await resp.read()
爬美女图片
目标网站:36壁纸
代码讲解
通过创建三个ClientSession来分别请求不同的内容
首先获取每个页面里每个图片的url
async def main(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
tree = etree.HTML(await response.text())
image_url_list = tree.xpath("/html/body/div[4]/ul/li")
for image_url in image_url_list:
image_url = image_url.xpath("./a/@href")[0]
await get_iamge_url(image_url)
?然后获取图片
async def get_iamge_url(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
tree = etree.HTML(await response.text())
image = tree.xpath("//*[@id='showpicnow']/@src")[0]
name = tree.xpath("//*[@id='showpicnow']/@alt")[0]
path = desktop + "//" + name + ".jpg"
await download(image, path, name)
最后写入文件
async def download(url, path, name):
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
async with aiofiles.open(path, mode="wb") as f:
await f.write(await response.read())
print(name + " 下载完成")
创建任务列表,每个任务分别获取不同的页面
async def multiple_main():
tasks = []
for i in range(1, 15):
tasks.append(main(f"https://www.3gbizhi.com/meinv/xgmn_{i}.html"))
await asyncio.wait(tasks)
if __name__ == '__main__':
asyncio.run(multiple_main())
效果展示
完整代码
import asyncio
import winreg
import aiofiles
import aiohttp
from lxml import etree
# 获取桌面路径
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders')
desktop = winreg.QueryValueEx(key, "Desktop")[0]
async def download(url, path, name):
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
async with aiofiles.open(path, mode="wb") as f:
await f.write(await response.read())
print(name + " 下载完成")
async def get_iamge_url(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
tree = etree.HTML(await response.text())
image = tree.xpath("//*[@id='showpicnow']/@src")[0]
name = tree.xpath("//*[@id='showpicnow']/@alt")[0]
path = desktop + "//" + name + ".jpg"
await download(image, path, name)
async def main(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
tree = etree.HTML(await response.text())
image_url_list = tree.xpath("/html/body/div[4]/ul/li")
for image_url in image_url_list:
image_url = image_url.xpath("./a/@href")[0]
await get_iamge_url(image_url)
async def multiple_main():
tasks = []
for i in range(1, 15):
tasks.append(main(f"https://www.3gbizhi.com/meinv/xgmn_{i}.html"))
await asyncio.wait(tasks)
if __name__ == '__main__':
asyncio.run(multiple_main())
|