以下分别用线程池、进程池、协程三种方法演示了异步请求的方式。
套路就是如此,剩下的就是和自己的代码匹配了?
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
import asyncio
import httpx
urls = [f'https://www.1001freedownloads.com/free-photos/?page={i}&ajax=1' for i in range(1, 50)]
def get_req(url):
try:
response = requests.get(url, headers=headers, verify=False)
except:
return None
return response
# 线程池
def get_thread():
with ThreadPoolExecutor(max_workers=5) as executor:
res = executor.map(get_req, urls) # 接受所有的返回值到一个列表
for r in res:
if r and r[0].status_code == 200:
do() # 对返回值进行操作
# 进程池
def get_process():
with ProcessPoolExecutor(max_workers=5) as executor:
res = executor.map(get_req, urls)
for r in res:
if r and r[0].status_code == 200:
do() # 对返回值进行操作
# 以下为协程
timeout = httpx.Timeout(10, connect=10)
async def get_html(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36',
}
async with httpx.AsyncClient(headers=headers, verify=False, timeout=timeout) as client:
r = await client.get(url)
return r
async def main(tasks):
try:
result_list = await asyncio.gather(*tasks, return_exceptions=False)
return result_list
except asyncio.TimeoutError:
print('超时')
def run():
tasks = [get_html(url) for url in urls]
result_list = asyncio.run(main(tasks))
for result in result_list:
print(result)
|