- 假设在微博主页地址输入关键字:emo表情包
- 中文需要使用urllib_parse.quote处理
- 表情包自定义存本地文件夹
-
本地运行日志如下: -
本地生成的文件格式为: -
代码如下:
from selenium import webdriver
from urllib import parse
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
import time
import re
import os
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36',
'Connection': 'close'
}
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
requests.adapters.DEFAULT_RETRIES = 5
s = requests.session()
s.keep_alive = False
def makedir(filePath):
E = os.path.exists(filePath)
if not E:
os.makedirs(filePath)
os.chdir(filePath)
print('文件夹<' + filePath + '>创建成功!')
else:
print('文件夹已存在!')
start_time = time.time()
searchKey = 'emo表情包'
filePath = 'F:\python\pic' + '\\' + searchKey
makedir(filePath)
url = 'http://s.weibo.com/weibo?q=' + parse.quote(searchKey)
print('关键字[' + searchKey + ']的待处理地址:' + str(url))
def getDownloadUrl():
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
web = webdriver.Chrome(options=options)
web.get(url)
time.sleep(6)
wb_resp = web.page_source
web.close()
return re.findall('img src="(?P<src>.*?)"', wb_resp)
if __name__ == '__main__':
download_link = getDownloadUrl()
print('待下载图片为:' + str(len(download_link)) + '个')
if len(download_link) > 0:
for link in download_link:
try:
if str(link).find('http') < 0:
link = "http:" + link
image_resp = requests.get(link, headers=header, verify=False)
image_name = link.split("?")[0].split("/")[-1]
with open(filePath + '\\' + image_name, mode="wb") as f:
f.write(image_resp.content)
image_resp.close()
except Exception as e:
image_resp.close()
print(e)
print('全部图片已完成下载,总耗时为:' + str(time.time() - start_time))
以上,只获取当前请求页的图片,如果想获取更多,F12了解到还有一个请求url:https://s.weibo.com/ajax/topsuggest.php 可以拿到与搜索词相关的博主id信息,然后直接去抓博主页的表情包也可以
|