首先对B站的动态加载做了解析找到了他放置Bv号的方法然后再使用了you-get,ffmpeg的组合来下载视频 效果如下:
代码如下:
import requests
import json
import os
from tqdm import tqdm
from bs4 import BeautifulSoup
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36"}
def get_resp_text(url):
rsp = requests.get(url,headers=headers)
rsp.encoding = 'utf-8'
return rsp.text
def get_upname(mid):
rsp = requests.get('https://space.bilibili.com/'+mid)
rsp.encoding = 'utf-8'
html = rsp.text
bss = BeautifulSoup(html, 'lxml')
return (bss.find('title').text[:-len('的个人空间_哔哩哔哩_Bilibili')])
def get_bvid(mid):
i = 1
bvid = []
while i != 0:
url0 = 'https://api.bilibili.com/x/space/arc/search?mid='+str(mid)+'&ps=30&tid=0&pn=&keyword=&order=pubdate&jsonp=jsonp'
url0 = url0[:-len('&keyword=&order=pubdate&jsonp=jsonp')] + str(
i) + '&keyword=&order=pubdate&jsonp=jsonp'
i += 1
html = get_resp_text(url0)
dict = json.loads(html.replace('\n', ''))
datadict = dict['data']
listdict = datadict['list']
vlist = listdict['vlist']
if len(vlist) == 0:
i = 0
elif len(vlist) != 0:
for _ in range(len(vlist)):
bvid.insert(0, vlist[_]['bvid'])
return bvid,get_upname(mid)
def download_vedio(bvid):
command='you-get --format=dash-flv360 https://www.bilibili.com/video/'+bvid
os.system(command)
def get_xml(bvid):
url0 = 'https://api.bilibili.com/x/player/pagelist?bvid='+bvid+'&jsonp=jsonp'
if __name__=='__main__':
bvid, upname = get_bvid(input("输入你想要爬取的up主的id号: "))
print("up主:" + str(upname) + "共有" + str(len(bvid)) + "条视频")
for vid in tqdm(bvid):
download_vedio(vid)
|