Python爬虫之实例—网易云评论爬取
*学习视频资源:2021年全新python教学资源 路飞学城IT B站
一、网易云评论提取步骤
1、找到未加密的参数 2、想办法把参数进行加密(必须参考网易的逻辑) 3、请求网易,拿到评论信息
二、网易云评论提取代码
完整代码,输出用户名和内容
代码如下:
from Crypto.Cipher import AES
from base64 import b64encode
import requests,json
url = "https://music.163.com/weapi/comment/resource/comments/get?csrf_token="
f = "00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7"
g = "0CoJUm6Qyw8W8jud"
e = "010001"
i= "1hmkfQkNHit7gIpB"
#请求方式post
# 获取真实参数
data = {
"csrf_token": "",
"cursor": "-1",
"offset": "0",
"orderType": "1",
"pageNo": "1",
"pageSize": "20",
"rid": "R_SO_4_1325905146",
"threadId": "R_SO_4_1325905146"
}
def get_encSecKey():
return "5ea9ed3d2c3925c76ef9ef69ffe2231e4dfc132862c34d84d76ea900d8a768089ca322a55cb9398bf8f4d7ee34fff9fa2de4e35e497971c59fea44ae33a3ac0235c7f007608d2a4db6430bb7d208f6a4162d789472bbe4ea7296b14396de165ccfdc62dcddb0b3de6839499bf1b32560f98ad24b40ee8b5e4764fd158f7a0ed9"
def get_params(data): # 默认收到的是字符串
first = enc_params(data,g)
second = enc_params(first,i)
return second
def to_16(data):
pad = 16 - len(data) % 16
data += chr(pad) * pad
return data
def enc_params(data,key): #加密过程
iv = "0102030405060708"
data = to_16(data)
aes = AES.new(key=key.encode("utf-8"),IV=iv.encode("utf-8"),mode=AES.MODE_CBC)
bs = aes.encrypt(data.encode("utf-8")) # 加密,加密的内容的长度必须是16的倍数
return str(b64encode(bs), "utf-8") #转化成字符串返回
resp = requests.post(url, data={
"params": get_params(json.dumps(data)), #json把字典转化成字符串
"encSecKey": get_encSecKey()
})
content = resp.text
dict = json.loads(content)
#dict类型数据去掉dict_values()和dict_keys()前缀
# 第二个值是我们要的数据,但是一个字典
b = list(dict.values())
a = b[1]
list1_dic = a['comments']
#遍历user 获取每个user的数据
for j in list1_dic:
nickname = j['user']['nickname']
content = j['content']
print("user:", nickname)
print("content:", content)
print("-----------------------------------------------------------------")
2.结果显示
user: 眉间心上ING content: 好想成为有钱人啊,这样会不会少很多遗憾 #-----------------------------------------------------------------# user: 八爷-_ #-----------------------------------------------------------------# user: 洋洋洋大人- content: 十年了,我大概已经记不起她的样子,留下的是初见她时,从舞台侧面打在她脸上的光。这么多年没联系她应该早已经结婚生子,但那份淡淡的爱,一直没有消散过。 #-----------------------------------------------------------------# …
|