目标网页:网易招聘
抓取目标: 1.根据输入职业抓取该职业的全部数据 2.存入excel
用到的库:
import requests
import pandas
全部代码:
import requests
import pandas
key = input('输入你要搜索的职业:')
def size(key):
data = {"currentPage":1, "pageSize":1, "keyword": key}
url = 'https://hr.163.com/api/hr163/position/queryPage'
res = requests.post(url, json=data)
json = res.json()
total = json['data']['total']
return total
page_size = size(key)
data = {"currentPage":1, "pageSize":page_size, "keyword": key}
url = 'https://hr.163.com/api/hr163/position/queryPage'
res = requests.post(url, json=data)
json = res.json()
total = json['data']['total']
json_list = json['data']['list']
print(total)
wangyi_list = []
for list in json_list:
name = list['name']
print(name)
xueli = list['reqEducationName']
address = list['workPlaceNameList'][0]
type = list['firstPostTypeName']
number = list['postStatus']
department = list['firstDepName']
demand = list['reqWorkYearsName']
describe = list['requirement']
requirement = list['en_requirement']
wangyi_list.append(
{
'职位名称':name,
'工作地址':address,
'工作部门':department,
'职业类型':type,
'需求人数':number,
'学历要求':xueli,
'经验需求':demand,
'职位描述':describe,
'职业要求':requirement
}
)
ex = pandas.DataFrame(wangyi_list)
ex.to_excel("网易招聘-%s.xlsx"%key,index=False)
效果:
|