仅仅提供学习使用
程序
仅仅提供学习使用
import requests
from lxml import etree
from bs4 import BeautifulSoup
import json
from selenium.webdriver.chrome.options import Options
from selenium.webdriver import ChromeOptions
from time import sleep
from selenium.webdriver import Chrome
import pandas as pd
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable--gpu')
option = ChromeOptions()
option.add_experimental_option('excludeSwitches',['enable-automation'])
chrome = Chrome(executable_path=r'E:\浏览器下载\chromedriver',options=chrome_options)
url = 'https://search.51job.com/list/000000,000000,0000,32,9,99,+,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare='
chrome.get(url)
input = chrome.find_elements_by_id('keywordInput')[0]
input.send_keys('java 成都')
search = input.find_element_by_xpath('//*[@id="search_btn"]')
search.click()
text = chrome.page_source
tree = etree.HTML(text)
xinxi = tree.xpath('//script[@type="text/javascript"]')[3]
dict_ = xinxi.xpath('./text()')
num = json.loads(dict_[0][28:])['total_page']
data = []
for _ in range(int(num)-1):
text = chrome.page_source
tree = etree.HTML(text)
xinxi = tree.xpath('//script[@type="text/javascript"]')[3]
dict_ = xinxi.xpath('./text()')
dict_ = json.loads(dict_[0][28:])['engine_jds']
for i in dict_:
data.append([i['job_name'], i['workarea_text'], i['updatedate'], list(i['attribute_text'])[1],list(i['attribute_text'])[2], i['providesalary_text'],list(i['attribute_text'])[-1]])
try:
sleep(1)
next_ = chrome.find_elements_by_xpath('//li[@class="next"]')[0]
next_.click()
chrome.refresh()
except:
break
data = pd.DataFrame(data,columns=['招聘岗位','招聘城市','招聘年月','工作经验','学历要求','招聘薪资','招聘数量'])
writer = pd.ExcelWriter(r'C:\Users\24132\Desktop\data_spyder.xlsx')
data.to_excel(writer, 'page_1', float_format='%.10f')
writer.save()
writer.close()
总结
老老实实的迈好每一步,相信成功就在你身边
甘愿为理想“头破血流”
|