selenium的简单操作
from selenium.webdriver import Chrome
from selenium.webdriver.common.keys import Keys
import time
web = Chrome()
web.get("https://lagou.com")
se = web.find_element_by_xpath('//*[@id="changeCityBox"]/ul/li[1]/a')
se.click()
time.sleep(1)
web.find_element_by_xpath('//*[@id="search_input"]').send_keys('python',Keys.ENTER)
lis = web.find_elements_by_xpath('//*[@id="jobList"]/div[1]/div')
for li in lis:
company = li.find_element_by_xpath('./div/div[2]/div/a').text
info = li.find_element_by_xpath('./div[1]/div[1]/div[2]').text
print(company, info)
from selenium.webdriver import Chrome
import time
from selenium.webdriver.common.keys import Keys
web = Chrome()
web.get("http://www.lagou.com")
time.sleep(1)
web.find_element_by_xpath('//*[@id="cboxClose"]').click()
time.sleep(1)
web.find_element_by_xpath('//*[@id="search_input"]').send_keys('python',Keys.ENTER)
time.sleep(1)
web.find_element_by_xpath('//*[@id="jobList"]/div[1]/div[1]/div[1]/div[1]/div[1]/a').click()
time.sleep(1)
web.switch_to.window(web.window_handles[-1])
time.sleep(1)
info = web.find_element_by_xpath('//*[@id="job_detail"]/dd[2]').text
time.sleep(1)
print(info)
web.close()
web.switch_to.window(web.window_handles[0])
无头浏览器及下拉列表
获取艺恩历年排行榜
from selenium.webdriver import Chrome
from selenium.webdriver.support.select import Select
import time
from selenium.webdriver.chrome.options import Options
opt = Options()
opt.add_argument('--headless')
opt.add_argument('--disable-gpu')
web = Chrome(options=opt)
web.get('https://www.endata.com.cn/BoxOffice/BO/Year/index.html')
ss = web.find_element_by_xpath('//*[@id="OptionDate"]')
sels = Select(ss)
通过工具超级鹰来进行验证码的处理
chaojiying.py
此文件文下面各个文件导入的chaojiying文件的源码,可以通过超级鹰获得 不同类型的验证码用不同的数字代表验证码类型
import requests
from hashlib import md5
class Chaojiying_Client(object):
def __init__(self, username, password, soft_id):
self.username = username
password = password.encode('utf8')
self.password = md5(password).hexdigest()
self.soft_id = soft_id
self.base_params = {
'user': self.username,
'pass2': self.password,
'softid': self.soft_id,
}
self.headers = {
'Connection': 'Keep-Alive',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
}
def PostPic(self, im, codetype):
"""
im: 图片字节
codetype: 题目类型 参考 http://www.chaojiying.com/price.html
"""
params = {
'codetype': codetype,
}
params.update(self.base_params)
files = {'userfile': ('ccc.jpg', im)}
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
return r.json()
def ReportError(self, im_id):
"""
im_id:报错题目的图片ID
"""
params = {
'id': im_id,
}
params.update(self.base_params)
r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
return r.json()
if __name__ == '__main__':
chaojiying = Chaojiying_Client('用户名', '密码', '软件ID')
im = open('a.jpg', 'rb').read()
print (chaojiying.PostPic(im, 1902))
超级鹰干掉超级鹰
from selenium.webdriver import Chrome
from selenium.webdriver.common.keys import Keys
from chaojiying import Chaojiying_Client
web = Chrome()
web.get('https://www.chaojiying.com/user/login/')
img = web.find_element_by_xpath('/html/body/div[3]/div/div[3]/div[1]/form/div/img').screenshot_as_png
chaojiying = Chaojiying_Client('用户名', '密码', '软件ID')
dic = chaojiying.PostPic(img, 1902)
verify_code = dic['pic_str']
web.find_element_by_xpath('/html/body/div[3]/div/div[3]/div[1]/form/p[1]/input').send_keys('用户名')
web.find_element_by_xpath('/html/body/div[3]/div/div[3]/div[1]/form/p[2]/input').send_keys('密码')
web.find_element_by_xpath('/html/body/div[3]/div/div[3]/div[1]/form/p[3]/input').send_keys(verify_code)
web.find_element_by_xpath('/html/body/div[3]/div/div[3]/div[1]/form/p[4]/input').send_keys(Keys.ENTER)
网站检测到使用自动化工具打开
使用自动化工具打开 未使用自动化工具打开
解决方法
对于chrome来说
1.chrome的版本号如果小于88 在你启动浏览器的时候(此时没有加载任何网页内容), 向页面嵌入js代码. 去掉webdriver
web = Chrome()
web.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
navigator.webdriver = undefined
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
web.get(xxxxxxx)
2.chrome的版本大于等于88
option = Options()
option.add_argument('--disable-blink-features=AutomationControlled')
web = Chrome(options=option)
web.get("xxxxxxxxxxxxxxxxxxxxx")
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from chaojiying import Chaojiying_Client
import time
option = Options()
option.add_argument('--disable-blink-features=AutomationControlled')
web = Chrome()
web.get('https://kyfw.12306.cn/otn/resources/login.html')
web.find_element_by_xpath('//*[@id="J-userName"]').send_keys('用户名')
time.sleep(1)
web.find_element_by_xpath('//*[@id="J-password"]').send_keys('密码')
time.sleep(1)
web.find_element_by_xpath('//*[@id="J-login"]').click()
time.sleep(1)
btn = web.find_element_by_xpath('//*[@id="nc_1_n1z"]')
ActionChains(web).drag_and_drop_by_offset(btn,300,0).perform()
|