selenium基本用法
import time
from selenium import webdriver
class Tjgb:
def __init__(self,url,if_headless=False):
self.chrome_driver_path = r'D:\Python3.6\Scripts\chromedriver.exe'
self.binary_location = r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe'
self.url = url
self.ua_pool = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
]
self.if_headless = if_headless
self.driver = self.base_driver()
def opt_config(self):
user_agent = self.ua_pool[0]
opt = webdriver.ChromeOptions()
if self.if_headless:
opt.add_argument('--headless')
opt.add_argument('-disable-gpu')
opt.add_argument('--no-sandbox')
opt.add_experimental_option('excludeSwitches',['enable-automation'])
opt.add_experimental_option('useAutomationExtension',False)
opt.add_argument('--disable-blink-features=AutomationControlled')
opt.add_argument(f'user-agent={user_agent}')
opt.binary_location = self.binary_location
return opt
def base_driver(self):
driver = webdriver.Chrome(options=self.opt_config(),executable_path=self.chrome_driver_path)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument",{
"source":"""
Object.defineProperty(navigator,'webdriver',{
get:()=>undefined})"""
})
return driver
def load_webpage(self):
self.driver.get(self.url)
time.sleep(15)
self.driver.implicitly_wait(10)
self.driver.switch_to.frame('ml')
li_list = self.driver.find_elements_by_xpath('//ul[@id="fanye"]/li')
print(li_list)
for li in li_list:
title = li.find_element_by_xpath('./a').get_attribute('title')
pub_date = li.find_element_by_xpath('./span').get_attribute('textContent')
print(title,pub_date)
self.driver.switch_to.default_content()
def close_driver(self):
print(self.driver.window_handles)
for handle in self.driver.window_handles:
self.driver.switch_to_window(handle)
self.driver.close()
self.driver.quit()
print('close all')
if __name__ == '__main__':
url = 'http://tjj.ezhou.gov.cn/zwgk/fdzdgknr/?itemid=2392'
tjgb = Tjgb(url=url,if_headless=False)
tjgb.load_webpage()
tjgb.close_driver()
selenium对iframe操作
<html>
<iframe id="frame1" src="https://www.sogou.com/" name="slider"></iframe>
</html>
driver.switch_to.frame("frame1")
<html>
<iframe id="frame1" src="https://www.sogou.com/" name="slider"></iframe>
</html>
driver.switch_to.frame("slider")
<iframe tab-id="12c8792" frameborder="0" src="xx/xx/xxx.html" scrolling="yes" class="x-iframe" cd_frame_id_="d5234f1"></iframe>
iframe_elem = driver.find_element_by_class_name('x-iframe').find_element_by_tag_name('iframe')
driver.switch_to.frame(iframe_elem)
多个iframe嵌套
<html>
<iframe id="frame1">
<iframe id="frame2" / >
</iframe>
</html>
driver.switch_to.frame("frame1")
driver.switch_to.frame("frame2")
drvier.switch_to.default_content()
drvier.switch_to.parent_frame()
|