Python中selenium的使用
requests与selenium的区别: requests目前只能请求静态页面 selenium即可以请求静态页面,也可以请求动态页面
创建浏览器对象并加载页面
from selenium import webdriver
URL = 'https://www.baidu.com/'
b = webdriver.Chrome()
b.get(URL)
设置浏览器窗口
b.set_window_size(1366, 768)
b.maximize_window()
前进后退
b.back()
time.sleep(2)
b.forward()
time.sleep(1)
打印网页源码、滚动滚动条
selenium使用execute_script方法执行JavaScript操作 scrollTo() 方法可把内容滚动到指定的坐标。
print(b.page_source)
max_y = 4000
y = 0
while y <= max_y:
b.execute_script(f'window.scrollTo(0, {y})')
y += 1000
time.sleep(1)
b.close()
b.quit()
设置配置项
from selenium import webdriver
URL = 'https://www.taobao.com/'
URL1 = 'https://www.baidu.com/'
options = webdriver.ChromeOptions()
options.add_argument('blink-settings=imagesEnabled=false')
b = webdriver.Chrome('resources/chromedriver.exe')
b.set_window_size(1366, 768)
b.get(URL)
b.execute_cdp_cmd(
"Page.addScriptToEvaluateOnNewDocument",
{
"source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
}
)
选项卡
b.execute_script('window.open()')
b.switch_to.window(b.window_handles[1])
b.get(URL1)
time.sleep(2)
b.switch_to.window(b.window_handles[0])
获取元素信息
b.find_element_by_class_name('h').click()
b.find_element_by_class_name('icon-qrcode').click()
隐式等待
当使用了隐式等待执行测试的时候,如果webDriver没有在DOM中找到元素,将继续等待, 超出设定时间后则抛出找不到元素的异常,换句话说,当查找元素或元素并没有立即出现的时候,隐式等待将等待一段时间再查找DOM,默认的时间是0
b.implicitly_wait(10)
b.find_elements_by_class_name('site-nav-login-info-nick ')
cookies
cookie = b.get_cookies()
print(cookie)
with open('resources/cookie.txt', 'w', encoding='utf-8') as file:
file.write(str(cookie))
b.quit()
实例-爬淘宝
"""
example03 - 实例
Author: Asus
Date: 2021/8/18
"""
import time
from Tools.i18n.pygettext import safe_eval
from selenium.webdriver.common import keys
from selenium import webdriver
URL = 'https://www.taobao.com/'
options = webdriver.ChromeOptions()
options.add_argument('blink-settings=imagesEnabled=false')
with open('自己爬虫爬好的cookie', 'r') as file:
cookie = file.read()
print(cookie)
new_cookie = safe_eval(cookie)
b = webdriver.Chrome('resources/chromedriver.exe', options=options)
b.execute_cdp_cmd(
"Page.addScriptToEvaluateOnNewDocument",
{
"source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
}
)
b.get(URL)
for i in new_cookie:
if i['secure']:
b.add_cookie(i)
b.get(URL)
search = b.find_element_by_id('q').send_keys('三只松鼠大礼包')
time.sleep(2)
enter = b.find_element_by_class_name('btn-search').send_keys(keys.Keys.ENTER)
time.sleep(5)
max_y = 5000
y = 0
while y <= max_y:
b.execute_script(f'window.scrollTo(0,{y})')
y += 1000
|