代理和selenium
1.代理ip的使用
1.获取蘑菇代理中的代理ip
import requests
def get_ip():
respond = requests.get('代理URL')
if respond.text[0] == '{':
print('提取ip失败')
return None
return [x for x in respond.test.split('\n') if x]
ips = get_ip()
ips = ['114.106.151.5:36293', '115.209.126.217:21170', '59.62.120.157:30400', '114.230.120.126:57022']
proxies = {'http': 'ips[0]', 'https': 'ip[1]'}
respond = requests.get('url', proxies=proxies)
2.使用代理的优化流程
import requests
import time
def get_ip():
respond = requests.get('代理url')
if respond.text[0] == '{':
print('提取ip失败')
time.sleep(5)
li = get_ip()
else:
li = respond.text.split('\n')[0:4]
return li
def get_ip():
respond = requests.get('代理url')
while True:
if repond.text[0] == '{':
print('请求失败')
time.sleep(5)
continue
else:
return respond.text.split("\n")[0:4]
2.selenium的基本功能
from selenium.webdriver import Chrome
b = Chrome
b.get('URL')
print(b.page_sourse)
b.close()
3.selenium的常规操作
from selenium.webdriver import Chrome
from selenium.webdriver.common.keys import Keys
import time
b = Chorme()
b.get('https://www.51job.com/')
search_input = b.find_element_by_id('kwdselectid')
search_input.send_keys('数据分析')
search_input.send_keys(Keys.ENTER)
time.sleep(1)
info = b.page_source
next = b.find_element_by_class_name('next')
next.click()
time.sleep(1)
info = b.page_source
4.selenium常用配置
from selenium.webdriver import Chorme, ChormeOptions
import requests
import time
options = ChromeOptions()
options.add_experimental_option('excludeSwithes', ['enable-automation'])
options.add_experimental_option("prefs", {"profiles.managed_default_content_settings.image": 2})
option.add_argument(f'--proxy-server=http://{ips[0]})
b = Chrome(options=options)
b.get('https://sec.douban.com/')
info = b.page_source
|