环境
- windows10
- python3.7.3
- selenium
- 谷歌
- 谷歌驱动
谷歌驱动下载
http://chromedriver.storage.googleapis.com/index.html
下载与自己电脑谷歌浏览器版本相应的谷歌驱动
解析
在这里主要爬去商品的基本信息:
结果
代码
from selenium import webdriver
import time
from selenium.webdriver import ActionChains
import csv
f = open('./taobao.csv','a',newline='')
writer = csv.writer(f)
def login(driver):
driver.delete_all_cookies()
url = "https://www.taobao.com/"
driver.get(url)
time.sleep(10)
while True:
try:
driver.find_element_by_xpath('/html/body/div[1]/div[1]/div/ul[2]/li[2]/div[1]/a').click()
break
except:
time.sleep(5)
while True:
try:
driver.find_element_by_xpath('/html/body/div/div[2]/div[3]/div/div/div/div[2]/div/form/div[1]/div[2]/input').send_keys('账号')
driver.find_element_by_xpath('/html/body/div/div[2]/div[3]/div/div/div/div[2]/div/form/div[2]/div[2]/input').send_keys('密码')
time.sleep(5)
break
except:
time.sleep(5)
driver.find_element_by_xpath('/html/body/div/div[2]/div[3]/div/div/div/div[2]/div/form/div[4]/button').click()
time.sleep(50)
driver.find_element_by_xpath('/html/body/div[1]/div/ul[2]/li[1]/div/a').click()
time.sleep(20)
while True:
try:
driver.find_element_by_xpath('/html/body/div[2]/div/div/div[2]/div/div[1]/div[2]/form/div[2]/div[3]/div/input').send_keys('红酒')
driver.find_element_by_xpath('/html/body/div[2]/div/div/div[2]/div/div[1]/div[2]/form/div[1]/button').click()
break
except:
time.sleep(5)
time.sleep(10)
page = driver.find_element_by_xpath('/html/body/div[1]/div[2]/div[3]/div[1]/div[16]/div/div[1]/div/div[2]/ul/li[2]').text.split('/')[1]
print(page)
time.sleep(10)
for _ in range(int(float(page))-1):
for i in range(1,49):
try:
test = driver.find_element_by_xpath('/html/body/div[1]/div[2]/div[3]/div[1]/div[21]/div/div/div[1]/div[{}]/div[2]'.format(i)).text.split('\n')
print(test)
writer.writerow(test)
except:
pass
time.sleep(3)
try:
driver.find_element_by_xpath('/html/body/div[1]/div[2]/div[3]/div[1]/div[26]/div/div/div/ul/li[8]/a').click()
except:
try:
driver.find_element_by_xpath('/html/body/div[1]/div[2]/div[3]/div[1]/div[26]/div/div/div/ul/li[9]/a').click()
except:
try:
driver.find_element_by_xpath('/html/body/div[1]/div[2]/div[3]/div[1]/div[26]/div/div/div/ul/li[10]/a').click()
except:
try:
driver.find_element_by_xpath(
'/html/body/div[1]/div[2]/div[3]/div[1]/div[26]/div/div/div/ul/li[11]/a').click()
except:
try:
driver.find_element_by_xpath(
'/html/body/div[1]/div[2]/div[3]/div[1]/div[26]/div/div/div/ul/li[12]/a').click()
except:
pass
time.sleep(10)
try:
a = driver.find_element_by_xpath('/html/body/div/div[2]/div/div[1]/div[2]/center/div[1]/div/div[1]/div[2]/span').text
except:
a = []
print(a)
if '滑块' in a:
button = driver.find_element_by_xpath('/html/body/div/div[2]/div/div[1]/div[2]/center/div[1]/div/div[1]/span')
ActionChains(driver).click_and_hold(button).perform()
ActionChains(driver).move_by_offset(xoffset=258, yoffset=0).perform()
ActionChains(driver).release().perform()
time.sleep(10)
driver.close()
f.close()
def main():
"""
chromeOptions 是一个配置 chrome 启动是属性的类,就是初始化
"""
option = webdriver.ChromeOptions()
"""
add_experimental_option 添加实验性质的设置参数
"""
option.add_experimental_option('excludeSwitches', ['enable-automation'])
'''
add_argument 添加启动参数
'''
option.add_argument("--disable-blink-features=AutomationControlled")
option.add_argument("--no-sandbox")
option.add_argument("--disable-dev-usage")
"""
Chrome 配置驱动
"""
driver = webdriver.Chrome(executable_path=r'/home/zc/桌面/chromedriver',options=option)
driver.set_page_load_timeout(15)
login(driver)
if __name__ == '__main__':
main()
希望这篇文章对你有用! 谢谢点赞评论!
|