from selenium import webdriver
import random#用于设置随机等待
import time
word='游戏本'
infos = []
prices = []
deals = []
names = []
locations = []
#登录模块
def search_product(keyword):
driver.find_element_by_xpath('//*[@id="q"]').send_keys(keyword)
#为了避免被检测,要模拟用户
time.sleep(random.randint(1,3))#1到3秒随机等待
driver.find_element_by_xpath('//*[@id="J_TSearchForm"]/div[1]/button').click()
time.sleep(random.randint(1,3))
#,解决登录
driver.find_element_by_xpath('//*[@id="fm-login-id"]').send_keys("账号")
time.sleep(random.randint(1,3))
driver.find_element_by_xpath('//*[@id="fm-login-password"]').send_keys('密码')
time.sleep(random.randint(1,3))
driver.find_element_by_xpath('//*[@id="login-form"]/div[4]/button').click()
time.sleep(random.randint(1,3))
def fanye():
a=5
for i in range(1,16):
if i<4:
parse_data()
time.sleep(random.randint(1,3))
print(f'第{i}页爬取成功')
driver.find_element_by_xpath('//*[@id="mainsrp-pager"]/div/div/div/ul/li[8]/a').click()
elif 4<=i<=6:
b=a+i
parse_data()
time.sleep(random.randint(1,3))
print(f'第{i}页爬取成功')
driver.find_element_by_xpath(f'//*[@id="mainsrp-pager"]/div/div/div/ul/li[{b}]/a').click()
elif i>=6:
b=a+i
parse_data()
time.sleep(random.randint(1,3))
print(f'第{i}页爬取成功')
driver.find_element_by_xpath(f'//*[@id="mainsrp-pager"]/div/div/div/ul/li[11]/a').click()
elif 6<=i<=8:
b=a+i
parse_data()
time.sleep(random.randint(1,3))
print(f'第{i}页爬取成功')
driver.find_element_by_xpath(f'//*[@id="mainsrp-pager"]/div/div/div/ul/li[{b}]/a').click()
#//*[@id="mainsrp-pager"]/div/div/div/ul/li[11]/a
# //*[@id="mainsrp-pager"]/div/div/div/ul/li[11]/a
#//*[@id="mainsrp-pager"]/div/div/div/ul/li[11]/a
#//*[@id="mainsrp-pager"]/div/div/div/ul/li[9]/a
#//*[@id="mainsrp-pager"]/div/div/div/ul/li[10]/a
time.sleep(random.randint(1,3))
#下载模块
def parse_data():
#解析商品数据
#多个商品数据解析
#//*[@id="mainsrp-itemlist"]/div/div
divs=driver.find_elements_by_xpath('//div[@class="grid g-clearfix"]/div/div')#加个s变成列表!!!
for div in divs:
info=div.find_element_by_xpath('.//div[@class="row row-2 title"]/a').text#//是跨阶定位,商品名称
price=div.find_element_by_xpath('.//strong').text+'元'#价格
deal=div.find_element_by_xpath('.//div[@class="deal-cnt"]').text#付款人数
name=div.find_element_by_xpath('.//div[@class="shop"]/a/span[2]').text#店名
location=div.find_element_by_xpath('.//div[@class="location"]').text#地址
infos.append(info)
prices.append(price)
deals.append(deal)
names.append(name)
locations.append(location)
# deal_url=div.find_element_by_xpath('.//div[@class="pic"]/a').get_attribute('href')
print(info)
print(price)
print(deal)
#创建浏览器对象
driver=webdriver.Chrome()
#修改浏览器的内部属性
#执行浏览器操作
driver.get('https://www.taobao.com/')
driver.implicitly_wait(10)#智能化等待
#driver.maximize_window()#最大化浏览器
#调用搜索函数
search_product(word)
fanye()
workbook.close()
time.sleep(1)
driver.quit()
print("爬取结束")
可以的话,请点赞关注
|