from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import time
from bs4 import BeautifulSoup
import re
import os
from selenium.webdriver.common.action_chains import ActionChains
import requests
import zipfile
import schedule
def get_photos(start_pos):
option = ChromeOptions()
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_argument("--disable-blink-features")
option.add_argument("--disable-blink-features=AutomationControlled")
desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none"
# 打开谷歌浏览器
driver = webdriver.Chrome(options=option)
# 打开网页
driver.get('https://ibaotu.com/sy/17-0-0-0-0-112.html')
# 为div弹框,直接是与定位其他元素一样,直接进行定位('class name')
# 点击登录
driver.find_element_by_css_selector(
'body > header.b-header.b-header-float-fixed.b-header-classify.header-have-boxshodow.b-header-fixed--no > div > div.b-header-right.clearfix > div:nth-child(4) > div > p').click()
# 浏览器窗口切换?切换到 QQ
current_window = driver.window_handles
# print(current_window)
driver.implicitly_wait(5)
driver.switch_to.window(current_window[0])
time.sleep(2)
driver.find_element_by_css_selector(
'body > div.re-popbox.reg-pop.login-New > div > div.login-in-way.clearfix > a.in-way-WX.ibaotu-md-click.auth-type-QQ.btn-social-login-item').click()
# driver.get_screenshot_as_file ("D:/1.PNG")
# 再次切换到网页内部的iframe窗口
current_window1 = driver.window_handles
# print(current_window1)
driver.switch_to.window(current_window1[1])
driver.switch_to.frame(0)
driver.find_element_by_css_selector('#switcher_plogin').click()
# 登陆进去
username = '351916740'
password = '920825ZlyTC10'
time.sleep(3)
driver.switch_to.window(current_window1[1])
driver.switch_to.frame(0) # 再一次踩坑,登录框框有iframe,需要先进入到iframe,再去定位
driver.find_element_by_css_selector('#u').send_keys(username)
time.sleep(2)
driver.find_element_by_css_selector('#p').send_keys(password)
time.sleep(3)
driver.find_element_by_id("login_button").click()
time.sleep(3)
# 验证条解锁
# 窗口切换到包图网
current_window2 = driver.window_handles
# print(current_window2)
driver.switch_to.window(current_window[0])
all_window_height = [] # 创建一个列表,用于记录每一次拖动滚动条后页面的最大高度
all_window_height.append(driver.execute_script("return document.body.scrollHeight;")) # 当前页面的最大高度加入列表
while True:
driver.execute_script("scroll(0,15000)") # 执行拖动滚动条操作
time.sleep(3)
check_height = driver.execute_script("return document.body.scrollHeight;")
if check_height == all_window_height[-1]: # 判断拖动滚动条后的最大高度与上一次的最大高度的大小,相等表明到了最底部
break
else:
all_window_height.append(check_height) # 如果不想等,将当前页面最大高度加入列表。
# 解析数据部分
driver.enconding = 'UTF-8'
soup = BeautifulSoup(driver.page_source, 'html.parser') # 得到全部的element代码
body = soup.find('div', attrs={'class': 'skin-wrap body-background-gradient'})
body = body.find('div',
attrs={'class': 'search-list box-bg-search box-bottom-gradient clearfix'}) # find只会找到与它内容匹配的第一个
body = body.find_all('div', attrs={'class': 'hover-pop'})
dwonload_url_list = []
for hover in body:
a_label = hover.find('a')
A_label_content = a_label['href']
txt_url = 'https:' + a_label['href']
pattern = re.compile(r'\d+')
id = pattern.findall(A_label_content)[0]
str_id = str(id)
dwonload_url = 'https:' + '//ibaotu.com/?m=downloadopen&a=open&id=' + str_id + '&down_type=1&&attachment_id=&zt_size_type=0'
dwonload_url_list.append(dwonload_url)
for i in range(start_pos-1,start_pos-21,-1):
time.sleep(2)
driver.get(dwonload_url_list[i])
time.sleep(5)
#解压
global count
for filename in os.listdir('D:/download/'):
r = zipfile.is_zipfile('D:/download/'+ filename)
print(r)
if r:
fz = zipfile.ZipFile('D:/download/'+ filename, 'r')
for file in fz.namelist():
dst_dir = 'D:/' + str(count)
fz.extract(file, dst_dir)
count += 1
else:
print('This is not zip')
def lenth(length):
le=length-20
return le
def calulate_day(day):
while day<4:
global length
le=lenth(length)
print(le)
length=le
get_photos(length)
day += 1
time.sleep(5)
else:
return
if __name__ == "__main__":
length=100
day=0
count=0
calulate_day(day)
|