学习目标:
提示:简单数据提取
学习内容:
import threading as td
from selenium import webdriver
class MyThread(td.Thread):
def __init__(self, target, args=()):
super(MyThread, self).__init__()
self.target = target
self.args = args
def run(self):
self.result = self.target(*self.args)
def get_result(self):
try:
return self.result
except:
return None
def get_url_list(dr):
dr.implicitly_wait(5)
try:
list_data=dr.find_elements_by_xpath('')
return list_data
except:
return "失败"
finally:
dr.implicitly_wait(30)
def search(name, delay):
driver_path = "F:\\个人学习资料\\学习\\爬虫\\chromedriver_win32+(2)\\chromedriver.exe"
driver = webdriver.Chrome(executable_path=driver_path)
driver.implicitly_wait(30)
driver.get('')
driver.implicitly_wait(30)
list_data = get_url_list(driver)
# 进行内容页面处理
check_page_num(driver)
def check_page_num(dr):
dr.implicitly_wait(20)
try:
# 当打开页面为3个的时候,关闭第2个页面
if (len(dr.window_handles) == 3):
dr.switch_to.window(dr.window_handles[1])
dr.close()
for handle in dr.window_handles:
dr.switch_to.window(handle)
data_value = dr.find_element_by_xpath("//table[@id='taskList']/tbody/tr/td[contains(text(),'APF')]/following-sibling::td[3]").text
return data_value
except:
return 0
finally:
dr.implicitly_wait(5)
dr.switch_to.window(dr.window_handles[0])
dr.implicitly_wait(5)
# 按间距中的绿色按钮以运行脚本。
if __name__ == '__main__':
t1 = MyThread(search, args=('thread-1', 1))
t1.start()
t1.join()
t1.get_result()
|