静态页面爬取网页,涵盖内容(点击下一页) 具体代码操作如下:
from selenium import webdriver
import requests
import time
driver_url = r"D:\anconda\msedgedriver.exe"
browser = webdriver.Edge(executable_path=driver_url)
browser.get("http://www.santostang.com/2018/07/04/hello-world/")
time.sleep(10)
s = 0
for i in (2,6):
print("这是第{}页".format(i),'\n')
time.sleep(5)
browser.switch_to.frame(browser.find_element_by_css_selector("iframe[title='livere-comment']"))
browser.execute_script("window.scrollTo(0,document.body.scrollHeight);")
comments = browser.find_elements_by_css_selector("div.reply-content")
for eachonecomment in comments:
content = eachonecomment.find_element_by_tag_name('p')
s = s+1
print("第{}条评论".format(s),content.text)
load_more_clic = browser.find_element_by_css_selector("button.page-btn[data-page='{}']".format(i))
load_more_clic.click()
browser.switch_to.default_content()
if i ==3:
print("完成")
没有用class的一个简单for循环,后续可以持续优化
|