个人笔记
import requests
import re
from lxml import etree
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def hebej():
url="http://www.hebei.net.cn/web/main/jjjxx_jyzf/2c940d846564b37b017a1cb158c058f2.htm"
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36"
}
html_temp = requests.get(url=url, headers=headers)
html_temp.encoding = html_temp.apparent_encoding
html = etree.HTML(html_temp.text)
page = "".join(html.xpath('//*[@id="ej_main"]/div[2]/div[1]/text()'))
bat = re.findall('.*?该页已被阅读(\d+)次', page)
return bat[0]
def shouhu():
chrome_options = Options()
chrome_options.add_argument('--headless')
driver = webdriver.Chrome(chrome_options=chrome_options, executable_path="chromedriver.exe")
driver.get("https://www.sohu.com/a/472717661_100011043")
tt = driver.find_element_by_xpath('//div[@class="read-wrap"]/span/em').text
dat = "".join(tt).replace('万', '')
return dat
|