import requests
from requests.exceptions import HTTPError,ConnectionError
from lxml import etree
url='http://www.zpdj.cn/gbgz/gsrm/content_214626'
try:
r=requests.get(url, timeout=3)
print('----------------')
print(url)
except ConnectionError:
print(r.status_codes)
dom_tree = etree.HTML(r.text)
text=dom_tree.xpath('//*[@id="content"]/div[1]/div[2]/article/div[1]/div[2]/p[4]/span[1]/span')
for i in text:
print(i)
使用网上说的r=requests.get(url, timeout=3).content.decode(‘utf-8’),text=dom_tree.xpath(’//[@id=“content”]/div[1]/div[2]/article/div[1]/div[2]/p[4]/span[1]/span’)这句会出现AttributeError: ‘str’ object has no attribute ‘text’错误。 后来通过多方查找发现加上‘/text()’即可,也就是在text=dom_tree.xpath(’//[@id=“content”]/div[1]/div[2]/article/div[1]/div[2]/p[4]/span[1]/span/text()’)就可以得到想要的结果
|