import requests
from bs4 import BeautifulSoup
url = 'https://car.autohome.com.cn/diandongche/index.html'
headers = {
'Referer': 'https://car.autohome.com.cn/',
'Sec-Fetch-Mode': 'no-cors',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'
}
brand_data = {}
try:
r = requests.get(url, headers=headers)
if r.status_code == 200:
r.encoding = r.apparent_encoding # 此处将编码改成网页的编码样式,防止出现乱码
soup = BeautifulSoup(r.text, "lxml")
car_tree = soup.find('div', id='cartree')
for i in car_tree.find_all('li'):
for j in i.find_all('a'):
print("一级品牌数据:", j.text.strip(), "=", 'https://car.autohome.com.cn' + j.get('href'))
brand_data[j.text.strip()] = 'https://car.autohome.com.cn' + j.get('href')
except:
print("爬取失败!")
|