import requests
from bs4 import BeautifulSoup
import openpyxl
# 获取网页
url = 'https://wp.forchange.cn/psychology/11069/'
# 请求网页
res = requests.get(url)
# 打印响应的状态码
print(res.status_code)
# 将响应内容的编码格式设置为utf-8
res.encoding = 'utf-8'
# 解析网页
# 解析请求到的网页,得到 BeautifulSoup 对象
bs = BeautifulSoup(res.text, 'html.parser')
# 生成一个 Workbook 的实例化对象,wb即代表一个工作簿(一个 Excel 文件)
wb = openpyxl.Workbook()
# 获取活跃的工作表,ws代表wb(工作簿)的一个工作表
ws = wb.active
# 搜索书籍信息的父节点<div>
info_tag = bs.find('div', class_='res-attrs')
# 搜索每条信息的节点<dl>
info_list = info_tag.find_all('dl')
# 创建字典,存储书籍信息
info_dict = {}
# 遍历搜索结果,提取文本内容,存储到字典中
for info in info_list:
# 提取信息提示项<dt>的元素内容
key = info.find('dt').text[:-2]
# 提取书籍信息<dd>的元素内容
value = info.find('dd').text
# 将信息添加到字典中
info_dict[key] = value
# 创建集合,存储多个书籍字典
info_dict_list = []
info_dict_list.append(info_dict)
info_dict_list.append(info_dict)
info_dict_list.append(info_dict)
info_dict_list.append(info_dict)
info_dict_list.append(info_dict)
# 打印查看字典中的书籍信息
print(info_dict)
print(info_dict_list)
# 循环字段键,保存到EXCEL
column = 65
row = 1
for title in info_dict:
ws[chr(column)+str(row)] = title
column = column+1
# 循环集合,保存到EXCEL
column = 65
row = 2
for item in info_dict_list:
for val in item.values():
ws[chr(column)+str(row)] = val
column = column+1
column = 65
row = row+1
wb.save('./material/爬网_result.xlsx')
|