完整代码:
from lxml.html import fromstring
from bs4 import BeautifulSoup
import xlwt
import json
with open('rr.html' ,'r' ,encoding='utf-8') as f:
# Soup = BeautifulSoup(f, 'lxml')
html = BeautifulSoup(f, 'html.parser')
html.list=html.find_all('div',class_='box_cont_0 cb')
# print(html.list)
fund=[]
for item in html.list:
fundName=item.find('strong',class_='fund_name fl').a.text.strip('')
fundSum = item.find_all('tr')[1].contents[1].string#获取第二tr标签下的第二个元素
fundProfit = item.find_all('tr')[1].contents[4].string#获取第二tr标签下的第二个元素
# print(fundName,fundSum,fundProfit,sep='/')
# print(len(fundSum))
fund.append({
'fundName':fundName,
'fundSum':fundSum ,
'fundProfit':fundProfit
})
with open('.fund.json', 'w', encoding='utf-8')as f:
json.dump(fund, f, indent=1, ensure_ascii=False)
# 读取json
with open(".fund.json", 'r', encoding='utf-8') as f:
data = json.load(f)
#创建一个workbook 设置编码
workbook = xlwt.Workbook(encoding='utf-8')
# 创建一个worksheet
worksheet = workbook.add_sheet('Worksheet')
title=["基金名称","总市值","利润"]
for i in range(len(title)):
worksheet.write(0,i,title)
#写入excel参数对应 行, 列,值
# print(data[i]("fundSum"))
for i in range(len(data)):
worksheet.write(i+1, 0, label=data[i]['fundName'])#label=data[1]["fundName"])z中的label可以不用
worksheet.write(i+1, 1, label=data[i]["fundSum"])
worksheet.write(i+1, 2, label=data[i]["fundProfit"])
workbook.save('Excel.xls')
|