获取想要查询的基金数据,通过基金代码、开始日期和结束日期等
import requests
import re
import pandas as pd
'''
获取单页面 基金数据
'''
def get_html(code, start_date, end_date, page=1, per=40):
url = f'http://fund.eastmoney.com/f10/F10DataApi.aspx?type=lsjz&code={code}&page={page}&sdate={start_date}&edate={end_date}&per={per}'
rsp = requests.get(url)
html = rsp.text
return html
# 从html中解析出数据表部分 并解析成df
def parses_table(html):
# 获取基金数据表
pattern = 'content:"<table(.*)</table>",'
table = re.search(pattern, html).group(1)
table = '<table' + table + '</table>'
fund_data = pd.read_html(table)[0]
return fund_data
# 获取指定日期内 累计净值 等数据
def get_fund_data(code, start_date, end_date):
first_page = get_html(code, start_date, end_date)
# 获取总页数
pattern = 'pages:(.*),'
pages = re.search(pattern, first_page).group(1)
# 转成int数据
try:
pages = int(pages)
except Exception as e:
r = f'【错误信息】{e}'
# print(r)
return r
# 存放每页获取到的基金数据 dataframe格式 便于后面合并
fund_df_list = []
# 循环便利所有页面
for i in range(pages):
if i == 0:
fund_data = parses_table(first_page)
else:
page_html = get_html(code, start_date, end_date, page=i+1)
fund_data = parses_table(page_html)
fund_df_list.append(fund_data)
# 将每页的数据合并到一起
fund_df = pd.concat(fund_df_list)
print(fund_df)
return fund_df
get_fund_data('000001', '2022-02-02', '2022-04-05')
?
|