问题
预测模型执行代码过慢,想要快速预测。 每次都是将历史数据作为训练数据喂给模型,然后让模型输出预测数据。这个模型的训练过程太慢。
解决
(1)讲训练的模型存储起来,每次预测的时候直接用存储的模型进行预测,躲开训练环节; (2)有新数据的时候,可以在不紧急的时候训练模型并存储起来;依照执行速度,可以设定更新频率,比如2个小时更新一次模型,1天更新一次模型。
实现
建立路径: quikly_predict.py 内容
import os
from fbprophet import Prophet
import pandas as pd
import matplotlib.pyplot as plt
import math
import pickle
class data_quikly_predict:
pwd = os.path.dirname(os.path.abspath(__file__))
data_dir = os.path.join(pwd, "data")
def __init__(self):
if not os.path.exists(self.data_dir):
os.mkdir(self.data_dir)
self.model_list = self.find_all_file(self.data_dir)
def update_model_list(self):
self.model_list = self.find_all_file(self.data_dir)
def find_all_file(self, dirname):
result = []
for maindir, subdir, file_name_list in os.walk(dirname):
for filename in file_name_list:
apath = os.path.join(maindir, filename)
result.append(apath)
return result
def create_new_model(self, df: pd.DataFrame, new_model_name: str):
"""
:param df: 训练新模型所需要的数据
:param new_model_name: 模型名称
:return:
"""
assert ("ds" in df.columns) and ("y" in df.columns)
df['ds'] = df['ds'].astype('datetime64[ns]')
model = Prophet()
model.fit(df)
model_file = os.path.join(self.data_dir, new_model_name)
if model_file in self.model_list:
print("模型名称已经被使用,但即将被置换\r\n")
with open(model_file, 'wb') as f:
pickle.dump(model, f)
self.update_model_list()
def predict(self, future_time: pd.DataFrame, model_name: str):
"""
:param future_time:未来需要预测的时间点DataFrame,只有一行'ds'
:param model_name:想要使用的模型名称
:return:
"""
model_file = os.path.join(self.data_dir, model_name)
if model_file not in self.model_list:
raise print("没有在data里找到这个模型文件,请确保用数据训练出了可使用的模型\r\n")
with open(model_file, 'rb') as f:
model = pickle.load(f)
forecast = model.predict(future_time)
forecast = forecast[['ds', 'yhat']]
return forecast
def dummy_predict(self, periods, freq: str, model_name: str):
"""
傻瓜化预测
:param periods: 需要预测多少个数据结果
:param freq: 从训练数据末尾调用频率
:param model_name: 模型名称
:return:
"""
model_file = os.path.join(self.data_dir, model_name)
if model_file not in self.model_list:
raise print("没有在data里找到这个模型文件,请确保用数据训练出了可使用的模型\r\n")
with open(model_file, 'rb') as f:
model = pickle.load(f)
future_time = model.make_future_dataframe(periods=periods, freq=freq, include_history=False)
forecast = model.predict(future_time)
forecast = forecast[['ds', 'yhat']]
return forecast
def quikly_plot_forecast(self, forecast: pd.DataFrame):
plt.plot(list(forecast['ds']), list(forecast['yhat']), color='b')
plt.show()
if __name__ == '__main__':
dataqp = data_quikly_predict()
timelist = list(pd.date_range(start='2021-01-01 00:00:00', end='2022-01-01 00:00:00', freq='H'))
y = [math.sin(data.hour) for k, data in enumerate(timelist)]
data_df = pd.DataFrame({'ds': timelist, 'y': y})
dataqp.create_new_model(data_df, "camera1")
future = pd.DataFrame({'ds': list(pd.date_range(start='2022-05-01 00:00:00',
end='2022-05-05 01:00:00',
freq='H'))})
forecast_res = dataqp.predict(future, model_name="camera1")
dataqp.quikly_plot_forecast(forecast_res)
如何使用
下面代码是正常过程,先执行训练,然后预测数据。
import math
import pandas as pd
from fbprophet_persistent_prodict.quikly_predict import data_quikly_predict
dataqp = data_quikly_predict()
timelist = list(pd.date_range(start='2021-01-01 00:00:00', end='2022-01-01 00:00:00', freq='H'))
y = [math.sin(data.hour) for k, data in enumerate(timelist)]
data_df = pd.DataFrame({'ds': timelist, 'y': y})
dataqp.create_new_model(data_df, "camera6")
future = pd.DataFrame({'ds': list(pd.date_range(start='2022-05-01 00:00:00',
end='2022-05-05 01:00:00',
freq='H'))})
forecast_res = dataqp.predict(future, model_name="camera6")
dataqp.quikly_plot_forecast(forecast_res)
先预测,后用训练更新模型:
import math
import pandas as pd
from fbprophet_persistent_prodict.quikly_predict import data_quikly_predict
dataqp = data_quikly_predict()
future = pd.DataFrame({'ds': list(pd.date_range(start='2022-05-01 00:00:00',
end='2022-05-05 01:00:00',
freq='H'))}) # 预测的设置 还没预测
forecast_res = dataqp.predict(future, model_name="camera6")
timelist = list(pd.date_range(start='2021-01-01 00:00:00', end='2022-01-01 00:00:00', freq='H'))
y = [math.sin(data.hour) for k, data in enumerate(timelist)]
data_df = pd.DataFrame({'ds': timelist, 'y': y})
dataqp.create_new_model(data_df, "camera6")
|