杀虫市场分析(1)
获取数据
import numpy as np
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
import matplotlib.style as style
import seaborn as sns
import warnings
import datetime
warnings.filterwarnings("ignore")
style.use("fivethirtyeight")
plt.rcParams["font.sans-serif"] = ["SimHei"]
plt.rcParams["axes.unicode_minus"] = False
plt.rcParams["figure.figsize"] = (14,8)
np.set_printoptions(suppress=True)
pd.set_option("display.float_format",lambda x : "%.3f"%x)
加载数据
os.chdir("./data/杀虫/驱虫剂市场")
filenames = glob.glob("*市场近三年交易额.xlsx")
dfs = []
for f in filenames:
df = pd.read_excel(f)
if df["时间"].dtype == "int64":
df["时间"] = pd.to_datetime(df["时间"],unit="D",origin=pd.Timestamp("1899-12-30"))
df = df.set_index("时间")
df.columns = [f.split("市场")[0] + "交易金额"]
dfs.append(df)
df = pd.concat(dfs,axis=1)
观察数据
df.dtypes
df.isna().sum()
df.index.max(),df.index.min()
df.plot() #画图
df.resample("Y").sum().plot()
数据量太少,需要增加2015年的数据,用线性回归进行预测补值
from sklearn.linear_model import LinearRegression
def fill(df= None,year= None,month= None) :
df = df.reset_index()
df["year"] = df["时间"].dt.year
df["month"] = df["时间"].dt.month
X_train = df.query(f"month=={month}").loc[:,["year","month"]]
y_trains = df.query(f"month=={month}").iloc[:,1:-2]
X_pred = np.array([[year,month]])
y_index = [datetime.datetime(year,month,1)]
for i in range(len(y_trains.columns)):
y_train = y_trains.iloc[:,i]
model = LinearRegression().fit(X_train,y_train)
y_pred = model.predict(X_pred)
y_index.append(y_pred)
df.drop(labels=["year","month"],axis=1,inplace=True)
newrow = pd.DataFrame(dict(zip(df.columns,y_index)))
return newrow.append(df).set_index(["时间"])
df = fill(df,2018,11)
df = fill(df,2018,12)
for i in range(1,11):
df = fill(df,2015,i)
df=df.sort_index(ascending=False)
df.plot()
in range(1,11): df = fill(df,2015,i)
~~~ python
df=df.sort_index(ascending=False)
df.plot()
|