python pandas数据分析入门
求和、求平均 统计引导
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_excel('文件目录',index_col='ID')
temp = df[['Chinese','English','Math']]
df['Total'] = temp.sum(axis=1)
df['Average'] = temp.mean(axis=1)
col_mean = df[['Chinese','English','Math','Chinese','English','Math']].mean()
col_mean['Name'] 'Summary'
df = df.append(col_mean,ignore_index=True)
相关性分析
import pandas as pd
import matplotlib.pyplot as plt
pd.options.display.max_columns = 20
df = pd.read_excel('文件目录')
df.corr()
print(df.corr())
线性回归与预测
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import linregress
df = pd.read_excel('文件目录',dtype={‘Month’:str})
slope,intercept,r,p,std_err = linregress(df.index,df.Revenue)
exp = df.index*slope + intercept
plt.scatter(df.index,df.Revenue)
plt.plot(df.index,exp,color='orange')
plt.title("Sales")
plt.xticks(df.index,df.Month,rotation=90)
plt.tight_layout()
plt.show()
|