pd.cut()的用法
pd.cut
- 取值是连续的字段, 切成几段,变成类别型字段
- bins 怎么切成几段 定义分段的区间
- labels 切成几段每一段
ABC-XYZ库存管理
- 清楚 ABC XYZ 在库存管理中的含义
- 把商品/用户 按照某些字段 做分类这种思想
- ABC 按照商品销量划分三个类别 划分的依据 二八法则 , 这种思路很常用
- XYZ 和 ABC交叉 从不同维度对商品/用户 进行细分
- 考虑的维度更多, 分群粒度可以更小, 运营可以更加精细
- XYZ 标签 使用变异系数进行划分的
变异系数
- cov = 标准差/平均值 反应了数据的波动情况 / 离散情况
- COV 变异系数越小 说明 数据波动越小, 大多数数据与平均值的差较小
- COV 变异系数越小 说明 数据波动越大, 离散程度越强
- XYZ 库存管理 就是统计了一年12个月的销量,计算每个SKU 销量的变异系数
- 变异系数越大 需求越不稳定, 库存管理越困难
- 变异系数越小, 需求越稳定, 库存管理越容易
ABC-XYZ
- X 结尾的
- Y 结尾的
- Z结尾的
- 可以采用下单采购
- 有明显季节性, 节日性商品,根据以往经验提前备货
代码实现过程
from datetime import timedelta
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("dark")
sns.set()
data1 = pd.read_excel('data2/online_retail_II.xlsx',sheet_name='Year 2009-2010')
data2 = pd.read_excel('data2/online_retail_II.xlsx',sheet_name='Year 2010-2011')
data = pd.concat([data1,data2],ignore_index=True)
data.head()
data['InvoiceDate'] = pd.to_datetime(data['InvoiceDate'])
df_12m = data[(data['InvoiceDate'] > '2010-01-01') &
(data['InvoiceDate'] <= '2010-12-31')]
df_12m.columns = ['OrderNo', 'sku', 'Description', 'quantity', 'date_created',
'UnitPrice', 'CustomerID', 'Country']
df_12m = df_12m.assign(month = df_12m['date_created'].dt.month)
df_12m = df_12m.query('quantity>0 & UnitPrice>0')
f, ax = plt.subplots(figsize=(20,8))
ax = sns.barplot(x='month',
y='quantity',
data=df_12m,
palette="Blues_d").set_title("Quantity by month",fontsize=15)
df_12m = df_12m.assign(revenue = df_12m['quantity'] * df_12m['UnitPrice'])
f,ax = plt.subplots(figsize=(20,8))
ax = sns.barplot(x='month',
y='revenue',
data=df_12m,
palette='Blues_d').set_title('Revenue by month",fontsize=15')
构建xyz模型
df_12m_units = df_12m.groupby(['sku', 'month'])\
[['quantity']].sum().reset_index()
df_12m_units = df_12m_units.pivot\
(index='sku', columns='month', values='quantity')\
.add_prefix('m').reset_index().fillna(0)
df_12m_units['std_demand'] = df_12m_units\
[['m1','m2','m3','m4','m5','m6','m7','m8','m9','m10','m11','m12']].std(axis=1)
df_12m_units = df_12m_units.assign(totall_demand = df_12m_units[['m1','m2','m3','m4','m5','m6','m7','m8','m9','m10','m11','m12']]\
.sum(axis=1))
df_12m_units = df_12m_units.assign\
(avg_demand = df_12m_units['totall_demand']/12)
df_12m_units = df_12m_units.assign(cov_demand =
df_12m_units['std_demand'] / df_12m_units['avg_demand'])
df_12m_units.sort_values('cov_demand', ascending=True).head()
f, ax = plt.subplots(figsize=(15, 6))
ax = sns.distplot(df_12m_units['cov_demand']).set_title("Coefficient ofVariation",fontsize=15)
def xyz_class_product(cov):
if cov <= 0.5:
return 'X'
elif cov > 0.5 and cov <=1.0:
return 'Y'
else:
return 'Z'
df_12m_units['xyz_class'] = df_12m_units['cov_demand'].apply(xyz_class_product)
df_12m_units.xyz_class.value_counts()
df_12m_units.groupby('xyz_class').agg(
total_skus = ('sku', 'nunique'),
total_demand = ('totall_demand', 'sum'),
std_demand = ('std_demand', 'mean'),
avg_demand = ('avg_demand', 'mean'),
avg_cov_demand = ('cov_demand', 'mean'))
df_monthly = df_12m_units.groupby('xyz_class').agg(
m1=('m1', 'sum'),
m2=('m2', 'sum'),
m3=('m3', 'sum'),
m4=('m4', 'sum'),
m5=('m5', 'sum'),
m6=('m6', 'sum'),
m7=('m7', 'sum'),
m8=('m8', 'sum'),
m9=('m9', 'sum'),
m10=('m10', 'sum'),
m11=('m11', 'sum'),
m12=('m12', 'sum'),)
df_monthly_unstacked = df_monthly.unstack('xyz_class').to_frame()
df_monthly_unstacked = df_monthly_unstacked.reset_index().rename(columns={'level_0':'month', 0: 'demand'})
df_monthly_unstacked.head()
f, ax = plt.subplots(figsize=(15, 6))
ax = sns.barplot(x="month",y="demand",\
data=df_monthly_unstacked[df_monthly_unstacked['xyz_class']=='X'],palette="Blues_d")\
.set_title("X class demand by month",fontsize=15)
对ABC库存分析
df_skus = df_12m.groupby('sku').agg(
unique_purchases = ('sku', 'nunique'),
total_units = ('quantity', 'sum'),
total_revenue = ('revenue', 'sum')).\
sort_values('total_revenue', ascending=False).reset_index()
df_skus['revenue_cumsum'] = df_skus['total_revenue'].cumsum()
df_skus['revenue_total'] = df_skus['total_revenue'].sum()
df_skus['revenue_running_percentage'] = df_skus['revenue_cumsum']/df_skus['revenue_total'] * 100
def abc_classfy_product(x):
if x > 0 and x <= 80:
return 'A'
elif x > 80 and x <= 90:
return 'B'
else:
return 'C'
df_skus['abc_class'] = df_skus['revenue_running_percentage'].apply(abc_classfy_product)
df_skus['abc_rank'] = df_skus['revenue_running_percentage'].rank().astype(int)
df_abc = df_skus.groupby('abc_class').agg(
total_skus=('sku', 'nunique'),
total_units=('total_units', sum),
total_revenue=('total_revenue', sum)).reset_index()
plt.rcParams['font.sans-serif'] = 'SimHei'
import warnings
warnings.filterwarnings('ignore')
f, ax = plt.subplots(figsize=(30,10))
ax = sns.barplot(x='abc_class',
y='total_revenue',
data=df_abc,
palette='Blues_d').set_title('ABC三类的贡献总额占比',fontsize=30)
ABC-XYZ库存分析
df_abc = df_skus[['sku','abc_class','abc_rank','total_revenue']]
df_xyz = df_12m_units.copy()
df_abc_xyz = df_abc.merge(df_xyz, on='sku', how='left')
df_abc_xyz['abc_xyz_class'] = df_abc_xyz['abc_class'].astype(str) + \
df_abc_xyz['xyz_class'].astype(str)
df_abc_xyz_summary = df_abc_xyz.groupby('abc_xyz_class').agg(
total_skus=('sku', 'nunique'),
total_demand=('totall_demand', sum),
avg_demand=('avg_demand', 'mean'),
total_revenue=('total_revenue', sum)).reset_index()
f, ax = plt.subplots(figsize=(15, 6))
ax = sns.barplot(x="abc_xyz_class",
y="total_skus",
data=df_abc_xyz_summary,
palette="Blues_d").set_title("SKUs by ABC-XYZ class",fontsize=15)
pivot / pivot_table / crosstab的区别
- pivot_table 和 cross_tab 可以互相替换
- pivot_table 是pandas 和 dataframe的方法
- cross_tab 只是 pandas的方法
pivot 不能聚合 pivot_table 和 cross_tab 都可以聚合
|