导入相关的包
import pandas as pd
import numpy as np
读取数据集
ft_zodiac = pd.read_csv('ft_zodiac.txt')
zodiac_label = pd.read_csv('zodiac_label.txt')
查看数据集信息
ft_zodiac.head()
len(set(ft_zodiac.zodiac))
len(set(ft_zodiac.chinese_zodiac))
ft_zodiac.shape
zodiac_label.head()
set(zodiac_label.label)
定义好人和坏人的标签
pd15作为好坏的分割节点。>15 为坏人,<15为好人? 15天以上的人为坏,标签为1;5天以内的人为好标签为0;5至15天的标签为2。
ft_label = zodiac_label[zodiac_label["label"] != 2]
ft_label.head()
两张表进行拼接
data = pd.merge(ft_label,ft_zodiac,on = 'order_id',how = "inner")
计算不同星座对应的坏账率
zodiac_list = set(data.zodiac)
chinese_zodiac_list = set(data.chinese_zodiac)
创建字典,来表示星座及对应的坏账率
zodiac_badrate = {}
for x in zodiac_list:
a = data[data.zodiac == x]
bad = a[a.label == 1]["label"].count()
good = a[a.label == 0]["label"].count()
zodiac_badrate[x] = bad/(bad+good)
zodiac_badrate
按照坏账率从高到低进行排序
f = zip(zodiac_badrate.keys(),zodiac_badrate.values())
f = sorted(f,key = lambda x:x[1],reverse = True)
zodiac_badrate = pd.DataFrame(f)
zodiac_badrate.columns = pd.Series(["星座","badrate"])
zodiac_badrate
将不同星座坏账率进行画图标表示
import matplotlib.pyplot as plt
plt.rcParams["font.sans-serif"] = ["Heiti TC"]
plt.figure(figsize=(9,6),dpi=80)
plt.xlabel("星座")
plt.ylabel("badrate")
plt.ylim(0,0.16)
plt.plot(zodiac_badrate["星座"],zodiac_badrate["badrate"])
计算不同生肖对应的坏账率
创建字典,来表示生肖及对应的坏账率
chinese_zodiac_badrate = {}
for x in chinese_zodiac_list:
a = data[data.chinese_zodiac == x]
bad = a[a.label == 1]["label"].count()
good = a[a.label == 0]["label"].count()
chinese_zodiac_badrate[x] = bad/(bad+good)
chinese_zodiac_badrate
按照坏账率从高到低进行排序
f = zip(chinese_zodiac_badrate.keys(),chinese_zodiac_badrate.values())
f = sorted(f,key = lambda x:x[1], reverse=True)
chinese_zodiac_badrate = pd.DataFrame(f)
chinese_zodiac_badrate.columns = pd.Series(["生肖","badrate"])
chinese_zodiac_badrate.head()
将不同生肖坏账率进行画图标表示
plt.figure(figsize = (9,6),dpi = 80)
plt.plot(chinese_zodiac_badrate["生肖"],chinese_zodiac_badrate["badrate"])
plt.xlabel("生肖")
plt.ylabel("badrate")
plt.ylim(0,0.16)
|