本文就是为方便自己查代码
绘制特征分布图
plt.style.use('seaborn')
features_set=list(df_graph.columns.difference(['source','target','class']))
fig, axes = plt.subplots(ncols=2, nrows=4,figsize=[15,20])
for i, ax, fea in zip(range(10), axes.flat,features_set):
ax.set_title(fea)
ax.set_xlabel('values')
ax.set_ylabel('nums')
sns.distplot(df_graph[fea].values,
hist=True,kde=True, ax=ax,color='royalblue')
plt.show()
绘制热力图
%matplotlib inline
complete_features=df_graph.loc[:,df_graph.columns.difference(['source','taregt','class'])]
plt.figure(figsize=(6,6))
sns.heatmap(complete_features.corr(),annot=True)
PCA降维可视化
from sklearn.decomposition import PCA
from mpl_toolkits.mplot3d import Axes3D
def plot_pca(num,data,label):
pca=PCA(n_components=num)
X_pca=pca.fit_transform(data)
print(pca.components_)
X_failure=np.array([x for i,x in enumerate(X_pca) if label[i]==1])
X_healthy=np.array([x for i,x in enumerate(X_pca) if label[i]==0])
if num==3:
fig = plt.figure(figsize=[10,15])
ax = Axes3D(fig)
ax.set_zlabel('Z', fontdict={'size': 15, 'color': 'red'})
ax.set_ylabel('Y', fontdict={'size': 15, 'color': 'red'})
ax.set_xlabel('X', fontdict={'size': 15, 'color': 'red'})
ax.scatter(X_failure[:,0], X_failure[:,1], X_failure[:,2])
ax.scatter(X_healthy[:,0], X_healthy[:,1], X_healthy[:,2])
elif num==2:
plt.figure(figsize=[10,10])
plt.scatter(X_failure[:,0],X_failure[:,1])
plt.scatter(X_healthy[:,0],X_healthy[:,1])
else:
print('i do not want to work.....')
X_norm,y=sift_features(df_graph,dislike_fea=['node_path'])
plot_pca(num=3,data=X_norm,label=y)
AUC曲线图
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score as AUC
def plot_roc(df):
FPR,recall,thresholds = roc_curve(df['label'],df['score'])
area=AUC(df['label'],df['score'])
plt.figure()
plt.plot(FPR,recall,label='ROC curve (area = %0.2f)'%area)
plt.legend(loc='lower right')
plt.show()
plot_roc(Jaccard_df)
有空更新
|