python中孤立森林算法实例
使用python中sklearn库自带的IsolationForest构建孤立森林,并训练预测数据,同时使用plt画图展示
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
import csv
def loadData(filename):
data = open(filename,'r',encoding='utf-8')
reader = csv.reader(data)
header = next(reader)
dataset = []
price = []
amount = []
for row in reader:
dataset.append([float(row[3]),float(row[4])])
price.append(float(row[3]))
amount.append(float(row[4]))
return dataset,price,amount
def iForest(dataset,price,amount):
clf = IsolationForest()
ans = clf.fit_predict(dataset)
price_abnormal = []
amount_abnormal = []
price_normal = []
amount_normal = []
for d in range(0,len(ans)):
if ans[d] == -1:
price_abnormal.append(dataset[d][0])
amount_abnormal.append(dataset[d][1])
else:
price_normal.append(dataset[d][0])
amount_normal.append(dataset[d][1])
print(price_normal)
plt.title("IsolationForest")
b1 = plt.scatter(price_normal, amount_normal, c='white',
s=20, edgecolor='k')
c = plt.scatter(price_abnormal, amount_abnormal, c='red',
s=20, edgecolor='k')
plt.axis('tight')
plt.xlim((0,3000))
plt.ylim(0,3500 )
plt.xlabel('Price')
plt.ylabel('Amount')
plt.legend([b1,c],['normal points', 'abnormal points'],
loc="upper left")
plt.show()
filename = 'lv3测试集.csv'
dataset,price,amount=loadData(filename)
iForest(dataset,price,amount)
效果如图:
|