-- coding: utf-8 --
“”" Created on Wed Apr 8 09:46:04 2020
@author: advantech “”" from sklearn.externals import joblib import pandas as pd import pymysql as mdb from sklearn.preprocessing import StandardScaler,MinMaxScaler import numpy as np import matplotlib.pyplot as plt #%matplotlib inline import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler,MinMaxScaler
from xgboost import XGBClassifier from sklearn import metrics from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LogisticRegression from sklearn.decomposition import PCA from sklearn.svm import SVC
import numpy as np from sklearn.svm import SVR import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn import linear_model import numpy as np
from sklearn import tree from sklearn.ensemble import RandomForestRegressor
def main(): file_path #names=[’ #https://wenku.baidu.com/view/0cdf0a66f524ccbff0218482.html data_df=pd.read_csv(file_path)#(file_path,names=names)
data_df=data_df.fillna(0)
"""添加变量变形"""
#data_df['Q']=ln
"""添加异常数据块"""
print(data_df.shape)
print(data_df.columns)
print(data_df.head())
corr = data_df.corr()
cor_sort = corr['d'].abs().sort_values()
print(cor_sort)
# ===========================================================================================================================
# plt.plot(np.arange(0,6), cor_sort3,'x')
# plt.plot(0,cor_sort3[0],'xr')
# plt.show()
# =============================================================================
"""select 变量"""
selected_columns=pd.DataFrame(cor_sort3).index[:-1]
print(selected_columns)
"""X变量: Y变量"""
# =============================================================================
# left_pd=data_df[selected_columns]
# print(left_pd[:][:10])
# columns_list = list(left_pd.columns)
# print(columns_list)
# ==============================================
X_Input=data_df[X_label].values
label=data_df[Y_label].values
model(X_Input,label)
X_Input=data_df[X_label2].values
label=data_df[Y_label].values
model(X_Input,label)
X_Inut=data_df[X_label3].values
label=data_df[Y_label].values
model(X_Input,label)
X_Input=data_df[X_label4].values
label=data_df[Y_label2].values
model(X_Input,label)
X_Input=data_df[X_label5].values
label=data_df[Y_label2].values
model(X_Input,label)
标准化处理特征
def model(X_Input,label): scaler = StandardScaler() scaler.fit(X_Input) new_X = scaler.fit_transform(X_Input) print(new_X)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(new_X,
label,
test_size=0.2)
print("X_train:",X_train.shape)
print("X_test :",X_test.shape)
print("y_train:",y_train.shape)
print("y_test :",y_test.shape)
"""Joint feature selection with multi-task Lasso"""
#https://scikit-learn.org/stable/auto_examples/linear_model/plot_multi_task_lasso_support.html#sphx-glr-auto-examples-linear-model-plot-multi-task-lasso-support-py
"""Lasso"""
=============================================================================
from sklearn import linear_model
import numpy as np
reg = linear_model.Lasso(alpha=0.1)
reg.fit(X_train,y_train)
predict_test = reg.predict(X_test)
predict_train = reg.predict(X_train)
corr_test=metrics.explained_variance_score(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
mean_error_test=metrics.mean_absolute_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
squared_error_test=metrics.mean_squared_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
corr=metrics.explained_variance_score(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
mean_error=metrics.mean_absolute_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
squared_error=metrics.mean_squared_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
print( "Lasso[corr,mean,squared] Test:%f;%f;%f" % (corr_test, mean_error_test,squared_error_test) )
print( "Lasso[[corr,mean,squared] Train:%f;%f;%f" % (corr, mean_error,squared_error) )
"""SVR"""
#print(__doc__)
=============================================================================
import numpy as np
from sklearn.svm import SVR
import matplotlib.pyplot as plt
=============================================================================
# Fit regression model
svr_rbf = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
svr_lin = SVR(kernel='linear', C=100, gamma='auto')
svr_poly = SVR(kernel='poly', C=100, gamma='auto', degree=3, epsilon=.1,
coef0=1)
svrs = [svr_rbf, svr_lin, svr_poly]
for ix, svr in enumerate(svrs):
svr.fit(X_train,y_train)
predict_test = svr.predict(X_test)#.fit(X_train,y_train).predict(X_test)
predict_train = svr.predict(X_train)#.fit((X_train,y_train).predict(X_train)
corr_test=metrics.explained_variance_score(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
mean_error_test=metrics.mean_absolute_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
squared_error_test=metrics.mean_squared_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
corr=metrics.explained_variance_score(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
mean_error=metrics.mean_absolute_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
squared_error=metrics.mean_squared_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
print( "SVR:%f[corr,mean,squared] Test:%f;%f;%f" % (ix,corr_test, mean_error_test,squared_error_test) )
print( "SVR:%f[[corr,mean,squared] Train:%f;%f;%f" % (ix,corr, mean_error,squared_error) )
"""Bayesian Ridge Regression is used for regression:"""
=============================================================================
from sklearn import linear_model
=============================================================================
reg = linear_model.BayesianRidge()
reg.fit(X_train,y_train)
predict_test = reg.predict(X_test)
predict_train = reg.predict(X_train)
corr_test=metrics.explained_variance_score(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
mean_error_test=metrics.mean_absolute_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
squared_error_test=metrics.mean_squared_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
corr=metrics.explained_variance_score(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
mean_error=metrics.mean_absolute_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
squared_error=metrics.mean_squared_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
print( "BayesianRidge[corr,mean,squared] Test:%f;%f;%f" % (corr_test, mean_error_test,squared_error_test) )
print( "BayesianRidge[corr,mean,squared] Train:%f;%f;%f" % (corr, mean_error,squared_error) )
"""tree"""
#from sklearn import tree
clf = tree.DecisionTreeRegressor()
clf = clf.fit(X_train,y_train)
joblib.dump(clf, 'kqll.pkl')
predict_test = clf.predict(X_test)
predict_train = clf.predict(X_train)
corr_test=metrics.explained_variance_score(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
mean_error_test=metrics.mean_absolute_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
squared_error_test=metrics.mean_squared_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
corr=metrics.explained_variance_score(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
mean_error=metrics.mean_absolute_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
squared_error=metrics.mean_squared_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
print( "tree[corr,mean,squared] Test:%f;%f;%f" % (corr_test, mean_error_test,squared_error_test) )
print( "tree[corr,mean,squared] Train:%f;%f;%f" % (corr, mean_error,squared_error) )
"""RandomForestClassifier"""
#from sklearn.ensemble import RandomForestClassifier
#from sklearn.ensemble import RandomForestRegressor
clf = RandomForestRegressor()
#clf = RandomForestClassifier(n_estimators=10)
clf = clf.fit(X_train,y_train)
predict_test = clf.predict(X_test)
predict_train = clf.predict(X_train)
corr_test=metrics.explained_variance_score(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
mean_error_test=metrics.mean_absolute_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
squared_error_test=metrics.mean_squared_error(y_test, predict_test, sample_weight=None, multioutput='uniform_average')
corr=metrics.explained_variance_score(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
mean_error=metrics.mean_absolute_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
squared_error=metrics.mean_squared_error(y_train, predict_train, sample_weight=None, multioutput='uniform_average')
print( "RandomForestRegressor[corr,mean,squared] Test:%f;%f;%f" % (corr_test, mean_error_test,squared_error_test) )
print( "RandomForestRegressor[corr,mean,squared] Train:%f;%f;%f" % (corr, mean_error,squared_error) )
if name==‘main’: main()
=============================================================================
from sklearn import svm
X = new_X
type()
y = label
clf = svm.SVR()
reg = linear_model.Lasso(alpha=0.1)
#reg = linear_model.Lasso(alpha=0.1)
reg.fit(X)
clf.fit(X, y)
reg.predict([X_test])
=============================================================================
#SVR()
=============================================================================
clf.predict([[1, 1]])
array([1.5])
=============================================================================
=============================================================================
“”“PCA”""
from sklearn.ensemble import RandomForestClassifier
pca = PCA()
pca.fit(X_train)
X_train_reduced = pca.transform(X_train)
X_test_reduced= pca.transform(X_test)
classifier =SVC()
y_train=y_train.astype(‘int’)
classifier.fit(X_train_reduced,y_train)
predict_train = classifier.predict(X_train_reduced)
accuracy_train=metrics.accuracy_score( y_train,predict_train)
predict_test = classifier.predict(X_test_reduced)
accuracy_test=metrics.accuracy_score( y_test,predict_test)
print( “[Accuracy] Train:%f Test:%f” % (accuracy_train, accuracy_test) )
=============================================================================
|