代码主要参考《python机器学习及实践》一书
分类学习
Logistics 回归 和 SGD分类器模型
import pandas as pd
import numpy as np
column_names = ['Sample code number', 'Clump Thickness', 'Uniformity of Cell Size',
'Uniformity of Cell Shape', 'Marginal Adhesion', 'Single Epithelial Cell Size',
'Bare Nuclei', 'Bland Chromatin', 'Normal Nucleoli', 'Mitoses', 'Class']
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data', names=column_names)
data = data.replace(to_replace='?', value=np.nan)
data = data.dropna(how='any')
data.shape
(683, 11)
data.head()
| Sample code number | Clump Thickness | Uniformity of Cell Size | Uniformity of Cell Shape | Marginal Adhesion | Single Epithelial Cell Size | Bare Nuclei | Bland Chromatin | Normal Nucleoli | Mitoses | Class |
---|
0 | 1000025 | 5 | 1 | 1 | 1 | 2 | 1 | 3 | 1 | 1 | 2 |
---|
1 | 1002945 | 5 | 4 | 4 | 5 | 7 | 10 | 3 | 2 | 1 | 2 |
---|
2 | 1015425 | 3 | 1 | 1 | 1 | 2 | 2 | 3 | 1 | 1 | 2 |
---|
3 | 1016277 | 6 | 8 | 8 | 1 | 3 | 4 | 3 | 7 | 1 | 2 |
---|
4 | 1017023 | 4 | 1 | 1 | 3 | 2 | 1 | 3 | 1 | 1 | 2 |
---|
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data[column_names[1:10]], data[column_names[10]],
test_size=0.25, random_state=33)
y_train.value_counts()
2 344
4 168
Name: Class, dtype: int64
y_test.value_counts()
2 100
4 71
Name: Class, dtype: int64
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, SGDClassifier
ss = StandardScaler()
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)
lr = LogisticRegression()
sgdc = SGDClassifier()
lr.fit(X_train, y_train)
lr_y_predict = lr.predict(X_test)
sgdc.fit(X_train, y_train)
sgdc_y_predict = sgdc.predict(X_test)
from sklearn.metrics import classification_report
print('Accuracy of LR Classification:', lr.score(X_test, y_test))
print (classification_report(y_test, lr_y_predict, target_names=['benign', 'malignant']))
Accuracy of LR Classification: 0.9883040935672515
precision recall f1-score support
benign 0.99 0.99 0.99 100
malignant 0.99 0.99 0.99 71
accuracy 0.99 171
macro avg 0.99 0.99 0.99 171
weighted avg 0.99 0.99 0.99 171
print('Accuracy of SGDClassifier:', sgdc.score(X_test, y_test))
print(classification_report(y_test, sgdc_y_predict, target_names=['benign', 'malignant']))
Accuracy of SGDClassifier: 0.9883040935672515
precision recall f1-score support
benign 1.00 0.98 0.99 100
malignant 0.97 1.00 0.99 71
accuracy 0.99 171
macro avg 0.99 0.99 0.99 171
weighted avg 0.99 0.99 0.99 171
支持向量机(SVM)
from sklearn.datasets import load_digits
digits = load_digits()
digits.data.shape
(1797, 64)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.25, random_state=33)
y_train.shape, y_test.shape
((1347,), (450,))
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)
from sklearn.svm import LinearSVC
lsvc = LinearSVC()
lsvc.fit(X_train, y_train)
y_predict = lsvc.predict(X_test)
print('The Accuracy of Linear SVC is', lsvc.score(X_test, y_test))
The Accuracy of Linear SVC is 0.9533333333333334
from sklearn.metrics import classification_report
print(classification_report(y_test, y_predict, target_names=digits.target_names.astype(str)))
precision recall f1-score support
0 0.92 1.00 0.96 35
1 0.96 0.98 0.97 54
2 0.98 1.00 0.99 44
3 0.93 0.93 0.93 46
4 0.97 1.00 0.99 35
5 0.94 0.94 0.94 48
6 0.96 0.98 0.97 51
7 0.92 1.00 0.96 35
8 0.98 0.84 0.91 58
9 0.95 0.91 0.93 44
accuracy 0.95 450
macro avg 0.95 0.96 0.95 450
weighted avg 0.95 0.95 0.95 450
朴素贝叶斯法
from sklearn.datasets import fetch_20newsgroups
news = fetch_20newsgroups(subset='all')
print(len(news.data))
print(news.data[0])
18846
From: Mamatha Devineni Ratnam <mr47+@andrew.cmu.edu>
Subject: Pens fans reactions
Organization: Post Office, Carnegie Mellon, Pittsburgh, PA
Lines: 12
NNTP-Posting-Host: po4.andrew.cmu.edu
? ? I am sure some bashers of Pens fans are pretty confused about the lack of any kind of posts about the recent Pens massacre of the Devils. Actually, I am bit puzzled too and a bit relieved. However, I am going to put an end to non-PIttsburghers’ relief with a bit of praise for the Pens. Man, they are killing those Devils worse than I thought. Jagr just showed you why he is much better than his regular season stats. He is also a lot fo fun to watch in the playoffs. Bowman should let JAgr have a lot of fun in the next couple of games since the Pens are going to beat the pulp out of Jersey anyway. I was very disappointed not to see the Islanders lose the final regular season game. PENS RULE!!!
?
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(news.data, news.target, test_size=0.25, random_state=33)
len(X_train), len(X_test)
(14134, 4712)
len(y_train), len(y_test)
(14134, 4712)
from sklearn.feature_extraction.text import CountVectorizer
vec = CountVectorizer()
X_train = vec.fit_transform(X_train)
X_test = vec.transform(X_test)
(14134, 150725)
X_train.shape, X_test.shape
((14134, 150725), (4712, 150725))
from sklearn.naive_bayes import MultinomialNB
mnb = MultinomialNB()
mnb.fit(X_train, y_train)
y_predict = mnb.predict(X_test)
from sklearn.metrics import classification_report
print('The accuracy of Naive Bayes Classifier is', mnb.score(X_test, y_test))
print(classification_report(y_test, y_predict, target_names=news.target_names))
The accuracy of Naive Bayes Classifier is 0.8397707979626485
precision recall f1-score support
alt.atheism 0.86 0.86 0.86 201
comp.graphics 0.59 0.86 0.70 250
comp.os.ms-windows.misc 0.89 0.10 0.17 248
comp.sys.ibm.pc.hardware 0.60 0.88 0.72 240
comp.sys.mac.hardware 0.93 0.78 0.85 242
comp.windows.x 0.82 0.84 0.83 263
misc.forsale 0.91 0.70 0.79 257
rec.autos 0.89 0.89 0.89 238
rec.motorcycles 0.98 0.92 0.95 276
rec.sport.baseball 0.98 0.91 0.95 251
rec.sport.hockey 0.93 0.99 0.96 233
sci.crypt 0.86 0.98 0.91 238
sci.electronics 0.85 0.88 0.86 249
sci.med 0.92 0.94 0.93 245
sci.space 0.89 0.96 0.92 221
soc.religion.christian 0.78 0.96 0.86 232
talk.politics.guns 0.88 0.96 0.92 251
talk.politics.mideast 0.90 0.98 0.94 231
talk.politics.misc 0.79 0.89 0.84 188
talk.religion.misc 0.93 0.44 0.60 158
accuracy 0.84 4712
macro avg 0.86 0.84 0.82 4712
weighted avg 0.86 0.84 0.82 4712
K近邻
from sklearn.datasets import load_iris
iris = load_iris()
iris.data.shape
(150, 4)
print(iris.DESCR)
.. _iris_dataset:
Iris plants dataset
--------------------
**Data Set Characteristics:**
:Number of Instances: 150 (50 in each of three classes)
:Number of Attributes: 4 numeric, predictive attributes and the class
:Attribute Information:
- sepal length in cm
- sepal width in cm
- petal length in cm
- petal width in cm
- class:
- Iris-Setosa
- Iris-Versicolour
- Iris-Virginica
:Summary Statistics:
============== ==== ==== ======= ===== ====================
Min Max Mean SD Class Correlation
============== ==== ==== ======= ===== ====================
sepal length: 4.3 7.9 5.84 0.83 0.7826
sepal width: 2.0 4.4 3.05 0.43 -0.4194
petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)
petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)
============== ==== ==== ======= ===== ====================
:Missing Attribute Values: None
:Class Distribution: 33.3% for each of 3 classes.
:Creator: R.A. Fisher
:Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
:Date: July, 1988
The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken
from Fisher's paper. Note that it's the same as in R, but not as in the UCI
Machine Learning Repository, which has two wrong data points.
This is perhaps the best known database to be found in the
pattern recognition literature. Fisher's paper is a classic in the field and
is referenced frequently to this day. (See Duda & Hart, for example.) The
data set contains 3 classes of 50 instances each, where each class refers to a
type of iris plant. One class is linearly separable from the other 2; the
latter are NOT linearly separable from each other.
.. topic:: References
- Fisher, R.A. "The use of multiple measurements in taxonomic problems"
Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to
Mathematical Statistics" (John Wiley, NY, 1950).
- Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.
(Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.
- Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
Structure and Classification Rule for Recognition in Partially Exposed
Environments". IEEE Transactions on Pattern Analysis and Machine
Intelligence, Vol. PAMI-2, No. 1, 67-71.
- Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE Transactions
on Information Theory, May 1972, 431-433.
- See also: 1988 MLC Proceedings, 54-64. Cheeseman et al"s AUTOCLASS II
conceptual clustering system finds 3 classes in the data.
- Many, many more ...
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.25, random_state=33)
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)
from sklearn.neighbors import KNeighborsClassifier
knc = KNeighborsClassifier()
knc.fit(X_train, y_train)
y_predict = knc.predict(X_test)
print('The accuracy of K-Nearest Neighbor Classifier is', knc.score(X_test, y_test))
The accuracy of K-Nearest Neighbor Classifier is 0.8947368421052632
from sklearn.metrics import classification_report
print(classification_report(y_test, y_predict, target_names=iris.target_names))
precision recall f1-score support
setosa 1.00 1.00 1.00 8
versicolor 0.73 1.00 0.85 11
virginica 1.00 0.79 0.88 19
accuracy 0.89 38
macro avg 0.91 0.93 0.91 38
weighted avg 0.92 0.89 0.90 38
决策树
import pandas as pd
titanic = pd.read_csv('./Titanic.txt', sep='\t')
titanic.head()
| PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked |
---|
0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
---|
1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
---|
2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
---|
3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
---|
4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
---|
titanic.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 PassengerId 891 non-null int64
1 Survived 891 non-null int64
2 Pclass 891 non-null int64
3 Name 891 non-null object
4 Sex 891 non-null object
5 Age 714 non-null float64
6 SibSp 891 non-null int64
7 Parch 891 non-null int64
8 Ticket 891 non-null object
9 Fare 891 non-null float64
10 Cabin 204 non-null object
11 Embarked 889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
X = titanic[['Pclass', 'Age', 'Sex']]
Y = titanic['Survived']
X.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Pclass 891 non-null int64
1 Age 714 non-null float64
2 Sex 891 non-null object
dtypes: float64(1), int64(1), object(1)
memory usage: 21.0+ KB
X['Age'].fillna(X['Age'].mean(), inplace=True)
X.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Pclass 891 non-null int64
1 Age 891 non-null float64
2 Sex 891 non-null object
dtypes: float64(1), int64(1), object(1)
memory usage: 21.0+ KB
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=33)
X_train.to_dict(orient='records')
[{'Pclass': 1, 'Age': 47.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 40.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 22.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 23.5, 'Sex': 'male'},
{'Pclass': 2, 'Age': 47.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 27.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 24.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 45.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 52.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 62.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 31.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 43.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 18.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 17.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 22.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 24.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 50.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 34.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 19.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 2, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 33.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 10.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 39.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 1.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 22.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 23.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 46.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 3.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 36.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 25.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 25.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 20.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 33.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 24.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 13.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 61.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 2.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 46.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 19.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 4.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 51.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 24.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 18.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 37.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 30.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 25.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 16.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 2, 'Age': 19.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 35.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 9.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 25.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 26.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 50.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 32.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 2, 'Age': 8.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 16.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 20.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 39.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 44.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 30.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 28.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 4.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 48.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 30.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 18.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 19.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 2, 'Age': 30.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 45.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 37.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 32.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 40.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 17.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 33.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 40.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 29.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 1, 'Age': 11.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 22.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 19.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 17.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 51.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 21.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 9.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 26.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 36.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 54.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 30.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 25.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 18.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 39.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 31.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 18.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 28.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 35.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 21.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 44.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 11.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 22.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 33.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 1, 'Age': 19.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 1, 'Age': 48.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 34.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 2.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 23.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 50.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 41.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 25.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 26.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 47.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 28.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 28.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 15.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 36.5, 'Sex': 'male'},
{'Pclass': 2, 'Age': 50.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 20.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 31.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 32.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 31.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 27.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 31.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 74.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 24.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 42.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 26.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 39.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 25.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 22.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 45.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 47.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 22.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 4.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 34.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 41.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 16.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 18.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 26.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 19.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 25.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 31.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 28.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 17.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 39.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 18.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 28.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 57.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 34.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 18.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 38.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 50.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 39.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 32.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 36.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 24.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 1.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 18.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 18.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 65.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 32.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 38.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 41.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 31.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 22.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 24.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 54.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 2, 'Age': 18.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 33.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 26.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 25.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 37.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 28.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 38.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 20.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 19.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 36.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 41.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 44.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 43.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 31.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 20.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 40.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 27.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 42.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 26.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 58.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 36.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 30.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 2.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 25.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 18.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 26.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 16.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 26.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 19.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 50.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 44.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 38.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 39.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 24.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 26.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 49.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 22.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 31.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 35.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 2, 'Age': 27.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 55.5, 'Sex': 'male'},
{'Pclass': 1, 'Age': 25.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 52.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 15.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 28.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 20.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 25.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 29.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 36.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 25.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 5.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 11.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 21.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 35.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 40.5, 'Sex': 'male'},
{'Pclass': 3, 'Age': 18.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 30.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 31.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 49.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 35.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 34.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 1.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 35.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 61.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 47.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 51.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 60.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 27.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 19.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 28.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 22.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 18.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 15.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 0.75, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 44.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 55.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 47.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 45.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 1, 'Age': 27.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 1, 'Age': 22.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 54.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 1.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 18.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 19.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 3.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 0.83, 'Sex': 'male'},
{'Pclass': 3, 'Age': 30.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 25.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 18.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 14.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 32.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 30.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 23.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 21.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 30.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 18.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 21.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 41.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 4.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 24.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 34.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 1, 'Age': 39.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 19.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 38.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 30.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 20.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 21.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 32.5, 'Sex': 'female'},
{'Pclass': 3, 'Age': 16.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 43.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 22.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 32.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 28.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 35.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 21.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 29.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 36.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 23.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 22.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 30.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 62.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 1, 'Age': 37.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 19.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 49.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 28.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 35.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 26.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 25.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 14.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 41.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 35.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 27.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 24.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 33.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 16.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 30.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 30.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 19.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 34.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 17.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 27.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 22.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 64.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 24.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 51.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 60.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 1, 'Age': 56.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 29.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 61.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 25.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 48.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 27.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 44.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 28.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 19.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 71.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 21.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 63.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 22.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 38.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 36.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 55.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 6.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 33.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 27.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 14.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 4.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 5.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 16.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 3.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 20.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 22.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 16.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 40.5, 'Sex': 'male'},
{'Pclass': 1, 'Age': 22.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 46.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 27.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 33.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 30.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 20.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 18.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 1.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 60.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 28.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 42.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 51.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 44.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 66.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 19.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 8.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 35.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 16.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 38.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 42.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 21.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 14.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 40.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 24.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 70.5, 'Sex': 'male'},
{'Pclass': 1, 'Age': 54.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 2, 'Age': 0.67, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 45.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 20.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 3.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 56.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 45.5, 'Sex': 'male'},
{'Pclass': 1, 'Age': 80.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 30.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 20.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 27.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 45.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 34.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 52.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 36.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 38.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 39.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 24.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 34.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 31.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 7.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 21.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 40.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 33.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 42.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 36.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 35.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 34.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 21.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 36.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 54.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 40.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 27.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 27.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 40.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 42.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 21.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 37.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 29.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 31.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 23.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 20.5, 'Sex': 'male'},
{'Pclass': 1, 'Age': 58.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 26.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 5.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 24.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 1, 'Age': 18.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 17.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 42.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 21.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 23.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 4.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 2.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 31.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 33.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 24.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 70.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 17.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 42.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 0.83, 'Sex': 'male'},
{'Pclass': 3, 'Age': 19.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 28.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 26.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 22.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 14.5, 'Sex': 'female'},
{'Pclass': 1, 'Age': 25.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 24.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 24.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 26.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 27.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 17.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 4.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 23.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 40.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 3.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 25.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 32.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 47.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 8.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 22.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 44.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 0.75, 'Sex': 'female'},
{'Pclass': 3, 'Age': 32.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 21.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 22.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 28.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 11.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 34.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 31.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 26.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 42.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 26.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 20.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 54.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 58.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 4.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 43.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 28.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 39.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 31.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 20.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 19.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 36.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 23.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 30.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 32.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 28.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 19.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 1, 'Age': 23.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 40.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 32.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 32.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 36.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 45.5, 'Sex': 'male'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 22.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 29.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 28.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 21.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 44.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 70.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 51.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 21.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 16.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 38.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 24.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 32.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 32.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 3, 'Age': 2.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 36.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 34.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 7.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 28.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 31.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 36.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 17.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 39.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 25.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 35.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 47.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 28.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 47.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 22.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 24.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 1.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 32.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 42.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 64.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 0.42, 'Sex': 'male'},
{'Pclass': 3, 'Age': 6.0, 'Sex': 'female'},
{'Pclass': 2, 'Age': 23.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 50.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 38.0, 'Sex': 'female'},
{'Pclass': 1, 'Age': 21.0, 'Sex': 'male'},
{'Pclass': 1, 'Age': 58.0, 'Sex': 'female'},
{'Pclass': 3, 'Age': 28.5, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'male'},
{'Pclass': 2, 'Age': 23.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 29.69911764705882, 'Sex': 'female'},
{'Pclass': 2, 'Age': 25.0, 'Sex': 'male'},
{'Pclass': 3, 'Age': 21.0, 'Sex': 'male'},
{'Pclass': 2, 'Age': 35.0, 'Sex': 'male'}]
from sklearn.feature_extraction import DictVectorizer
vec = DictVectorizer()
X_train = vec.fit_transform(X_train.to_dict(orient='records'))
print(vec.feature_names_)
['Age', 'Pclass', 'Sex=female', 'Sex=male']
X_test = vec.transform(X_test.to_dict(orient='records'))
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)
y_predict = dtc.predict(X_test)
from sklearn.metrics import classification_report
print('The accuracy of decision tree model is', dtc.score(X_test, y_test))
print(classification_report(y_test, y_predict, target_names=['died', 'survived']))
The accuracy of decision tree model is 0.8340807174887892
precision recall f1-score support
died 0.84 0.90 0.87 134
survived 0.82 0.74 0.78 89
accuracy 0.83 223
macro avg 0.83 0.82 0.82 223
weighted avg 0.83 0.83 0.83 223
集成模型
比较单决策树、随机森林和梯度上升决策树
import pandas as pd
titanic = pd.read_csv('./Titanic.txt', sep='\t')
X = titanic[['Pclass', 'Age', 'Sex']]
Y = titanic['Survived']
X['Age'].fillna(X['Age'].mean(), inplace=True)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=33)
from sklearn.feature_extraction import DictVectorizer
vec = DictVectorizer(sparse=False)
X_train = vec.fit_transform(X_train.to_dict(orient='records'))
X_test = vec.transform(X_test.to_dict(orient='records'))
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)
dtc_y_predict = dtc.predict(X_test)
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
rfc_y_predict = rfc.predict(X_test)
from sklearn.ensemble import GradientBoostingClassifier
gbc = GradientBoostingClassifier()
gbc.fit(X_train, y_train)
gbc_y_predict = gbc.predict(X_test)
from sklearn.metrics import classification_report
print('The accuracy of decision tree is', dtc.score(X_test, y_test))
print(classification_report(y_test, dtc_y_predict))
print('The accuracy of random forest classifier is', rfc.score(X_test, y_test))
print(classification_report(y_test, rfc_y_predict))
print('The accuracy of gradient tree boosting is', gbc.score(X_test, y_test))
print(classification_report(y_test, gbc_y_predict))
The accuracy of decision tree is 0.8340807174887892
precision recall f1-score support
0 0.84 0.90 0.87 134
1 0.82 0.74 0.78 89
accuracy 0.83 223
macro avg 0.83 0.82 0.82 223
weighted avg 0.83 0.83 0.83 223
The accuracy of random forest classifier is 0.8340807174887892
precision recall f1-score support
0 0.84 0.90 0.87 134
1 0.82 0.74 0.78 89
accuracy 0.83 223
macro avg 0.83 0.82 0.82 223
weighted avg 0.83 0.83 0.83 223
The accuracy of gradient tree boosting is 0.8430493273542601
precision recall f1-score support
0 0.84 0.91 0.87 134
1 0.85 0.74 0.79 89
accuracy 0.84 223
macro avg 0.84 0.83 0.83 223
weighted avg 0.84 0.84 0.84 223
回归预测
线性回归器
from sklearn.datasets import load_boston
boston = load_boston()
print(boston.DESCR)
.. _boston_dataset:
Boston house prices dataset
---------------------------
**Data Set Characteristics:**
:Number of Instances: 506
:Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.
:Attribute Information (in order):
- CRIM per capita crime rate by town
- ZN proportion of residential land zoned for lots over 25,000 sq.ft.
- INDUS proportion of non-retail business acres per town
- CHAS Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
- NOX nitric oxides concentration (parts per 10 million)
- RM average number of rooms per dwelling
- AGE proportion of owner-occupied units built prior to 1940
- DIS weighted distances to five Boston employment centres
- RAD index of accessibility to radial highways
- TAX full-value property-tax rate per $10,000
- PTRATIO pupil-teacher ratio by town
- B 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
- LSTAT % lower status of the population
- MEDV Median value of owner-occupied homes in $1000's
:Missing Attribute Values: None
:Creator: Harrison, D. and Rubinfeld, D.L.
This is a copy of UCI ML housing dataset.
https://archive.ics.uci.edu/ml/machine-learning-databases/housing/
? This dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.
The Boston house-price data of Harrison, D. and Rubinfeld, D.L. 'Hedonic
prices and the demand for clean air', J. Environ. Economics & Management,
vol.5, 81-102, 1978. Used in Belsley, Kuh & Welsch, 'Regression diagnostics
...', Wiley, 1980. N.B. Various transformations are used in the table on
pages 244-261 of the latter.
The Boston house-price data has been used in many machine learning papers that address regression
problems.
.. topic:: References
- Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.
- Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.
from sklearn.model_selection import train_test_split
X = boston.data
y = boston.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=33)
import numpy as np
print('The max target value is', np.max(boston.target))
print('The min target value is', np.min(boston.target))
print('The average target value is', np.mean(boston.target))
The max target value is 50.0
The min target value is 5.0
The average target value is 22.532806324110677
from sklearn.preprocessing import StandardScaler
ss_X = StandardScaler()
ss_y = StandardScaler()
X_train = ss_X.fit_transform(X_train)
X_test = ss_X.transform(X_test)
y_train = ss_y.fit_transform(y_train.reshape(-1, 1))
y_test = ss_y.transform(y_test.reshape(-1, 1))
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_y_predict = lr.predict(X_test)
from sklearn.linear_model import SGDRegressor
sgdr = SGDRegressor()
sgdr.fit(X_train, y_train)
sgdr_y_predict = sgdr.predict(X_test)
print('The value of default measurement of LinearRegression is', lr.score(X_test, y_test))
The value of default measurement of LinearRegression is 0.675795501452948
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
print('The value of R-squared of LinearRegression is', r2_score(y_test, lr_y_predict))
print('The mean squared error of LinearRegression is', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(lr_y_predict)))
print('The mean absolute error of LinearRegression is', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(lr_y_predict)))
The value of R-squared of LinearRegression is 0.675795501452948
The mean squared error of LinearRegression is 25.139236520353457
The mean absolute error of LinearRegression is 3.5325325437053983
print('The value of default measurement of SGDRegression is', sgdr.score(X_test, y_test))
print('The R-squared value of SGDRegressor is', r2_score(y_test, sgdr_y_predict))
print('The mean squared error of SGDRegressor is', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(sgdr_y_predict)))
print('The mean absolute error of SGDRegressor is', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(sgdr_y_predict)))
The value of default measurement of SGDRegression is 0.6676793685682791
The R-squared value of SGDRegressor is 0.6676793685682791
The mean squared error of SGDRegressor is 25.768571971072685
The mean absolute error of SGDRegressor is 3.522268742618558
支持向量机(回归)
from sklearn.svm import SVR
linear_svr = SVR(kernel='linear')
linear_svr.fit(X_train, y_train)
linear_svr_y_predict = linear_svr.predict(X_test)
poly_svr = SVR(kernel='poly')
poly_svr.fit(X_train, y_train)
poly_svr_y_predict = poly_svr.predict(X_test)
rbf_svr = SVR(kernel='rbf')
rbf_svr.fit(X_train, y_train)
rbf_svr_y_predict = rbf_svr.predict(X_test)
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
print('The R-squared value of linear SVR is', linear_svr.score(X_test, y_test))
print('The MAE of linear SVR is', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(linear_svr_y_predict)))
print('The MSE of linear SVR is', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(linear_svr_y_predict)))
print('The R-squared value of Poly SVR is', poly_svr.score(X_test, y_test))
print('The MAE of Poly SVR is', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(poly_svr_y_predict)))
print('The MSE of Poly SVR is', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(poly_svr_y_predict)))
print('The R-squared value of RBF SVR is', rbf_svr.score(X_test, y_test))
print('The MAE of RBF SVR is', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rbf_svr_y_predict)))
print('The MSE of RBF SVR is', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rbf_svr_y_predict)))
The R-squared value of linear SVR is 0.6506595464215357
The MAE of linear SVR is 3.432801387759994
The MSE of linear SVR is 27.088311013556197
The R-squared value of Poly SVR is 0.4036506510255131
The MAE of Poly SVR is 3.738407371046495
The MSE of Poly SVR is 46.24170053103894
The R-squared value of RBF SVR is 0.7559887416340946
The MAE of RBF SVR is 2.6067819999501114
The MSE of RBF SVR is 18.920948861538722
K近邻(回归)
from sklearn.neighbors import KNeighborsRegressor
uni_knr = KNeighborsRegressor(weights='uniform')
uni_knr.fit(X_train, y_train)
uni_knr_y_predict = uni_knr.predict(X_test)
dis_knr = KNeighborsRegressor(weights='distance')
dis_knr.fit(X_train, y_train)
dis_knr_y_predict = dis_knr.predict(X_test)
print('The R-squared value of uniform-weighted KNeighborsRegressor is', uni_knr.score(X_test, y_test))
print('The MAE of uniform-weighted KNeighborsRegressor is', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(uni_knr_y_predict)))
print('The MSE of uniform-weighted KNeighborsRegressor is', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(uni_knr_y_predict)))
print('The R-squared value of distance-weighted KNeighborsRegressor is', dis_knr.score(X_test, y_test))
print('The MAE of distance-weighted KNeighborsRegressor is', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dis_knr_y_predict)))
print('The MSE of distance-weighted KNeighborsRegressor is', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dis_knr_y_predict)))
The R-squared value of uniform-weighted KNeighborsRegressor is 0.6907212176346006
The MAE of uniform-weighted KNeighborsRegressor is 2.9650393700787396
The MSE of uniform-weighted KNeighborsRegressor is 23.981877165354337
The R-squared value of distance-weighted KNeighborsRegressor is 0.7201094821421603
The MAE of distance-weighted KNeighborsRegressor is 2.801125502210876
The MSE of distance-weighted KNeighborsRegressor is 21.703073090490353
回归树
from sklearn.tree import DecisionTreeRegressor
dtr = DecisionTreeRegressor()
dtr.fit(X_train, y_train)
dtr_y_predict = dtr.predict(X_test)
print('The R-squared value of DecisionTreeRegressor is', dtr.score(X_test, y_test))
print('The MAE of DecisionTreeRegressor is', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dtr_y_predict)))
print('The MSE of DecisionTreeRegressor is', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dtr_y_predict)))
The R-squared value of DecisionTreeRegressor is 0.7018637082840042
The MAE of DecisionTreeRegressor is 3.1346456692913383
The MSE of DecisionTreeRegressor is 23.117874015748036
集成模型(回归)
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor
rfr = RandomForestRegressor()
rfr.fit(X_train, y_train)
rfr_y_predict = rfr.predict(X_test)
etr = ExtraTreesRegressor()
etr.fit(X_train, y_train)
etr_y_predict = etr.predict(X_test)
gbr = GradientBoostingRegressor()
gbr.fit(X_train, y_train)
gbr_y_predict = gbr.predict(X_test)
print('The R-squared value of RandomForestRegressor is', rfr.score(X_test, y_test))
print('The MAE of RandomForestRegressor is', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rfr_y_predict)))
print('The MSE of RandomForestRegressor is', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rfr_y_predict)))
print(np.sort([x for x in zip(rfr.feature_importances_, boston.feature_names)], axis=0))
The R-squared value of RandomForestRegressor is 0.8440895024116558
The MAE of RandomForestRegressor is 2.2735354330708666
The MSE of RandomForestRegressor is 12.089501818897643
[['0.0006952186678271629' 'AGE']
['0.0011740925648898665' 'B']
['0.0028736628406999766' 'CHAS']
['0.010579683068382618' 'CRIM']
['0.010648458290071508' 'DIS']
['0.010993814577649055' 'INDUS']
['0.011370471510572628' 'LSTAT']
['0.015470830834121943' 'NOX']
['0.018271274527914837' 'PTRATIO']
['0.03688007979838428' 'RAD']
['0.05912370039150086' 'RM']
['0.3438362196168467' 'TAX']
['0.47808249331113856' 'ZN']]
print('The R-squared value of ExtraTreesRegressor is', etr.score(X_test, y_test))
print('The MAE of ExtraTreesRegressor is', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(etr_y_predict)))
print('The MSE of ExtraTreesRegressor is', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(etr_y_predict)))
print(np.sort([x for x in zip(etr.feature_importances_, boston.feature_names)], axis=0))
The R-squared value of ExtraTreesRegressor is 0.7888586858524715
The MAE of ExtraTreesRegressor is 2.3978267716535435
The MSE of ExtraTreesRegressor is 16.372170834645676
[['0.006161438928324989' 'AGE']
['0.01562649275993193' 'B']
['0.015793280691105457' 'CHAS']
['0.015911262651877756' 'CRIM']
['0.019522257475124195' 'DIS']
['0.024248474046779066' 'INDUS']
['0.02675902636449222' 'LSTAT']
['0.027354845633028434' 'NOX']
['0.029778329553770785' 'PTRATIO']
['0.037235762917298534' 'RAD']
['0.05366976186193688' 'RM']
['0.36340298961376505' 'TAX']
['0.3645360775025648' 'ZN']]
print('The R-squared value of GradientBoostingRegressor is', gbr.score(X_test, y_test))
print('The MAE of GradientBoostingRegressor is', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(gbr_y_predict)))
print('The MSE of GradientBoostingRegressor is', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(gbr_y_predict)))
print(np.sort([x for x in zip(gbr.feature_importances_, boston.feature_names)], axis=0))
The R-squared value of GradientBoostingRegressor is 0.8352282904422077
The MAE of GradientBoostingRegressor is 2.2967895563692675
The MSE of GradientBoostingRegressor is 12.776611666402166
[['0.000813577859578548' 'AGE']
['0.0009379773085437085' 'B']
['0.002470984513191071' 'CHAS']
['0.0026554787648684175' 'CRIM']
['0.006102069186157214' 'DIS']
['0.009097588958970589' 'INDUS']
['0.011010707897691053' 'LSTAT']
['0.015176077585162848' 'NOX']
['0.027349301643875182' 'PTRATIO']
['0.0277504007587151' 'RAD']
['0.08757621162790671' 'RM']
['0.3493765408039815' 'TAX']
['0.459683083091358' 'ZN']]
|