注:本案例为黑马的课堂案例,上传仅为方便查看
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer
from sklearn.tree import DecisionTreeClassifier, export_graphviz
data = pd.read_csv("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic.txt")
data
| row.names | pclass | survived | name | age | embarked | home.dest | room | ticket | boat | sex |
---|
0 | 1 | 1st | 1 | Allen, Miss Elisabeth Walton | 29.0000 | Southampton | St Louis, MO | B-5 | 24160 L221 | 2 | female |
---|
1 | 2 | 1st | 0 | Allison, Miss Helen Loraine | 2.0000 | Southampton | Montreal, PQ / Chesterville, ON | C26 | NaN | NaN | female |
---|
2 | 3 | 1st | 0 | Allison, Mr Hudson Joshua Creighton | 30.0000 | Southampton | Montreal, PQ / Chesterville, ON | C26 | NaN | (135) | male |
---|
3 | 4 | 1st | 0 | Allison, Mrs Hudson J.C. (Bessie Waldo Daniels) | 25.0000 | Southampton | Montreal, PQ / Chesterville, ON | C26 | NaN | NaN | female |
---|
4 | 5 | 1st | 1 | Allison, Master Hudson Trevor | 0.9167 | Southampton | Montreal, PQ / Chesterville, ON | C22 | NaN | 11 | male |
---|
5 | 6 | 1st | 1 | Anderson, Mr Harry | 47.0000 | Southampton | New York, NY | E-12 | NaN | 3 | male |
---|
6 | 7 | 1st | 1 | Andrews, Miss Kornelia Theodosia | 63.0000 | Southampton | Hudson, NY | D-7 | 13502 L77 | 10 | female |
---|
7 | 8 | 1st | 0 | Andrews, Mr Thomas, jr | 39.0000 | Southampton | Belfast, NI | A-36 | NaN | NaN | male |
---|
8 | 9 | 1st | 1 | Appleton, Mrs Edward Dale (Charlotte Lamson) | 58.0000 | Southampton | Bayside, Queens, NY | C-101 | NaN | 2 | female |
---|
9 | 10 | 1st | 0 | Artagaveytia, Mr Ramon | 71.0000 | Cherbourg | Montevideo, Uruguay | NaN | NaN | (22) | male |
---|
10 | 11 | 1st | 0 | Astor, Colonel John Jacob | 47.0000 | Cherbourg | New York, NY | NaN | 17754 L224 10s 6d | (124) | male |
---|
11 | 12 | 1st | 1 | Astor, Mrs John Jacob (Madeleine Talmadge Force) | 19.0000 | Cherbourg | New York, NY | NaN | 17754 L224 10s 6d | 4 | female |
---|
12 | 13 | 1st | 1 | Aubert, Mrs Leontine Pauline | NaN | Cherbourg | Paris, France | B-35 | 17477 L69 6s | 9 | female |
---|
13 | 14 | 1st | 1 | Barkworth, Mr Algernon H. | NaN | Southampton | Hessle, Yorks | A-23 | NaN | B | male |
---|
14 | 15 | 1st | 0 | Baumann, Mr John D. | NaN | Southampton | New York, NY | NaN | NaN | NaN | male |
---|
15 | 16 | 1st | 1 | Baxter, Mrs James (Helene DeLaudeniere Chaput) | 50.0000 | Cherbourg | Montreal, PQ | B-58/60 | NaN | 6 | female |
---|
16 | 17 | 1st | 0 | Baxter, Mr Quigg Edmond | 24.0000 | Cherbourg | Montreal, PQ | B-58/60 | NaN | NaN | male |
---|
17 | 18 | 1st | 0 | Beattie, Mr Thomson | 36.0000 | Cherbourg | Winnipeg, MN | C-6 | NaN | NaN | male |
---|
18 | 19 | 1st | 1 | Beckwith, Mr Richard Leonard | 37.0000 | Southampton | New York, NY | D-35 | NaN | 5 | male |
---|
19 | 20 | 1st | 1 | Beckwith, Mrs Richard Leonard (Sallie Monypeny) | 47.0000 | Southampton | New York, NY | D-35 | NaN | 5 | female |
---|
20 | 21 | 1st | 1 | Behr, Mr Karl Howell | 26.0000 | Cherbourg | New York, NY | C-148 | NaN | 5 | male |
---|
21 | 22 | 1st | 0 | Birnbaum, Mr Jakob | 25.0000 | Cherbourg | San Francisco, CA | NaN | NaN | (148) | male |
---|
22 | 23 | 1st | 1 | Bishop, Mr Dickinson H. | 25.0000 | Cherbourg | Dowagiac, MI | B-49 | NaN | 7 | male |
---|
23 | 24 | 1st | 1 | Bishop, Mrs Dickinson H. (Helen Walton) | 19.0000 | Cherbourg | Dowagiac, MI | B-49 | NaN | 7 | female |
---|
24 | 25 | 1st | 1 | Bjornstrm-Steffansson, Mr Mauritz Hakan | 28.0000 | Southampton | Stockholm, Sweden / Washington, DC | NaN | | D | male |
---|
25 | 26 | 1st | 0 | Blackwell, Mr Stephen Weart | 45.0000 | Southampton | Trenton, NJ | NaN | NaN | (241) | male |
---|
26 | 27 | 1st | 1 | Blank, Mr Henry | 39.0000 | Cherbourg | Glen Ridge, NJ | A-31 | NaN | 7 | male |
---|
27 | 28 | 1st | 1 | Bonnell, Miss Caroline | 30.0000 | Southampton | Youngstown, OH | C-7 | NaN | 8 | female |
---|
28 | 29 | 1st | 1 | Bonnell, Miss Elizabeth | 58.0000 | Southampton | Birkdale, England Cleveland, Ohio | C-103 | NaN | 8 | female |
---|
29 | 30 | 1st | 0 | Borebank, Mr John James | NaN | Southampton | London / Winnipeg, MB | D-21/2 | NaN | NaN | male |
---|
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
---|
1283 | 1284 | 3rd | 0 | Vestrom, Miss Hulda Amanda Adolfina | NaN | NaN | NaN | NaN | NaN | NaN | female |
---|
1284 | 1285 | 3rd | 0 | Vonk, Mr Jenko | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1285 | 1286 | 3rd | 0 | Ware, Mr Frederick | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1286 | 1287 | 3rd | 0 | Warren, Mr Charles William | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1287 | 1288 | 3rd | 0 | Wazli, Mr Yousif | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1288 | 1289 | 3rd | 0 | Webber, Mr James | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1289 | 1290 | 3rd | 1 | Wennerstrom, Mr August Edvard | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1290 | 1291 | 3rd | 0 | Wenzel, Mr Linhart | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1291 | 1292 | 3rd | 0 | Widegren, Mr Charles Peter | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1292 | 1293 | 3rd | 0 | Wiklund, Mr Jacob Alfred | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1293 | 1294 | 3rd | 1 | Wilkes, Mrs Ellen | NaN | NaN | NaN | NaN | NaN | NaN | female |
---|
1294 | 1295 | 3rd | 0 | Willer, Mr Aaron | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1295 | 1296 | 3rd | 0 | Willey, Mr Edward | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1296 | 1297 | 3rd | 0 | Williams, Mr Howard Hugh | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1297 | 1298 | 3rd | 0 | Williams, Mr Leslie | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1298 | 1299 | 3rd | 0 | Windelov, Mr Einar | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1299 | 1300 | 3rd | 0 | Wirz, Mr Albert | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1300 | 1301 | 3rd | 0 | Wiseman, Mr Phillippe | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1301 | 1302 | 3rd | 0 | Wittevrongel, Mr Camiel | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1302 | 1303 | 3rd | 1 | Yalsevac, Mr Ivan | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1303 | 1304 | 3rd | 0 | Yasbeck, Mr Antoni | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1304 | 1305 | 3rd | 1 | Yasbeck, Mrs Antoni | NaN | NaN | NaN | NaN | NaN | NaN | female |
---|
1305 | 1306 | 3rd | 0 | Youssef, Mr Gerios | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1306 | 1307 | 3rd | 0 | Zabour, Miss Hileni | NaN | NaN | NaN | NaN | NaN | NaN | female |
---|
1307 | 1308 | 3rd | 0 | Zabour, Miss Tamini | NaN | NaN | NaN | NaN | NaN | NaN | female |
---|
1308 | 1309 | 3rd | 0 | Zakarian, Mr Artun | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1309 | 1310 | 3rd | 0 | Zakarian, Mr Maprieder | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1310 | 1311 | 3rd | 0 | Zenn, Mr Philip | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1311 | 1312 | 3rd | 0 | Zievens, Rene | NaN | NaN | NaN | NaN | NaN | NaN | female |
---|
1312 | 1313 | 3rd | 0 | Zimmerman, Leo | NaN | NaN | NaN | NaN | NaN | NaN | male |
---|
1313 rows × 11 columns
data.describe()
| row.names | survived | age |
---|
count | 1313.000000 | 1313.000000 | 633.000000 |
---|
mean | 657.000000 | 0.341965 | 31.194181 |
---|
std | 379.174762 | 0.474549 | 14.747525 |
---|
min | 1.000000 | 0.000000 | 0.166700 |
---|
25% | 329.000000 | 0.000000 | 21.000000 |
---|
50% | 657.000000 | 0.000000 | 30.000000 |
---|
75% | 985.000000 | 1.000000 | 41.000000 |
---|
max | 1313.000000 | 1.000000 | 71.000000 |
---|
x = data[["pclass", "age", "sex"]]
x.head()
| pclass | age | sex |
---|
0 | 1st | 29.0000 | female |
---|
1 | 1st | 2.0000 | female |
---|
2 | 1st | 30.0000 | male |
---|
3 | 1st | 25.0000 | female |
---|
4 | 1st | 0.9167 | male |
---|
y = data["survived"]
y.head()
0 1
1 0
2 0
3 0
4 1
Name: survived, dtype: int64
x["age"].fillna(value=data["age"].mean(), inplace=True)
x.head()
| pclass | age | sex |
---|
0 | 1st | 29.0000 | female |
---|
1 | 1st | 2.0000 | female |
---|
2 | 1st | 30.0000 | male |
---|
3 | 1st | 25.0000 | female |
---|
4 | 1st | 0.9167 | male |
---|
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=22, test_size=0.2)
x.head()
| pclass | age | sex |
---|
0 | 1st | 29.0000 | female |
---|
1 | 1st | 2.0000 | female |
---|
2 | 1st | 30.0000 | male |
---|
3 | 1st | 25.0000 | female |
---|
4 | 1st | 0.9167 | male |
---|
x_train = x_train.to_dict(orient="records")
x_test = x_test.to_dict(orient="records")
x_train
[{'pclass': '3rd', 'age': 45.0, 'sex': 'female'},
{'pclass': '3rd', 'age': 31.19418104265403, 'sex': 'male'},
{'pclass': '1st', 'age': 31.19418104265403, 'sex': 'female'},
.....
...]
transfer = DictVectorizer()
x_train = transfer.fit_transform(x_train)
x_test = transfer.fit_transform(x_test)
x_train
<1050x6 sparse matrix of type '<class 'numpy.float64'>'
with 3150 stored elements in Compressed Sparse Row format>
estimator = DecisionTreeClassifier(max_depth=5)
estimator.fit(x_train, y_train)
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=5,
max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort=False, random_state=None,
splitter='best')
y_pre = estimator.predict(x_test)
print(y_pre)
[0 0 0 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 1 0 1 0 0 0
0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0
1 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1
0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 1
1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 1 0 0 0 0 0 1 1 1 1 0 0 0 0 0
0 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 1 0
0 0 1 0 0 0 0 0 0 1 0 0 1 0 1 0 1 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 0 0 1 0
0 1 0 1]
ret = estimator.score(x_test, y_test)
ret
0.7984790874524715
export_graphviz(estimator, out_file="./data/tree.dot", feature_names=['age', 'pclass=1st', 'pclass=2nd', 'pclass=3rd', '女性', '男性'])
|