声明:内容非原创,是学习内容的总结,版权所属姜老师
逻辑回归
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_blobs
X, y = make_blobs(n_samples=150, n_features=2, centers=3)
X.shape
(150, 2)
y.shape
(150,)
y
array([0, 1, 0, 1, 2, 1, 0, 1, 1, 2, 2, 2, 1, 2, 0, 1, 2, 0, 2, 2, 1, 2,
0, 1, 1, 0, 1, 2, 0, 0, 2, 0, 0, 2, 1, 0, 1, 2, 1, 1, 2, 2, 0, 2,
2, 2, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0, 2, 0, 2,
2, 2, 0, 1, 2, 1, 0, 0, 0, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 2, 2, 2,
2, 2, 2, 0, 0, 0, 1, 1, 2, 2, 0, 0, 0, 1, 1, 0, 1, 1, 2, 2, 0, 1,
1, 2, 2, 1, 2, 1, 0, 2, 0, 1, 0, 0, 2, 2, 2, 0, 1, 2, 1, 0, 1, 0,
2, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 0, 2, 1, 1])
import seaborn as sns
sns.countplot(y)
D:\software\anaconda\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
<AxesSubplot:ylabel='count'>
import matplotlib.pyplot as plt
%matplotlib inline
sns.set(palette='cubehelix',style = 'dark')
from matplotlib.colors import ListedColormap
colors = sns.color_palette('husl')
cmp = ListedColormap(colors)
plt.scatter(X[:,0],X[:,1],c=y,cmap=cmp)
<matplotlib.collections.PathCollection at 0x253b8d81430>
lr = LogisticRegression()
lr.fit(X,y)
LogisticRegression()
import numpy as np
xmin, xmax = X[:,0].min(),X[:,0].max()
ymin, ymax = X[:,1].min(),X[:,1].max()
a = np.linspace(xmin, xmax, 200)
b = np.linspace(ymin, ymax, 200)
xx, yy = np.meshgrid(a, b)
X_test = np.concatenate((xx.reshape(-1,1),yy.reshape(-1,1)),axis=1)
X_test.shape
(40000, 2)
y_ = lr.predict(X_test)
plt.scatter(X_test[:,0],X_test[:,1],c=y_)
plt.scatter(X[:,0],X[:,1],c=y,cmap=cmp)
<matplotlib.collections.PathCollection at 0x253b9f5c430>
逻辑斯蒂回归的概率预测
X = np.random.random(size=(10,5))
X
array([[0.76319326, 0.80687777, 0.03787185, 0.04125638, 0.24176662],
[0.49108586, 0.86212156, 0.58924449, 0.63014179, 0.58616281],
[0.47409458, 0.07739679, 0.57434414, 0.87367993, 0.23624037],
[0.83504226, 0.15709413, 0.70954111, 0.66084017, 0.94787926],
[0.51833235, 0.0205479 , 0.36886036, 0.68425598, 0.89572644],
[0.21072669, 0.75949519, 0.28850424, 0.05028508, 0.26778145],
[0.62106076, 0.31755045, 0.09641944, 0.83162441, 0.64964526],
[0.77423254, 0.49471209, 0.18916518, 0.9511086 , 0.30883323],
[0.15912683, 0.47671285, 0.54351492, 0.63198269, 0.0259659 ],
[0.22599968, 0.70621492, 0.95491154, 0.9921246 , 0.76667328]])
y = np.random.randint(0,2,size=10)
y
array([1, 1, 0, 1, 0, 0, 0, 1, 1, 0])
lr = LogisticRegression()
lr.fit(X,y)
LogisticRegression()
lr.predict(X)
array([1, 1, 0, 0, 0, 1, 0, 1, 0, 0])
lr.coef_
array([[ 0.45474797, 0.36116252, -0.03061392, -0.15813758, -0.27354745]])
lr.score(X,y)
0.7
lr.predict_proba(X)
array([[0.39799317, 0.60200683],
[0.47356658, 0.52643342],
[0.53186764, 0.46813236],
[0.52494617, 0.47505383],
[0.56763617, 0.43236383],
[0.46772714, 0.53227286],
[0.5164642 , 0.4835358 ],
[0.46524313, 0.53475687],
[0.50749225, 0.49250775],
[0.54706982, 0.45293018]])
lr.predict(X)
array([1, 1, 0, 0, 0, 1, 0, 1, 0, 0])
(lr.predict_proba(X)[:,0] > 0.6)*1
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
(lr.predict_proba(X)[:,0] < 0.52)*1
array([1, 1, 0, 0, 0, 1, 1, 1, 1, 0])
lr.predict_proba(X)[:,1]
array([0.60200683, 0.52643342, 0.46813236, 0.47505383, 0.43236383,
0.53227286, 0.4835358 , 0.53475687, 0.49250775, 0.45293018])
梯度下降
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
线性回归的损失函数 Loss = ((y-y_)**2).sum()
f = lambda x: 3*x**2 + 2*x + 3.2
x = np.linspace(-5, 5,20)
y = f(x)
plt.plot(x,y)
plt.xlabel('w')
plt.ylabel('loss')
Text(0, 0.5, 'loss')
g = lambda x: 6*x + 2
w1 = w0 - step*g(w0)
h = np.abs(w0 - w1)
w0 = np.random.randint(-5,5,1)[0]
print('梯度下降的起始点:%d'%(w0))
w1 = w0 +1
step = 0.01
precision = 0.001
max_count = 3000
current_count = 1
points = []
while 1:
if current_count> max_count:
break
if np.abs(w0-w1) <= precision:
break
w0 = w1
w1 = w0 - step*g(w0)
points.append(w1)
current_count += 1
print('当前第%d次下降的位置:%.4f'%(current_count,w1))
梯度下降的起始点:-5
当前第2次下降的位置:-3.7800
当前第3次下降的位置:-3.5732
当前第4次下降的位置:-3.3788
。。。。。。
当前第87次下降的位置:-0.3513
当前第88次下降的位置:-0.3502
当前第89次下降的位置:-0.3492
当前第90次下降的位置:-0.3482
w = np.array(points)[::10]
value = f(w)
x = np.linspace(-5,5,50)
y = f(x)
plt.figure(figsize=(12,3))
plt.plot(x,y)
plt.xlabel('w')
plt.ylabel('loss')
plt.scatter(w,value,color = 'red')
<matplotlib.collections.PathCollection at 0x154b5a16400>
|