一、使用python来实现，使用解析解求解多元线性回归

"""
创建 100行1列的 x,y数据
"""


# 解析解  求解模型的方法
# numpy是做数值计算的
import numpy as np
# matplotlib 是关于绘图的
import matplotlib.pyplot as plt


#  回归，有监督的机器学习  X,y
X= np.random.rand(100,1)

# 这里要模拟出来的数据y是代表真实的数据，所以也就是y_hat+error  预期值和误差   
# 假设误差 服从正太分布
# standard normal distribution 标准正太分布   期望为μ=0  方差1
y= 2 +4*X +np.random.randn(100,1)

# 为了去求解w0截距项，我们给X矩阵一开始加上一列全为1的X0
# np.c_[]  拼接函数
X_b = np.c_[np.ones((100,1)),X]

# 实现解析解的公式来求解θ
θ = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
print(θ)

# 使用模型去做预测
# 新建数据
X_new = np.array([[0],
                  [2]])  
X_new_b = np.c_[np.ones((2,1)),X_new]
print(X_new_b)

y_predict = X_new_b.dot(θ)
print(y_predict)


# 绘图进行展示真实的数据点和我们预测用的模型
plt.plot(X_new,y_predict,'r-')
plt.plot(X,y,'b.')
plt.axis([0,2,0,10])
plt.show()

?二、使用Scikit-learn封装的线性回归函数求解多元线性回归

import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt


X1 = 2*np.random.rand(100, 1)
X2 = 2*np.random.rand(100, 1)
X = np.c_[X1, X2]

y = 4 + 3*X1 + 5*X2 + np.random.randn(100, 1)

reg = LinearRegression(fit_intercept=True)  # fit_intercept 是否计算截距
reg.fit(X, y)    # 对X，y进行训练拟合
print(reg.intercept_, reg.coef_)  # intercept_ ：截距项值   coef_：其他的参数值

X_new = np.array([[0, 0],
                  [2, 1],
                  [2, 4]])
y_predict = reg.predict(X_new)

# 绘图进行展示真实的数据点和我们预测用的模型
plt.plot(X_new[:, 0], y_predict, 'r-')
plt.plot(X1, y, 'b.')
plt.axis([0, 2, 0, 25])
plt.show()

三、python 实现梯度下降法（全量梯度、随机梯度、小批量梯度）

1、全量梯度

import numpy as np

# 创建数据集X，y
np.random.seed(1)  # 设置随机种子，确定随机值
X = np.random.rand(100,1)
y = 4 + 3*X +np.random.randn(100,1)  # 表示真实值
X_b = np.c_[np.ones((100,1)),X]

# 创建超参数
learning_rate =0.001  # 学习率
n_iteration = 10000


# 第一步、初始化θ， θ=W0.....Wn, 标准正太分布创建W
theta= np.random.randn(2,1)


# 4、判断是否收敛，一般不会设定阈值，而是直接采用设置相对大的迭代次数保证可以收敛
for _ in range(n_iteration):
    # 2、求梯度 ,计算gradient = (Xθ-y)Xj
    gradient = X_b.T.dot(X_b.dot(theta)-y)
    # 3、应用梯度下降法的公式去调整θ值   公式： θt+1 =θt-η*gradient
    theta = theta -learning_rate*gradient

print(theta)

2、小批量梯度下降

"""  
小批量梯度下降
"""
import numpy as np

# 创建数据集X，y
X = np.random.rand(100,1)
y = 4+ 3*X + np.random.randn(100,1)
X_b = np.c_[np.ones((100,1)),X]

# 创建超参数
learn_rating = 0.0001
n_epochs = 10000  # 迭代轮次
m = 100   # 样本数
batch_size = 10   # 每一批次使用多少条样本 
num_batches = int(m/batch_size)    # 每一轮次 需要多少批次才能执行完一个完整样本
# 第一步 随机初始化θ
theta = np.random.randn(2,1)
for epoch in range(n_epochs):
    # 在双层for循环之间，每个轮次开始分批次迭代之前 打乱数据索引顺序， 目的 保证随机性
    arr = np.arange(len(X_b))  # 返回索引列表
    np.random.shuffle(arr)  # 把索引打乱顺序
    X_b = X_b[arr]
    y = y[arr]
    for i in range(num_batches):
        # random_index = np.random.randint(m)
        x_batch = X_b[i*batch_size:i*batch_size+batch_size]
        y_batch = y[i*batch_size:i*batch_size+batch_size]
        # 第二步  求梯度 gradient = (Xθ-y)Xj
        gradient = x_batch.T.dot(x_batch.dot(theta)-y_batch)

        # 第三步 用梯度下降法公式去调整θ值   公式： θt+1 =θt-η*gradient
        theta = theta - learn_rating*gradient

print(theta)

3、随机梯度

"""  
随机梯度: 随机一个样本进行测试
"""

import numpy as np

# 创建数据集X，y
X = np.random.rand(100,1)
y= 4 + 3*X + np.random.randn(100,1)
X_b = np.c_[np.ones((100,1)),X]


# 创建超参数
learning_rating = 0.001  # 学习率
n_epochs = 10000    #  迭代轮次
m =100    # 100个样本



# 第一步 初始化θ
theta = np.random.randn(2,1)
for epoch in range(n_epochs):
    arr = np.arange(len(X_b))
    np.random.shuffle(arr)
    X_b = X_b[arr]
    y = y[arr]
    for i in range(m):
        # random_index = np.random.randint(m)
        xi = X_b[i:i+1]
        yi = y[i:i+1]
        # 第二部 求梯度  gradient =(Xθ-y)Xj
        gradient = xi.T.dot(xi.dot(theta)-yi)

        # 3、应用梯度下降法的公式去调整θ值   公式： θt+1 =θt-η*gradient
        theta = theta - learning_rating*gradient


print(theta)

四、归一化（最大最小值归一化、标准归一化）

1、最大最小值归一化

import numpy as np
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()  # 创建对象
temp = np.array([1,2,3,4,5,5])
# 求归一化
result = scaler.fit_transform(temp.reshape(-1,1))
print(result)

2、标准归一化

"""  
代码实现标准归一化
"""
from sklearn.preprocessing import StandardScaler
import numpy as np
temp = np.array([1,2,3,5,5])
# 创建对象
scaler = StandardScaler()
# fit()会将某一列的均值和方差求出来
scaler.fit(temp.reshape(-1,1))
# 查看均值
scaler.mean_
# 查看标准差
scaler.var_
# 转换数据
scaler.transform(temp.reshape(-1,1))

五、Lasso回归

""" 
lasso 回归  损失函数 +L2正则项
 """
import numpy as np
from sklearn.linear_model import Lasso
from sklearn.linear_model import SGDRegressor   # 随机梯度下降
X = np.random.rand(100,1)
y = 4 + 3*X + np.random.randn(100,1)

# lasso_reg = Lasso(alpha=0.04,max_iter=30000)
# lasso_reg.fit(X,y)

# print(lasso_reg.predict([[1.5]]))
# print(lasso_reg.intercept_) # 查看截距项W0
# print(lasso_reg.coef_)  # 查看其他的参数 W1
sgd_reg = SGDRegressor(penalty='l1',max_iter=30000)
# sgd_reg.fit(X,y.ravel())
sgd_reg.fit(X,y.reshape(-1,))
print(sgd_reg.predict([[1.5]]))
print(sgd_reg.intercept_)
print(sgd_reg.coef_)

六、ridge岭回归

"""  
ridge岭回归   损失函数+L2正则项   
Ridge(alpha=0.4, solver='sag')
    alpha  : L2 正则项系数 
    solver : 梯度训练方法
"""

import numpy as np
from sklearn.linear_model import Ridge
from sklearn.linear_model import SGDRegressor  # 随机梯度下降

X = np.random.rand(100,1)
y = 4 + 3*X +np.random.randn(100,1)  # 真实值

# # 创建Ridge岭对象
# ridge_reg = Ridge(alpha=0.4, solver='sag')
# ridge_reg.fit(X,y)  # 把训练集里面的对象 训练好
# print(ridge_reg.predict([[1.5]])) # 预测值
# print(ridge_reg.intercept_)   # 获取截距项  W0
# print(ridge_reg.coef_)   # 其他系数  W1

# 创建SGDRegressor 对象
sgd_reg = SGDRegressor(penalty='l2',max_iter=1000)
sgd_reg.fit(X,y)
print(sgd_reg.predict([[1.5]]))
print(sgd_reg.intercept_)
print(sgd_reg.coef_)

七、弹性网络回归

"""  

损失函数 + L1 正则项+L2正则项
"""

import numpy as np
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import SGDRegressor  # 随机梯度下降
X = np.random.rand(100,1)
y = 4 + 3*X + np.random.randn(100,1)

# elastic_reg = ElasticNet(alpha=0.04,max_iter=100000,l1_ratio=0.15)  # l1_ratio 指的是权衡L1  L2正则项哪个重要
# elastic_reg.fit(X,y)
# print(elastic_reg.predict([[1.5]]))
# print(elastic_reg.intercept_)
# print(elastic_reg.coef_)

sgd_reg = SGDRegressor(penalty='elasticnet',max_iter=30000)
sgd_reg.fit(X,y)
print(sgd_reg.predict([[1.5]]))
print(sgd_reg.intercept_)
print(sgd_reg.coef_)

八、多项式回归升维（对特征的预处理）

"""  
多项式回归  升维
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures   # 对特征进行预处理
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error  # 回归评估指标 mse   ;评估升维的效果


np.random.seed(42) # 随机种子  目的 ：运行多次 数据是一样的
m =100 # 样本
X = 6*np.random.rand(m,1) -3
y = 0.5*X**2 +X +2 + np.random.randn(m,1)

# # 画图
plt.plot(X,y,'b.')


# 训练集
X_train = X[:80]
y_train = y[:80]
# 测试集
X_test =X[80:]
y_test = y[80:]


d = {1: 'g-', 2:'r+', 10:'y*'}
for i in d:
    # 把训练集和测试集升维多项式回归
    poly_features = PolynomialFeatures(degree=i, include_bias=True)  # include_bias 是否创建截距项
    X_poly_train = poly_features.fit_transform(X_train)
    X_poly_test = poly_features.fit_transform(X_test)
    print(X_train[0])
    print(X_poly_train[0]) 
    print(X_train.shape)
    print(X_poly_train.shape)

    # 创建模型
    lin_reg = LinearRegression(fit_intercept=False)  # fit_intercept 是否计算截距
    lin_reg.fit(X_poly_train,y_train)
    print(lin_reg.intercept_,lin_reg.coef_)

    # 看看是否随着degree 的增加升维，是否过拟合了
    y_train_predict = lin_reg.predict(X_poly_train)
    y_test_predict = lin_reg.predict(X_poly_test)

    plt.plot(X_poly_train[:,1],y_train_predict,d[i])

    print(mean_squared_error(y_test,y_test_predict))
    print(mean_squared_error(y_train,y_train_predict))
# plt.show()