[人工智能] 深度学习实战——numpy手写梯度下降法对价格进行预测

开发: C++知识库 Java知识库 JavaScript Python PHP知识库人工智能区块链大数据移动开发嵌入式开发工具数据结构与算法开发测试游戏开发网络协议系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑笔记本显卡显示器固态硬盘硬盘耳机手机 iphone vivo oppo 小米华为单反装机图拉丁

-> 人工智能 -> 深度学习实战——numpy手写梯度下降法对价格进行预测 -> 正文阅读

[人工智能]深度学习实战——numpy手写梯度下降法对价格进行预测

作者:recommend-item-box type_download clearfix

导包准备

import numpy as np
import pandas as pd
import jdc
import matplotlib.pyplot as plt
import seaborn as sns #Visualization

算法

梯度求导公式

对应的梯度计算，a 代表学习率
在这里插入图片描述

class MultivariateNetwork():
    def __init__(self, num_of_features=1, learning_rate=0.1):
        """
        This function creates a vector of zeros of shape (num_of_features, 1) for W and initializes w_0 to 0.

        Argument:
        num_of_features -- size of the W vector, i.e., the number of features, excluding the bias

        Returns:
        W -- initialized vector of shape (dim, 1)
        w_0 -- initialized scalar (corresponds to the bias)
        """
        # n is the number of features
        self.n = num_of_features
        # alpha is the learning rate
        self.alpha = learning_rate

        ### START YOUR CODE HERE ###
        # initialize self.W and self.w_0 to be 0's
        self.W = np.zeros((self.n, 1))
        self.w_0 = 0
        ### YOUR CODE ENDS ###
        assert (self.W.shape == (self.n, 1))
        assert (isinstance(self.w_0, float) or isinstance(self.w_0, int))

    def fit(self, X, Y, epochs=1000, print_loss=True):
        """
        This function implements the Gradient Descent Algorithm
        Arguments:
        X -- training data matrix: each column is a training example.
                The number of columns is equal to the number of training examples
        Y -- true "label" vector: shape (1, m)
        epochs --

        Return:
        params -- dictionary containing weights
        losses -- loss values of every 100 epochs
        grads -- dictionary containing dW and dw_0
        """
        losses = []

        for i in range(epochs):
            # Get the number of training examples
            m = X.shape[1]

            ### START YOUR CODE HERE ###
            # Calculate the hypothesis outputs Y_hat (≈ 1 line of code)
            # (n,m)@(m,1) = (n,m)
            # print(X.shape)
            # print(self.W.shape)


            Y_hat = X.T @ self.W + self.w_0
            Y = Y.reshape(-1,1)




            # Calculate loss (≈ 1 line of code)

            loss =( 1 / (2 * m) * (Y - Y_hat)*(Y - Y_hat)).sum().mean()

            # print(loss)
            # exit()
            # Calculate the gredients for W and w_0
            dW = 1 / m * (X @ (Y - Y_hat))

            # print(dW)
            dw_0 = np.sum(1 / m * (Y - Y_hat))

            # Weight updates
            self.W = self.W + self.alpha * dW
            self.w_0 = self.w_0 + self.alpha * dw_0
            ### YOUR CODE ENDS ###

            if ((i % 100) == 0):
                losses.append(loss)
                # Print the cost every 100 training examples
                if print_loss:
                    print("Cost after iteration %i: %f" % (i, loss))

        params = {
            "W": self.W,
            "w_0": self.w_0
        }

        grads = {
            "dw":dW,
            "dw_0": dw_0
        }

        return params, grads, losses


    def predict(self, X):
        '''
        Predict the actual values using learned parameters (self.W, self.w_0)

        Arguments:
        X -- data of size (n x m)

        Returns:
        Y_prediction -- a numpy array (vector) containing all predictions for the examples in X
        '''
        m = X.shape[1]
        Y_prediction = np.zeros((1, m))

        # Compute the actual values
        ### START YOUR CODE HERE ###
        # (n,m)@(m,1) + b ===>(n,1)
        Y_prediction = X.T@self.W+self.w_0
        ### YOUR CODE ENDS ###

        return Y_prediction

    def normalize(self, matrix):
        '''
        matrix: the matrix that needs to be normalized. Note that each column represents a training example.
             The number of columns is the the number of training examples
        '''
        # (n,m)
        # Calculate mean for each feature
        # Pay attention to the value of axis = ?
        # set keepdims=True to avoid rank-1 array
        ### START YOUR CODE HERE ###
        # calculate mean (1 line of code)
        mean =np.mean(matrix,axis=0,keepdims=True)
        # calculate standard deviation (1 line of code)
        std = np.std(matrix,axis=0,keepdims=True)
        # normalize the matrix based on mean and std
        matrix = (matrix-mean)/std
        ### YOUR CODE ENDS ###
        return matrix

训练代码

def Run_Experiment(X_train, Y_train, X_test, Y_test, epochs=2000, learning_rate=0.5, print_loss=False):
    """
    Builds the multivariate linear regression model by calling the function you've implemented previously

    Arguments:
    X_train -- training set represented by a numpy array
    Y_train -- training labels represented by a numpy array (vector)
    X_test -- test set represented by a numpy array
    Y_test -- test labels represented by a numpy array (vector)
    epochs -- hyperparameter representing the number of iterations to optimize the parameters
    learning_rate -- hyperparameter representing the learning rate used in the update rule of optimize()
    print_loss -- Set to true to print the cost every 100 iterations

    Returns:
    d -- dictionary containing information about the model.
    """
    num_of_features = X_train.shape[0]
    model = MultivariateNetwork(num_of_features, learning_rate)

    ### START YOUR CODE HERE ###
    # Obtain the parameters, gredients, and losses by calling a model's method (≈ 1 line of code)
    # print(X_train)
    # exit()
    # print(X_train[:1])
    # X_train = model.normalize(matrix=X_train[:1])
    # print(X_train)
    # exit()


    parameters, grads, losses = model.fit(X_train,Y_train,epochs=epochs)

    # Predict test/train set examples (≈ 2 lines of code)
    Y_prediction_test =model.predict(X_test)
    Y_prediction_train = model.predict(X_train)
    ### YOUR CODE ENDS ###

    # Print train/test Errors
    print("train accuracy: {:.2f} %".format(abs(100 - np.mean(np.abs(Y_prediction_train - Y_train) / Y_train) * 100)))
    print("test accuracy: {:.2f} %".format(abs(100 - np.mean(np.abs(Y_prediction_test - Y_test) / Y_test) * 100)))

    np.set_printoptions(precision=2)
    W = parameters['W']
    w_0 = parameters['w_0']
    print("W: \n")
    print(W)
    print("w_0: {:.2f}".format(w_0))
    print(w_0)

    d = {"losses": losses,
         "Y_prediction_test": Y_prediction_test,
         "Y_prediction_train": Y_prediction_train,
         "W": W,
         "w_0": w_0,
         "learning_rate": learning_rate,
         "epochs": epochs}

    return d

实战，拿个训练集试一下

df = pd.read_csv('prj2data1.csv', header=None)
X_train = df[[0, 1]].values.T
Y_train = df[2].values.reshape(-1, 1).T


df_test = pd.read_csv('prj2data1_test.csv', header=None)
X_test = df_test[[0, 1]].values.T
Y_test = df_test[2].values.reshape(-1, 1).T
d = Run_Experiment(X_train, Y_train, X_test, Y_test, epochs = 2000, learning_rate = 0.01, print_loss = True)

# Plot learning curve (with costs)
losses = np.squeeze(d['losses'])
plt.plot(losses)
plt.ylabel('loss')
plt.xlabel('epochs (per hundreds)')
plt.title("Learning rate =" + str(d["learning_rate"]))
plt.show()

之后会的到损失显示，对应参数的显示，以及损失曲线
在这里插入图片描述

在这里插入图片描述

不对标签进行标准化,结果

发现有些特征差异太大，在进行求导时，会导致梯度爆炸

# Prepare Train/Test data
df = pd.read_csv('encoded_insurance.csv', header=None, skiprows=1)

train_test_ratio = 0.7
range_train = int(len(df) * train_test_ratio)
X_train = df.iloc[:range_train, :-1]
Y_train = df.iloc[:range_train, -1]
X_test = df.iloc[range_train:, :-1]
Y_test = df.iloc[range_train:, -1]

X_train = X_train.values.T
Y_train = Y_train.values.reshape(1, -1)
X_test = X_test.values.T
Y_test = Y_test.values.reshape(1, -1)
d = Run_Experiment(X_train, Y_train, X_test, Y_test, epochs = 1000, learning_rate = 0.01, print_loss = True)
# Plot learning curve (with costs)
losses = np.squeeze(d['losses'])
plt.plot(losses)
plt.ylabel('loss')
plt.xlabel('epochs (per hundreds)')
plt.title("Learning rate =" + str(d["learning_rate"]))
plt.show()

在这里插入图片描述

对数据进行标准化

model2 = MultivariateNetwork()
# print(X_train[0].shape)
X_train[0] = model2.normalize(X_train[0])
X_train[1] = model2.normalize(X_train[1])
X_test[0] = model2.normalize(X_test[0])
X_test[1] = model2.normalize(X_test[1])


# print(X_train)
d = Run_Experiment(X_train, Y_train, X_test, Y_test, epochs = 1000, learning_rate = 0.01, print_loss = True)
# Plot learning curve (with costs)
losses = np.squeeze(d['losses'])
plt.plot(losses)
plt.ylabel('loss')
plt.xlabel('epochs (per hundreds)')
plt.title("Learning rate =" + str(d["learning_rate"]))
plt.show()

在这里插入图片描述

发现效果不是很好，考虑对价格(因变量)进行分析

fig= plt.figure(figsize=(12,4))

ax=fig.add_subplot(111)
sns.distplot(df.iloc[:, -1],bins=50,color='r',ax=ax)
ax.set_title('Distribution of insurance charges')

在这里插入图片描述

让我们分析因变量的特征。由此可见，因变量“电荷”是不正常的。然而，正态性在统计学和线性回归中非常重要。

fig= plt.figure(figsize=(12,4))

ax=fig.add_subplot(111)
#Pay attention to the log
sns.distplot(np.log(df.iloc[:,-1]),bins=40,color='b',ax=ax)
ax.set_title('Distribution of insurance charges in $log$ sacle')
ax.set_xscale('log');

在这里插入图片描述

因此对标签进行对数变换

### START YOUR CODE HERE ###
#Normalize dependent variable using logarithm transformation
Y_train = np.log(1+Y_train)
Y_test = np.log(1+Y_test)
### YOUR CODE ENDS ###

d = Run_Experiment(X_train, Y_train, X_test, Y_test, epochs = 1000, learning_rate = 0.01, print_loss = True)
# Plot learning curve (with costs)
losses = np.squeeze(d['losses'])
plt.plot(losses)
plt.ylabel('loss')
plt.xlabel('epochs (per hundreds)')
plt.title("Learning rate =" + str(d["learning_rate"]))
plt.show()