[人工智能] 吴恩达机器学习课后编程作业(Week2)

开发: C++知识库 Java知识库 JavaScript Python PHP知识库人工智能区块链大数据移动开发嵌入式开发工具数据结构与算法开发测试游戏开发网络协议系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑笔记本显卡显示器固态硬盘硬盘耳机手机 iphone vivo oppo 小米华为单反装机图拉丁

-> 人工智能 -> 吴恩达机器学习课后编程作业(Week2) -> 正文阅读

[人工智能]吴恩达机器学习课后编程作业(Week2)

文章目录

逻辑回归+归一化
对于画决策边界的问题还有待学习，还没有画出来🤦?♀?🤦?♀?😭

#!/usr/bin/env python 3.74
# -*-coding:utf-8 -*-
#@Time: 2021/09/26 13:05:35 
#@Author: zz 
#@File : .py
#@Software : VScode

#Week2 
#1.逻辑回归
#建立一个逻辑回归模型来预测一个学生是否被大学录取。
#根据两次考试的结果来决定每个申请人的录取机会。有以前的申请人的历史数据， 可以用它作为逻辑回归的训练集
#python实现逻辑回归 目标：建立分类器（求解出三个参数 θ0 θ1 θ2）即得出分界线 备注:θ1对应'Exam 1'成绩,θ2对应'Exam 2' 设定阈值，根据阈值判断录取结果 备注:阈值指的是最终得到的概率值.将概率值转化成一个类别.一般是＞0.5是被录取了,＜0.5未被录取

import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns  #Seaborn其实是在matplotlib的基础上进行了更高级的API封装，从而使得作图更加容易，在大多数情况下使用seaborn就能做出很具有吸引力的图。
plt.style.use("fivethirtyeight")   #样式美化
from sklearn.metrics import classification_report  #这个包是评价报告

#准备数据
#data = pd.read_csv('learn_myself\week2\ex2data1.txt', names=['exam1', 'exam2', 'admitted'])
data = pd.read_csv('ex2data1.txt', names=['exam1', 'exam2', 'admitted'])
data.head()  #看前五行
data.describe()

sns.set(context="notebook", style="darkgrid", palette=sns.color_palette("RdBu", 2)) #设置样式参数,默认主题 darkgrid（灰色背景+白网格）,调色板 2色
sns.lmplot('exam1', 'exam2', hue='admitted', data=data,    #hue就是显示出来的点 0还是1
           size=6, 
           fit_reg=False,    #fit_feg 参数，控制是否显示拟合的直线
           scatter_kws={"s": 50}    #hue参数是将name所指定的不同类型的数据叠加在一张图中显示
          )
plt.show()#看下数据的样子


def get_X(df): #读取特征
   ones = pd.DataFrame({'ones': np.ones(len(df))})  #ones是m行1列的dataframe   len（df)就是样本的个数 
   #DataFrame是Python中Pandas库中的一种数据结构，它类似excel，是一种二维表
   data = pd.concat([ones, df], axis=1)   # 合并数据，根据列合并 axis = 1的时候，concat就是行对齐，然后将不同列名称的两张表合并 加列
   data.insert(0, 'Ones', 1)
   return data.iloc[:, :-1].values# 这个操作返回 ndarray,不是矩阵   NDFrame.as_matrix Use NDFrame.values instead 

def get_y(df): #读取标签

   return np.array(df.iloc[:, -1])  #df.iloc[:, -1]是指df的最后一列

def normalize_feature(df):
   return df.apply(lambda column: (column - column.mean()) / column.std())  #特征缩放在逻辑回归同样适用


X = get_X(data)
print(X.shape)

y = get_y(data)
print(y.shape)


#sigmoid函数
def sigmoid(z):
   
   return 1 / (1 + np.exp(-z))

#画图
fig,ax = plt.subplots(figsize=(8,6))
ax.plot(np.arange(-10,10,step = 0.01),sigmoid(np.arange(-10,10,step=0.01))) 
#画图 前面为x轴内容，后面为y轴内容  
#np.arange()函数返回一个有终点和起点的固定步长的排列

ax.set_ylim((-0.1,1.1)) #lim 轴线显示长度
ax.set_xlabel('z',fontsize=18)
ax.set_ylabel('g(z)',fontsize=18)
ax.set_title('sigmoid function0',fontsize = 18)
plt.show()

#cost function(代价函数)

theta = theta = np.zeros(4)   
theta 

def cost(theta,X,y):
   
   return np.mean(-y * np.log(sigmoid(X @ theta)) - (1-y) * np.log(1-sigmoid(X @ theta)))
#mean()函数功能：求取均值
cost(theta,X,y)  #这里的输出是0.6931471805599453


#梯度下降 转化为向量化计算

def gradient(theta,X,y):
   return (1/len(X)) * X.T @ (sigmoid(X @ theta) - y)

gradient(theta,X,y)

#拟合参数
import scipy.optimize as opt
res = opt.minimize(fun=cost, x0=theta, args=(X, y), method='Newton-CG', jac=gradient)
print(res)

#用训练集预测和验证
def predict(x,theta):
   prob = sigmoid(x @ theta)
   return (prob >= 0.5).astype(int)  #astype 布尔类型   如果大于0.5 y就等于1

final_theta = res.x
y_pred = predict(X, final_theta)

print(classification_report(y, y_pred))


#寻找决策边界  X*theta = 0

print(res.x)  #最后的theta

coef = -(res.x / res.x[2])  # find the equation
print(coef)

x = np.arange(130, step=0.1)
y = coef[0] + coef[1]*x 
data.describe()  # find the range of x and y

sns.set(context="notebook", style="ticks", font_scale=1.5)  #默认使用notebook上下文 主题 context可以设置输出图片的大小尺寸(scale)

sns.lmplot('exam1', 'exam2', hue='admitted', data=data, 
           size=6, 
           fit_reg=False, 
           scatter_kws={"s": 25}
          )

plt.plot(x, y, 'grey')
plt.xlim(0, 130) 
plt.ylim(0, 130)
plt.title('Decision Boundary')
plt.show()    #这边没有画出决策边界  不明白问题出在哪里  因为我是4个点


#3.正则化逻辑回归
df = pd.read_csv('ex2data2.txt', names=['test1', 'test2', 'accepted'])
df.head()
sns.set(context="notebook", style="ticks", font_scale=1.5)

sns.lmplot('test1', 'test2', hue='accepted', data=df, 
           size=6, 
           fit_reg=False, 
           scatter_kws={"s": 50}
          )

plt.title('Regularized Logistic Regression')
plt.show()

#进行特征映射
def feature_mapping(x, y, power, as_ndarray=False):
#     """return mapped features as ndarray or dataframe"""

    data = {"f{}{}".format(i - p, p): np.power(x, i - p) * np.power(y, p)
               for i in np.arange(power + 1)
               for p in np.arange(i + 1)
            }

    if as_ndarray:
        return pd.DataFrame(data).iloc[:,:].values   #.as_matrix()  改成.iloc[:,:].values
    else:
        return pd.DataFrame(data)
     
x1 = np.array(df.test1)
x2 = np.array(df.test2)

data = feature_mapping(x1, x2, power=6)
print(data.shape)
data.head()  #将二维特征映射成28维特征


#正则化代价函数   加了正则项
theta = np.zeros(data.shape[1])
X = feature_mapping(x1, x2, power=6, as_ndarray=True)
print(X.shape)

y = get_y(df)
print(y.shape)

def regularized_cost(theta, X, y, l=1):   #这里的L是兰姆达，这里取兰姆达为1
    # your code here  (appro ~ 3 lines
    theta_j1_to_n = theta[1:]  #数组都是从0下标开始的，这里1代表thetay
    regularized_term = (l / (2 * len(X))) * np.power(theta_j1_to_n, 2).sum()
    
    return  cost(theta, X, y) + regularized_term
 
regularized_cost(theta, X, y, l=1)


#正则化梯度
def regularized_gradient(theta, X, y, l=1):
    # your code here  (appro ~ 3 lines)
    theta_j1_to_n = theta[1:]      #不加theta0
    regularized_theta = (l / len(X)) * theta_j1_to_n
    
    regularized_term = np.concatenate([np.array([0]), regularized_theta])
    return gradient(theta, X, y) + regularized_term
 
 
regularized_gradient(theta, X, y)

#拟合参数
import scipy.optimize as opt  #SciPy的optimize模块提供了许多数值优化算法

print('init cost = {}'.format(regularized_cost(theta, X, y)))

res = opt.minimize(fun=regularized_cost, x0=theta, args=(X, y), method='Newton-CG', jac=regularized_gradient)
res


#预测
final_theta = res.x
y_pred = predict(X, final_theta)

print(classification_report(y, y_pred))


#画出决策边界
def draw_boundary(power, l):
#     """
#     power: polynomial power for mapped feature
#     l: lambda constant
#     """
    density = 1000
    threshhold = 2 * 10**-3

    final_theta = feature_mapped_logistic_regression(power, l)
    x, y = find_decision_boundary(density, power, final_theta, threshhold)

    df = pd.read_csv('ex2data2.txt', names=['test1', 'test2', 'accepted'])
    sns.lmplot('test1', 'test2', hue='accepted', data=df, size=6, fit_reg=False, scatter_kws={"s": 100})

    plt.scatter(x, y, c='R', s=10)
    plt.title('Decision boundary')
    plt.show()
    
    
def feature_mapped_logistic_regression(power, l):
#     """for drawing purpose only.. not a well generealize logistic regression
#     power: int
#         raise x1, x2 to polynomial power
#     l: int
#         lambda constant for regularization term
#     """
    df = pd.read_csv('ex2data2.txt', names=['test1', 'test2', 'accepted'])
    x1 = np.array(df.test1)
    x2 = np.array(df.test2)
    y = get_y(df)

    X = feature_mapping(x1, x2, power, as_ndarray=True)
    theta = np.zeros(X.shape[1])

    res = opt.minimize(fun=regularized_cost,
                       x0=theta,
                       args=(X, y, l),
                       method='TNC',
                       jac=regularized_gradient)
    final_theta = res.x

    return final_theta    
 
 
def find_decision_boundary(density, power, theta, threshhold):
    t1 = np.linspace(-1, 1.5, density)  #1000个样本
    t2 = np.linspace(-1, 1.5, density)

    cordinates = [(x, y) for x in t1 for y in t2]
    x_cord, y_cord = zip(*cordinates)
    mapped_cord = feature_mapping(x_cord, y_cord, power)  # this is a dataframe

    inner_product = mapped_cord.iloc[:,:].values @ theta 

    decision = mapped_cord[np.abs(inner_product) < threshhold]

    return decision.f10, decision.f01
#寻找决策边界函数
draw_boundary(power=6, l=1)     #set lambda = 1
draw_boundary(power=6,l=0)  # set lambda < 0.1
draw_boundary(power=6, l=100)  # set lambda > 10

人工智能最新文章

2022吴恩达机器学习课程——第二课（神经网

第十五章规则学习

FixMatch: Simplifying Semi-Supervised Le

数据挖掘Java——Kmeans算法的实现

大脑皮层的分割方法

【翻译】GPT-3是如何工作的

论文笔记:TEACHTEXT: CrossModal Generaliz

python从零学（六）

详解Python 3.x 导入(import)

【答读者问27】backtrader不支持最新版本的

加:2021-10-09 16:16:59 更:2021-10-09 16:18:37

360图书馆购物三丰科技阅读网日历万年历 2025年7日历

-2025/7/19 7:47:19-

图片自动播放器
↓图片自动播放器↓

TxT小说阅读器
↓语音阅读,小说下载,古典文学↓

一键清除垃圾
↓轻轻一点,清除系统垃圾↓

图片批量下载器
↓批量下载图片,美女图库↓

网站联系: qq:121756557 email:121756557@qq.com IT数码