IT数码 购物 网址 头条 软件 日历 阅读 图书馆
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
图片批量下载器
↓批量下载图片,美女图库↓
图片自动播放器
↓图片自动播放器↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁
 
   -> 人工智能 -> Numpy Pandas Matplotlib 快速上手 -> 正文阅读

[人工智能]Numpy Pandas Matplotlib 快速上手

【莫烦Python】Nnmpy & Pandas: https://www.bilibili.com/video/BV1Ex411L7oT
【莫烦Python】Matplotlib Python 画图教程: https://www.bilibili.com/video/BV1Jx411L7LU
Numpy 官网文档:https://www.numpy.org.cn/user/setting-up.html
Pandas 官网文档:https://www.pypandas.cn/docs/
Matplotlib 官网文档:https://www.matplotlib.org.cn/tutorials/

Numpy 介绍

  • numpy 是基于C语言,对大量数据计算,快
  • pandas 基于 numpy 再封装

Numpy 基本属性: ndim, shape, size, dtype

import numpy as np

# 列表转numpy矩阵
array = np.array([[1, 2, 3],
                  [3, 4, 5]])

print(array)  # [1, 2, 3], [3, 4, 5]]

# 维度
print(f'number of dim: {array.ndim}')  # number of dim: 2

# 各个维度的大小
print(f'shape: {array.shape}')  # shape: (2, 3)

# size: 多少个元素
print(f'size: {array.size}')  # size: 6

Numpy 创建矩阵: zeros, ones, empty, arange, linspace, random

# 从列表转化,可以指定类型
array = np.array([[1, 2, 3], [3, 4, 5]], dtype=np.int16)
print(f'type: {array.dtype}')  # type: int16

# 全0矩阵,参数为矩阵的shape
array = np.zeros([2, 3, 4])
print(array)
# [[[0. 0. 0. 0.]
#   [0. 0. 0. 0.]
#   [0. 0. 0. 0.]]
#  [[0. 0. 0. 0.]
#   [0. 0. 0. 0.]
#   [0. 0. 0. 0.]]]

# 全1矩阵
array = np.ones([1, 2, 3], dtype=np.int0)
print(array, array.dtype)
# [[[1 1 1]
#   [1 1 1]]] int64

# 空矩阵: 只分配内存但不初始化
array = np.empty([1, 2, 3,4])
print(array)

# 生成有序的矩阵: 起始(默认为0)、终止、步长(默认为1)
array = np.arange(10, 20, 2)
print(array)  # [10 12 14 16 18]

array = np.arange(12).reshape((3, 4))
print(array)
# [[ 0  1  2  3]
#  [ 4  5  6  7]
#  [ 8  9 10 11]]

# linspace: [1, 10] 分成2段,一共3个值,可以看做事等差数列
array = np.linspace(1, 10, 3)
print(array)  # [ 1.   5.5 10. ]

# 随机矩阵
array = np.random.random((2, 4))
print(array)
# [[0.70309398 0.72261462 0.66680394 0.42831447]
#  [0.80402385 0.40738157 0.59900451 0.62351528]]

Numpy 计算: + - * / **, sin, dot, sum, max, min, mean, median, cumsum, diff, nonzero, sort, argmax, T, transpose, clip, flat

import numpy as np

a = np.array([[1, 1], [0, 1]])
b = np.arange(4).reshape((2, 2))

# 对应元素操作: + - * / **
c = a - b
print(c)
# [[ 1  0]
#  [-2 -2]]

# 三角函数
c = 10 * np.sin(a)
print(c)
# [[8.41470985 8.41470985]
#  [0.         8.41470985]]

# 对应元素: < 返回一个bool列表
print(b < 3)
# [[ True  True]
#  [ True False]]

# 矩阵运算
c = np.dot(a, b)  # 相乘(叉乘) a X b
c = a.dot(b)
print(c)
# [[2 4]
#  [2 3]]

# 求和、最大最小值
print(np.sum(a))  # 3
print(np.max(a))  # 1
print(np.min(a))  # 0

# 求和、最大最小值,可以指定轴,axis 0表示列,1表示行
a = np.array([[1, 2], [3, 4]])
print(np.sum(a, axis=0))  # [4 6]
print(np.sum(a, axis=1))  # [3 7]

print(np.max(a, axis=0))  # [3 4]
print(np.max(a, axis=1))  # [2 4]

print(np.min(a, axis=0))  # [1 2]
print(np.min(a, axis=1))  # [1 3]

# 平均值
print(np.mean(a))  # 2.5
print(a.mean())  # 2.5

# 中位数
print(np.median(a))

# 逐一相加,并保留结果
print(np.cumsum(a))

# 相邻数据之差
print(np.diff(a))

# 查找非0数,返回索引
print(np.nonzero(a))

# 逐行排序
print(np.sort(a))


# 获取索引: 按一维数组的索引
print(np.argmax(a))  # 3

# 矩阵转置
print(np.transpose(a))
print(a.T)

# 过0比较器: 第一个1,所有小于1的数会换成1,第二个1,所有大于1的数会换成1
print(np.clip(a, 1, 1))

# Numpy 同索引访问值,同多维list
a = np.arange(3, 15)
print(a)
print(a[2])
# [ 3  4  5  6  7  8  9 10 11 12 13 14]
# 5

a = a.reshape((3, 4))
print(a)
print(a[2])
# [[ 3  4  5  6]
#  [ 7  8  9 10]
#  [11 12 13 14]]
# [11 12 13 14]

print(a[0][2])  # 5
print(a[0, 2])  # 5
print(a[0, :])  # [3 4 5 6]

# 打印行
for row in a:
    print(row)

# 打印列
for col in a.T:
    print(col)

# 打印单独元素
for item in a.flat:
    print(item)

# a.flat 是一个迭代器
# a.flatten() 返回一个铺平的list
print(a.flatten())  # [ 3  4  5  6  7  8  9 10 11 12 13 14]

Numpy 合并与分割: vstack, hstack, concatenate, split, array_split, vsplit, hsplit

import numpy as np

# ### Numpy array合并
a = np.array([1, 1, 1])
b = np.array([2, 2, 2])

# 上下合并 vertical stack
print(np.vstack((a, b)))
# [[1 1 1]
#  [2 2 2]]

print(a.shape, np.vstack((a, b)).shape)  # (3,) (2, 3)
# (3,) 并不是3行的意思,???
# (2, 3) 这个array中有2个小array,每个小array里有3个元素

# 左右合并 horizontal stack
print(np.hstack((a, b)))  # [1 1 1 2 2 2]

# 行向量转成列向量
print(a[:, np.newaxis])  # ???
print(np.vstack(a))  # ???
print(a.reshape(a.size, 1))
# [[1]
#  [1]
#  [1]]

print(a[np.newaxis, :])  # [[1 1 1]]

a = a[np.newaxis, :]
b = b[np.newaxis, :]
print(a, b)  # [[1 1 1]] [[2 2 2]]

# 这个合并函数是通过,axis指定合并的方向
c = np.concatenate((a, b), axis=0)
print(c)
# [[1 1 1]
#  [2 2 2]]

c = np.concatenate((a, b), axis=1)  #
print(c)  # [[1 1 1 2 2 2]]

# ### array 分割
a = np.arange(12).reshape((3, 4))
print(a)
# [[ 0  1  2  3]
#  [ 4  5  6  7]
#  [ 8  9 10 11]]

# 横向分割
# a 分成2块,按列分,竖着操作,均匀分
print(np.split(a, 2, axis=1))
# [array([[0, 1],
#        [4, 5],
#        [8, 9]]), array([[ 2,  3],
#        [ 6,  7],
#        [10, 11]])]

# 不均分: 默认,先均分,多的放到第一个里面去,也可以指定
print(np.array_split(a, 3, axis=1))
# [array([[0, 1],
#        [4, 5],
#        [8, 9]]), array([[ 2],
#        [ 6],
#        [10]]), array([[ 3],
#        [ 7],
#        [11]])]

print(np.array_split(a, (1, 2, 1), axis=1))
# [array([[0],
#        [4],
#        [8]]), array([[1],
#        [5],
#        [9]]), array([], shape=(3, 0), dtype=int64), array([[ 1,  2,  3],
#        [ 5,  6,  7],
#        [ 9, 10, 11]])]

# np.vsplit np.hsplit
print(np.vsplit(a, 3))
# [array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]

print(np.hsplit(a, 2))
# [array([[0, 1],
#        [4, 5],
#        [8, 9]]), array([[ 2,  3],
#        [ 6,  7],
#        [10, 11]])]

Numpy 拷贝: copy

import numpy as np

a = np.arange(4)
b = a  # 两者指向同一数据,改变a,b也会跟着变
print(b is a)  # True

b = a.copy()  # 拷贝,a, b没有关系,会有单独副本

Pandas 介绍

  • numpy是array是多维list
  • pandas是字典,每一行和列可以自定义命名
  • nan : no a mumber

Pandas 多种创建方式: Series, DataFrame, date_range

import numpy as np
import pandas as pd

# 从list,默认索引是0,1,2,3,类型是float64
s = pd.Series([1, 3, 6, np.nan, 44, 1])
print(s)
# 0     1.0
# 1     3.0
# 2     6.0
# 3     NaN
# 4    44.0
# 5     1.0
# dtype: float64

# 从numpy导入,矩阵这边叫DataFrame,可以设置索引
df = pd.DataFrame(np.arange(12).reshape((3, 4)))
print(df)
#    0  1   2   3
# 0  0  1   2   3
# 1  4  5   6   7
# 2  8  9  10  11

# 也可以设置索引,先创建索引,index
dates = pd.date_range('20211201', periods=6)
print(dates)
# DatetimeIndex(['2021-12-01', '2021-12-02', '2021-12-03', '2021-12-04',
#                '2021-12-05', '2021-12-06'],
#               dtype='datetime64[ns]', freq='D')

df = pd.DataFrame(np.random.rand(6, 4), index=dates,
                  columns=['a', 'b', 'c', 'd'])
print(df)
#                    a         b         c         d
# 2021-12-01  0.464341  0.785184  0.843978  0.683584
# 2021-12-02  0.090226  0.844951  0.882069  0.080591
# 2021-12-03  0.990635  0.975542  0.540292  0.199442
# 2021-12-04  0.628743  0.346208  0.559444  0.045485
# 2021-12-05  0.808089  0.799405  0.715815  0.133164
# 2021-12-06  0.814320  0.748949  0.460721  0.036809

# 自定义DataFrame,可以用字典来代替输入的值:字典的key代表列的索引,value代表这列的值
df = pd.DataFrame({
    'A': 1,
    'B': pd.Timestamp('20211201'),
    'C': pd.Series(1., index=list(range(4)), dtype='float64'),
    'D': np.array([3] * 4, dtype='int64'),
    'E': pd.Categorical(['test', 'train', 'test', 'train']),
    'F': 'foo'
})
print(df)  # 行是样本,列是特征
#    A          B    C  D      E    F
# 0  1 2021-12-01  1.0  3   test  foo
# 1  1 2021-12-01  1.0  3  train  foo
# 2  1 2021-12-01  1.0  3   test  foo
# 3  1 2021-12-01  1.0  3  train  foo

Pandas 常用属性和访问操作: dtypes, index, columns, values, describe, T, sort_index, sort_values

import numpy as np
import pandas as pd

df = = pd.DataFrame({
    'A': 1,
    'B': pd.Timestamp('20211201'),
    'C': pd.Series(1., index=list(range(4)), dtype='float64'),
    'D': np.array([3] * 4, dtype='int64'),
    'E': pd.Categorical(['test', 'train', 'test', 'train']),
    'F': 'foo'
})

# dtypes
print(df.dtypes)
# A             int64
# B    datetime64[ns]
# C           float64
# D             int64
# E          category
# F            object
# dtype: object

# index: 所有列的标序
print(df.index)
# Int64Index([0, 1, 2, 3], dtype='int64')

# columns: 所有列的名字
print(df.columns)
# Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

# values: 获取所有值
print(df.values, type(df.values))
# [[1 Timestamp('2021-12-01 00:00:00') 1.0 3 'test' 'foo']
#  [1 Timestamp('2021-12-01 00:00:00') 1.0 3 'train' 'foo']
#  [1 Timestamp('2021-12-01 00:00:00') 1.0 3 'test' 'foo']
#  [1 Timestamp('2021-12-01 00:00:00') 1.0 3 'train' 'foo']] <class 'numpy.ndarray'>

# describ(): 描述变量(只针对数字形式的数据)
print(df.describe())
#          A    C    D
# count  4.0  4.0  4.0
# mean   1.0  1.0  3.0
# std    0.0  0.0  0.0
# min    1.0  1.0  3.0
# 25%    1.0  1.0  3.0
# 50%    1.0  1.0  3.0
# 75%    1.0  1.0  3.0
# max    1.0  1.0  3.0

# 当做矩阵翻转
print(df.T)
#                      0                    1                    2                    3
# A                    1                    1                    1                    1
# B  2021-12-01 00:00:00  2021-12-01 00:00:00  2021-12-01 00:00:00  2021-12-01 00:00:00
# C                  1.0                  1.0                  1.0                  1.0
# D                    3                    3                    3                    3
# E                 test                train                 test                train
# F                  foo                  foo                  foo                  foo

# 排序,是针对索引进行排序
# axis=1 对列索引排序,False表示倒序
print(df.sort_index(axis=1, ascending=False))
#      F      E  D    C          B  A
# 0  foo   test  3  1.0 2021-12-01  1
# 1  foo  train  3  1.0 2021-12-01  1
# 2  foo   test  3  1.0 2021-12-01  1
# 3  foo  train  3  1.0 2021-12-01  1

# axis=0 对行索引排序,False表示倒序
print(df.sort_index(axis=0, ascending=False))
#    A          B    C  D      E    F
# 3  1 2021-12-01  1.0  3  train  foo
# 2  1 2021-12-01  1.0  3   test  foo
# 1  1 2021-12-01  1.0  3  train  foo
# 0  1 2021-12-01  1.0  3   test  foo

# 根据某一列的值进行排序
print(df.sort_values(by='E'))
#    A          B    C  D      E    F
# 0  1 2021-12-01  1.0  3   test  foo
# 2  1 2021-12-01  1.0  3   test  foo
# 1  1 2021-12-01  1.0  3  train  foo
# 3  1 2021-12-01  1.0  3  train  foo

Pandas 数据切片: loc, iloc

import numpy as np
import pandas as pd
dates = pd.date_range('20211201', periods=6)
df = pd.DataFrame(np.arange(24).reshape((6, 4)),
                  index=dates, columns=['A', 'B', 'C', 'D'])
print(df)
#              A   B   C   D
# 2021-12-01   0   1   2   3
# 2021-12-02   4   5   6   7
# 2021-12-03   8   9  10  11
# 2021-12-04  12  13  14  15
# 2021-12-05  16  17  18  19
# 2021-12-06  20  21  22  23

# 选择某一列 这两种方式一样
print(df['A'])
print(df.A)
# 2021-12-01     0
# 2021-12-02     4
# 2021-12-03     8
# 2021-12-04    12
# 2021-12-05    16
# 2021-12-06    20
# Freq: D, Name: A, dtype: int64

# 选择某一行
print(df[0:3])  # 0 到 2行
#             A  B   C   D
# 2021-12-01  0  1   2   3
# 2021-12-02  4  5   6   7
# 2021-12-03  8  9  10  11

print(df['20211201':'20211203'])
#             A  B   C   D
# 2021-12-01  0  1   2   3
# 2021-12-02  4  5   6   7
# 2021-12-03  8  9  10  11

# loc: select by label 根据标签来选
print(df.loc['20211201'])
# A    0
# B    1
# C    2
# D    3
# Name: 2021-12-01 00:00:00, dtype: int64

# 纵向标签
print(df.loc[:, ['A', 'B']])
#              A   B
# 2021-12-01   0   1
# 2021-12-02   4   5
# 2021-12-03   8   9
# 2021-12-04  12  13
# 2021-12-05  16  17
# 2021-12-06  20  21

# 某一行的部分数据
print(df.loc['20211202':, ['A', 'B']])
#              A   B
# 2021-12-02   4   5
# 2021-12-03   8   9
# 2021-12-04  12  13
# 2021-12-05  16  17
# 2021-12-06  20  21

# iloc: select by position (根据下标来选)
print(df.iloc[3:5, 1:3])  # 第三行到第四行,第一列到第二列数据
#              B   C
# 2021-12-04  13  14
# 2021-12-05  17  18

print(df.iloc[[1, 3, 5], 1:3])
#              B   C
# 2021-12-02   5   6
# 2021-12-04  13  14
# 2021-12-06  21  22

# ix: mixed selecttion 混合筛选(3.7已经弃用)
# print(df.ix[:3, ['A', 'C']])

# 是或否筛选: Boolean indexing
print(df[df.A > 8])  # df.A > 8 返回的是 True or False,后面再把True的行打印出来
#              A   B   C   D
# 2021-12-04  12  13  14  15
# 2021-12-05  16  17  18  19
# 2021-12-06  20  21  22  23

Pandas 赋值,添加新列

import numpy as np
import pandas as pd
dates = pd.date_range('20211201', periods=6)
df = pd.DataFrame(np.arange(24).reshape((6, 4)),
                  index=dates, columns=['A', 'B', 'C', 'D'])
print(df)
#              A   B   C   D
# 2021-12-01   0   1   2   3
# 2021-12-02   4   5   6   7
# 2021-12-03   8   9  10  11
# 2021-12-04  12  13  14  15
# 2021-12-05  16  17  18  19
# 2021-12-06  20  21  22  23

# 修改值

# loc, iloc
print(df.iloc[2, 2])  # 10
df.iloc[2, 2] = 111
print(df.iloc[2, 2])  # 111

print(df.loc['20211203', 'C'])  # 111
df.loc['20211203', 'C'] = 222
print(df.loc['20211203', 'C'])  # 222

# 将A这一列中大于8的行的B赋值为0
df.B[df.A > 8] = 0
print(df)
#              A  B    C   D
# 2021-12-01   0  1    2   3
# 2021-12-02   4  5    6   7
# 2021-12-03   8  9  222  11
# 2021-12-04  12  0   14  15
# 2021-12-05  16  0   18  19
# 2021-12-06  20  0   22  23

# 将A这一列中小于8的这一行全赋值为0
df[df.A < 8] = 0
print(df)
#              A  B    C   D
# 2021-12-01   0  0    0   0
# 2021-12-02   0  0    0   0
# 2021-12-03   8  9  222  11
# 2021-12-04  12  0   14  15
# 2021-12-05  16  0   18  19
# 2021-12-06  20  0   22  23

# 加新的列
df['F'] = np.nan
print(df)
#              A  B    C   D   F
# 2021-12-01   0  0    0   0 NaN
# 2021-12-02   0  0    0   0 NaN
# 2021-12-03   8  9  222  11 NaN
# 2021-12-04  12  0   14  15 NaN
# 2021-12-05  16  0   18  19 NaN
# 2021-12-06  20  0   22  23 NaN

# 添加或修改原有的列,index要对应
df['D'] = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20211201', periods=6))
print(df)
#              A  B    C  D   F
# 2021-12-01   0  0    0  1 NaN
# 2021-12-02   0  0    0  2 NaN
# 2021-12-03   8  9  222  3 NaN
# 2021-12-04  12  0   14  4 NaN
# 2021-12-05  16  0   18  5 NaN
# 2021-12-06  20  0   22  6 NaN

# 这种直接赋值也可以
df['D'] = list(range(6, 12))
print(df)
#              A  B    C   D   F
# 2021-12-01   0  0    0   6 NaN
# 2021-12-02   0  0    0   7 NaN
# 2021-12-03   8  9  222   8 NaN
# 2021-12-04  12  0   14   9 NaN
# 2021-12-05  16  0   18  10 NaN
# 2021-12-06  20  0   22  11 NaN

Pandas 处理缺失数据 NaN: isna, isnull, fillna, dropna

import numpy as np
import pandas as pd
dates = pd.date_range('20211201', periods=6)
df = pd.DataFrame(np.arange(24).reshape((6, 4)),
                  index=dates, columns=['A', 'B', 'C', 'D'])

# 制造NaN值
df.iloc[1, 1], df.iloc[2, 2] = np.nan, np.nan

print(df)
#              A     B     C   D
# 2021-12-01   0   1.0   2.0   3
# 2021-12-02   4   NaN   6.0   7
# 2021-12-03   8   9.0   NaN  11
# 2021-12-04  12  13.0  14.0  15
# 2021-12-05  16  17.0  18.0  19
# 2021-12-06  20  21.0  22.0  23

# isnull: 是否有缺失值 和 isna一样
print(df.isnull())
#                 A      B      C      D
# 2021-12-01  False  False  False  False
# 2021-12-02  False   True  False  False
# 2021-12-03  False  False   True  False
# 2021-12-04  False  False  False  False
# 2021-12-05  False  False  False  False
# 2021-12-06  False  False  False  False

# 可以这么判断 np.any 表至少一个元素等于 True
print(np.any(df.isnull() == True))  # True
print(np.any(df.isnull() is True))  # False
print(np.any(df.isnull()))  # True


# fillna: 替换NaN数据
print(df.fillna(value=99))
#              A     B     C   D
# 2021-12-01   0   1.0   2.0   3
# 2021-12-02   4  99.0   6.0   7
# 2021-12-03   8   9.0  99.0  11
# 2021-12-04  12  13.0  14.0  15
# 2021-12-05  16  17.0  18.0  19
# 2021-12-06  20  21.0  22.0  23


df.iloc[1, 1], df.iloc[2, 2] = np.nan, np.nan

# dropna: 丢弃含NaN的数据,axis=0 行,axis=1 列; any 有任何一个NaN就丢, all 所有数据都为NaN时才丢弃
print(df.dropna(axis=0, how='any'))
#              A     B     C   D
# 2021-12-01   0   1.0   2.0   3
# 2021-12-04  12  13.0  14.0  15
# 2021-12-05  16  17.0  18.0  19
# 2021-12-06  20  21.0  22.0  23

Pandas 处理文件

支持的文件: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html

image-20211201205403450

import pandas as pd

# 创建一个cvs文件
csv_file = './test.csv'

writer = open(csv_file, 'w+', encoding='utf-8')
writer.write('id,name,num\n')

for i in range(10):
    writer.write(f'{i},{i},{i}\n')

writer.close()

# 读取,会自动加上行索引
data = pd.read_csv(csv_file)
print(data)
#    id  name  num
# 0   0     0    0
# 1   1     1    1
# 2   2     2    2
# 3   3     3    3
# 4   4     4    4
# 5   5     5    5
# 6   6     6    6
# 7   7     7    7
# 8   8     8    8
# 9   9     9    9

# 存储
data.to_pickle('./test.pickel')

data_pickel = pd.read_pickle('./test.pickel')
print(data_pickel)
#    id  name  num
# 0   0     0    0
# 1   1     1    1
# 2   2     2    2
# 3   3     3    3
# 4   4     4    4
# 5   5     5    5
# 6   6     6    6
# 7   7     7    7
# 8   8     8    8
# 9   9     9    9

Pandas 合并: concat append

import numpy as np
import pandas as pd
from pandas.io.spss import read_spss

# 准备数据
df1 = pd.DataFrame(np.ones((3, 4)) * 0, columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'])
df3 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['a', 'b', 'c', 'd'])
print(df1)
print(df2)
print(df3)
#      a    b    c    d
# 0  0.0  0.0  0.0  0.0
# 1  0.0  0.0  0.0  0.0
# 2  0.0  0.0  0.0  0.0
#      a    b    c    d
# 0  1.0  1.0  1.0  1.0
# 1  1.0  1.0  1.0  1.0
# 2  1.0  1.0  1.0  1.0
#      a    b    c    d
# 0  2.0  2.0  2.0  2.0
# 1  2.0  2.0  2.0  2.0
# 2  2.0  2.0  2.0  2.0

# concat: 上下合并 axis=0 竖向合并; igore 为True会忽略以前的索引
res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)
print(res)
#      a    b    c    d
# 0  0.0  0.0  0.0  0.0
# 1  0.0  0.0  0.0  0.0
# 2  0.0  0.0  0.0  0.0
# 3  1.0  1.0  1.0  1.0
# 4  1.0  1.0  1.0  1.0
# 5  1.0  1.0  1.0  1.0
# 6  2.0  2.0  2.0  2.0
# 7  2.0  2.0  2.0  2.0
# 8  2.0  2.0  2.0  2.0

# 部分重合的数据
df4 = pd.DataFrame(np.ones((3, 4))*0, index=[1, 2, 3], columns=['a', 'b', 'c', 'd'])
df5 = pd.DataFrame(np.ones((3, 4))*1, index=[2, 3, 4], columns=['b', 'c', 'd', 'e'])
print(df4)
print(df5)
#      a    b    c    d
# 1  0.0  0.0  0.0  0.0
# 2  0.0  0.0  0.0  0.0
# 3  0.0  0.0  0.0  0.0
#      b    c    d    e
# 2  1.0  1.0  1.0  1.0
# 3  1.0  1.0  1.0  1.0
# 4  1.0  1.0  1.0  1.0

# 直接合并,默认join='outer',默认axis=0
res = pd.concat([df4, df5])
print(res)
#      a    b    c    d    e
# 1  0.0  0.0  0.0  0.0  NaN
# 2  0.0  0.0  0.0  0.0  NaN
# 3  0.0  0.0  0.0  0.0  NaN
# 2  NaN  1.0  1.0  1.0  1.0
# 3  NaN  1.0  1.0  1.0  1.0
# 4  NaN  1.0  1.0  1.0  1.0

res = pd.concat([df4, df5], axis=1)
print(res)
#      a    b    c    d    b    c    d    e
# 1  0.0  0.0  0.0  0.0  NaN  NaN  NaN  NaN
# 2  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
# 3  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
# 4  NaN  NaN  NaN  NaN  1.0  1.0  1.0  1.0

# 直接合并,使用join='inner',裁剪相同的部分,同样可以加ignore
res = pd.concat([df4, df5], join='inner')
print(res)
#      b    c    d
# 1  0.0  0.0  0.0
# 2  0.0  0.0  0.0
# 3  0.0  0.0  0.0
# 2  1.0  1.0  1.0
# 3  1.0  1.0  1.0
# 4  1.0  1.0  1.0

# append: 在后面添加,可以指定方向
res = df1.append([df2, df3], ignore_index=True)
print(res)
#      a    b    c    d
# 0  0.0  0.0  0.0  0.0
# 1  0.0  0.0  0.0  0.0
# 2  0.0  0.0  0.0  0.0
# 3  1.0  1.0  1.0  1.0
# 4  1.0  1.0  1.0  1.0
# 5  1.0  1.0  1.0  1.0
# 6  2.0  2.0  2.0  2.0
# 7  2.0  2.0  2.0  2.0
# 8  2.0  2.0  2.0  2.0

# 添加一行新的数据
s1 = pd.Series(np.arange(4), index=['a', 'b', 'c', 'd'])
print(s1)
# a    0
# b    1
# c    2
# d    3
# dtype: int64

print(df1.append(s1, ignore_index=True))
#      a    b    c    d
# 0  0.0  0.0  0.0  0.0
# 1  0.0  0.0  0.0  0.0
# 2  0.0  0.0  0.0  0.0
# 3  0.0  1.0  2.0  3.0

Pandas 合并 merge

  • on: 有相同的属性(列名)
import pandas as pd

# 准备数据 有一列列名相同
left = pd.DataFrame({
    'key': ['K0', 'K1', 'K3', 'K4'],
    'A': ['A0', 'A1', 'A3', 'A4'],
    'B': ['B0', 'B1', 'B3', 'B4']
})
right = pd.DataFrame({
    'key': ['K0', 'K1', 'K3', 'K4'],
    'C': ['C0', 'C1', 'C3', 'C4'],
    'D': ['D0', 'D1', 'D3', 'D4']
})
print(left)
print(right)
#   key   A   B
# 0  K0  A0  B0
# 1  K1  A1  B1
# 2  K3  A3  B3
# 3  K4  A4  B4
#   key   C   D
# 0  K0  C0  D0
# 1  K1  C1  D1
# 2  K3  C3  D3
# 3  K4  C4  D4

# 在key上合并
res = pd.merge(left, right, on='key')
print(res)
#   key   A   B   C   D
# 0  K0  A0  B0  C0  D0
# 1  K1  A1  B1  C1  D1
# 2  K3  A3  B3  C3  D3
# 3  K4  A4  B4  C4  D4

# 准备数据: 有两列列名相同
left = pd.DataFrame({
    'key1': ['K0', 'K0', 'K1', 'K2'],
    'key2': ['K0', 'K1', 'K0', 'K1'],
    'A': ['A0', 'A1', 'A3', 'A4'],
    'B': ['B0', 'B1', 'B3', 'B4']
})
right = pd.DataFrame({
    'key1': ['K0', 'K1', 'K1', 'K2'],
    'key2': ['K0', 'K0', 'K0', 'K0'],
    'C': ['C0', 'C1', 'C3', 'C4'],
    'D': ['D0', 'D1', 'D3', 'D4']
})
print(left)
print(right)
#   key1 key2   A   B
# 0   K0   K0  A0  B0
# 1   K0   K1  A1  B1
# 2   K1   K0  A3  B3
# 3   K2   K1  A4  B4
#   key1 key2   C   D
# 0   K0   K0  C0  D0
# 1   K1   K0  C1  D1
# 2   K1   K0  C3  D3
# 3   K2   K0  C4  D4

res = pd.merge(left, right, on=['key1', 'key2'])  # 默认是how='inner'
print(res)  # key1 key2 都相同时,把其他的对应元素也拷贝下来
#   key1 key2   A   B   C   D
# 0   K0   K0  A0  B0  C0  D0
# 1   K1   K0  A3  B3  C1  D1
# 2   K1   K0  A3  B3  C3  D3

# how = ['letf', 'right', 'outer', 'inner']
res = pd.merge(left, right, on=['key1', 'key2'], how='outer')
print(res)  # key1 key2 不管相不相同都留下来,没有的值为NaN
#   key1 key2    A    B    C    D
# 0   K0   K0   A0   B0   C0   D0
# 1   K0   K1   A1   B1  NaN  NaN
# 2   K1   K0   A3   B3   C1   D1
# 3   K1   K0   A3   B3   C3   D3
# 4   K2   K1   A4   B4  NaN  NaN
# 5   K2   K0  NaN  NaN   C4   D4

res = pd.merge(left, right, on=['key1', 'key2'], how='left')
print(res)  # 拿left的key去right中找,找到几个留几个,找不到用NaN填充
#   key1 key2   A   B    C    D
# 0   K0   K0  A0  B0   C0   D0
# 1   K0   K1  A1  B1  NaN  NaN
# 2   K1   K0  A3  B3   C1   D1
# 3   K1   K0  A3  B3   C3   D3
# 4   K2   K1  A4  B4  NaN  NaN

res = pd.merge(left, right, on=['key1', 'key2'], how='right')
print(res)
#   key1 key2    A    B   C   D
# 0   K0   K0   A0   B0  C0  D0
# 1   K1   K0   A3   B3  C1  D1
# 2   K1   K0   A3   B3  C3  D3
# 3   K2   K0  NaN  NaN  C4  D4

# indicator: 显示合并方式
res = pd.merge(left, right, on=['key1', 'key2'], how='right', indicator=True)
print(res)
#   key1 key2    A    B   C   D      _merge
# 0   K0   K0   A0   B0  C0  D0        both
# 1   K1   K0   A3   B3  C1  D1        both
# 2   K1   K0   A3   B3  C3  D3        both
# 3   K2   K0  NaN  NaN  C4  D4  right_only

# indicator: 指定列名
res = pd.merge(left, right, on=['key1', 'key2'], how='right', indicator='tset-name')
print(res)
#   key1 key2    A    B   C   D   tset-name
# 0   K0   K0   A0   B0  C0  D0        both
# 1   K1   K0   A3   B3  C1  D1        both
# 2   K1   K0   A3   B3  C3  D3        both
# 3   K2   K0  NaN  NaN  C4  D4  right_only
  • 通过index合并
# 准备数据
left = pd.DataFrame({
    'A': ['A0', 'A1', 'A2'],
    'B': ['B0', 'B1', 'B2']
}, index=['K0', 'K1', 'K2'])
right = pd.DataFrame({
    'C': ['C0', 'C1', 'C3'],
    'D': ['D0', 'D1', 'D3']
}, index=['K0', 'K2', 'K3'])
print(left)
print(right)
#      A   B
# K0  A0  B0
# K1  A1  B1
# K2  A2  B2
#      C   D
# K0  C0  D0
# K2  C1  D1
# K3  C3  D3

# 通过index合并,left_index right_index 默认是 False,为True之后不在考虑列的索引
res = pd.merge(left, right, left_index=True, right_index=True, how='outer')
print(res)
#       A    B    C    D
# K0   A0   B0   C0   D0
# K1   A1   B1  NaN  NaN
# K2   A2   B2   C1   D1
# K3  NaN  NaN   C3   D3

res = pd.merge(left, right, left_index=True, right_index=True, how='inner')
print(res)
#     A   B   C   D
# K0  A0  B0  C0  D0
# K2  A2  B2  C1  D1
  • 相同列名,加后缀
import pandas as pd

# 准备数据 两个数据表的有属性名一样
boys = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [1, 2, 3], 'test_1': [1, 2, 2]})
girls = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'age': [4, 5, 3], 'test_2': [2, 2, 2]})
print(boys)
print(girls)
#     k  age  test_1
# 0  K0    1       1
# 1  K1    2       2
# 2  K2    3       2
#     k  age  test_2
# 0  K0    4       2
# 1  K0    5       2
# 2  K3    3       2

# 如果列名相同会加后缀,不同就不需要
res = pd.merge(boys, girls, on='k', suffixes=['_boy', '_girls'], how='outer')
print(res)
#     k  age_boy  test_1  age_girls  test_2
# 0  K0      1.0     1.0        4.0     2.0
# 1  K0      1.0     1.0        5.0     2.0
# 2  K1      2.0     2.0        NaN     NaN
# 3  K2      3.0     2.0        NaN     NaN
# 4  K3      NaN     NaN        3.0     2.0

Pandas 画图

image-20211202160513188

image-20211202160535453

image-20211202160444206

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Series
# 生成1000个随机数,下标是[0, 1000)
data = pd.Series(np.random.randn(1000), index=np.arange(1000))
data = data.cumsum()  # 逐步累加,并保留值
data.plot()  # 显示上去了,加载数据
plt.show()

# DataFrame
# 1000个数据,每个数据4个属性,分别为ABDC
data = pd.DataFrame(np.random.randn(1000, 4), index=np.arange(1000), columns=list('ABDC'))
data = data.cumsum()

# 打印前5个数据
print(data.head(5))
#           A         B         D         C
# 0  1.137261 -0.151256 -1.768632 -0.553897
# 1  0.824339  0.570760 -2.483564  0.190463
# 2  0.488407 -0.699785 -1.906383  0.053245
# 3 -1.073358 -2.450119 -2.178400  0.119925
# 4 -0.588325 -2.945741 -4.263403 -1.111191

data.plot()  # plot 中可以设置图形参数
plt.show()

# plot methods: bar, hist, box, kde, area, scatter, hexbin, pie
pic = data.plot.scatter(x='A', y='B', color='DarkBlue', label='calss 1')
data.plot.scatter(x='A', y='C', color='DarkGreen', label='class 2', ax=pic)
data.plot()
plt.show()

Matplotlib 基本使用

image-20211202164933649

import matplotlib.pyplot as plt
import numpy as np

# [-1, 1] 分成50份
x = np.linspace(-1, 1, 50)
y = x * 2 + 1
plt.plot(x, y)  # 设置数据和属性
plt.show()      # 画图

Matplotlib Figure

  • 默认是显示在一个figure上,可以手动创建,设置大小、名字
  • 多个figure时,当前figure的属性设置代码直到遇到创建下一个figure

image-20211202170022103

image-20211202170003513

import matplotlib.pyplot as plt
import numpy as np

x = np.linspace(-3, 3, 50)
y1 = x * 2 + 1
y2 = x ** 2

plt.figure()  # 创建一个figure,这个下面设置的数据和属性都是个画板的
plt.plot(x, y1)  

plt.figure('pic', figsize=(8, 5))  # 创建第二个画板,并给个名字,设置大小
plt.plot(x, y2)

# 在第二个画板加入y1,设置红色,虚线
plt.plot(x, y1, color='red', linewidth=3, linestyle='--')

plt.show()   

Matplotlib 坐标轴设置

image-20211203110204297

import matplotlib.pyplot as plt
import numpy as np

x = np.linspace(-3, 3, 50)
y1 = x * 2 + 1
y2 = x ** 2

# 在图形中加入y1,设置红色,虚线
plt.plot(x, y1, color='red', linewidth=3, linestyle='--')
# 加入y2
plt.plot(x, y2)

# 设置坐标轴

# 设置: 取值范围
plt.xlim((-1, 2))
plt.ylim((-2, 3))

# 设置名称
plt.xlabel('x label')
plt.ylabel('y label')

# 设置分割范围,没有设置的刻度就不显示
new_ticks = np.linspace(-1, 2, 5)
plt.xticks(new_ticks)

plt.yticks(
    [-2, -1.8, -1, 1.22, 3],
    ['really bad', 'bad', 'normal', 'good', 'really good']
)
 
# 设置新字体,数学形式,且可读,两边加$符号,空格前加斜杆,r表示是正则
plt.yticks(
    [-2, -1.8, -1, 1.22, 3],
    [r'$really\ bad$', r'$bad\ \alpha$', r'$normal$', r'$good$', r'$really\ good$']
)

# 设置坐标轴的位置
# gca = get current axis 有四个轴,上下左右,常说的xy就是下和左
ax = plt.gca()

# 隐藏右边和上边的轴
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')

# 设置下边和左边的轴为x y
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')

# x轴 所在的位置是y轴的-1
ax.spines['bottom'].set_position(('data', -1))
ax.spines['left'].set_position(('data', 0))

plt.show()

Matplotlib 图例 legend

image-20211203112303603

from logging import Handler
import matplotlib.pyplot as plt
import numpy as np

x = np.linspace(-3, 3, 50)
y1 = x * 2 + 1
y2 = x ** 2

# 设置坐标轴: 取值范围
plt.xlim((-1, 2))
plt.ylim((-2, 3))

# 设置名称
plt.xlabel('x label')
plt.ylabel('y label')

# 设置分割范围,没有设置的刻度就不显示
new_ticks = np.linspace(-1, 2, 5)
plt.xticks(new_ticks)

plt.yticks(
    [-2, -1.8, -1, 1.22, 3],
    ['really bad', 'bad', 'normal', 'good', 'really good']
)
 
# 设置新字体,数学形式,且可读,两边加$符号,空格前加斜杆,r表示是正则
plt.yticks(
    [-2, -1.8, -1, 1.22, 3],
    [r'$really\ bad$', r'$bad\ \alpha$', r'$normal$', r'$good$', r'$really\ good$']
)

# 在图形中加入y1,设置红色,虚线,名字
line1, = plt.plot(x, y1, color='red', linewidth=3, linestyle='--', label='up')
# 加入y2; 后面加个逗号,就解包的写法
line2, = plt.plot(x, y2, label='down')

# 图例 loc='best' 会自动选择一个地方
plt.legend(handles=[line1, line2, ], labels=['a', 'b'], loc='lower right')

# 只打印 a
plt.legend(handles=[line1,], labels=['a',], loc='lower right')

plt.show()

Matplotlib 添加注解

image-20211209165117162

import matplotlib.pyplot as plt
import numpy as np

x = np.linspace(-3, 3, 50)
y = x * 2 + 1

plt.figure(num=1, figsize=(8, 5))
plt.plot(x, y)

ax = plt.gca()
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.spines['bottom'].set_position(('data', 0))
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data', 0))

# 添加点(1, 3)的注释
a = 1
b = a * 2 + 1
plt.scatter(a, b, s=50, color='b')
# 画一条虚线: 黑色虚线,宽度为2.5
plt.plot([a, a], [b, 0], 'k--', lw=2.5)

# 添加标注
# xy是文本位置;xycoords表示a b是坐标数值;xytext表示最终位置相对xy; textcoords表示基于某个点
# arrowprops就是这个弧线:arrowstyle样式,connectionstyle弧度
plt.annotate(f'$2x+1={b}$', xy=(a, b), xycoords='data', xytext=(+30, -30), textcoords='offset points',
             fontsize=16, arrowprops=dict(arrowstyle='->', connectionstyle='arc3, rad=.2'))

# 添加文本
plt.text(-3, 3, r'$this\ is\ the\ text\ \sigma_i$', fontdict={'size': 16, 'color': 'r'})

plt.show()

Matplotlib 坐标轴的标签防遮挡

image-20211209170345032

import matplotlib.pyplot as plt
import numpy as np

x = np.linspace(-3, 3, 50)
y = x * 0.1

plt.figure(num=1, figsize=(8, 5))
plt.plot(x, y, lw=10)
plt.ylim(-2, 2)

ax = plt.gca()
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.spines['bottom'].set_position(('data', 0))
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data', 0))

ax.xaxis.set_zorder(2)
ax.yaxis.set_zorder(2)

for label in ax.get_xticklabels() + ax.get_yticklabels():
    label.set_fontsize(12)
    label.set_bbox(dict(facecolor='white', edgecolor='None', alpha=0.7))
    label.set_zorder(1)
plt.show()

Matplotlib 散点图

image-20211209173628001

import matplotlib.pyplot as plt
import numpy as np

n = 1024
# 均值为0、方差为1、n个
x = np.random.normal(0, 1, n)
y = np.random.normal(0, 1, n)

# 颜色值
t = np.arctan2(y, x)

plt.scatter(x, y, s=75, c=t, alpha=0.5)
plt.xlim((-1.5, 1.5))
plt.ylim((-1.5, 1.5))

# 隐藏坐标轴标签
plt.xticks(())
plt.yticks(())

plt.show()

Matplotlib 柱状图

image-20211213131100100

import matplotlib.pyplot as plt
import numpy as np

n = 12
# 均匀分布
x = np.arange(n)
y = (1 - x/float(n)) * np.random.uniform(0.5, 1.0, n)

plt.bar(x, y, facecolor='#9999ff', edgecolor='white')
plt.bar(x, -y, facecolor='#ff9999', edgecolor='white')

# 设置标签
for xv, yv in zip(x, y):
    # ha: 水平对齐
    plt.text(xv, yv+0.05, '%.2f' % yv, ha='center', va='bottom')
    plt.text(xv, -yv-0.05, '%.2f' % -yv, ha='center', va='top')

plt.xlim(-0.5, n)
plt.ylim(-1.25, 1.25)

# 隐藏坐标轴标签
plt.xticks(())
plt.yticks(())

plt.show()

Matplotlib 等高线图

image-20211213155828770

import matplotlib.pyplot as plt
import numpy as np

def getHeight(x, y):
    return (1 - x/2 + x**5 + y**3) * np.exp(-x**2 - y**2)

n = 256
x = np.linspace(-3, 3, n)
y = np.linspace(-3, 3, n)

# 设置网格
X, Y = np.meshgrid(x, y)

# 添加颜色
plt.contourf(X, Y, getHeight(X, Y), 8, alpha=0.75, cmap=plt.cm.hot)

# 添加等高线
C = plt.contour(X, Y, getHeight(X, Y), 8, colors='black', linewidths=0.5)

# 在线旁边添加标签
plt.clabel(C, inline=True, fontsize=10)

# 隐藏坐标轴标签
plt.xticks(())
plt.yticks(())

plt.show()

Matplotlib 图片

image-20211213160830160

import matplotlib.pyplot as plt
import numpy as np

# image data
a = np.array([0.313660827978, 0.365348418405, 0.423733120134,
              0.365348418405, 0.439599930621, 0.525083754405,
              0.423733120134, 0.525083754405, 0.651536351379]).reshape(3, 3)

plt.imshow(a, interpolation='nearest', cmap='bone', origin='upper')

# 颜色条的长度为90%
plt.colorbar(shrink=0.9)

plt.xticks(())
plt.yticks(())
plt.show()

Matplotlib 3D图形

image-20211213163840309

import matplotlib.pyplot as plt
import numpy as np
# 导入3D
from mpl_toolkits.mplot3d import Axes3D

# 显示窗口
fig = plt.figure()

# 添加坐标轴
ax = Axes3D(fig)

# X,Y的值
x = np.arange(-4, 4, 0.25)
y = np.arange(-4, 4, 0.25)
X, Y = np.meshgrid(x, y)
Z = np.sin(np.sqrt(X**2 + Y**2))

# 添加数据: 三个轴的数据;cstride 是线和线的跨度;;edgecolor 显示黑线,默认不显示
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'), edgecolor='black')

# 设置轴
ax.set_xlim(-4, 4)
ax.set_ylim(-4, 4)
ax.set_zlim(-2, 2)

# 加上等高线
# zdir 设置从上压下去,offset 表示压到z=-2这个面上来
ax.contourf(X, Y, Z, zdir='z', offset=-2, cmap='rainbow')
ax.contourf(X, Y, Z, zdir='x', offset=-4, cmap='rainbow')
ax.contourf(X, Y, Z, zdir='y', offset=4, cmap='rainbow')

plt.show()

Matplotlib 一个窗口显示多张子图 Subplot

image-20211213170305330

import matplotlib.pyplot as plt
import numpy as np

plt.figure()

# 按2行1列分,这个占一行
plt.subplot(2, 1, 1)
plt.plot([0, 1], [0, 1])

# 按2行3列分,这个在第4个位置,后面依次
plt.subplot(234)
plt.plot([0, 1], [0, 2])
plt.subplot(235)
plt.plot([0, 1], [0, 3])
plt.subplot(236)
plt.plot([0, 1], [0, 4])

plt.show()

Matplotlib 多子图:subplot2grid、girdspec

image-20211213182006229

image-20211213184223079

import matplotlib.pyplot as plt
import numpy as np
import matplotlib.gridspec as girdspec

# figure 1
plt.figure()

# 分成3X3,从0,0开始,占1行3列
ax1 = plt.subplot2grid((3, 3), (0, 0), colspan=3, rowspan=1)
ax1.plot([1, 2], [1, 2])
ax1.set_title('subplot2grid')

ax2 = plt.subplot2grid((3, 3), (1, 0), colspan=2, rowspan=1)
ax3 = plt.subplot2grid((3, 3), (1, 2), colspan=1, rowspan=2)
ax4 = plt.subplot2grid((3, 3), (2, 0), colspan=1, rowspan=1)
ax5 = plt.subplot2grid((3, 3), (2, 1), colspan=1, rowspan=1)

# figure 2
plt.figure()
gs = girdspec.GridSpec(3, 3)

# 第一行,全部列;负数是倒着数
ax1 = plt.subplot(gs[0, :])
ax1.set_title('girdspec')
plt.subplot(gs[1, :2])
plt.subplot(gs[1:, 2])
plt.subplot(gs[-1, 0])
plt.subplot(gs[-1, -2])

plt.show()

Matplotlib 图中图

image-20211213190743279

import matplotlib.pyplot as plt
import numpy as np

# figure
fig = plt.figure()
x = np.arange(1, 8)
y = [1, 3, 4, 2, 5, 8, 6]

# 图的大小 百分比 起始位置+宽高
l, b, w, h = 0.1, 0.1, 0.8, 0.8
ax1 = fig.add_axes([l, b, w, h])

# 外面大图
ax1.plot(x, y, 'r')

# 小图 1
l, b, w, h = 0.2, 0.6, 0.25, 0.25
ax2 = fig.add_axes([l, b, w, h])
ax2.plot(x, y, 'b')

# 小图 2
plt.axes([0.6, 0.2, 0.25, 0.25])
# y[::-1] -1表示从后往前,间隔1
plt.plot(x, y[::-1], 'g')

plt.show()

Matplotlib 主次坐标轴

image-20211213191529032

import matplotlib.pyplot as plt
import numpy as np

x = np.arange(0, 10, 0.1)
y1 = 0.05 * x**2
y2 = -1 * y1

fig, ax1 = plt.subplots()

# ax2 用 ax1 的镜像轴
ax2 = ax1.twinx()

ax1.plot(x, y1, 'g-')
ax2.plot(x, y2, 'r--')

ax1.set_xlabel('x')
ax1.set_ylabel('y1')
ax2.set_ylabel('y2')

plt.show()

Matplotlib 动画 Animation

image-20211213193020856

import matplotlib.pyplot as plt
import numpy as np
from matplotlib import animation

x = np.arange(0, 2*np.pi, 0.01)

fig, ax = plt.subplots()
line, = ax.plot(x, np.sin(x))

def animationFunc(i):
    # i 表示第几帧
    line.set_ydata(np.sin(x+i/10))
    return line,

def initAnimationFunc():
    line.set_ydata(np.sin(x))
    return line,   

# 创建动画: frames 表示多少帧,init_func 最开始什么样子; interval 频率,多少毫秒; blit 表示是否只更新变动的数据
ani = animation.FuncAnimation(fig=fig, func=animationFunc, frames=100, init_func=initAnimationFunc, interval=20, blit=False)

plt.show()
  人工智能 最新文章
2022吴恩达机器学习课程——第二课(神经网
第十五章 规则学习
FixMatch: Simplifying Semi-Supervised Le
数据挖掘Java——Kmeans算法的实现
大脑皮层的分割方法
【翻译】GPT-3是如何工作的
论文笔记:TEACHTEXT: CrossModal Generaliz
python从零学(六)
详解Python 3.x 导入(import)
【答读者问27】backtrader不支持最新版本的
上一篇文章      下一篇文章      查看所有文章
加:2021-12-14 15:56:46  更:2021-12-14 15:57:03 
 
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁

360图书馆 购物 三丰科技 阅读网 日历 万年历 2024年11日历 -2024/11/27 0:40:02-

图片自动播放器
↓图片自动播放器↓
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
图片批量下载器
↓批量下载图片,美女图库↓
  网站联系: qq:121756557 email:121756557@qq.com  IT数码