Numpy 再来一遍
配合
机器学习食用更佳。
import numpy as np
一、Numpy的属性
- ndim 维度
- shape 形状
- dtype 类型
- size 大小
array = np.array([[1,2,3],
[4,5,6]])
print(array)
[[1 2 3]
[4 5 6]]
print(array.ndim)
2
print(array.shape)
(2, 3)
print(array.dtype)
int64
print(array.size)
6
二、创建array
- 指定数据属性
- 创建一维数据、二维数据
- zeros 全0、 ones 全1、 empty 全接近于0、 arange 等差一维数组
- reshape 改变矩阵形状
a = np.array([1,2,3],dtype=np.int32)
print(a)
[1 2 3]
b = np.array([1,2,3],dtype=np.float)
print(b.dtype)
float64
c = np.array([1,2,3])
print(c)
[1 2 3]
d = np.array([[1,2,3],[4,5,6]])
print(d)
[[1 2 3]
[4 5 6]]
zero = np.zeros((2,3))
print(zero)
[[0. 0. 0.]
[0. 0. 0.]]
one = np.ones((3,4))
print(one)
[[1. 1. 1. 1.]
[1. 1. 1. 1.]
[1. 1. 1. 1.]]
empty = np.empty((3,3))
print(empty)
[[6.92430951e-310 4.67278860e-310 6.92414190e-310]
[6.92414190e-310 6.92414190e-310 6.92414190e-310]
[6.92414190e-310 6.92414190e-310 3.95252517e-322]]
e = np.arange(5)
print(e)
[0 1 2 3 4]
f = np.arange(4,12)
print(f)
[ 4 5 6 7 8 9 10 11]
g = np.arange(4,12,2)
print(g)
[ 4 6 8 10]
h = np.arange(8).reshape(4,2)
print(h)
[[0 1]
[2 3]
[4 5]
[6 7]]
三、Numpy的运算
- ‘+ - * / // ** % >’
- np.dot(arr1,arr2) == arr1.dot(arr2) 矩阵乘法
- arr1.T == np.transpose(arr1)
arr1 = np.array([1,2,3])
arr2 = np.array([4,5,6])
print(arr1,arr2)
[1 2 3] [4 5 6]
print(arr1+arr2)
[5 7 9]
print(arr1-arr2)
[-3 -3 -3]
print(arr1*arr2)
[ 4 10 18]
print(arr1/arr2)
[0.25 0.4 0.5 ]
print(arr1**arr2)
[ 1 32 729]
print(arr1 // arr2)
[0 0 0]
print(arr1 % arr2)
[1 2 3]
print(arr1 + 1)
[2 3 4]
print(arr1 * 3)
[3 6 9]
print(arr1 > 3)
[False False False]
arr3 = np.arange(6).reshape(3,2)
arr4 = np.arange(6).reshape(2,3)
np.dot(arr3,arr4)
array([[ 3, 4, 5],
[ 9, 14, 19],
[15, 24, 33]])
arr3.dot(arr4)
array([[ 3, 4, 5],
[ 9, 14, 19],
[15, 24, 33]])
print(arr3.T)
[[0 2 4]
[1 3 5]]
print(arr3)
[[0 1]
[2 3]
[4 5]]
print(np.transpose(arr3))
[[0 2 4]
[1 3 5]]
四、随机数生成及矩阵的统计
- np.random.random 0-1的随机数
- np.random.normal 符合正态分布的随机数
- np.random.randint(0,10,size=()) 生成从0到10的size形状的随机整数
- np.sum() 求和、np.mean() 求平均值 np.max() 求最大值、 np.min() 求最小值
- np.sort() 排序、np.sqrt() 开方、 np.median() 求中位数、 axis=0 0行1列
- np.argmin() 求最小值的下标、np.argmax() 求最大值下标
arr1 = np.random.random((2,2))
print(arr1)
[[0.70322893 0.62660424]
[0.10822154 0.70288038]]
arr2 = np.random.normal(size=(3,3))
print(arr2)
[[ 0.27050106 0.66089326 0.05234379]
[ 0.81437662 -0.41467658 2.49832908]
[-0.17560416 0.42175065 -1.02400744]]
arr3 = np.random.randint(0,10,size=(4,3))
print(arr3)
[[4 1 9]
[8 7 9]
[9 1 9]
[5 0 9]]
np.sum(arr1)
2.1409350945823276
np.min(arr1)
0.10822154146948515
np.max(arr1)
0.7032289271352641
np.sum(arr1,axis=0)
array([0.81145047, 1.32948463])
np.sum(arr1,axis=1)
array([1.32983317, 0.81110192])
np.argmin(arr1)
2
np.argmax(arr1)
0
np.mean(arr1)
0.5352337736455819
np.median(arr1)
0.6647423129887893
np.sqrt(arr1)
array([[ True, True],
[ True, True]])
np.sort(arr1)
array([[0.62660424, 0.70322893],
[0.10822154, 0.70288038]])
np.clip(arr1,0.5,0.6)
array([[0.6, 0.6],
[0.5, 0.6]])
五、Numpy索引
- 索引arr1[1,2] == arr1[1][2]
- 切片 [:,:] 逗号前面代表行,后面代表列
- 遍历行、遍历列、遍历元素
print(arr1)
[[0.62660424 0.70322893]
[0.10822154 0.70288038]]
print(arr1[0])
[0.62660424 0.70322893]
print(arr1[0][1]) == print(arr1[0,1])
0.7032289271352641
print(arr1[:,1])
[0.70322893 0.70288038]
print(arr1[1,1])
0.7028803828361643
for i in arr1:
print(i)
[0.62660424 0.70322893]
[0.10822154 0.70288038]
for i in arr1.T:
print(i)
[0.62660424 0.10822154]
[0.70322893 0.70288038]
arr1.reshape(1,-1)
array([[0.62660424, 0.70322893, 0.10822154, 0.70288038]])
for i in arr1.flat:
print(i)
0.6266042431414143
0.7032289271352641
0.10822154146948515
0.7028803828361643
六、合并
- 一维数据合并 np.vstack 垂直合并、 np.hstack 水平合并
- 多维数据合并 np.concatenate((arr1,arr2)) # 默认垂直合并、np.concatenate((arr1,arr2),axis=1) # 水平合并
- 增加一个新的维度np.newaxis
- 变成nd数据 np.atleast_nd n可以为1、2、3、4…
arr1 = np.arange(3)
arr2 = np.arange(3)
print(arr1,arr2)
[0 1 2] [0 1 2]
np.vstack((arr1,arr2))
array([[0, 1, 2],
[0, 1, 2]])
np.hstack((arr1,arr2))
array([0, 1, 2, 0, 1, 2])
np.concatenate((arr1,arr2))
array([0, 1, 2, 0, 1, 2])
arr3 = np.arange(4).reshape(2,2)
np.concatenate((arr3,arr3))
array([[0, 1],
[2, 3],
[0, 1],
[2, 3]])
np.concatenate((arr3,arr3),axis=1)
array([[0, 1, 0, 1],
[2, 3, 2, 3]])
arr1.T
array([0, 1, 2])
arr1[np.newaxis,:]
array([[0, 1, 2]])
arr1[:,np.newaxis]
array([[0],
[1],
[2]])
np.atleast_2d(arr1)
array([[0, 1, 2]])
七、分割
- 等分 np.split 0行1列
- 不等分 np.array_split 0行1列
- 垂直分np.hsplit、水平分np.vsplit
arr1 = np.arange(12).reshape(3,4)
print(arr1)
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
np.split(arr1,2,axis=1)
[array([[0, 1],
[4, 5],
[8, 9]]), array([[ 2, 3],
[ 6, 7],
[10, 11]])]
np.split(arr1,3,axis=0)
[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8, 9, 10, 11]])]
np.split(arr1,3,axis=1)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/numpy/lib/shape_base.py in split(ary, indices_or_sections, axis)
866 try:
--> 867 len(indices_or_sections)
868 except TypeError:
TypeError: object of type 'int' has no len()
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-112-2e738d9e1da7> in <module>
----> 1 np.split(arr1,3,axis=1) # 水平方向分割,平均为3份
<__array_function__ internals> in split(*args, **kwargs)
/opt/conda/lib/python3.6/site-packages/numpy/lib/shape_base.py in split(ary, indices_or_sections, axis)
871 if N % sections:
872 raise ValueError(
--> 873 'array split does not result in an equal division')
874 return array_split(ary, indices_or_sections, axis)
875
ValueError: array split does not result in an equal division
np.array_split(arr1,3,axis=1)
[array([[0, 1],
[4, 5],
[8, 9]]), array([[ 2],
[ 6],
[10]]), array([[ 3],
[ 7],
[11]])]
np.vsplit(arr1,3)
[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8, 9, 10, 11]])]
np.hsplit(arr1,2)
[array([[0, 1],
[4, 5],
[8, 9]]), array([[ 2, 3],
[ 6, 7],
[10, 11]])]
八、深浅拷贝
- Python是引用传递,默认内存共享
- arr1.copy() 深拷贝 完全独立开
arr1
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
arr2 = arr1
arr2[0] = 0
arr2
array([[ 0, 0, 0, 0],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
arr1
array([[ 0, 0, 0, 0],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
arr3 = arr1.copy()
arr3[0] = 1
arr3
array([[ 1, 1, 1, 1],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
arr1
array([[ 0, 0, 0, 0],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
|