1,pycharm中批量注释功能:ctrl+/ 2,numpy数组的创建
a=np.array([1,2,3])
print(a)
print(type(a))
b=np.array(range(10))
print(b)
c=np.arange(10)
print(c)
结果为
[1 2 3]
<class 'numpy.ndarray'>
[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]
3,指定数据类型
a=np.arange(4,10,2,dtype=float)
print(a)
print(a.dtype)
a=np.array(range(10),dtype='int8')
print(a)
print(a.dtype)
b=a.astype('int32')
print(b)
print(b.dtype)
结果为:
[4. 6. 8.]
float64
[0 1 2 3 4 5 6 7 8 9]
int8
[0 1 2 3 4 5 6 7 8 9]
int32
4,numpy中的小数
a=np.array([random.random() for i in range(10)])
print(a.dtype)
print(a)
b=np.round(a,2)
print(b)
print(b.dtype)
结果为:
float64
[0.43862789 0.17282301 0.62031606 0.81452307 0.01375042 0.15249559
0.7118712 0.80971245 0.90993671 0.98013062]
[0.44 0.17 0.62 0.81 0.01 0.15 0.71 0.81 0.91 0.98]
float64
5,numpy数据的形状与改变形状
a=np.arange(24)
print(a.shape)
b=a.reshape(2,3,4)
print(b)
c=np.array([[[1,2,3],
[4,5,6]],
[[7,8,9],
[1,2,3]]])
print(c)
print(c.shape)
d=c.reshape((2,6))
print(d)
e=a.reshape((2,2,2,3))
print(e)
6,改变形状为一维数组
a=np.arange(24).reshape((2,3,4))
b=a.reshape((24,))
print(b)
c=a.reshape((a.shape[0]*a.shape[1]*a.shape[2],))
print(c)
d=a.flatten()
print(d)
结果为:
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
7,数组运算
a=np.arange(12).reshape((3,4))
print(a+2)
print(a/0)
结果如下:
[[ 2 3 4 5]
[ 6 7 8 9]
[10 11 12 13]]
[[nan inf inf inf]
[inf inf inf inf]
[inf inf inf inf]]
8,数组与数组运算
a=np.arange(12).reshape((3,4))
print(a)
b=np.arange(30,42).reshape((3,4))
print(a*b)
c=np.array([1,2,3,4])
print(a+c)
d=np.array([1,2,3]).reshape(3,1)
e=a.reshape(2,3,2)
print(a+d)
print(e)
print(e+d)
f=np.arange(18).reshape((3,3,2))
g=np.arange(9).reshape((3,3))
print(f+g)
结果为
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[[ 0 31 64 99]
[136 175 216 259]
[304 351 400 451]]
[[ 1 3 5 7]
[ 5 7 9 11]
[ 9 11 13 15]]
[[ 1 2 3 4]
[ 6 7 8 9]
[11 12 13 14]]
[[[ 0 1]
[ 2 3]
[ 4 5]]
[[ 6 7]
[ 8 9]
[10 11]]]
[[[ 1 2]
[ 4 5]
[ 7 8]]
[[ 7 8]
[10 11]
[13 14]]]
9,numpy读取数据
us_file_path='./youtube_video_data/US_video_data_numbers.csv'
gb_file_path='./youtube_video_data/GB_video_data_numbers.csv'
t1=np.loadtxt(us_file_path,delimiter=',',dtype='int',unpack=False)
print(t1)
print(type(t1))
print(t1[2])
print(t1[2:4])
print(t1[[2,8,10]])
print(t1[:,0])
print(t1[0:2,0])
print(t1[[2,10,8],:])
print(t1[:,2:])
print(t1[:,[0,2]])
a=t1[2,3]
print(a)
print(type(a))
b=t1[2:5,1:4]
print(b)
print(type(b))
c = t1[[0,2,2],[0,1,3]]
print(c)
10,二维数据的转置
a=np.arange(24).reshape(4,6)
print(a)
print(a.transpose())
print(a.T)
print(a.swapaxes(1,0))
11,numpy中数值的修改
a=np.arange(24).reshape(4,6)
print(a)
a[:,2:4]=0
print(a)
print(a<10)
a[a<10]=3
print(a)
b=np.where(a<10,1,20)
print(b)
a=a.astype(float)
a[3,3]=np.nan
a[3,4]=np.inf
print(a)
c=a.clip(10,20)
print(c)
结果如下:
[[ 0 1 2 3 4 5]
[ 6 7 8 9 10 11]
[12 13 14 15 16 17]
[18 19 20 21 22 23]]
[[ 0 1 0 0 4 5]
[ 6 7 0 0 10 11]
[12 13 0 0 16 17]
[18 19 0 0 22 23]]
[[ True True True True True True]
[ True True True True False False]
[False False True True False False]
[False False True True False False]]
[[ 3 3 3 3 3 3]
[ 3 3 3 3 10 11]
[12 13 3 3 16 17]
[18 19 3 3 22 23]]
[[ 1 1 1 1 1 1]
[ 1 1 1 1 20 20]
[20 20 1 1 20 20]
[20 20 1 1 20 20]]
[[ 3. 3. 3. 3. 3. 3.]
[ 3. 3. 3. 3. 10. 11.]
[12. 13. 3. 3. 16. 17.]
[18. 19. 3. nan inf 23.]]
[[10. 10. 10. 10. 10. 10.]
[10. 10. 10. 10. 10. 11.]
[12. 13. 10. 10. 16. 17.]
[18. 19. 10. nan 20. 20.]]
Process finished with exit code 0
12,nan与inf,常用统计函数
print(np.nan==np.nan)
print(np.nan!=np.nan)
a=np.arange(24).reshape(4,6)
a=a.astype(float)
a[3,3]=np.nan
print(a)
print(np.count_nonzero(a))
print(np.count_nonzero(a!=a))
print(np.isnan(a))
print(np.count_nonzero(np.isnan(a)))
a[np.isnan(a)]=0
print(a)
a[3,3]=np.nan
print(np.sum(a))
b=np.arange(12).reshape(3,4)
print(np.sum(b,axis=0))
print(np.sum(b,axis=1))
print(np.mean(a,axis=0))
print(np.median(a,axis=0))
print(np.max(a,axis=0))
print(np.min(a,axis=0))
print(np.ptp(a,axis=0))
print(np.std(a,axis=0))
13,实现nan类型的替换
def fill_nan(t):
for i in range(t.shape[1]):
temp_col=t[:,i]
non_num=np.count_nonzero(temp_col!=temp_col)
if non_num!=0:
temp_col_num=temp_col[temp_col==temp_col]
print(temp_col_num)
temp_col_nan=temp_col[temp_col!=temp_col]
print(temp_col_nan)
temp_col[np.isnan(temp_col)]=temp_col_num.mean()
return t
14,总结 15,练习
import numpy as np
from matplotlib import pyplot as plt
us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"
t_us=np.loadtxt(us_file_path,delimiter=',',dtype='int',usecols=[-1])
t_us=t_us[t_us<5000]
print(t_us.max(),t_us.min())
d=250
bin_nums=(t_us.max()-t_us.min())//d
plt.hist(t_us,bin_nums)
plt.grid()
plt.show()
import numpy as np
from matplotlib import pyplot as plt
us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"
t_us=np.loadtxt(us_file_path,delimiter=',',dtype='int')
t_us=t_us[t_us[:,1]<100000]
t_us_comment=t_us[:,-1]
t_us_like=t_us[:,1]
plt.scatter(t_us_like,t_us_comment)
plt.show()
16,数据拼接
t1=np.arange(12).reshape(3,4)
t2=np.arange(12,24).reshape(3,4)
print(np.vstack((t1,t2)))
print(np.hstack((t1,t2)))
print(t1)
t1[[1,2],:]=t1[[2,1],:]
print(t1)
t1[:,[2,3]]=t1[:,[3,2]]
print(t1)
17,
import numpy as np
us_data = "./youtube_video_data/US_video_data_numbers.csv"
uk_data = "./youtube_video_data/GB_video_data_numbers.csv"
us_data = np.loadtxt(us_data,delimiter=",",dtype=int)
uk_data = np.loadtxt(uk_data,delimiter=",",dtype=int)
zeros_data = np.zeros((us_data.shape[0],1)).astype(int)
ones_data = np.ones((uk_data.shape[0],1)).astype(int)
us_data = np.hstack((us_data,zeros_data))
uk_data = np.hstack((uk_data,ones_data))
final_data = np.vstack((us_data,uk_data))
print(final_data)
18,numpy中的随机数产生
print(np.eye(3))
print(np.ones((3,4)))
print(np.zeros((3,4)))
a=np.arange(12)
print(np.argmax(a,axis=0))
print(np.argmin(a,axis=0))
'''
'''
print(np.random.rand(2,3))
print(np.random.randn(2,3))
print(np.random.randint(0,5,(2,3)))
print(np.random.uniform(0,5,(2,3)))
print(np.random.normal(0,1,(2,3)))
np.random.seed(2)
t=np.random.randint(0,5,(2,3))
print(t)
t1=np.random.randint(0,5,(2,3))
print(t1)
t2=np.random.randint(0,5,(2,3))
print(t2)
t3=np.random.randint(0,5,(2,3))
print(t3)
|