一:
import numpy as np
import pandas as pd
df=pd.DataFrame(np.random.randn(10,6))
df.iloc[:4,1]=None
df.iloc[:2,4:6]=None
df.iloc[6,3:5]=None
df.iloc[8,0:2]=None
print(df)
# 判断缺失值
# 元素级别的判断,把对应所有元素的位置都列出来,元素为空或者NA就显示true,否则就是False
result=df.isnull()
print(result)
# 列级别的判断,只要该列有空,或者NA,就为True,否则为False
result=df.isnull().any()
print(result)
# 只显示具有缺失值的行列,清楚的确定缺失值的位置
# 用drop_duplicates()去掉重复的行
result=df[df.isnull().values==True].drop_duplicates();
print(result)
# 获得为空或者NA的列索引
result=df.columns[df.isnull().any()==True]
print(result)
# 获得每列为空的数据的个数
num=df.isnull().sum()
print(num)
# 获得每行为空的数据的个数
num=df.isnull().sum(axis=1)
print(num)
0 1 2 3 4 5
0 -0.007384 NaN -0.080993 -1.237227 NaN NaN
1 1.714427 NaN 0.150938 0.911013 NaN NaN
2 -0.277183 NaN -0.730419 -0.439043 0.392371 0.290128
3 -1.480452 NaN 3.166166 -0.514042 0.179483 0.020033
4 0.151948 -1.710320 -0.400083 -0.157654 1.094303 -0.343649
5 0.760926 0.029076 0.460996 -0.734838 0.887533 0.730887
6 -0.552344 -1.129041 1.132274 NaN NaN 0.200584
7 0.369075 0.311318 -0.651598 -0.382182 -0.607464 -0.219942
8 NaN NaN -0.502302 0.179766 0.461502 -1.395001
9 -0.696918 -1.230387 1.368480 -0.794495 0.838037 -0.314962
0 1 2 3 4 5
0 False True False False True True
1 False True False False True True
2 False True False False False False
3 False True False False False False
4 False False False False False False
5 False False False False False False
6 False False False True True False
7 False False False False False False
8 True True False False False False
9 False False False False False False
0 True
1 True
2 False
3 True
4 True
5 True
dtype: bool
0 1 2 3 4 5
0 -0.007384 NaN -0.080993 -1.237227 NaN NaN
1 1.714427 NaN 0.150938 0.911013 NaN NaN
2 -0.277183 NaN -0.730419 -0.439043 0.392371 0.290128
3 -1.480452 NaN 3.166166 -0.514042 0.179483 0.020033
6 -0.552344 -1.129041 1.132274 NaN NaN 0.200584
8 NaN NaN -0.502302 0.179766 0.461502 -1.395001
Int64Index([0, 1, 3, 4, 5], dtype='int64')
0 1
1 5
2 0
3 1
4 3
5 2
dtype: int64
0 3
1 3
2 1
3 1
4 0
5 0
6 2
7 0
8 2
9 0
dtype: int64
|