import pandas as pd
import numpy as np
data = {"a" : [2,2,np.nan,5,6],"b" : ["kl","kl","kl",np.nan,"kl"],"c" : [4,6,5,np.nan,6],"d" : [7,9,np.nan,9,8]}
df = pd.DataFrame(data)
print(df)
a b c d
0 2.0 kl 4.0 7.0
1 2.0 kl 6.0 9.0
2 NaN kl 5.0 NaN
3 5.0 NaN NaN 9.0
4 6.0 kl 6.0 8.0
print("数据框中列的名称 columns = ")
print(df.columns)
print("-" * 40)
print("数据框中索引 index = ")
print(df.index)
print("-" * 40)
print("数据框中的值 = ")
print(df.values)
数据框中列的名称 columns =
Index(['a', 'b', 'c', 'd'], dtype='object')
----------------------------------------
数据框中索引 index =
RangeIndex(start=0, stop=5, step=1)
----------------------------------------
数据框中的值 =
[[2.0 'kl' 4.0 7.0]
[2.0 'kl' 6.0 9.0]
[nan 'kl' 5.0 nan]
[5.0 nan nan 9.0]
[6.0 'kl' 6.0 8.0]]
df1 = df.dropna()
print(df)
print("*" * 40)
print(df1)
a b c d
0 2.0 kl 4.0 7.0
1 2.0 kl 6.0 9.0
2 NaN kl 5.0 NaN
3 5.0 NaN NaN 9.0
4 6.0 kl 6.0 8.0
****************************************
a b c d
0 2.0 kl 4.0 7.0
1 2.0 kl 6.0 9.0
4 6.0 kl 6.0 8.0
df2 = df.fillna(0)
print(df2)
a b c d
0 2.0 kl 4.0 7.0
1 2.0 kl 6.0 9.0
2 0.0 kl 5.0 0.0
3 5.0 0 0.0 9.0
4 6.0 kl 6.0 8.0
df3 = df.fillna("kl")
print(df3)
a b c d
0 2.0 kl 4.0 7.0
1 2.0 kl 6.0 9.0
2 kl kl 5.0 kl
3 5.0 kl kl 9.0
4 6.0 kl 6.0 8.0
df4 = df.fillna({"a" : 0,"b" : "kl","c" : 0, "d": "lk"})
print(df4)
a b c d
0 2.0 kl 4.0 7.0
1 2.0 kl 6.0 9.0
2 0.0 kl 5.0 lk
3 5.0 kl 0.0 9.0
4 6.0 kl 6.0 8.0
df5 = df.fillna({"a" : 8888, "b" : "kkkk"})
print(df5)
a b c d
0 2.0 kl 4.0 7.0
1 2.0 kl 6.0 9.0
2 8888.0 kl 5.0 NaN
3 5.0 kkkk NaN 9.0
4 6.0 kl 6.0 8.0
data1 = {"a":[5,3,4,1,6],"b":["d","c","a","e","q"],"c":[4,6,5,5,6]}
Df = pd.DataFrame(data1)
print(Df)
a b c
0 5 d 4
1 3 c 6
2 4 a 5
3 1 e 5
4 6 q 6
Df1 = Df.sort_values("a",ascending = False)
print(Df1)
a b c
4 6 q 6
0 5 d 4
2 4 a 5
1 3 c 6
3 1 e 5
Df2 = Df.sort_values("b",ascending = False)
print(Df2)
a b c
4 6 q 6
3 1 e 5
0 5 d 4
1 3 c 6
2 4 a 5
Df3 = Df1.sort_index(ascending = False)
print(Df3)
a b c
4 6 q 6
3 1 e 5
2 4 a 5
1 3 c 6
0 5 d 4
H4 = Df3.head(4)
print(H4)
a b c
4 6 q 6
3 1 e 5
2 4 a 5
1 3 c 6
H41 = H4.drop("b",axis = 1)
print(H41)
a c
4 6 6
3 1 5
2 4 5
1 3 6
Df4 = pd.DataFrame({"e":[1,3,5,7,9]})
Df5 = df4.join(Df4)
print(Df5)
a b c d e
0 2.0 kl 4.0 7.0 1
1 2.0 kl 6.0 9.0 3
2 0.0 kl 5.0 lk 5
3 5.0 kl 0.0 9.0 7
4 6.0 kl 6.0 8.0 9
list1 = ["a","b","c","d","e","f"]
list2 = [1,2,3,4,5,6]
list3 = [1.4,3.5,2,6,7,8]
list4 = [4,5,6,7,8,9]
list5 = ["t",5,6,7,"k",9.6]
D = {"m1":list1,"m2":list2,"m3":list3,"m4":list4,"m5":list5}
print(D)
{'m1': ['a', 'b', 'c', 'd', 'e', 'f'], 'm2': [1, 2, 3, 4, 5, 6], 'm3': [1.4, 3.5, 2, 6, 7, 8], 'm4': [4, 5, 6, 7, 8, 9], 'm5': ['t', 5, 6, 7, 'k', 9.6]}
D = pd.DataFrame(D)
print(D)
m1 m2 m3 m4 m5
0 a 1 1.4 4 t
1 b 2 3.5 5 5
2 c 3 2.0 6 6
3 d 4 6.0 7 7
4 e 5 7.0 8 k
5 f 6 8.0 9 9.6
D1 = D.as_matrix()
print(D1)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-32-b12b637b5581> in <module>
----> 1 D1 = D.as_matrix()
2 print(D1) # 新版本pandas不兼容此操作
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in __getattr__(self, name)
5463 if self._info_axis._can_hold_identifiers_and_holds_name(name):
5464 return self[name]
-> 5465 return object.__getattribute__(self, name)
5466
5467 def __setattr__(self, name: str, value) -> None:
AttributeError: 'DataFrame' object has no attribute 'as_matrix'
D1 = D.iloc[:,:].values
print(D1)
[['a' 1 1.4 4 't']
['b' 2 3.5 5 5]
['c' 3 2.0 6 6]
['d' 4 6.0 7 7]
['e' 5 7.0 8 'k']
['f' 6 8.0 9 9.6]]
G = {"m1":list2,"m2":list3,"m3":list4}
G = pd.DataFrame(G)
print(G)
m1 m2 m3
0 1 1.4 4
1 2 3.5 5
2 3 2.0 6
3 4 6.0 7
4 5 7.0 8
5 6 8.0 9
G1 = G.iloc[:,:].values
print(G1)
[[1. 1.4 4. ]
[2. 3.5 5. ]
[3. 2. 6. ]
[4. 6. 7. ]
[5. 7. 8. ]
[6. 8. 9. ]]
|