?1?定义2个DataFrame对象,指定相同的‘key’进行merge合并
#定义2个DataFrame对象
In [6]:
left = pd.DataFrame({'A':['A1','A2','A3','A4'],'B':['B1','B2','B3','B4'],'key':['K1','K2','K3','K4']})
right = pd.DataFrame({'C':['C1','C2','C3','C4'],'D':['D1','D2','D3','D4'],'key':['K1','K2','K3','K4']})
print (left)
print (right)
A B key
0 A1 B1 K1
1 A2 B2 K2
2 A3 B3 K3
3 A4 B4 K4
C D key
0 C1 D1 K1
1 C2 D2 K2
2 C3 D3 K3
3 C4 D4 K4
#形参left,right也可以不写,直接传入实参left,right
In [7]:
pd.merge(left = left,right = right)
Out[7]:
A B key C D
0 A1 B1 K1 C1 D1
1 A2 B2 K2 C2 D2
2 A3 B3 K3 C3 D3
3 A4 B4 K4 C4 D4
In [11]:
主键进行合并,结果如下
pd.merge(left = left,right = right,on='key')
# on='key',表示以‘key’为主键进行合并,结果如下
Out[11]:
A B key C D
0 A1 B1 K1 C1 D1
1 A2 B2 K2 C2 D2
2 A3 B3 K3 C3 D3
3 A4 B4 K4 C4 D4
2?定义2个DataFrame,key1,key2相同
#定义2个DataFrame,key1,key2相同
In [13]:
left = pd.DataFrame({'A':['A1','A2','A3','A4'],'B':['B1','B2','B3','B4'],'key1':['K1','K2','K3','K4'],'key2':['K1','K2','K3','K4']})
right = pd.DataFrame({'C':['C1','C2','C3','C4'],'D':['D1','D2','D3','D4'],'key1':['K1','K2','K3','K4'],'key2':['K1','K2','K3','K4']})
print (left)
print (right)
A B key1 key2
0 A1 B1 K1 K1
1 A2 B2 K2 K2
2 A3 B3 K3 K3
3 A4 B4 K4 K4
C D key1 key2
0 C1 D1 K1 K1
1 C2 D2 K2 K2
2 C3 D3 K3 K3
3 C4 D4 K4 K4
#left和right中以key1进行合并,但是left和right中都用key2,此时会加_x,_y进行区分,如下
In [14]:
pd.merge(left,right,on='key1')
Out[14]:
A B key1 key2_x C D key2_y
0 A1 B1 K1 K1 C1 D1 K1
1 A2 B2 K2 K2 C2 D2 K2
2 A3 B3 K3 K3 C3 D3 K3
3 A4 B4 K4 K4 C4 D4 K4
3可以指定2个主键进行匹配,如on=['key1','key2']
#可以指定2个主键进行匹配,如on=['key1','key2']
In [15]:
pd.merge(left,right,on=['key1','key2'])
Out[15]:
A B key1 key2 C D
0 A1 B1 K1 K1 C1 D1
1 A2 B2 K2 K2 C2 D2
2 A3 B3 K3 K3 C3 D3
3 A4 B4 K4 K4 C4 D4
4?#此时left和right中的key1相同,但是key不同
In [16]:
#此时left和right中的key1相同,但是key不同
left = pd.DataFrame({'A':['A1','A2','A3','A4'],'B':['B1','B2','B3','B4'],'key1':['K1','K2','K3','K4'],'key2':['K1','K2','K3','K4']})
right = pd.DataFrame({'C':['C1','C2','C3','C4'],'D':['D1','D2','D3','D4'],'key1':['K1','K2','K3','K4'],'key2':['K1','K2','K3','K5']})
print (left)
print (right)
A B key1 key2
0 A1 B1 K1 K1
1 A2 B2 K2 K2
2 A3 B3 K3 K3
3 A4 B4 K4 K4
C D key1 key2
0 C1 D1 K1 K1
1 C2 D2 K2 K2
2 C3 D3 K3 K3
3 C4 D4 K4 K5
In [17]:
#此时left和right中的key1相同,但是key不同,如果merge函数以key1,和key2进行合并
#观察可知,相同的key1和key2组合被保留了下来,相当于做了交集
#此时left和right中的key1相同,但是key不同,如果merge函数以key1,和key2进行合并
#观察可知,相同的key1和key2组合被保留了下来,相当于做了交集
In [18]:
pd.merge(left,right,on=['key1','key2'])
pd.merge(left,right,on=['key1','key2'])
Out[18]:
A B key1 key2 C D
0 A1 B1 K1 K1 C1 D1
1 A2 B2 K2 K2 C2 D2
2 A3 B3 K3 K3 C3 D3
In [ ]:
#此时left和right中的key1相同,但是key不同,如果merge函数以key1,和key2进行合并
#如果做并集,merge()函数需要将how参数指定为'outer'
#how不仅可以传入'outer',还可以传入"lefg','right'
In [20]:
pd.merge(left,right,on=['key1','key2'],how='outer')#没有数据的则用nan进行填充
Out[20]:
A B key1 key2 C D
0 A1 B1 K1 K1 C1 D1
1 A2 B2 K2 K2 C2 D2
2 A3 B3 K3 K3 C3 D3
3 A4 B4 K4 K4 NaN NaN
4 NaN NaN K4 K5 C4 D4
In [23]:
pd.merge(left,right,on=['key1','key2'],how='left')#以左表为基准
Out[23]:
A B key1 key2 C D
0 A1 B1 K1 K1 C1 D1
1 A2 B2 K2 K2 C2 D2
2 A3 B3 K3 K3 C3 D3
3 A4 B4 K4 K4 NaN NaN
In [25]:
pd.merge(left,right,on=['key1','key2'],how='right')#以右表为基准
Out[25]:
A B key1 key2 C D
0 A1 B1 K1 K1 C1 D1
1 A2 B2 K2 K2 C2 D2
2 A3 B3 K3 K3 C3 D3
3 NaN NaN K4 K5 C4 D4
In [22]:
#指定指示器,显示到底是叫交集还是并集
pd.merge(left,right,on=['key1','key2'],how='outer',indicator=True)
Out[22]:
A B key1 key2 C D _merge
0 A1 B1 K1 K1 C1 D1 both
1 A2 B2 K2 K2 C2 D2 both
2 A3 B3 K3 K3 C3 D3 both
3 A4 B4 K4 K4 NaN NaN left_only
4 NaN NaN K4 K5 C4 D4 right_only
|