import numpy as np
import pandas as pd
data_1 = pd.read_csv('train-left-down.csv')
data_1.head()
data_2 = pd.read_csv('train-left-up.csv')
data_2.head()
df_1 = pd.read_csv('train-right-down.csv')
df_1.head(3)
df_2 = pd.read_csv('train-right-up.csv')
df_2.head(3)
result_up = data_2.join(df_2)
result_up.head(3)
result_down = data_1.join(df_1)
result = result_up.append(result_down)
result.head(3)
result_down = data_1.merge(df_1, right_index = True, left_index = True)
result_down.head(3)
result_up = data_2.merge(df_2, right_index = True, left_index = True)
result = result_up.append(result_down)
result.head(3)
s1 = df_1.stack() #stack函数转化为series
s1
#groupby
arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
['Captive', 'Wild', 'Captive', 'Wild']]
index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
index=index)
df
df.groupby(level='Type').sum()
# 计算泰坦尼克号男性与女性的平均票价
result = pd.read_csv('result.csv')
result.head()
sex_fare = result.groupby(['Sex']).Fare.mean()
sex_fare
# 统计泰坦尼克号中男女的存活人数
sex_survived = result.groupby(['Sex']).Survived.sum()
sex_survived
# 计算客舱不同等级的存活人数
result.groupby(['Pclass']).Survived.sum()
result[result['Pclass'] == 1].groupby(by = ['Age']).Fare.mean()
df = pd.merge(sex_fare, sex_survived, left_index = True, right_index = True)
df
#不同年龄的存活人数
age_sur = result.groupby(['Age']).Survived.sum()
age_sur.head(3)
age_sured = age_sur[age_sur == age_sur.max()]
age_sured
sur_rate = age_sured/age_sum #存活率
sur_rate
|