import os
import pandas as pd
import numpy as np
from scipy import stats
from scipy.stats import linregress
import matplotlib.pyplot as plt
import seaborn as sns
df=pd.read_excel('e3.xlsx',sheet_name='Sheet1',nrows=3605)
print(df.dtypes)
'''
批量读取工作页合并成一个大的工作框
sheet_name=['Sheet'+str(i) for i in range(1,3)]
data_all=pd.DataFrame()
print(sheet_name)
for i in sheet_name:
data=pd.read_excel('e1.xlsx',sheet_name=i)
data_all=pd.concat([data_all,data],axis=0,ignore_index=True)
'''
# # 保存
# df.to_excel()
# 数据筛选
# print(df[['sex','age']][:5])
# loc和iloc的用法
# miss_rate=pd.DataFrame(df.apply(lambda x:sum(x.isnull()/len(x))))
# miss_rate.columns=['缺失率']
# miss_rate1=miss_rate[miss_rate['缺失率']>0]['缺失率'].apply(lambda x:format(x,'.3%'))
# print(miss_rate1) # 缺失率
# data1=df[df['CYC'].isnull()].iloc[:10]
# print(data1)
# index_num=df[df['CYC']>0.8,df['sex']==1].index
# print(index_num)
# data=df.loc[df['sex']==1]
# print(data)
'''
# 条件查询
df2=df[(df['sex']==1)&(df['age']>60)&(df['CREAT']>0.7&(df['CYC']<=0.8))]
print(df2)
print(df['age'].between(60,70,inclusive=True)) # 判断年龄在60-70岁之间的并输出
df['age'].isin(['60']) # 判断年龄是否有60岁
df['CREAT'].str.contains('N') # 判断是否包含'N'字符
'''
# 增删改查
'''
# df['eGFR']=df['sex']*df['age'] # 增加操作
df['Scr']='肌酐'
print(df.columns) # 输出列
# df.drop(['Scr','eGFR'],axis=1,inplace=True) # axis=1为按列删除,axis=0为按行删除
del df['Scr']
mid=144*pow(((df['CREAT'])/0.7),-0.329)*pow(0.993,df['age'])
df.insert(5,'eGFR',mid) #参数说明:5是第5列(从第0列开始),列名为eGFR,数据为mid
'''
'''
# 筛选
def sex_s(a):
return a==0
def level_a(s):
return s>0.7
dfs=df.loc[df['sex'].apply(lambda x:x==0)].loc[df['CREAT'].apply(lambda x:x>0.7)] # 使用lambda表达式
df['eGFR']=144*pow(((df['CREAT'])/0.7),-0.329)*pow(0.993,df['age'])
x=df['mGFR']
y=df['eGFR']
plt.scatter(x,y,marker='.',edgecolors='red')
plt.show()
'''
# df1=df.loc[df['sex'].apply(lambda x:x==0)].loc[df['CREAT'].apply(lambda x:x>0.7)]
# df1['eGFR']=144*pow(((df1['CREAT'])/0.7),-0.329)*pow(0.993,df1['age'])
# print(df1)
# df2=df.loc[df['sex'].apply(lambda x:x==1)].loc[df['CREAT'].apply(lambda x:x<=0.7)]
# df2['eGFR']=144*pow(((df2['CREAT'])/0.7),-1.209)*pow(0.993,df2['age'])
# df3=pd.concat([df1,df2],ignore_index=True)
# print(df3.isnull())
|