1、if else 简洁用法
a=10
b=5
r="a更大" if a>b else "b更大"
2、循环用法
i=1
while i<=5:
print("当前是第%d次循环"%(i))
print("i=%d"%i)
i+=1
name="世界杯在召唤我"
for x in name:
print('----')
if x == '在'
break
print(x)
3、数据清洗笔记
import numpy as np
import pandas as pd
df=pd.read_excel(r"D:\学习\python学习\00 数据分析二期课程课件\WEEK9 Python数据清洗\code&data\data\user_orders.xlsx")
df.head()
df.shape
df.index
df.info()
df.describe()
df.isnull().sum()
df.duplicated()
df.duplicated().sum()
df.loc[5:10,["name","age"]]
df1=df.set_index("name")
df1.head()
df1.loc["李小胆李l",:]
df.iloc[:,0:5]
df.iloc[0:5]
df1.sample(frac=0.01)
df1.sample(n=2,axis=1)
df1.sample(n=2,random_state=420)
df3=df.copy()
df3['id']=df3['id'].astype('str')
df3.info()
df3['custom_amt']=df3['custom_amt'].str.strip('¥').astype('float')
df3['order_date']=pd.to_datetime(df3['order_date'],format="%Y年%m月%d日")
df3.drop_duplicates()
df3.drop_duplicates(inplace=True,ignore_index=True)
df3.describe([0.99])
df3.loc[df3["age"]>=200,:]
df3=df3.drop(index=118)
df3.loc[~(df3["age"]>200),:]
df3.isnull().mean()
df3.drop(columns='edu')
df3.loc[:,df3.isnull().mean()<=0.5]
df3["age"].mean()
df3["age"].fillna(df3["age"].mean())
df3["age"].fillna(method='ffill')
4、读取excel中的多个sheet(通用函数)
def get_allsheets_data(excel_dir):
work_book=load_workbook(excel_dir)
all_sheets=work_book.sheetnames
df=pd.DataFrame()
for i in range(len(all_sheets)):
work_sheet=all_sheets[i]
df_sheet=pd.read_excel(excel_dir,sheet_name=i)
df=pd.concat([df,df_sheet],ignore_index=True)
return df
|