import numpy as np
import pandas as pd
path=r"C:\Users\Tsinghua-yincheng\Desktop\SZday74"
pop=pd.read_csv(path+"\\"+"state-population.csv")
areas=pd.read_csv(path+"\\"+"state-areas.csv")
abbrevs=pd.read_csv(path+"\\"+"state-abbrevs.csv")
pop.head(5)

areas.head(5)

abbrevs.head(5)

display(pop.shape,areas.shape,abbrevs.shape)

merged=pd.merge(pop,abbrevs,how="outer",
left_on="state/region",
right_on="abbreviation")
merged

merged=merged.drop("abbreviation",axis=1)
merged

merged.isnull().any()

merged[merged["population"].isnull()]

merged.loc[merged["state"].isnull(),"state/region"]

merged.loc[merged["state"].isnull(),"state/region"].unique()

merged.loc[merged["state/region"]=="PR","state"]="Puerto Rico"
merged.loc[merged["state/region"]=="USA","state"]=\
"United States"
merged.isnull().any()

merged

final=pd.merge(merged,areas,on="state",how="left")
final

final.shape

final.isnull().any()

final["state"][final["area (sq. mi)"].isnull()]

final["state"][final["area (sq. mi)"].isnull()].unique()

final.dropna(inplace=True)
final

final.isnull().any()

final

data2010=final.query("year==2010 & ages=='total'")
data2010

data2010.shape

data2010.set_index("state",inplace=True)
data2010

density=data2010["population"].div(data2010["area (sq. mi)"])
density

density.sort_values(ascending=False,inplace=True)
density.head(10)

density.tail(10)

|