python数据预处理——有关联数据写入csv文件,为画图作准备
目标格式如下: 测试代码:
from pandas import DataFrame
authors=[]
authors_id={}
empty=[]
author_name={}
author_name['id']=123456
author_name['name']='xiaotang'
author_name['follow_id']=1234567
author_name['follow_name']='xiaotangtang'
authors.append(author_name)
author_name={}
author_name['id']=12345678
author_name['name']='xiaotang123'
author_name['follow_id']=123456789
author_name['follow_name']='xiaotangtang456'
authors.append(author_name)
print(authors)
data_frame = DataFrame(data=authors)
data_frame.to_csv("a.csv")
筛选数据集中的数据:
from pandas import DataFrame
import json
import numpy as np
file = open('clean_data.txt', encoding='UTF-8')
js = file.read()
dic = json.loads(js)
file.close()
count=0
authors=[]
empty=[]
author_name={}
for leng_dic in range(len(dic)-1):
author_dic=dic[leng_dic].get('authors', [])
if author_dic is None:
empty.append(leng_dic)
else:
author_name_now = []
author_id_now = []
for i in range( len(author_dic)-1):
try:
author_id_now.append(author_dic[i]['_id'])
except:
author_id_now.append([])
try:
author_name_now.append(author_dic[i]['name'])
except:
author_id_now.append([])
for j in range(len(author_name_now)-1):
for t in range(len(author_name_now) - 1):
author_name={}
if j !=t:
author_name['id']=author_id_now[j]
author_name['name']= author_name_now[j]
author_name['follow_id']=author_id_now[t]
author_name['follow_name'] = author_name_now[t]
authors.append(author_name)
data_frame = DataFrame(data=authors)
data_frame.to_csv("tu1.csv")
部分数据集没有部分关键字,直接删掉:
from pandas import DataFrame
import json
import numpy as np
file = open('clean_data.txt', encoding='UTF-8')
js = file.read()
dic = json.loads(js)
file.close()
count=0
authors=[]
empty=[]
author_name={}
for leng_dic in range(len(dic)-1):
author_dic=dic[leng_dic].get('authors', [])
if author_dic is None:
empty.append(leng_dic)
else:
author_name_now = []
author_id_now = []
for i in range( len(author_dic)-1):
try:
author_id_now.append(author_dic[i]['_id'])
author_name_now.append(author_dic[i]['name'])
except:
pass
for j in range(len(author_name_now)-1):
for t in range(len(author_name_now) - 1):
author_name={}
if j !=t:
author_name['id']=author_id_now[j]
author_name['name']= author_name_now[j]
author_name['follow_id']=author_id_now[t]
author_name['follow_name'] = author_name_now[t]
authors.append(author_name)
data_frame = DataFrame(data=authors)
data_frame.to_csv("tu1.csv")
|