数据展示
import pandas as pd
import numpy as np
import json
from collections import Counter
data = pd.read_excel("work/Test.xlsx")
rows = data.shape[0]
col = data.shape[1]
user_index = list(data.columns.values)
print(user_index, "\n 一共有:{}行,{}列".format(rows, col))
data.head(-3)
部分数据如下所示:
代码处理
查找特定列,某一个值出现过多少次
def maturity_count(data_col, val):
"""
data_col : 输入data的某一列 如要输入pid这一列,则输入data.pid
val : 特定data_col这一列索要查询的value值
return : 返回指定列,某一个值(val)出现多少次
"""
data_pid_dict = data_col.value_counts()
count = data_pid_dict[val]
return count
count = maturity_count(data.pid, 7)
print("查找pid这一列中7出现的次数为:{}次".format(count))
count = maturity_count(data.pid, 8)
print("查找pid这一列中8出现的次数为:{}次".format(count))
输出为:
查找pid这一列中7出现的次数为:3
查找pid这一列中8出现的次数为:7
计算x,y,z坐标的平均值
def pd_to_list(data, line):
"""
data : pd读入的数据,含有行列索引
line : 按行读入
输出 : pd格式的每行转为列表形式[1624865249825.0, 316.0, 351.0, 32.0, 107.0, 4.0, 0.9925, 0.3903, 1.7187, 0.2296, 2970.0, -1.0]
"""
list_x = data.iloc[[line]]
list_x = list_x.values.tolist()
list_x = list(_flatten(list_x))
return list_x
def get_id_conf_xyz(data_col, val):
"""
data_col : 输入data的某一列 如要输入pid这一列,则输入data.pid
val : 特定data_col这一列索要查询的value值
return : 返回每行查找confidencey与xyz坐标矩阵
[[0.9925, 0.3903, 1.7187, 0.2296],
[0.9925, 0.4297, 1.8475, 0.1514],
[0.9925, 0.3814, 1.6054, 0.2501],
[0.9925, 0.4798, 2.2024, 0.4195],
[0.9925, 0.4789, 1.856, 0.1646]]
"""
conf_xyzList = []
pidList = data_col.to_numpy()
index = np.where(np.array(pidList) == val)
index = list(index[0])
for line in index:
coordinates = pd_to_list(data, line)[3:6]
conf_xyzList.append(coordinates)
return conf_xyzList
def conf_xyz_average(conf_xyzList):
"""
conf_xyzList : get_id_conf_xyz函数的返回值 存储了id相同的x,y,z的列表
return : 返回相同id的平均空间坐标值
"""
conf_xyz_np = np.array(conf_xyzList)
_mean = conf_xyz_np.mean(axis=0)
x = _mean[0]
y = _mean[1]
z = _mean[2]
return x, y, z
conf_xyzList = get_id_conf_xyz(data.pid, 8)
avg_x, avg_y, avg_z = conf_xyz_average(conf_xyzList)
print("x,y,z的数值分别为 : \n ", conf_xyzList)
print("avg_x, avg_y, avg_z的平均值分别为 : ", avg_x, avg_y, avg_z)
输出为:
x,y,z的数值分别为 :
[[0.4524, 2.0891, 0.2752],
[0.4704, 2.1208, 0.1704],
[0.4522, 1.9888, 0.3099],
[0.5596, 2.1749, 0.1986],
[0.4374, 2.0295, 0.3439],
[0.4973, 1.9068, 0.2064],
[0.4909, 2.0664, 0.2256]]
avg_x, avg_y, avg_z的平均值分别为 : 0.4800285714285714 2.053757142857143 0.24714285714285714
|