数据展示 
import pandas as pd
import numpy as np
import json
from collections import Counter
data = pd.read_excel("work/Test.xlsx")
rows = data.shape[0] 
col = data.shape[1]  
user_index = list(data.columns.values)
print(user_index, "\n 一共有:{}行,{}列".format(rows, col))
data.head(-3)
  
部分数据如下所示:  
   
代码处理 
查找特定列,某一个值出现过多少次 
def maturity_count(data_col, val):
    """
    data_col : 输入data的某一列 如要输入pid这一列,则输入data.pid
    val : 特定data_col这一列索要查询的value值
    return  : 返回指定列,某一个值(val)出现多少次
    """
    data_pid_dict = data_col.value_counts()
    count = data_pid_dict[val]
    return count
  
count = maturity_count(data.pid, 7)
print("查找pid这一列中7出现的次数为:{}次".format(count))
count = maturity_count(data.pid, 8)
print("查找pid这一列中8出现的次数为:{}次".format(count))
  
输出为:
    查找pid这一列中7出现的次数为:3
    查找pid这一列中8出现的次数为:7
  
计算x,y,z坐标的平均值 
def pd_to_list(data, line):
    """
    data : pd读入的数据,含有行列索引
    line : 按行读入
    输出 : pd格式的每行转为列表形式[1624865249825.0, 316.0, 351.0, 32.0, 107.0, 4.0, 0.9925, 0.3903, 1.7187, 0.2296, 2970.0, -1.0]
    """
    list_x = data.iloc[[line]]
    list_x = list_x.values.tolist()
    list_x = list(_flatten(list_x))
    return list_x
def get_id_conf_xyz(data_col, val):
    """
    data_col : 输入data的某一列 如要输入pid这一列,则输入data.pid
    val : 特定data_col这一列索要查询的value值
    return : 返回每行查找confidencey与xyz坐标矩阵
    [[0.9925, 0.3903, 1.7187, 0.2296],
    [0.9925, 0.4297, 1.8475, 0.1514],
    [0.9925, 0.3814, 1.6054, 0.2501],
    [0.9925, 0.4798, 2.2024, 0.4195],
    [0.9925, 0.4789, 1.856, 0.1646]]
    """
    conf_xyzList = []
    
    pidList = data_col.to_numpy()
    index = np.where(np.array(pidList) == val)
    index = list(index[0]) 
    
    for line in index:
        coordinates = pd_to_list(data, line)[3:6]
        conf_xyzList.append(coordinates)
    
    return conf_xyzList
def conf_xyz_average(conf_xyzList):
    """
    conf_xyzList : get_id_conf_xyz函数的返回值 存储了id相同的x,y,z的列表 
    return : 返回相同id的平均空间坐标值
    """
    conf_xyz_np = np.array(conf_xyzList)
    _mean = conf_xyz_np.mean(axis=0)
    
    x = _mean[0]
    y = _mean[1]
    z = _mean[2]
    
    return x, y, z
  
conf_xyzList = get_id_conf_xyz(data.pid, 8)
avg_x, avg_y, avg_z = conf_xyz_average(conf_xyzList)
print("x,y,z的数值分别为 :  \n ", conf_xyzList)
print("avg_x, avg_y, avg_z的平均值分别为 : ", avg_x, avg_y, avg_z)
  
输出为:
	    x,y,z的数值分别为 :  
  					[[0.4524, 2.0891, 0.2752], 
  					[0.4704, 2.1208, 0.1704], 
  					[0.4522, 1.9888, 0.3099],
  					[0.5596, 2.1749, 0.1986], 
  					[0.4374, 2.0295, 0.3439],
  					[0.4973, 1.9068, 0.2064], 
  					[0.4909, 2.0664, 0.2256]]
  					
avg_x, avg_y, avg_z的平均值分别为 :  0.4800285714285714 2.053757142857143 0.24714285714285714
 
                
                
                
        
    
 
 |