一、背景
有一天朋友跟我说,工作上统计的Excel数据弄错了,需要重新计算,手填的工作量很大,向我求助,我一听,觉得是时候展现真正技术的时候了。 简单来说就是把数据的列置换为表头,如下图 需求总结:
1. 前置条件:原始需要拷贝到新Excel文件,不保留表头,第一行即为有用数据
2. 移除 " 汇总" 的行数据
3. 移除 "(空白)" 的行数据
4. 转换要求保留原来的行顺序结构
5. 拷贝后新文件第二列数据置换为表头(保持该项出现的顺序)
6. 填入原表格统计数据,如果原表格没有该项,填入默认值:0
7. 弹窗选择xlxs/xlx文件,输出转换后文件到选中文件目录下,文件名增加后缀 -转换结果.xlxs/xlx
8. 只要求转换第一个sheet即可
二、实现代码
import os
import tkinter as tk
from tkinter import filedialog
import tkinter.messagebox
import sys
from isodate import D_DEFAULT
import xlrd
import xlwt
file_dir = []
def get_file():
root = tk.Tk()
root.withdraw()
while True:
xlsx_path_name = filedialog.askopenfilename()
print("\n 文件路径", xlsx_path_name)
if xlsx_path_name.endswith('.xlsx') | xlsx_path_name.endswith('.xls'):
file_dir.append(xlsx_path_name)
break
else:
result = tkinter.messagebox.askretrycancel(
"進寶の无敌转换器", "正经选excel文件可以吗? 宝")
if False == result:
sys.exit()
def covert_file_header():
book = xlrd.open_workbook(file_dir[0])
output_workbook = xlwt.Workbook()
dir = os.path.dirname(file_dir[0])
file_name = os.path.basename(file_dir[0])
output_file_name = ""
if file_name.endswith(".xlsx"):
output_file_name = dir + "/" + file_name.replace(".xlsx", "-转换结果.xlsx")
else:
output_file_name = dir + "/" + file_name.replace(".xls", "-转换结果.xls")
sheet = book.sheets()[0]
nrows = sheet.nrows
ncols = sheet.ncols
print('表格总行数', nrows)
print('表格总列数', ncols)
row_list = []
row_dict = {}
col_list = []
col_dict = {}
for row in range(nrows):
row_values = sheet.row_values(row)
cell_value_0 = str(row_values[0])
if cell_value_0.endswith(' 汇总') or cell_value_0.endswith('(空白)') or cell_value_0.endswith('总计'):
continue
if cell_value_0 != '' and row_dict.__contains__(cell_value_0) == False:
row_list.append(cell_value_0)
row_dict[cell_value_0] = {}
cell_value_1 = str(row_values[1])
cell_value_0 = row_list[-1]
if cell_value_1 != '' and col_dict.__contains__(cell_value_1) == False:
col_list.append(cell_value_1)
col_dict[cell_value_1] = cell_value_1
value_dict = row_dict[cell_value_0]
value_dict[cell_value_1] = str(row_values[2])
worksheet = output_workbook.add_sheet(sheet.name)
for c in range(len(col_list)):
worksheet.write(0, c + 1, col_list[c])
for r in range(len(row_list)):
c_row = row_list[r]
c_col_dict = row_dict[c_row]
worksheet.write(r + 1, 0, c_row)
for c in range(len(col_list)):
if c_col_dict.__contains__(col_list[c]):
worksheet.write(r + 1, c + 1, c_col_dict[col_list[c]])
else:
worksheet.write(r + 1, c + 1, '0')
output_workbook.save(output_file_name)
if __name__ == '__main__':
get_file()
covert_file_header()
|