#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2022/4/11 下午1:51
# @Author : LiSi
# @File : test.py
# @Software: PyCharm
import os
import chardet
import logging
import argparse
import pandas as pd
import numpy as np
class UploadData:
def __init__(self,filePath):
self.filePath = filePath
self.filePathList = self.get_file_list()
self.logger = Logger(
log_file_name=opt.log_path + "/log.txt",
log_level=logging.DEBUG,
logger_name="test",
).get_log()
def get_file_list(self):
self.PathList = os.listdir(self.filePath)
self.filePathList = list(filter(lambda x: True if type(x) == str and len(x) > 0 and x[-3:] == ".rb" else False, self.PathList))
return self.filePathList
def get_txt(self):
if len(self.filePathList)>0:
for i,filename in enumerate(self.filePathList):
self.logger.info('正在解析读取第{}个文件...'.format(i+1))
#读取
with open(self.filePath+'/'+filename, 'rb') as f:
r = f.read()
f_charInfo = chardet.detect(r) #获取文本编码信息 #'GB2312'
str_txt0 = r.decode(f_charInfo['encoding'])
str_txt = str_txt0.split('\r\n')
#日期
date = list(filter(lambda x: True if ('date' in x) or('time' in x) else False,str_txt0.split('\t')))[0]
date_str = date.split('=')[1]
print(date_str)
#提取flag标志文本,例如 </风电检修容量日报::山东.安城大唐风电 date='2018-08-11'> 中的 风电检修容量日报::山东.安城大唐风电
last_flag = str_txt[-1][:-2].split(' ')[0]
last_flag = last_flag.split('/')[-1] if '/'in last_flag else last_flag.split('<')[-1]
#文档主体
txt_body= list(filter(lambda x: True if last_flag not in x else False,str_txt))
txt_body = list(map(lambda x:x.replace('@','').split('\t'),txt_body))
#数据—dataframe
data_df = pd.DataFrame(txt_body[1:],columns=txt_body[0])
print(data_df.head())
else:
raise Exception('无有效文件,无法解析数据!')
class Logger(object):
def __init__(self, log_file_name, log_level, logger_name):
# firstly, create a logger
self.__logger = logging.getLogger(logger_name)
self.__logger.setLevel(log_level)
# secondly, create a handler
file_handler = logging.FileHandler(log_file_name)
console_handler = logging.StreamHandler()
# thirdly, define the output form of handler
formatter = logging.Formatter(
"[%(asctime)s]-[%(filename)s line:%(lineno)d]:%(message)s "
)
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)
# finally, add the Hander to logger
self.__logger.addHandler(file_handler)
self.__logger.addHandler(console_handler)
def get_log(self):
return self.__logger
if __name__ == '__main__':
global logger
parser=argparse.ArgumentParser()
parser.add_argument(
'--file_path',
type=str,
default='./',
help='files set path'
)
parser.add_argument(
'--log_path',
type=str,
default='./',
help='log files set psth'
)
opt = parser.parse_args()
FilePath = opt.file_path
#调用
UploadData(FilePath).get_txt()
|