# -*- coding:utf-8 -*-
import pandas as pd
import os
from concurrent.futures import ThreadPoolExecutor,wait,as_completed
def get_single_data_frame(cur_path):
return pd.read_csv(cur_path)
# 创建两个线程
def get_sample(file_dir):
executor = ThreadPoolExecutor(max_workers=10)
all_file_list = os.listdir(file_dir)
data_frame_list = []
all_task = []
for single_file in all_file_list:
cur_path = os.path.join(file_dir, single_file)
if "_SUCCESS" in cur_path or "_COPYING_" in cur_path:
continue
try:
task = executor.submit(get_single_data_frame, cur_path)
#single_data_frame = executor.submit(get_single_data_frame, cur_path).result()
#data_frame_list.append(single_data_frame)
all_task.append(task)
except:
print("Error: 读取异常")
wait(all_task)
for future in as_completed(all_task):
result = future.result()
data_frame_list.append(result)
all_data_frame = pd.concat(data_frame_list, ignore_index=True)
return all_data_frame
print(get_sample("./test/"))
|