import os
import cv2
# train 训练集上,通过过滤宽高小于等于的15的目标,过滤掉的数目是[65857, 15275, 38775, 1372, 15, 0]
# val 训练集上,通过过滤宽高小于等于的15的目标,过滤掉的数目是[9664, 2755, 3705, 343, 15, 0]
img_path = "/data/***/datasets/ktxx-auth/images/train/"
label_path = "/data/***/datasets/ktxx-auth/labels/train/"
label_path_new = "/data/***/datasets/ktxx-auth/labels/train-clean/"
label_list = os.listdir(label_path)
count = [0, 0, 0, 0, 0, 0]
# labels = ['person', 'dogcart', 'car', 'tricycle', 'plate', 'face']
labels = ['person', 'car', 'plate', 'face']
for label_file in label_list:
image = cv2.imread(os.path.join(img_path, label_file[0:-4] + ".jpg"))
img_w, img_h = image.shape[1], image.shape[0]
if label_file.endswith(".txt"):
with open(os.path.join(label_path, label_file), "r", encoding="utf-8") as f1, open(os.path.join(label_path_new, label_file), "w", encoding="utf-8") as f2:
for line in f1:
items = line.split(" ")
cls = int(items[0])
xmin = (float(items[1]) - float(items[3]) / 2.) * img_w
xmax = (float(items[1]) + float(items[3]) / 2.) * img_w
ymin = (float(items[2]) - float(items[4]) / 2.) * img_h
ymax = (float(items[2]) + float(items[4]) / 2.) * img_h
if xmax - xmin <= 30 or ymax - ymin <= 15: # 在这里修改要过滤掉的目标的大小
count[cls] += 1
print("In {} clean {}, it's location is {}.".format(label_file, labels[cls], " ".join(items[1:5])))
continue
line = " ".join(items)
f2.write(line)
print("has cleaned instance:")
print(count)
注意根据自己想过滤的目标进行修改,本人这里设置的是小于宽30or高15的目标
|