Python取出xml文件中的部分内容
前言
? 最近在搞目标检测方面的东西,由于遇到有个项目需要的标签格式为txt格式的,而我之前用labelimg工具标注生成的是xml格式的文件,由于懒,就想找个脚本自动提取出我想要的信息,但是并没有找到合适的,后来在参考了几位博主的文章后逐渐懂得如何取出我自己想要的内容。
正文
我的xml示例如下(我命名为1.xml):
<annotation>
<folder>poses</folder>
<filename>1.jpg</filename>
<path>D:\pycharm\Graduate_design\poses\1.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>640</width>
<height>480</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>forward</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>241</xmin>
<ymin>85</ymin>
<xmax>436</xmax>
<ymax>404</ymax>
</bndbox>
</object>
</annotation>
假设我想要取出的是:
1.<size 标签中的width、height以及depth后面对应的640,480,3;
2.<object 中的name字段。
3.<bndbox 中的xmin、ymin、xmax、ymax对应的内容;
代码:
import os
import xml.dom.minidom
path = os.path.abspath('.')
data_path = os.path.join(path,'./1.xml')
DOMTree = xml.dom.minidom.parse(data_path)
data = DOMTree.documentElement
def get_data_vaule(style, typename, typevalue, valuename):
nodelist = data.getElementsByTagName(style)
for node in nodelist:
if typevalue == node.getAttribute(typename):
node_name = node.getElementsByTagName(valuename)
value = node_name[0].childNodes[0].nodeValue
return value
return
width = get_data_vaule('size',"","",'width')
print('width:',width)
height = get_data_vaule('size',"","",'height')
print('height:',height)
depth = get_data_vaule('size',"","",'depth')
print('width:',depth)
class_name = get_data_vaule('object',"","",'name')
print('class_name:',class_name)
xmin = get_data_vaule('bndbox',"","",'xmin')
print('xmin:',xmin)
ymin = get_data_vaule('bndbox',"","",'ymin')
print('ymin:',ymin)
xmax = get_data_vaule('bndbox',"","",'xmax')
print('xmax:',xmax)
ymax = get_data_vaule('bndbox',"","",'ymax')
print('ymax:',ymax)
结果:
可以看到成功取出了我想要的内容。
参考资料
1.https://blog.csdn.net/weixin_39008941/article/details/76037730?utm_medium=distribute.pc_relevant.none-task-blog-2defaultbaidujs_baidulandingword~default-0.topblog&spm=1001.2101.3001.4242.1&utm_relevant_index=3
下面这部分可以不用看了,只是我把前面提取出来的数据经过处理之后存入txt而已,我就想方便自己查找(狗头)
import os
import xml.dom.minidom
path = os.path.abspath('.')
data_path = os.path.join(path,'./1.xml')
DOMTree = xml.dom.minidom.parse(data_path)
data = DOMTree.documentElement
def get_data_vaule(style, typename, typevalue, valuename):
nodelist = data.getElementsByTagName(style)
for node in nodelist:
if typevalue == node.getAttribute(typename):
node_name = node.getElementsByTagName(valuename)
value = node_name[0].childNodes[0].nodeValue
return value
return
class_name = get_data_vaule('object',"","",'name')
print('class_name:',class_name)
width = get_data_vaule('size',"","",'width')
print('width:',width)
height = get_data_vaule('size',"","",'height')
print('height:',height)
xmin = get_data_vaule('bndbox',"","",'xmin')
print('xmin:',xmin)
ymin = get_data_vaule('bndbox',"","",'ymin')
print('ymin:',ymin)
xmax = get_data_vaule('bndbox',"","",'xmax')
print('xmax:',xmax)
ymax = get_data_vaule('bndbox',"","",'ymax')
print('ymax:',ymax)
class_dict = {0:'backward',1:'forward',2:'left',3:'right',4:'speed_down',5:'speed_up',6:'stop'}
print(class_dict.values())
print(class_dict.keys())
label_index = list(class_dict.keys())[list(class_dict.values()).index(class_name)]
print(label_index)
xmin = int(xmin)
xmax = int(xmax)
ymin = int(ymin)
ymax = int(ymax)
width = int(width)
height = int(height)
Cx = ((xmax - xmin)/2)/width
Cy = ((ymax - ymin)/2)/height
norm_w = (xmax - xmin)/width
norm_h = (ymax - ymin)/height
print('----------')
print('label_index:',label_index)
print('Cx:',Cx)
print('Cy:',Cy)
print('norm_w:',norm_w)
print('norm_h:',norm_h)
write_data = [label_index,Cx,Cy,norm_w,norm_h]
txt_path = './txt/'
start = 1
path_file_name = txt_path+str(start)+'.txt'
if not os.path.exists(path_file_name):
with open(path_file_name, "w") as f:
print(f)
with open(path_file_name, "w") as f:
for data in write_data:
f.write(str(data))
f.write(' ')
print('finished!')
|