python:批量统计xml中各类目标的数量案例

作者:南石北岸生 时间:2021-11-17 05:22:44 

之前写了一个matlab的,越用越觉得麻烦,如果不同数据集要改类别数目,而且运行速度慢。所以重新写了一个Python的,直接读取xml文件夹路径就可以,不用预先知道类别,直接能够检测出所有类别的目标名称及其对应的数量。

分享出来给大家。

代码如下:


# -*- coding:utf-8 -*-
import os
import xml.etree.ElementTree as ET
import numpy as np
np.set_printoptions(suppress=True, threshold=np.nan)
import matplotlib
from PIL import Image

def parse_obj(xml_path, filename):
tree=ET.parse(xml_path+filename)
objects=[]
for obj in tree.findall('object'):
obj_struct={}
obj_struct['name']=obj.find('name').text
objects.append(obj_struct)
return objects

def read_image(image_path, filename):
im=Image.open(image_path+filename)
W=im.size[0]
H=im.size[1]
area=W*H
im_info=[W,H,area]
return im_info

if __name__ == '__main__':
xml_path='C:/Users/nansbas/Desktop/hebin/03/'
filenamess=os.listdir(xml_path)
filenames=[]
for name in filenamess:
name=name.replace('.xml','')
filenames.append(name)
recs={}
obs_shape={}
classnames=[]
num_objs={}
obj_avg={}
for i,name in enumerate(filenames):
recs[name]=parse_obj(xml_path, name+ '.xml' )
for name in filenames:
for object in recs[name]:
if object['name'] not in num_objs.keys():
 num_objs[object['name']]=1
else:
 num_objs[object['name']]+=1
if object['name'] not in classnames:
 classnames.append(object['name'])
for name in classnames:
print('{}:{}个'.format(name,num_objs[name]))
print('信息统计算完毕。')

python:批量统计xml中各类目标的数量案例

补充知识:Python对目标检测数据集xml文件操作(统计目标种类、数量、面积、比例等&修改目标名字)

1. 根据xml文件统计目标种类以及数量


# -*- coding:utf-8 -*-
#根据xml文件统计目标种类以及数量
import os
import xml.etree.ElementTree as ET
import numpy as np
np.set_printoptions(suppress=True, threshold=np.nan)
import matplotlib
from PIL import Image

def parse_obj(xml_path, filename):
tree=ET.parse(xml_path+filename)
objects=[]
for obj in tree.findall('object'):
obj_struct={}
obj_struct['name']=obj.find('name').text
objects.append(obj_struct)
return objects

def read_image(image_path, filename):
im=Image.open(image_path+filename)
W=im.size[0]
H=im.size[1]
area=W*H
im_info=[W,H,area]
return im_info

if __name__ == '__main__':
xml_path='/home/dlut/网络/make_database/数据集——合集/VOCdevkit/VOC2018/Annotations/'
filenamess=os.listdir(xml_path)
filenames=[]
for name in filenamess:
name=name.replace('.xml','')
filenames.append(name)
recs={}
obs_shape={}
classnames=[]
num_objs={}
obj_avg={}
for i,name in enumerate(filenames):
recs[name]=parse_obj(xml_path, name+ '.xml' )
for name in filenames:
for object in recs[name]:
 if object['name'] not in num_objs.keys():
  num_objs[object['name']]=1
 else:
  num_objs[object['name']]+=1
 if object['name'] not in classnames:
  classnames.append(object['name'])
for name in classnames:
print('{}:{}个'.format(name,num_objs[name]))
print('信息统计算完毕。')

python:批量统计xml中各类目标的数量案例

2.根据xml文件统计目标的平均长度、宽度、面积以及每一个目标在原图中的占比


# -*- coding:utf-8 -*-
#统计
# 计算每一个目标在原图中的占比
# 计算目标的平均长度、
# 计算平均宽度,
# 计算平均面积、
# 计算目标平均占比

import os
import xml.etree.ElementTree as ET
import numpy as np

#np.set_printoptions(suppress=True, threshold=np.nan) #10,000,000
np.set_printoptions(suppress=True, threshold=10000000) #10,000,000
import matplotlib
from PIL import Image

def parse_obj(xml_path, filename):
tree = ET.parse(xml_path + filename)
objects = []
for obj in tree.findall('object'):
 obj_struct = {}
 obj_struct['name'] = obj.find('name').text
 bbox = obj.find('bndbox')
 obj_struct['bbox'] = [int(bbox.find('xmin').text),
       int(bbox.find('ymin').text),
       int(bbox.find('xmax').text),
       int(bbox.find('ymax').text)]
 objects.append(obj_struct)
return objects

def read_image(image_path, filename):
im = Image.open(image_path + filename)
W = im.size[0]
H = im.size[1]
area = W * H
im_info = [W, H, area]
return im_info

if __name__ == '__main__':
image_path = '/home/dlut/网络/make_database/数据集——合集/VOCdevkit/VOC2018/JPEGImages/'
xml_path = '/home/dlut/网络/make_database/数据集——合集/VOCdevkit/VOC2018/Annotations/'
filenamess = os.listdir(xml_path)
filenames = []
for name in filenamess:
 name = name.replace('.xml', '')
 filenames.append(name)
print(filenames)
recs = {}
ims_info = {}
obs_shape = {}
classnames = []
num_objs={}
obj_avg = {}
for i, name in enumerate(filenames):
 print('正在处理 {}.xml '.format(name))
 recs[name] = parse_obj(xml_path, name + '.xml')
 print('正在处理 {}.jpg '.format(name))
 ims_info[name] = read_image(image_path, name + '.jpg')
print('所有信息收集完毕。')
print('正在处理信息......')
for name in filenames:
 im_w = ims_info[name][0]
 im_h = ims_info[name][1]
 im_area = ims_info[name][2]
 for object in recs[name]:
  if object['name'] not in num_objs.keys():
   num_objs[object['name']] = 1
  else:
   num_objs[object['name']] += 1
  #num_objs += 1
  ob_w = object['bbox'][2] - object['bbox'][0]
  ob_h = object['bbox'][3] - object['bbox'][1]
  ob_area = ob_w * ob_h
  w_rate = ob_w / im_w
  h_rate = ob_h / im_h
  area_rate = ob_area / im_area
  if not object['name'] in obs_shape.keys():
   obs_shape[object['name']] = ([[ob_w,
           ob_h,
           ob_area,
           w_rate,
           h_rate,
           area_rate]])
  else:
   obs_shape[object['name']].append([ob_w,
            ob_h,
            ob_area,
            w_rate,
            h_rate,
            area_rate])
 if object['name'] not in classnames:
  classnames.append(object['name']) # 求平均

for name in classnames:
 obj_avg[name] = (np.array(obs_shape[name]).sum(axis=0)) / num_objs[name]
 print('{}的情况如下:*******\n'.format(name))
 print(' 目标平均W={}'.format(obj_avg[name][0]))
 print(' 目标平均H={}'.format(obj_avg[name][1]))
 print(' 目标平均area={}'.format(obj_avg[name][2]))
 print(' 目标平均与原图的W比例={}'.format(obj_avg[name][3]))
 print(' 目标平均与原图的H比例={}'.format(obj_avg[name][4]))
 print(' 目标平均原图面积占比={}\n'.format(obj_avg[name][5]))
print('信息统计计算完毕。')

python:批量统计xml中各类目标的数量案例

3.修改xml文件中某个目标的名字为另一个名字


#修改xml文件中的目标的名字,
import os, sys
import glob
from xml.etree import ElementTree as ET

# 批量读取Annotations下的xml文件
# per=ET.parse(r'C:\Users\rockhuang\Desktop\Annotations\000003.xml')
xml_dir = r'/home/dlut/网络/make_database/数据集——合集/VOCdevkit/VOC2018/Annotations'
xml_list = glob.glob(xml_dir + '/*.xml')
for xml in xml_list:
print(xml)
per = ET.parse(xml)
p = per.findall('/object')

for oneper in p: # 找出person节点
 child = oneper.getchildren()[0] # 找出person节点的子节点
 if child.text == 'PinNormal': #需要修改的名字
  child.text = 'normal bolt' #修改成什么名字
 if child.text == 'PinDefect': #需要修改的名字
  child.text = 'defect bolt-1' #修改成什么名字

per.write(xml)
print(child.tag, ':', child.text)

python:批量统计xml中各类目标的数量案例

来源:https://blog.csdn.net/gusui7202/article/details/86583444

标签:python,xml,目标,数量
0
投稿

猜你喜欢

  • Python中列表与元组的乘法操作示例

    2021-05-09 17:11:25
  • php文件下载后无法打开的处理方案及代码

    2023-06-13 19:12:03
  • Python使用lambda表达式对字典排序操作示例

    2022-12-26 06:27:46
  • PHP常用字符串操作函数实例总结(trim、nl2br、addcslashes、uudecode、md5等)

    2023-10-02 13:10:01
  • Python网络爬虫信息提取mooc代码实例

    2022-01-02 12:18:23
  • CSS兼容性(IE和Firefox)技巧大全

    2010-07-29 12:29:00
  • 请给PNG8一个机会

    2009-09-16 14:22:00
  • 他们是如何不让我的Teleport和Webzip工作的?

    2010-07-14 21:06:00
  • ASP-server.URLEncode反函数:urldecode

    2008-10-23 16:05:00
  • citespace数据处理:用python对Ref文档进行去重方式

    2022-07-06 06:01:13
  • PaddleNLP ppdiffusers 自动生成兔了个兔海报

    2023-07-20 01:56:39
  • 基于ASPJPEG 制作了一个梦寐已久的批量水印工具步骤

    2011-02-28 10:39:00
  • ASP实现上传图片到数据库

    2007-09-21 12:59:00
  • python+Word2Vec实现中文聊天机器人的示例代码

    2023-08-04 13:11:29
  • 使用 XML 模板 (MSSQL手册)

    2008-09-04 14:25:00
  • 基于Python实现视频的人脸融合功能

    2021-07-12 06:41:59
  • Python数值求解微分方程方法(欧拉法,隐式欧拉)

    2023-06-29 10:45:29
  • django中ImageField的使用详解

    2023-09-28 03:58:37
  • Go语言中的匿名结构体用法实例

    2023-07-07 11:10:21
  • MSSQL优化之探索MSSQL执行计划(转)

    2011-11-03 17:16:21
  • asp之家 网络编程 m.aspxhome.com