python操作xml文件示例
时间:2022-06-17 22:38:28
def get_seed_data(filename):
dom = minidom.parse(filename)
root = dom.documentElement
system_nodes = root.getElementsByTagName("system")
k = 0
seed_list = []
for system_node in system_nodes:
#print system_node.nodeName+' id='+system_node.getAttribute('id')
system_id = system_node.getAttribute("id")
system_name = system_node.getAttribute("name")
#print 'system_name:%s'%system_name
section_nodes = system_node.getElementsByTagName("section")
for section_node in section_nodes:
section_id = section_node.getAttribute('id')
section_name = section_node.getAttribute('name')
#print ' '+section_node.nodeName+' id='+section_id+' name='+section_name
crawl_cycle_node = section_node.getElementsByTagName("crawl_cycle")
crawl_cycle = crawl_cycle_node[0].childNodes[0].nodeValue
#print ' '+crawl_cycle_node[0].nodeName+'='+crawl_cycle
seed_nodes = section_node.getElementsByTagName('seed')
for seed_node in seed_nodes:
seed = {}
seed['crawl_cycle'] = crawl_cycle
seed['system_id'] = int(system_id)
seed['system_name'] = system_name
seed['section_id'] = int(section_id)
seed['section_name'] = section_name
seed_id = seed_node.getAttribute('id')
seed['seed_id'] = int(seed_id)
#print ' '+seed_node.nodeName+' '+'id='+seed_id
userblog_url_node = seed_node.getElementsByTagName('userblog_url')
userblog_url = userblog_url_node[0].childNodes[0].nodeValue
seed['userblog_url'] = userblog_url
#print ' '+'userblog_url'+' '+userblog_url
print '-------------------------------------------'
print 'system_id:%d' % seed['system_id']
print 'system_name:%s'%seed['system_name']
print ' section_id:%d' % seed['section_id']
print ' section_name:%s' % seed['section_name']
print ' seed_id:%d' %seed['seed_id']
print ' userblog_url:%s' %seed['userblog_url']
print '========================='
seed_list.append(seed)
print seed_list[k]
k += 1
os.system('pause')
return seed_list
<?xml version="1.0" encoding="utf-8" ?>
<seeds>
<system id="1" name="新浪">
<section id="1" name="娱乐">
<crawl_cycle> </crawl_cycle>
<seed id="1">
<userblog_url>http://aaa.com.cn/loveissuuny</userblog_url>
</seed>
<seed id="2">
<userblog_url>http://aaa.com.cn/loveissuuny</userblog_url>
</seed>
<seed id="3">
<userblog_url>http://aaa.com.cn/sanxiazaixian</userblog_url>
</seed>
</section>
<section id="2" name="读书">
<crawl_cycle> </crawl_cycle>
<seed id="11">
<userblog_url>http://aaa.com.cn/twocold</userblog_url>
</seed>
<seed id="12">
<userblog_url>http://aaa.com.cn/u/1233526741</userblog_url>
</seed>
</section>
</system>
</seeds>
标签:python,xml
0
投稿
猜你喜欢
vue基本使用--refs获取组件或元素的实例
2024-05-02 16:35:20
mysql 之通过配置文件链接数据库
2024-01-17 13:32:28
设计上的小细节
2010-06-24 21:44:00
Golang限流库与漏桶和令牌桶的使用介绍
2024-05-10 13:57:50
如何利用python创建、读取和修改CSV数据文件
2021-12-15 21:18:41
wxPython绘图模块wxPyPlot实现数据可视化
2023-10-03 14:58:22
对python中的控制条件、循环和跳出详解
2022-03-08 00:41:44
Numpy 多维数据数组的实现
2022-12-22 11:26:03
Pandas 筛选和删除目标值所在的行的实现
2021-11-16 03:11:31
MySql表、字段、库的字符集修改及查看方法
2024-01-19 19:39:38
Python基础语法之容器详解
2022-01-07 23:20:19
python继承threading.Thread实现有返回值的子类实例
2023-06-07 19:01:15
PHP之Mysql常用SQL语句示例的深入分析
2024-05-05 09:31:21
python 的 openpyxl模块 读取 Excel文件的方法
2023-02-23 14:41:56
Python脚本操作Excel实现批量替换功能
2023-06-15 16:37:57
10个简化PHP开发的工具
2023-07-14 18:02:05
c#使用FreeSql生产环境时自动升级备份数据库
2024-01-22 15:56:01
Python实现查找数组中任意第k大的数字算法示例
2022-04-26 22:10:46
使用Python的Tornado框架实现一个Web端图书展示页面
2022-04-23 07:23:48
MySQL一键安装Shell脚本的实现
2024-01-16 23:28:29