快速解析超大XML不占用太大内存
Posted bongem
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了快速解析超大XML不占用太大内存相关的知识,希望对你有一定的参考价值。
1 import xml.etree.ElementTree as ET 2 3 def parse_res(xml_file): 4 res_dic = {} 5 tmp_lst_lev1 = [] 6 tmp_lst_lev2 = [] 7 add_flag = False 8 for event, elem in ET.iterparse(xml_file): 9 if event == ‘end‘: 10 if elem.tag == ‘Item‘: 11 tmp_lst_lev1.append(dict(elem.attrib)) 12 elif elem.tag == ‘Enum‘: 13 enum_str = ‘‘.join([ ‘‘.join([‘[‘, item[‘value‘], ‘=‘, item[‘name‘], ‘]‘]) for item in tmp_lst_lev1]) 14 res_enum = {} 15 res_enum[‘id‘] = elem.attrib[‘id‘] 16 res_enum[‘name‘] = enum_str 17 tmp_lst_lev2.append(res_enum) 18 tmp_lst_lev1 = [] 19 elif elem.tag == ‘EnumRes‘: 20 res_dic[‘EnumRes‘] = {} 21 tmp_dic = res_dic[‘EnumRes‘] 22 for item in tmp_lst_lev2: 23 tmp_dic[ item[‘id‘].split(‘.‘)[1] ] = item[‘name‘] 24 tmp_lst_lev2 = [] 25 elif elem.tag == ‘MeasUnitRes‘ or elem.tag == ‘CounterNameRes‘ or elem.tag == ‘CounterUnitRes‘: 26 res_dic[elem.tag] = {} 27 tmp_dic = res_dic[elem.tag] 28 for item in tmp_lst_lev1: 29 tmp_dic[ item[‘id‘].split(‘.‘)[1] ] = item[‘name‘] 30 tmp_lst_lev1 = [] 31 #CommonInfo.Resource.xml 32 elif elem.tag == ‘DevTypeNameRes‘ or elem.tag == ‘VendorRes‘ or elem.tag == ‘MocRes‘: 33 res_dic[elem.tag] = {} 34 tmp_dic = res_dic[elem.tag] 35 for item in tmp_lst_lev1: 36 tmp_dic[‘id‘] = item[‘id‘].split(‘.‘)[1] 37 tmp_dic[‘name‘] = item[‘name‘] 38 tmp_lst_lev1 = [] 39 #StaticList.xml 40 elif elem.tag == ‘param‘: 41 if ‘alarmId‘ == elem.attrib[‘name‘]: 42 id = elem.text 43 elif elem.tag == ‘alarm‘: 44 tmp_lst_lev1.append([id, elem.attrib[‘name‘]]) 45 elif elem.tag == ‘alarms‘: 46 res_dic[elem.tag] = {} 47 tmp_dic = res_dic[elem.tag] 48 for item in tmp_lst_lev1: 49 tmp_dic[ item[0] ] = item[1] 50 tmp_lst_lev1 = [] 51 elem.clear() #关键在这一名,处理完节点及时清理内存 52 return res_dic
以上是关于快速解析超大XML不占用太大内存的主要内容,如果未能解决你的问题,请参考以下文章