python 解析XML文件

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python 解析XML文件相关的知识,希望对你有一定的参考价值。

比较高效的python 解析XML文件

参考 http://codingpy.com/article/parsing-xml-using-python/

try:
    import xml.etree.cElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET
import time


def parse_poi_by_elementTree(filepath):
    t0 = time.time()

    tree = ET.ElementTree(file=filepath)
    pois_element_num = 0
    vde_poi = 0
    # for elem in tree.iter(tag=‘Pois‘):
    #     pois_element_num = elem.get(‘Num‘)
    #     print pois_element_num
    #     for e in elem.iter():
    #         if e.tag == ‘Poi‘:
    #             vde_poi += 1
    pois_element_num = tree.iter(tag=Pois).next().get(Num)
    vde_poi = len(list(tree.iter(tag=Poi)))

    cost_time = time.time() - t0
    print parse_poi_by_elementTree time cost is %s % cost_time
    return pois_element_num, vde_poi


def parse_poi_by_iterparse(filepath):
    t0 = time.time()

    pois_element_num = 0
    vde_poi = 0
    for event, elem in ET.iterparse(filepath):
        # if event == ‘end‘:
        if elem.tag == Poi:
            vde_poi += 1
        if elem.tag == "Pois":
            pois_element_num = int(elem.get(Num))

        elem.clear()

    cost_time = time.time() - t0
    print parse_poi_by_iterparse time cost is %s % cost_time
    return pois_element_num, vde_poi


from statistic import StatisticItem, XML_STREET, XML_POI
import os


def parse_street_xml_by_ET(street_file):
    if not os.path.exists(street_file):
        return StatisticItem(XML_STREET, [0, 0])

    street_num = 0
    vde_streets = 0  # actual count

    for event, elem in ET.iterparse(street_file):
        if elem.tag == Street:
            vde_streets += 1
        if elem.tag == "Streets":
            street_num = int(elem.get(Num))

        elem.clear()
    return StatisticItem(XML_STREET, [vde_streets, street_num])


def parse_poi_xml_by_ET(poi_file):
    if not os.path.exists(poi_file):
        return StatisticItem(XML_POI, [0, 0])

    poi_num = 0
    vde_pois = 0  # actual count

    for event, elem in ET.iterparse(poi_file):
        if elem.tag == Poi:
            vde_pois += 1
        if elem.tag == "Pois":
            poi_num = int(elem.get(Num))

        elem.clear()
    return StatisticItem(XML_POI, [vde_pois, poi_num])


if __name__ == __main__:
    # C:\Users\shchshan\Desktop\vde\State_14120002\POI_1414000018.xml
    # C:\Users\shchshan\Desktop\vde\State_14120001\POI_1414000001.xml
    print parse_poi_by_elementTree(rC:\Users\shchshan\Desktop\vde\State_14120001\POI_1414000001.xml)
    print parse_poi_by_iterparse(rC:\Users\shchshan\Desktop\vde\State_14120001\POI_1414000001.xml)

 

以上是关于python 解析XML文件的主要内容,如果未能解决你的问题,请参考以下文章

从 XML 声明片段获取 XML 编码:部分内容解析不支持 XmlDeclaration

如何在 python 代码中解析多个 xml 文件?

从流输入中解析没有根元素的 XML 片段列表

python使用ElementTree解析XML文件

在Tomcat的安装目录下conf目录下的server.xml文件中增加一个xml代码片段,该代码片段中每个属性的含义与用途

DOM解析xml文件