python xml2json

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python xml2json相关的知识,希望对你有一定的参考价值。

# use beautiful soup for XML parsing
from bs4 import BeautifulSoup
# use json for writing json file and json formatting
import json
# use sys for displaying python version information
import sys

# display python and pandas versions
print (sys.version_info)

## 2. Parse XML File

### 2.1 define XML file and open using BeautifulSoup with utf-8 encoding

xml_file = './xml.xml'
json_file = './json.dat'

## 3 supporting functions
### 3.1 build_tag: define a build tag function which will extract specific data for saving to json file



def build_tag (tag):
# return a constructed dictionary object with tag elements

    # create an empty dictionary to contain a dict for each XML element
    tag_dict = {};   
    
    # extract from tag Title, Description and list of RelatedTerms (Title, Relationship)
    title_string = [feature.string for feature in tag.find('Title')][0]; 
    description_string = [ feature.string for feature in tag.find('Description')][0];    
    l_rt_title = [feature.string for feature in tag.findChildren('Title') if feature.parent.parent.name == 'RelatedTerms'];
    l_relationship = [feature.string for feature in tag.findChildren('Relationship')];
    
    # add Description to tag dictionary
    tag_dict.update({'Description' : description_string});
    
    # tag_list is defined for a set of RelatedTerms
    tag_list = [];
    # use a counter for keeping track of which RelatedTerms element is current in the for loop
    index = 0;
    
    for relationship_string in l_relationship:
        # Check the boundary of the Title list otherwise index out of range
        if ( index < len(l_rt_title)):
            rt_title_string = l_rt_title[index];
        else:
            # set Title as blank for this RelatedTerm (no Title for this Relationship tag)
            rt_title_string = '';
            
        # add a Relationship and Title tag to the RelatedTerms list
        tag_list.append({'Relationship': relationship_string,'Title': rt_title_string});

        # keep track of which list item (RelatedTerms - Relationship was added
        index = index + 1;
    # add RelatedTerms and Title elements to tag dict
    tag_dict.update({'RelatedTerms' :  tag_list});    
    tag_dict.update({'Title' : title_string});

    return tag_dict;

###


### 3.2 traverseXML: iterate through all XML tags in the document



# for each tag build a dictionary element with the features to include
def traverseXML(tag):
    # create an empty list to store all tags of dict
    tag_list = [];
    
    # build a list of dict tags representing all the XML tags in the document
    for child in tag.findChildren(recursive=False):
        tag_list.append(build_tag(child))
    
    # return constructed list of tags from document
    return tag_list;

###

## 4. Main
### 4.1 open XML file into soup object

try:
    soup = BeautifulSoup(open(xml_file,encoding='UTF-8'),'lxml-xml')
except Exception as error:
    print('[ERROR]: opening file :'+xml_file);
    print(error);

### 4.2 optionally for debugging purposes display "pretty" format of XML document

# optionally display XML file structure
print(soup.prettify())


### 4.3 extract tags from XML document and build a dictionary and list of tags for JSON formatting


# tagDict contains the XML document
tagDict = {};
# tagList contains a dict item per XML element that is iteratively constructed
tagList = [];

# start with the root node in the XML document
for child in soup.findChildren(recursive=False):
    # process from root element
    if child.name:    
        tagList = (traverseXML(child));

# create a dictionary with 'thesaurus' as the key
tagDict = {'thesaurus' : tagList};


### 4.4 write dataDict to json file

# format the json output

try:
    with open(json_file,'w') as output:
        out = json.dump(tagDict,output,sort_keys=False,indent=0)
except Exception as error:
    print('[ERROR]: writing to file :'+json_file);

以上是关于python xml2json的主要内容,如果未能解决你的问题,请参考以下文章

使用 $.xml2Json 返回一个复杂的对象

为 API 调用 Xml2Json 制作 Json 模型

C# xml2json

python解析xml文件

vue中引入x2js(针对xml、json数据进行转换处理)

Jackson 学习笔记 XML转JSON