使用java NodeList合并xml文件
Posted
技术标签:
【中文标题】使用java NodeList合并xml文件【英文标题】:Merging xml file using java NodeList 【发布时间】:2013-01-12 04:51:22 【问题描述】:我正在尝试合并两个 xml 文件,如下所示,但我无法获得所需的输出,请帮助我,谢谢
Java 代码:
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
domFactory.setIgnoringComments(true);
DocumentBuilder builder = domFactory.newDocumentBuilder();
Document doc = builder.parse(new File("file1.xml"));
Document doc1 = builder.parse(new File("file2.xml"));
NodeList nodes = doc.getElementsByTagName("staff");
NodeList nodes1 = doc1.getElementsByTagName("staff");
for(int i=0;i<nodes1.getLength();i=i+1)
Node n= (Node) doc.importNode(nodes1.item(i), true);
nodes.item(i).getParentNode().appendChild(n);
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
StreamResult result = new StreamResult(new StringWriter());
DOMSource source = new DOMSource(doc);
transformer.transform(source, result);
Writer output = null;
output = new BufferedWriter(new FileWriter("mergedxml.xml"));
String xmlOutput = result.getWriter().toString();
output.write(xmlOutput);
output.close();
System.out.println("merge complete");
文件1.xml
<company>
<staff>
<name>john</name>
<phone>465456433</phone>
<email>gmail1</email>
</staff>
</company>
文件2.xml
<company>
<staff>
<area>area1</area>
<city>city1</city>
</staff>
</company>
当前输出:
<company>
<staff>
<name>john</name>
<phone>465456433</phone>
<email>gmail1</email>
</staff>
<staff>
<area>area1</area>
<city>city1</city>
</staff>
</company>
预期输出:
<company>
<staff>
<name>john</name>
<phone>465456433</phone>
<email>gmail1</email>
<area>area1</area>
<city>city1</city>
</staff>
</company>
【问题讨论】:
另见:ibm.com/developerworks/xml/library/x-tipcombxslt 和 ***.com/questions/1510688/… 我相信你的算法只有在为了自己做。你应该这样做:
public static void mergeXML()
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = null;
Document doc = null;
Document doc2 = null;
try
db = dbf.newDocumentBuilder();
doc = db.parse(new File("D:\\Loic_Workspace\\Test2\\res\\test.xml"));
doc2 = db.parse(new File("D:\\Loic_Workspace\\Test2\\res\\test2.xml"));
NodeList ndListFirstFile = doc.getElementsByTagName("staff");
Node nodeArea = doc.importNode(doc2.getElementsByTagName("area").item(0), true);
Node nodeCity = doc.importNode(doc2.getElementsByTagName("city").item(0), true);
ndListFirstFile.item(0).appendChild(nodeArea);
ndListFirstFile.item(0).appendChild(nodeCity);
TransformerFactory tFactory = TransformerFactory.newInstance();
Transformer transformer = tFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new StringWriter());
transformer.transform(source, result);
Writer output = new BufferedWriter(new FileWriter("D:\\Loic_Workspace\\Test2\\res\\testFinal.xml"));
String xmlOutput = result.getWriter().toString();
output.write(xmlOutput);
output.close();
catch (ParserConfigurationException e)
// TODO Auto-generated catch block
e.printStackTrace();
catch (SAXException e)
// TODO Auto-generated catch block
e.printStackTrace();
catch (IOException e)
// TODO Auto-generated catch block
e.printStackTrace();
catch (TransformerException e)
// TODO Auto-generated catch block
e.printStackTrace();
testFinal.xml 的最终输出:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<company>
<staff>
<name>john</name>
<phone>465456433</phone>
<email>gmail1</email>
<area>area1</area>
<city>city1</city>
</staff>
</company>
如你所愿;-)
希望对你有帮助,
【讨论】:
奇怪,没有Doccument.importNode appendChild会导致DOMException【参考方案2】:问题是,您想将子元素附加到“staff”元素,但您实际上要做的是:
nodes.item(i).getParentNode().appendChild(n);
意味着您正在寻找列表的“员工”节点之一的父节点,并且该节点是“公司”。因此,您将一个新的“员工”节点(从 doc1 导入的节点)附加到 doc 的“公司”节点
现在,您要做的是遍历 doc1 的“staff”子节点,并将它们一一附加到 doc 的“staff”节点。 所以你会想改变nodes1的定义如下:
// Retrieving child nodes of first "staff" element of doc1
NodeList nodes1 = doc1.getElementsByTagName("staff").item(0).getChildNodes();
然后通过替换来更改您附加的节点
nodes.item(i).getParentNode().appendChild(n);
通过
nodes.item(0).appendChild(n);
所以现在您要将 doc1 的所有“staff”子节点(/!\ 仅用于第一个“staff”元素)附加到 doc 的第一个“staff”元素
注意 1:不要使用迭代变量 (i) 来遍历列表 A 来选择另一个列表的项目,除非您知道自己在做什么(例如两个列表的长度相同)
注意 2:该解决方案会将 doc1 的第一个“staff”元素的节点附加到 doc 的第一个“staff”元素。您可能会想在这里和那里添加一些迭代。
【讨论】:
【参考方案3】:此解决方案适用于在合并之前需要迭代和验证某些内容的文件。
file1.xml:
<?xml version="1.0" encoding="UTF-8"?>
<reactions>
<reaction>
<ID>07402</ID>
<type>irreversible</type>
<substrate>15666</substrate>
<product>07756</product>
</reaction>
<reaction>
<ID>03063</ID>
<type>irreversible</type>
<substrate>00916</substrate>
<product>04712</product>
</reaction>
file2.xml:
<?xml version="1.0" encoding="UTF-8"?><reactions>
<reaction>
<ID>00001</ID>
<reactionName>polyphosphate polyphosphohydrolase</reactionName>
<reactionDescription> Polyphosphate + n H2O <=> (n+1) Oligophosphate</reactionDescription>
</reaction>
<reaction>
<ID>00002</ID>
<reactionName>Reduced ferredoxin:dinitrogen oxidoreductase (ATP-hydrolysing)</reactionName>
<reactionDescription> 16 ATP + 16 H2O + 8 Reduced ferredoxin <=> 8 e- + 16 Orthophosphate + 16 ADP + 8 Oxidized ferredoxin</reactionDescription>
</reaction>
<reaction>
<ID>03063</ID>
<reactionName>cephalosporin-C:2-oxoglutarate aminotransferase</reactionName>
<reactionDescription> Cephalosporin C + 2-Oxoglutarate <=> (7R)-7-(5-Carboxy-5-oxopentanoyl)aminocephalosporinate + D-Glutamate</reactionDescription>
</reaction>
<reaction>
<ID>07402</ID>
<reactionName>(7R)-7-(4-carboxybutanamido)cephalosporanate amidohydrolase</reactionName>
<reactionDescription> (7R)-7-(4-Carboxybutanamido)cephalosporanate + H2O <=> 7-Aminocephalosporanic acid + Glutarate</reactionDescription>
</reaction>
</reactions>
结果.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<reactions>
<reaction>
<ID>07402</ID>
<type>irreversible</type>
<substrate>15666</substrate>
<product>07756</product>
<reactionName>(7R)-7-(4-carboxybutanamido)cephalosporanate amidohydrolase</reactionName>
<reactionDescription> (7R)-7-(4-Carboxybutanamido)cephalosporanate + H2O <=> 7-Aminocephalosporanic acid + Glutarate</reactionDescription>
</reaction>
<reaction>
<ID>03063</ID>
<type>irreversible</type>
<substrate>00916</substrate>
<product>04712</product>
<reactionName>cephalosporin-C:2-oxoglutarate aminotransferase</reactionName>
<reactionDescription> Cephalosporin C + 2-Oxoglutarate <=> (7R)-7-(5-Carboxy-5-oxopentanoyl)aminocephalosporinate + D-Glutamate</reactionDescription>
</reaction>
</reactions>
执行此操作的 Java 程序:
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.StringWriter;
import java.io.Writer;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class MergeXML
public static void main(String[] args)
MergeXML m = new MergeXML();
try
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db;
db = dbf.newDocumentBuilder();
Document secondaryMetabolismXML = db
.parse(new File("/home/bioinfo/workspace/teste/src/file1.xml"));
Document generalMetabolismXML = db
.parse(new File("/home/bioinfo/workspace/teste/src/file2.xml"));
NodeList secondaryReactions = secondaryMetabolismXML.getElementsByTagName("reaction");
NodeList generalReactions = generalMetabolismXML.getElementsByTagName("reaction");
for (int s = 0; s < secondaryReactions.getLength(); s++)
Node secondaryReaction = secondaryReactions.item(s);
for (int g = 0; g < generalReactions.getLength(); g++)
Node generalReaction = generalReactions.item(g);
if (getChildrenByNodeName(secondaryReaction, "ID").getTextContent()
.equals(getChildrenByNodeName(generalReaction, "ID").getTextContent()))
if (getChildrenByNodeName(generalReaction, "reactionName") != null)
secondaryReaction.appendChild(secondaryMetabolismXML
.importNode(getChildrenByNodeName(generalReaction, "reactionName"), true));
if (getChildrenByNodeName(generalReaction, "reactionAlternativeName") != null)
secondaryReaction.appendChild(secondaryMetabolismXML.importNode(
getChildrenByNodeName(generalReaction, "reactionAlternativeName"), true));
if (getChildrenByNodeName(generalReaction, "reactionDescription") != null)
secondaryReaction.appendChild(secondaryMetabolismXML
.importNode(getChildrenByNodeName(generalReaction, "reactionDescription"), true));
TransformerFactory tFactory = TransformerFactory.newInstance();
Transformer transformer = tFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
DOMSource source = new DOMSource(secondaryMetabolismXML);
StreamResult result = new StreamResult(new StringWriter());
transformer.transform(source, result);
Writer output = new BufferedWriter(
new FileWriter("/home/bioinfo/workspace/teste/src/Result.xml"));
String xmlOutput = result.getWriter().toString();
output.write(xmlOutput);
output.close();
catch (Exception e)
e.printStackTrace();
/**
* Returns a node child when you have a match with a given node name
*
* @param node
* @param nodeName
* @return
*/
public static Node getChildrenByNodeName(Node node, String nodeName)
for (Node childNode = node.getFirstChild(); childNode != null;)
Node nextChild = childNode.getNextSibling();
if (childNode.getNodeName().equalsIgnoreCase(nodeName))
return childNode;
childNode = nextChild;
return null;
【讨论】:
以上是关于使用java NodeList合并xml文件的主要内容,如果未能解决你的问题,请参考以下文章
具有 NodeList 属性的 Java XML 字符串(解析)
带有xml DOM的Chrome getElementsByTagName()返回不正确的NodeList [重复]