解析xml

Posted wuqiqing_1

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了解析xml相关的知识,希望对你有一定的参考价值。



   (参考《JAXP验证》),工作中经常会用到JAXP相关的代码, 为了方便,做一些总结。

   JAXP只是定义了一套通过JAVA操作XML文件的统一API,主要提供SAX和DOM(jaxp遵循w3c的dom标准)两种方式,但是API结构都非常相似。值得注意的是JAXP只是定义了API框架,并不提供XML的解析。虽然我们可以直接通过默认API参数获得默认的解析器(jdk1.4默认包括一种解析器Crimson,jdk5.0默认使用的是xerces了,org.apache.xerces放到了com.sun包里),但是我们可以通过多种方式改变JAXP的解析器。如:通过虚拟机启动参数,工厂方法参数等。

   例子使用一个简单的SPRING配置文件来做测试,因为使用SPRING3.0环境,所以测试代码中也使用了一些SPRING的工具类。

/**
 * JAXP TEST&DEMO 
 */
public class JAXPTest 

	private static Logger logger = LoggerFactory.getLogger(JAXPTest.class);
	/** JAXP attribute used to configure the schema language for validation. */
	private static final String SCHEMA_LANGUAGE_ATTRIBUTE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";

	/** JAXP attribute value indicating the XSD schema language. */
	private static final String XSD_SCHEMA_LANGUAGE = "http://www.w3.org/2001/XMLSchema";

	public static void main(String[] args) 

		URL url = JAXPTest.class.getResource("/applicationcontext_test.xml");
		File file = new File(url.getFile());

		// testSAXParser(file);
		testDOMBuilder(file);
	

	/**
	 * 测试Dom方式解析XML
	 * 
	 * @param file
	 */
	public static void testDOMBuilder(File file) 
		try 
			DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
			// 设置Namespace有效
			factory.setNamespaceAware(true);
			// 打开验证
			factory.setValidating(true);
			//设置验证的SCHEMA方式为XSD
			factory.setAttribute(SCHEMA_LANGUAGE_ATTRIBUTE, XSD_SCHEMA_LANGUAGE);
			DocumentBuilder documentBuilder = factory.newDocumentBuilder();
			// 解析的验证文件XSD来源
			documentBuilder.setEntityResolver(new EntityResolver() 
				@Override
			public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException 
				Map<String, String> schemaMappings = new HashMap<String, String>();
				try 
				Properties mappings = PropertiesLoaderUtils.loadAllProperties("META-INF/spring.schemas", null);
					CollectionUtils.mergePropertiesIntoMap(mappings, schemaMappings);
					ResourceLoader rl = new DefaultResourceLoader();
					Resource resource = rl.getResource(schemaMappings.get(systemId));
					InputSource inputSource = new InputSource(resource.getInputStream());
					inputSource.setPublicId(publicId);
					inputSource.setSystemId(systemId);
					return inputSource;
					 catch (Exception e) 
						e.printStackTrace();
					
					return null;
				
			);

			// 解析错误处理
			documentBuilder.setErrorHandler(new ErrorHandler() 
				@Override
				public void warning(SAXParseException exception) throws SAXException 
					logger.warn(exception.getMessage());
				

				@Override
				public void fatalError(SAXParseException exception) throws SAXException 
					logger.error(exception.getMessage());
				

				@Override
				public void error(SAXParseException exception) throws SAXException 
					logger.error(exception.getMessage());
				
			);
			Document document = documentBuilder.parse(file);

			Element root = document.getDocumentElement();
			printNode(root);
		 catch (ParserConfigurationException e) 
			// TODO Auto-generated catch block
			e.printStackTrace();
		 catch (SAXException e) 
			// TODO Auto-generated catch block
			e.printStackTrace();
		 catch (IOException e) 
			// TODO Auto-generated catch block
			e.printStackTrace();
		

	

	/**
	 * 递归打印document的主要节点
	 * 
	 * @param e
	 */
	private static void printNode(Element e) 
		if (e.hasChildNodes()) 
			NodeList subList = e.getChildNodes();
			for (int i = 0; i < subList.getLength(); i++) 
				Node n = subList.item(i);
				if (n instanceof Element) 
					printNode((Element) n);
				
			
		 else 
			StringBuffer sb = new StringBuffer();
			sb.append("<").append(e.getNodeName());
			if (e.hasAttributes()) 
				NamedNodeMap attr = e.getAttributes();
				for (int i = 0; i < attr.getLength(); i++) 
					sb.append(" ").append(attr.item(i).getNodeName()).append("=\\"").append(attr.item(i).getNodeValue()).append("\\"");
				
			
			sb.append(">");

			String content = e.getNodeValue();
			if (StringUtils.isNotEmpty(content)) 
				sb.append(content);
			
			sb.append("</" + e.getNodeName() + ">");
			System.out.println(sb);

		
	

	/**
	 * SAX解析测试
	 * 
	 * @param file
	 */
	public static void testSAXParser(File file) 

		try 
			SAXParserFactory factory = SAXParserFactory.newInstance();
			factory.setNamespaceAware(true);
			factory.setValidating(true);

			SAXParser parser = factory.newSAXParser();
			parser.parse(file, new SAXParserHandler());
		 catch (ParserConfigurationException e) 
			e.printStackTrace();
		 catch (SAXException e) 
			e.printStackTrace();
		 catch (IOException e) 
			e.printStackTrace();
		
	

	/**
	 * SAP解析事件回调
	 * 
	 * @author zhangpu
	 * 
	 */
	static class SAXParserHandler extends DefaultHandler 
		/**
		 * uri: Namespace URI, localName: 没有前缀的节点名称, qName: 节点全名,包括NAMEPSACE前缀,
		 * Attributes: 属性
		 */
		@Override
		public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException 
			logger.info("startElement - localName:" + localName + "; qName:" + qName + "; uri:" + uri);
			for (int i = 0; i < attributes.getLength(); i++) 
				logger.info("attribute_" + i + " - " + attributes.getLocalName(i) + " : " + attributes.getValue(i));
			
		

		@Override
		public void endElement(String uri, String localName, String qName) throws SAXException 
			logger.info("endElement - localName:" + localName + "; qName:" + qName + "; uri:" + uri);
		

		@Override
		public void characters(char[] ch, int start, int length) throws SAXException 
			String content = new String(ch, start, length);
			content = content.trim();
			if (content.length() > 0) 
				logger.info("characters: " + content);
			
		

	


代码中的DOM方式解析中,如果xml文档内容合法(前面贴出)

<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xmlns:context="http://www.springframework.org/schema/context" xmlns:aop="http://www.springframework.org/schema/aop"
	xmlns:tx="http://www.springframework.org/schema/tx"
	xsi:schemaLocation="http://www.springframework.org/schema/beans
                     http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
                     http://www.springframework.org/schema/context
                     http://www.springframework.org/schema/context/spring-context-3.0.xsd
                     http://www.springframework.org/schema/aop
                     http://www.springframework.org/schema/aop/spring-aop-3.0.xsd
                     http://www.springframework.org/schema/tx
                     http://www.springframework.org/schema/tx/spring-tx-3.0.xsd">

	<context:annotation-config />

	<context:property-placeholder location="classpath:database.properties" />

	<bean id="userManager" class="main.spring.UserManagerImpl" />

</beans>

结果:

   <context:annotation-config></context:annotation-config><context:property-placeholder ignore-resource-not-found="false" ignore-unresolvable="false" local-override="false" location="classpath:database.properties" system-properties-mode="FALLBACK"></context:property-placeholder><bean autowire="default" autowire-candidate="default" class="main.spring.UserManagerImpl" id="userManager" lazy-init="default"></bean>

如果XML文档中有非法(不符合XSD定义的)的内容:

  

<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xmlns:context="http://www.springframework.org/schema/context" xmlns:aop="http://www.springframework.org/schema/aop"
	xmlns:tx="http://www.springframework.org/schema/tx"
	xsi:schemaLocation="http://www.springframework.org/schema/beans
                     http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
                     http://www.springframework.org/schema/context
                     http://www.springframework.org/schema/context/spring-context-3.0.xsd
                     http://www.springframework.org/schema/aop
                     http://www.springframework.org/schema/aop/spring-aop-3.0.xsd
                     http://www.springframework.org/schema/tx
                     http://www.springframework.org/schema/tx/spring-tx-3.0.xsd">

	<context:annotation-config />

	<context:property-placeholder location="classpath:database.properties" />

	<bean id="userManager" class="main.spring.UserManagerImpl" />
	
	<anyotherWithoutNode></anyotherWithoutNode>

</beans>

   结果:

2012-10-28 18:57:22,270 ERROR [main.xml.JAXPTest] - <cvc-complex-type.2.4.a: Invalid content starting with element 'anyotherWithoutNode'. The content must match '(("http://www.springframework.org/schema/beans":description)0-1,(("http://www.springframework.org/schema/beans":import)|("http://www.springframework.org/schema/beans":alias)|("http://www.springframework.org/schema/beans":bean)|(WC[##other:"http://www.springframework.org/schema/beans"])0-UNBOUNDED)0-UNBOUNDED)'.>
<context:annotation-config></context:annotation-config>
<context:property-placeholder ignore-resource-not-found="false" ignore-unresolvable="false" local-override="false" location="classpath:database.properties" system-properties-mode="FALLBACK"></context:property-placeholder>
<bean autowire="default" autowire-candidate="default" class="main.spring.UserManagerImpl" id="userManager" lazy-init="default"></bean>
<anyotherWithoutNode></anyotherWithoutNode>





以上是关于解析xml的主要内容,如果未能解决你的问题,请参考以下文章

全名解析和数据库比较

XML CDATA

雷林鹏分享: XML CDATA

解析xml

解析xml

XML 特殊字符处理和 CDATA