使用 POI 读取 Word docx 中的书签替换书签内容(汉字或合并外部文档内容)
Posted catoop
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了使用 POI 读取 Word docx 中的书签替换书签内容(汉字或合并外部文档内容)相关的知识,希望对你有一定的参考价值。
通过操作书签可以实现 word 模板替换变量的功能场景,本文一下代码内容,直接可以复制使用正常编译运行。
添加 maven 依赖
<dependencies>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>5.2.2</version>
</dependency>
<dependency>
<groupId>com.deepoove</groupId>
<artifactId>poi-tl</artifactId>
<version>1.12.0</version>
</dependency>
</dependencies>
操作书签的代码
1、ShanhyXWPFDocumentMerge.java
package org.example;
import com.deepoove.poi.xwpf.NiceXWPFDocument;
import com.deepoove.poi.xwpf.XmlXWPFDocumentMerge;
import org.apache.xmlbeans.XmlOptions;
import org.apache.xmlbeans.impl.store.DomImpl;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
import org.w3c.dom.Node;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* 文档合并
*
* @author shanhy
*/
public class ShanhyXWPFDocumentMerge extends XmlXWPFDocumentMerge
/**
* 将一个文档合并到另外一个文档指定段落的相对位置
*
* @param source 当前文档
* @param mergeIterator 被合入的文档
* @param targetParagraphNode 目标段落Node
* @param deleteTargetParagraph 是否删除目标段落自身
* @return
* @throws Exception
*/
public ShanhyXWPFDocument mergeToParagraphBefore(ShanhyXWPFDocument source, Iterator<ShanhyXWPFDocument> mergeIterator, Node targetParagraphNode, boolean deleteTargetParagraph) throws Exception
CTBody body = source.getDocument().getBody();
List<String> addParts = createMergeableStrings(source, mergeIterator);
String[] startEnd = truncatedStartEndXmlFragment(body);
// CTP mergedContainer = paragraph.getCTP();
XmlOptions options = new XmlOptions();
options.setUseSameLocale(((DomImpl.Dom) targetParagraphNode).locale());
CTP mergedBody = CTP.Factory.parse(startEnd[0] + String.join("", addParts) + startEnd[1], options);
// instead insert xml-fragment?
// new XWPFParagraph(CTP.Factory.parse(mergedBody.getDomNode(), options), source);
Node mergedContainerParentNode = targetParagraphNode.getParentNode();
// 将引入文档的整体插入到目标段落之前
mergedContainerParentNode.insertBefore(mergedBody.getDomNode(), targetParagraphNode);
if(deleteTargetParagraph)
// 删除掉目标段落自身
mergedContainerParentNode.removeChild(targetParagraphNode);
// mergedContainer.getDomNode().appendChild(CTP.Factory.parse(mergedBody.getDomNode(), options).getDomNode());
// mergedContainer.set(mergedBody);
// String xmlText = truncatedOverlapWP(body);
// body.set(CTBody.Factory.parse(xmlText));
// return source.generate(true);
return source;
/**
* 反射调用父类方法
*
* @param methodName
* @param params
* @throws NoSuchMethodException
* @throws InvocationTargetException
* @throws IllegalAccessException
*/
private Object invokeSuperMethod(String methodName, Class<?>[] paramClasses, Object[] params) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException
Method method = this.getClass().getSuperclass().getDeclaredMethod(methodName, paramClasses);
method.setAccessible(true);
return method.invoke(this, params);
@SuppressWarnings("unchecked")
private List<String> createMergeableStrings(ShanhyXWPFDocument source, Iterator<ShanhyXWPFDocument> mergeIterator)
try
Object obj = invokeSuperMethod("createMergeableStrings", new Class[]NiceXWPFDocument.class, Iterator.class, new Object[]source, mergeIterator);
if (obj instanceof List<?>)
return (List<String>) obj;
catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e)
throw new RuntimeException(e);
return new ArrayList<>();
private String[] truncatedStartEndXmlFragment(CTBody body)
try
Object obj = invokeSuperMethod("truncatedStartEndXmlFragment", new Class[]CTBody.class, new Object[]body);
return (String[]) obj;
catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e)
throw new RuntimeException(e);
private String truncatedOverlapWP(CTBody body)
try
Object obj = invokeSuperMethod("truncatedOverlapWP", new Class[]CTBody.class, new Object[]body);
return (String) obj;
catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e)
throw new RuntimeException(e);
2、ShanhyXWPFDocument.java
package org.example;
import com.deepoove.poi.xwpf.NiceXWPFDocument;
import org.w3c.dom.Node;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
/**
* 处理 Docx 文档内容处理
*
* @author shanhy
*/
public class ShanhyXWPFDocument extends NiceXWPFDocument
public ShanhyXWPFDocument(InputStream in) throws IOException
super(in);
/**
* 将一个文档合并到另外一个文档指定段落的相对位置
*
* @param source 当前文档
* @param mergeIterator 被合入的文档
* @param targetParagraphNode 目标段落Node
* @param deleteTargetParagraph 是否删除目标段落自身
*
* @throws Exception
*/
public void mergeToParagraphBefore(ShanhyXWPFDocument source, Iterator<ShanhyXWPFDocument> mergeIterator, Node targetParagraphNode, boolean deleteTargetParagraph) throws Exception
new ShanhyXWPFDocumentMerge().mergeToParagraphBefore(this, mergeIterator, targetParagraphNode, deleteTargetParagraph);
3、ShanhyDocxBookmarkService.java
package org.example;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlOptions;
import org.apache.xmlbeans.impl.store.DomImpl;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
* 书签替换处理类
*
* @author shanhy
*/
public class ShanhyDocxBookmarkService
/**
* 因为docx为xml格式的结构,一下为docx中定义的部分常量引用
**/
public static final String RUN_NODE_NAME = "w:r";
public static final String TEXT_NODE_NAME = "w:t";
public static final String BOOKMARK_START_TAG = "w:bookmarkStart";
public static final String BOOKMARK_END_TAG = "w:bookmarkEnd";
public static final String BOOKMARK_ID_ATTR_ID = "w:id";
public static final String NODENAME_BODY = "w:body";
public static final String NODENAME_PARAGRAPH = "w:p";
public static final String BOOKMARK_ID_ATTR_NAME = "w:name";
public static final String STYLE_NODE_NAME = "w:rPr";
public static final String PARAGRAPH_PROPERTIES_NAME = "w:pPr";
/**
* 读取 docx 文件中的所有书签(注意不支持书签嵌套,书签嵌套书签的情况只识别最外层书签)
*
* @param docx
*/
public List<Node> getBookmarksFromDocx(XWPFDocument docx)
Node bodyNode = docx.getDocument().getBody().getDomNode();
// 递归读取 bookmarkStart 节点,返回bookmarkNode集合(<w:bookmarkStart w:id="1" w:name="书签名称"/>)
List<Node> bookmarkNodeList = new ArrayList<>();
getBookmarksFromNode(bodyNode, bookmarkNodeList);
return bookmarkNodeList;
/**
* 递归解析所有Node节点,将bookmark记录到集合中
*
* @param node
* @param bookmarkNodeList
*/
public void getBookmarksFromNode(Node node, List<Node> bookmarkNodeList)
if (node.getNodeName().equals(BOOKMARK_START_TAG))
bookmarkNodeList.add(node);
else if (node.getNodeName().equals(BOOKMARK_END_TAG) || node.getNodeName().equals(PARAGRAPH_PROPERTIES_NAME))
// Nothing
else
NodeList childNodes = node.getChildNodes();
for (int i = 0, j = childNodes.getLength(); i < j; i++)
Node childNode = childNodes.item(i);
getBookmarksFromNode(childNode, bookmarkNodeList);
/**
* docx 文件中书签的替换
*
* @param docx
* @param outputStream
* @param dataMap
* @throws IOException
*/
public void replaceDocxBookmarks(ShanhyXWPFDocument docx, OutputStream outputStream, Map<String, Object> dataMap) throws Exception
// 获取所有书签
List<Node> startBookmarkList = getBookmarksFromDocx(docx);
// 替换书签内容
for (Node startBookmarkNode : startBookmarkList)
String bookmarkName = startBookmarkNode.getAttributes().getNamedItem("w:name").getNodeValue();
if (dataMap.containsKey(bookmarkName))
Object data = dataMap.get(bookmarkName);
if (data instanceof String) // 内容是文本
replaceDocxBookmarkFromString(getFirstParentParagraphByNode(startBookmarkNode, docx), startBookmarkNode, (String) data);
else if (data instanceof ShanhyXWPFDocument) // 内容是外部 docx 文档
replaceDocxBookmarkFromDocx(docx, getFirstParentNodeByNode(startBookmarkNode, docx), startBookmarkNode, (ShanhyXWPFDocument) data);
else
throw new RuntimeException("替换书签的内容源数据格式暂不支持");
docx.write(outputStream);
docx.close();
/**
* 将一个docx文档替换到docx的书签中
* 1.要求书签尽量设置在文档的换行起点
* 2.被合入的docx会作为换行起点开始合入,如果bookmark没有设置在一个换行的起点,程序会自动从该bookmark节点开始寻找,
* 定位到父节点为body的对应p节点的下一个节点,然后将需要合入的文档所有内容追加到该p节点的下一个
*
* @param bookmarkParentParagraph
* @param startBookmarkNode
* @param content
*/
public void replaceDocxBookmarkFromString(XWPFParagraph bookmarkParentParagraph, Node startBookmarkNode, String content)
Node nextNode = startBookmarkNode.getNextSibling();
boolean contentReplaced = false;
while (nextNode != null)
// 节点是结束符
if (nextNode.getNodeName().equals(BOOKMARK_END_TAG))
break;
// 1.寻找startBookmark的下一个 w:r 节点,然后将该节点中的 w:t 节点的真正文字内容替换掉(这样可以保留原来的bookmark的文字样式)
if (!contentReplaced && nextNode.getNodeName().equals("w:r"))
NodeList runChildNodes = nextNode.getChildNodes();
for (int i = 0, j = runChildNodes.getLength(); i < j; i++)
if (runChildNodes.item(i).getNodeName().equals("w:t")) // w:t 是真正的文本内容
runChildNodes.item(i).getFirstChild().setNodeValue(content);
contentReplaced = true;
else
// 2.然后继续向下删除 endBookmark 之前的所有节点
startBookmarkNode.getParentNode().removeChild(nextNode);
nextNode = nextNode.getNextSibling();
//1-end
if (!contentReplaced)
XWPFRun run = bookmarkParentParagraph.createRun();
run.setText(content);
跪求POI根据模板生成word文档的例子。 注意啦,是用书签标记的方式,替换书签中的内容。
将 RTF/DOC/DOCX 文件插入 Word 文件中的书签位置
java poi 生成word表格怎么 让表格填充整个页面和合并单元格
分别使用docx4j,jacob将文字与图片插入word中书签位置
java用poi导出word文档,我要导出一个表格,表格的单元格中还要有一个表格,请问怎么实现
java 用POI 解析word中的表格,POI只能识别word中创建的表格。 如果表格是从Excel中copy过来的, POI无法识