使用 POI 读取 Word 中的书签替换书签内容(doc和docx)
Posted catoop
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了使用 POI 读取 Word 中的书签替换书签内容(doc和docx)相关的知识,希望对你有一定的参考价值。
通过操作书签可以实现 word 模板替换变量的功能场景,本文一下代码内容,直接可以复制使用正常编译运行。
添加 maven 依赖
<dependencies>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-examples</artifactId>
<version>5.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>5.2.2</version>
</dependency>
</dependencies>
操作书签的代码
package org.example;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Bookmark;
import org.apache.poi.hwpf.usermodel.Bookmarks;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
import org.w3c.dom.Node;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class BookmarkDemo
/**
* 因为docx为xml格式的结构,一下为docx中定义的部分常量引用
**/
public static final String RUN_NODE_NAME = "w:r";
public static final String TEXT_NODE_NAME = "w:t";
public static final String BOOKMARK_START_TAG = "w:bookmarkStart";
public static final String BOOKMARK_END_TAG = "w:bookmarkEnd";
public static final String BOOKMARK_ID_ATTR_NAME = "w:id";
public static final String STYLE_NODE_NAME = "w:rPr";
public static void main(String[] args) throws IOException
// 读取 doc 文件中的所有书签
InputStream inputStream = Files.newInputStream(Paths.get("D:\\\\Desktop\\\\Welcome2.doc"));
getBookmarksByDoc(inputStream);
// 读取 doc 文件中的所有书签
InputStream inputStream2 = Files.newInputStream(Paths.get("D:\\\\Desktop\\\\Welcome.docx"));
getBookmarksByDocx(inputStream2);
// 替换文件中的 bookmark 内容
InputStream inputStream3 = Files.newInputStream(Paths.get("D:\\\\Desktop\\\\Welcome.docx"));
Map<String, String> dataMap = new HashMap<>();
dataMap.put("strong", "单红宇");
dataMap.put("footnotes", "李小雨");
replaceBookmarksByDocx(inputStream3, Files.newOutputStream(Paths.get("D:\\\\Desktop\\\\Welcome3.docx")), dataMap);
/**
* 读取 doc 文件中的所有书签
*
* @param inputStream
* @throws IOException
*/
public static void getBookmarksByDoc(InputStream inputStream) throws IOException
HWPFDocument wordDoc = new HWPFDocument(inputStream);
Bookmarks bookmarks = wordDoc.getBookmarks();
for (int b = 0; b < bookmarks.getBookmarksCount(); b++)
Bookmark bookmark = bookmarks.getBookmark(b);
String bookMarkText = new Range(bookmark.getStart(), bookmark.getEnd(), wordDoc).text();
System.out.println("[" + bookmark.getStart() + "; "
+ bookmark.getEnd() + "]: " + bookmark.getName() + " = " + bookMarkText);
/**
* 读取 docx 文件中的所有书签
*
* @param inputStream
* @throws IOException
*/
public static void getBookmarksByDocx(InputStream inputStream) throws IOException
XWPFDocument docx = new XWPFDocument(inputStream);
List<XWPFParagraph> paragraphList = docx.getParagraphs();
for (XWPFParagraph xwpfParagraph : paragraphList)
CTP ctp = xwpfParagraph.getCTP();
for (int dwI = 0; dwI < ctp.sizeOfBookmarkStartArray(); dwI++)
CTBookmark bookmark = ctp.getBookmarkStartArray(dwI);
String bookmarkName = bookmark.getName();
// 因为 docx 的结构是xml格式,它不像 doc 文档那样有具体的start和end值来定位一个bookmark的值范围,
// 所有如果你想要读取 docx 文档中bookmark的内容,你需要从 bookmarkStart 节点开始逐级逐层依次解析xml文
// 件的 nodeValue 值并进行拼接,直至读取到下一个名为 bookmarkEnd 的节点为止。
// 下面这个方法 getBookmarkTextContent() 是一个没有实现的方法,如有需要请自行实现
// String bookmarkTextContent = getBookmarkTextContent(bookmark);
System.out.println(bookmarkName);
/**
* docx 文件中书签的替换
*
* @param inputStream
* @param outputStream
* @param dataMap
* @throws IOException
*/
public static void replaceBookmarksByDocx(InputStream inputStream, OutputStream outputStream, Map<String, String> dataMap) throws IOException
XWPFDocument document = new XWPFDocument(inputStream).getXWPFDocument();
List<XWPFParagraph> paragraphList = document.getParagraphs();
for (XWPFParagraph xwpfParagraph : paragraphList)
CTP ctp = xwpfParagraph.getCTP();
for (int dwI = 0; dwI < ctp.sizeOfBookmarkStartArray(); dwI++)
CTBookmark bookmark = ctp.getBookmarkStartArray(dwI);
if (dataMap.containsKey(bookmark.getName()))
XWPFRun run = xwpfParagraph.createRun();
run.setText(dataMap.get(bookmark.getName()));
Node firstNode = bookmark.getDomNode();
Node nextNode = firstNode.getNextSibling();
while (nextNode != null)
// 循环查找结束符
String nodeName = nextNode.getNodeName();
if (nodeName.equals(BOOKMARK_END_TAG))
break;
// 删除中间的非结束节点,即删除原书签内容
Node delNode = nextNode;
nextNode = nextNode.getNextSibling();
ctp.getDomNode().removeChild(delNode);
if (nextNode == null)
// 始终找不到结束标识的,就在书签前面添加
ctp.getDomNode().insertBefore(run.getCTR().getDomNode(), firstNode);
else
// 找到结束符,将新内容添加到结束符之前,即内容写入bookmark中间
ctp.getDomNode().insertBefore(run.getCTR().getDomNode(), nextNode);
document.write(outputStream);
document.close();
/**
* doc 文件中书签的替换
*
* @param inputStream
* @param outputStream
* @param dataMap
* @throws IOException
*/
public static void replaceBookmarksByDoc(InputStream inputStream, OutputStream outputStream,
Map<String, String> dataMap) throws IOException
HWPFDocument document = new HWPFDocument(inputStream);
Bookmarks bookmarks = document.getBookmarks();
for (int dwI = 0; dwI < bookmarks.getBookmarksCount(); dwI++)
Bookmark bookmark = bookmarks.getBookmark(dwI);
if (dataMap.containsKey(bookmark.getName()))
Range range = new Range(bookmark.getStart(), bookmark.getEnd(), document);
range.replaceText(dataMap.get(bookmark.getName()), false);
document.write(outputStream);
(END)
以上是关于使用 POI 读取 Word 中的书签替换书签内容(doc和docx)的主要内容,如果未能解决你的问题,请参考以下文章
跪求POI根据模板生成word文档的例子。 注意啦,是用书签标记的方式,替换书签中的内容。