doc以及docx文档转html文件(同时解析图片音频和视频)
Posted 整理是一切的开始
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了doc以及docx文档转html文件(同时解析图片音频和视频)相关的知识,希望对你有一定的参考价值。
话就不多说了,就是用到了doc/docx转html的需求了,例如:文件预览,可以把doc/docx文件转html然后预览!
1,加入poi依赖解析word
<!--docx转html-->
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.xdocreport.document</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.3</version>
</dependency>
2,代码实现
@Value("${file.upload}") // D:\\test\\files
private String path;
@RequestMapping("/testDocx")
public void testDocx() throws ParserConfigurationException, TransformerException, IOException {
docx2Html("D:\\\\工作\\\\表单电子化\\\\02-test.docx","D:\\\\工作\\\\表单电子化\\\\02-test.html");
}
@RequestMapping("/testDoc")
public void testDoc() throws ParserConfigurationException, TransformerException, IOException {
doc2Html("D:\\\\工作\\\\表单电子化\\\\test(王大刚_java).doc","D:\\\\工作\\\\表单电子化\\\\test(王大刚_java).html");
}
/**
* docx转html
* @param docxFileUrl
* @param htmlOutPutUrl
*/
public void docx2Html(String docxFileUrl, String htmlOutPutUrl) throws TransformerException, IOException, ParserConfigurationException {
String fileOutName = htmlOutPutUrl;
XWPFDocument document = new XWPFDocument(new FileInputStream(docxFileUrl));
XHTMLOptions options = XHTMLOptions.create().indent(4);
// 导出图片
File imageFolder = new File(path);
options.setExtractor(new FileImageExtractor(imageFolder));
// URI resolver
options.URIResolver(new FileURIResolver(imageFolder));
File outFile = new File(fileOutName);
outFile.getParentFile().mkdirs();
OutputStream out = new FileOutputStream(outFile);
XHTMLConverter.getInstance().convert(document, out, options);
}
/**
* doc转html
* @param docFileUrl
* @param htmlOutPutUrl
*/
public void doc2Html(String docFileUrl, String htmlOutPutUrl) throws IOException, ParserConfigurationException, TransformerException {
File file=new File(docFileUrl);
FileInputStream fileInputStream=null;
try {
fileInputStream = new FileInputStream(file);
} catch (FileNotFoundException e) {
logger.error("===>docx转输入流失败");
}
HWPFDocument wordDocument = new HWPFDocument(fileInputStream);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
//src="test/0.png
return path+File.separator + suggestedName;
}
});
wordToHtmlConverter.processDocument(wordDocument);
// 保存图片
List<Picture> pics = wordDocument.getPicturesTable().getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
System.out.println();
try {
//保存图片到path路径下
pic.writeImageContent(new FileOutputStream(path+File.separator + pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
out.close();
writeFile(new String(out.toByteArray()), htmlOutPutUrl);
}
/**
* 写文件
*/
public static void writeFile(String content, String path) {
FileOutputStream fos = null;
BufferedWriter bw = null;
try {
File file = new File(path);
fos = new FileOutputStream(file);
bw = new BufferedWriter(new OutputStreamWriter(fos, "utf-8"));
bw.write(content);
} catch (FileNotFoundException fnfe) {
fnfe.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
} finally {
try {
if (bw != null)
bw.close();
if (fos != null)
fos.close();
} catch (IOException ie) {
}
}
}
以上是关于doc以及docx文档转html文件(同时解析图片音频和视频)的主要内容,如果未能解决你的问题,请参考以下文章