PHP读取Word内容,那个大神说下解决思路
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了PHP读取Word内容,那个大神说下解决思路相关的知识,希望对你有一定的参考价值。
COM组件可以但是两次就会卡死,不做考虑
说phpWord的 ,麻烦仔细看过再说话
你看一下phpword给了demo里有读取word的,效果也一样很差。追问
很是头疼啊。。。。
参考技术A 读取用PHPword没问题。我这里替换就出了问题。
ueditor如何实现word文档的导入和下载功能?
网上没有找到合适的解决方法,UEditor官网上提示说“word导入编辑上线”,然并卵,我们个人还是不知道如何实现。请大神给解答!
最新完整版JSP版的UEditor 1.4.3
1、UEditor没有提供word的导入功能,只能说是粘贴复制。
2、方案:用poi来提供word导入,思路是将word转换为html输出,再用UEditor提供的setContent()方法将html的内容添加到编辑器中。
方案缺点,一是poi对word文本的格式获取必须按setContent()可接受的方式进行;二是我暂时没发现poi可以提供获取段落格式(对齐方式、缩进量)的方法。
代码如下:package demo;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document;
public class Word2Html
public static void main(String argv[])
String htmlContent = new String();
try
htmlContent = convert2Html("D://test//1.doc","D://test//1.html");
catch (Exception e)
e.printStackTrace();
System.out.println(htmlContent);
/**
* 将内容写入指定路径下的<a href="https://www.baidu.com/s?wd=html%E6%96%87%E4%BB%B6&tn=44039180_cpr&fenlei=mv6quAkxTZn0IZRqIHckPjm4nH00T1d9Pj9hrARdrAR1PhnLmWmL0ZwV5Hcvrjm3rH6sPfKWUMw85HfYnjn4nH6sgvPsT6KdThsqpZwYTjCEQLGCpyw9Uz4Bmy-bIi4WUvYETgN-TLwGUv3EnHnvrjDkP1mzPWRLrj6knj64rf" target="_blank" class="baidu-highlight">html文件</a>
* @param content
* @param path
*/
public static void writeFile(String content, String path)
FileOutputStream fos = null;
BufferedWriter bw = null;
try
File file = new File(path);
fos = new FileOutputStream(file);
bw = new BufferedWriter(new OutputStreamWriter(fos,"UTF-8"));
bw.write(content);
catch (FileNotFoundException fnfe)
fnfe.printStackTrace();
catch (IOException ioe)
ioe.printStackTrace();
finally
try
if (bw != null)
bw.close();
if (fos != null)
fos.close();
catch (IOException ie)
/**
* 转换为html
* @param fileName
* @param outPutFile
* @throws TransformerException
* @throws IOException
* @throws ParserConfigurationException
*/
public static String convert2Html(String fileName, String outPutFile)
throws TransformerException, IOException, ParserConfigurationException
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName)); //WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager()
/**
* 按建议名称保存图片
*/
public String savePicture( byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches )
return "D:/test/"+suggestedName;
);
wordToHtmlConverter.processDocument(wordDocument);
//save pictures
List pics = wordDocument.getPicturesTable().getAllPictures();
if( pics!=null )
for(int i=0; i<pics.size(); i++)
Picture pic = (Picture)pics.get(i);
System.out.println();
try
pic.writeImageContent(new FileOutputStream("D:/test/" + pic.suggestFullFileName()));
catch (FileNotFoundException e)
e.printStackTrace();
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
out.close();
writeFile(new String(out.toByteArray()), outPutFile);
return new String(out.toByteArray());
参考技术A
UEditor没有提供word的导入功能,只能说是粘贴复制。
另外的方案:用poi来提供word导入,思路是将word转换为html输出,再用UEditor提供的setContent()方法将html的内容添加到编辑器中。这个方案有两个缺点,一是poi对word文本的格式获取必须按setContent()可接受的方式进行;二是我暂时没发现poi可以提供获取段落格式(对齐方式、缩进量)的方法。
可以参考以下代码,这是从网上找的(不记得网页地址了,谢谢代码原主提供)
package demo;import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document;
public class Word2Html
public static void main(String argv[])
String htmlContent = new String();
try
htmlContent = convert2Html("D://test//1.doc","D://test//1.html");
catch (Exception e)
e.printStackTrace();
System.out.println(htmlContent);
/**
* 将内容写入指定路径下的html文件
* @param content
* @param path
*/
public static void writeFile(String content, String path)
FileOutputStream fos = null;
BufferedWriter bw = null;
try
File file = new File(path);
fos = new FileOutputStream(file);
bw = new BufferedWriter(new OutputStreamWriter(fos,"UTF-8"));
bw.write(content);
catch (FileNotFoundException fnfe)
fnfe.printStackTrace();
catch (IOException ioe)
ioe.printStackTrace();
finally
try
if (bw != null)
bw.close();
if (fos != null)
fos.close();
catch (IOException ie)
/**
* 转换为html
* @param fileName
* @param outPutFile
* @throws TransformerException
* @throws IOException
* @throws ParserConfigurationException
*/
public static String convert2Html(String fileName, String outPutFile)
throws TransformerException, IOException, ParserConfigurationException
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName)); //WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager()
/**
* 按建议名称保存图片
*/
public String savePicture( byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches )
return "D:/test/"+suggestedName;
);
wordToHtmlConverter.processDocument(wordDocument);
//save pictures
List pics = wordDocument.getPicturesTable().getAllPictures();
if( pics!=null )
for(int i=0; i<pics.size(); i++)
Picture pic = (Picture)pics.get(i);
System.out.println();
try
pic.writeImageContent(new FileOutputStream("D:/test/" + pic.suggestFullFileName()));
catch (FileNotFoundException e)
e.printStackTrace();
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
out.close();
writeFile(new String(out.toByteArray()), outPutFile);
return new String(out.toByteArray());
本回答被提问者采纳 参考技术B 这个编辑器有几个函数,可以在编辑器加载完后给编辑器赋值。具体你搜一下帮助文档。
ueditor_1_2_0-gbk完整包里面有个_examples文件夹,看哈帮助吧,里面有获得内容、写入内容、判断是否有内容等等函数都有,而且还有示例。
editor.getContent()
editor.setContent()追问
这只是对编辑器进行的get和set,并不能实现Word文档的导入呢。
参考技术C1、选中表格;
2、选择表格工具布局选项卡,在对齐方式处,根据需要选择一种即可,如图所示。
以上是关于PHP读取Word内容,那个大神说下解决思路的主要内容,如果未能解决你的问题,请参考以下文章
用php 读取word 文档内容 比如:word文档为试题等等
富文本编辑器内容实现word导出下载,请各位大神们指点,感激不尽