JAVA导出下载word文档(导出带富文本图片word)
Posted 闪电拉拉
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了JAVA导出下载word文档(导出带富文本图片word)相关的知识,希望对你有一定的参考价值。
文档下载,导入jsoup的jar包,处理html代码
<dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.7.3</version> </dependency>
以下是几个必要的文件:
RichHtmlHandler.java
import java.io.File; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.UUID; import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; /** * @Description:富文本Html处理器,主要处理图片及编码 * */ public class RichHtmlHandler { private Document doc = null; private String html; private String docSrcParent = "paper.files"; //在paper.ftl文件里面找到,检索“Content-Location” private String docSrcLocationPrex = "file:///C:/D1324D12"; //在paper.ftl文件里面找到,检索“Content-Location” private String nextPartId = "01D2EB53.503F62F0"; //在paper.ftl文件里面找到,最末行 private String shapeidPrex = "_x56fe__x7247__x0020"; private String spidPrex = "_x0000_i"; private String typeid = "#_x0000_t75"; private String handledDocBodyBlock; private List<String> docBase64BlockResults = new ArrayList<String>(); private List<String> xmlImgRefs = new ArrayList<String>(); private String srcPath = ""; public RichHtmlHandler(){} public String getDocSrcLocationPrex() { return docSrcLocationPrex; } public void setDocSrcLocationPrex(String docSrcLocationPrex) { this.docSrcLocationPrex = docSrcLocationPrex; } public String getNextPartId() { return nextPartId; } public void setNextPartId(String nextPartId) { this.nextPartId = nextPartId; } public String getHandledDocBodyBlock() { String raw= WordHtmlGeneratorHelper.string2Ascii(doc.getElementsByTag("body").html()); return raw.replace("=3D", "=").replace("=", "=3D"); } public String getRawHandledDocBodyBlock() { String raw= doc.getElementsByTag("body").html(); return raw.replace("=3D", "=").replace("=", "=3D"); } public List<String> getDocBase64BlockResults() { return docBase64BlockResults; } public List<String> getXmlImgRefs() { return xmlImgRefs; } public String getShapeidPrex() { return shapeidPrex; } public void setShapeidPrex(String shapeidPrex) { this.shapeidPrex = shapeidPrex; } public String getSpidPrex() { return spidPrex; } public void setSpidPrex(String spidPrex) { this.spidPrex = spidPrex; } public String getTypeid() { return typeid; } public void setTypeid(String typeid) { this.typeid = typeid; } public String getDocSrcParent() { return docSrcParent; } public void setDocSrcParent(String docSrcParent) { this.docSrcParent = docSrcParent; } public String getHtml() { return html; } public void setHtml(String html) { this.html = html; } public RichHtmlHandler(String html, String srcPath) { this.html = html; this.srcPath = srcPath; doc = Jsoup.parse(wrappHtml(this.html)); try { handledHtml(false); } catch (IOException e) { e.printStackTrace(); } } public void re_init(String html){ doc=null; doc = Jsoup.parse(wrappHtml(html)); docBase64BlockResults.clear(); xmlImgRefs.clear(); } /** * @Description: 获得已经处理过的HTML文件 * @param @return * @return String * @throws IOException * @throws */ public void handledHtml(boolean isWebApplication) throws IOException { Elements imags = doc.getElementsByTag("img"); System.out.println("doc:\n"+doc); if (imags == null || imags.size() == 0) { // 返回编码后字符串 return; //handledDocBodyBlock = WordHtmlGeneratorHelper.string2Ascii(html); } // 转换成word mht 能识别图片标签内容,去替换html中的图片标签 for (Element item : imags) { // 把文件取出来 String src = item.attr("src"); String srcRealPath = srcPath + src; // String thepaths = RichHtmlHandler.class.getClassLoader().getResource("").toString(); // System.out.println("src="+src+" thepaths="+thepaths); if (isWebApplication) { // String contentPath=RequestResponseContext.getRequest().getContextPath(); // if(!StringUtils.isEmpty(contentPath)){ // if(src.startsWith(contentPath)){ // src=src.substring(contentPath.length()); // } // } // // srcRealPath = RequestResponseContext.getRequest().getSession() // .getServletContext().getRealPath(src); } File imageFile = new File(srcRealPath); String imageFielShortName = imageFile.getName(); String fileTypeName = WordImageConvertor.getFileSuffix(srcRealPath); String docFileName = "image" + UUID.randomUUID().toString() + "."+ fileTypeName; String srcLocationShortName = docSrcParent + "/" + docFileName; String styleAttr = item.attr("style"); // 样式 //高度 String imagHeightStr=item.attr("height"); if(StringUtils.isEmpty(imagHeightStr)){ imagHeightStr = getStyleAttrValue(styleAttr, "height"); } //宽度 String imagWidthStr=item.attr("width");; if(StringUtils.isEmpty(imagWidthStr)){ imagWidthStr = getStyleAttrValue(styleAttr, "width"); } imagHeightStr = imagHeightStr.replace("px", ""); imagWidthStr = imagWidthStr.replace("px", ""); if(StringUtils.isEmpty(imagHeightStr)){ //去得到默认的文件高度 imagHeightStr="0"; } if(StringUtils.isEmpty(imagWidthStr)){ imagWidthStr="0"; } int imageHeight = Integer.parseInt(imagHeightStr); int imageWidth = Integer.parseInt(imagWidthStr); // 得到文件的word mht的body块 String handledDocBodyBlock = WordImageConvertor.toDocBodyBlock(srcRealPath, imageFielShortName, imageHeight, imageWidth,styleAttr, srcLocationShortName, shapeidPrex, spidPrex, typeid); //这里的顺序有点问题:应该是替换item,而不是整个后面追加 //doc.rreplaceAll(item.toString(), handledDocBodyBlock); item.after(handledDocBodyBlock); // item.parent().append(handledDocBodyBlock); item.remove(); // 去替换原生的html中的imag String base64Content = WordImageConvertor.imageToBase64(srcRealPath); String contextLoacation = docSrcLocationPrex + "/" + docSrcParent + "/" + docFileName; String docBase64BlockResult = WordImageConvertor.generateImageBase64Block(nextPartId, contextLoacation, fileTypeName, base64Content); docBase64BlockResults.add(docBase64BlockResult); String imagXMLHref = "<o:File HRef=3D\"" + docFileName + "\"/>"; xmlImgRefs.add(imagXMLHref); } } private String getStyleAttrValue(String style, String attributeKey) { if (StringUtils.isEmpty(style)) { return ""; } // 以";"分割 String[] styleAttrValues = style.split(";"); for (String item : styleAttrValues) { // 在以 ":"分割 String[] keyValuePairs = item.split(":"); if (attributeKey.equals(keyValuePairs[0])) { return keyValuePairs[1]; } } return ""; } private String wrappHtml(String html){ // 因为传递过来都是不完整的doc StringBuilder sb = new StringBuilder(); sb.append("<html>"); sb.append("<body>"); sb.append(html); sb.append("</body>"); sb.append("</html>"); return sb.toString(); } public String getData(List<String> list){ String data = ""; if (list != null && list.size() > 0) { for (String string : list) { data += string + "\n"; } } return data; } }
WordHtmlGeneratorHelper.java
import java.lang.reflect.Field; import java.util.ArrayList; import java.util.Collection; import java.util.Date; import java.util.List; import java.util.Map; import org.apache.commons.beanutils.PropertyUtils; import org.springframework.util.ReflectionUtils; import org.apache.commons.lang3.StringUtils; import org.springframework.util.ReflectionUtils.FieldCallback; /** * @Description:word 网页导出(单文件网页导出,mht文件格式) * */ public class WordHtmlGeneratorHelper { /** * @Description: 将字符换成3Dus-asci,十进制Accsii码 * @param @param source * @param @return * @return String * @throws */ public static String string2Ascii(String source){ if(source==null || source==""){ return null; } StringBuilder sb=new StringBuilder(); char[] c=source.toCharArray(); for(char item : c){ String itemascii=""; if(item>=19968 && item<40623){ itemascii=itemascii="&#"+(item & 0xffff)+";"; }else{ itemascii=item+""; } sb.append(itemascii); } return sb.toString(); } /** * @Description: 将object的所有属性值转成成3Dus-asci编码值 * @param @param object * @param @return * @return T * @throws */ public static <T extends Object> T handleObject2Ascii(final T toHandleObject){ class myFieldsCallBack implements FieldCallback{ @Override public void doWith(Field f) throws IllegalArgumentException, IllegalAccessException { if(f.getType().equals(String.class)){ //如果是字符串类型 f.setAccessible(true); String oldValue=(String)f.get(toHandleObject); if(!StringUtils.isEmpty(oldValue)){ f.set(toHandleObject, string2Ascii(oldValue)); } //f.setAccessible(false); } } } ReflectionUtils.doWithFields(toHandleObject.getClass(), new myFieldsCallBack()); return toHandleObject; } public static <T extends Object> List<T> handleObjectList2Ascii(final List<T> toHandleObjects){ for (T t : toHandleObjects) { handleObject2Ascii(t); } return toHandleObjects; } public static void handleAllObject(Map<String, Object> dataMap){ //去处理数据 for (Map.Entry<String, Object> entry : dataMap.entrySet()){ Object item=entry.getValue(); //判断object是否是primitive type if(isPrimitiveType(item.getClass())){ if(item.getClass().equals(String.class)){ item=WordHtmlGeneratorHelper.string2Ascii((String)item); entry.setValue(item); } }else if(isCollection(item.getClass())){ for (Object itemobject : (Collection)item) { WordHtmlGeneratorHelper.handleObject2Ascii(itemobject); } }else{ WordHtmlGeneratorHelper.handleObject2Ascii(item); } } } public static String joinList(List<String> list,String join ){ StringBuilder sb=new StringBuilder(); for (String t : list) { sb.append(t); if(!StringUtils.isEmpty(join)){ sb.append(join); } } return sb.toString(); } private static boolean isPrimitiveType(Class<?> clazz){ return clazz.isEnum() || CharSequence.class.isAssignableFrom(clazz) || Number.class.isAssignableFrom(clazz) || Date.class.isAssignableFrom(clazz); } private static boolean isCollection(Class<?> clazz){ return Collection.class.isAssignableFrom(clazz); } }
WordImageConvertor.java
import java.awt.image.BufferedImage; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.math.BigDecimal; import java.util.UUID; import javax.imageio.ImageIO; import org.apache.commons.codec.binary.Base64; import sun.misc.BASE64Encoder; /** * @Description:WORD 文档图片转换器 * */ public class WordImageConvertor { //private static Const WORD_IMAGE_SHAPE_TYPE_ID=""; /** * @Description: 将图片转换成base64编码的字符串 * @param @param imageSrc 文件路径 * @param @return * @return String * @throws IOException * @throws */ public static String imageToBase64(String imageSrc) throws IOException{ //判断文件是否存在 File file=new File(imageSrc); if(!file.exists()){ throw new FileNotFoundException("文件不存在!"); } StringBuilder pictureBuffer = new StringBuilder(); FileInputStream input=new FileInputStream(file); ByteArrayOutputStream out = new ByteArrayOutputStream(); //读取文件 //BufferedInputStream bi=new BufferedInputStream(in); Base64 base64=new Base64(); BASE64Encoder encoder=new BASE64Encoder(); byte[] temp = new byte[1024]; for(int len = input.read(temp); len != -1;len = input.read(temp)){ out.write(temp, 0, len); //out(pictureBuffer.toString()); //out.reset(); } pictureBuffer.append(new String( base64.encodeBase64Chunked(out.toByteArray()))); //pictureBuffer.append(encoder.encodeBuffer(out.toByteArray())); /*byte[] data=new byte[input.available()]; input.read(data); pictureBuffer.append(base64.encodeBase64String (data));*/ input.close(); /*BASE64Decoder decoder=new BASE64Decoder(); FileOutputStream write = new FileOutputStream(new File("c:\\test2.jpg")); //byte[] decoderBytes = decoder.decodeBuffer (pictureBuffer.toString()); byte[] decoderBytes = base64.decodeBase64(pictureBuffer.toString()); write.write(decoderBytes); write.close();*/ return pictureBuffer.toString(); } public static String toDocBodyBlock( String imageFilePath, String imageFielShortName, int imageHeight, int imageWidth, String imageStyle, String srcLocationShortName, String shapeidPrex,String spidPrex,String typeid){ //shapeid //mht文件中针对shapeid的生成好像规律,其内置的生成函数没法得知,但是只要保证其唯一就行 //这里用前置加32位的uuid来保证其唯一性。 String shapeid=shapeidPrex; shapeid+=UUID.randomUUID().toString(); //spid ,同shapeid处理 String spid=spidPrex; spid+=UUID.randomUUID().toString(); /* <!--[if gte vml 1]><v:shape id=3D"_x56fe__x7247__x0020_0" o:spid=3D"_x0000_i10= 26" type=3D"#_x0000_t75" alt=3D"725017921264249223.jpg" style=3D‘width:456.7= 5pt; height:340.5pt;visibility:visible;mso-wrap-style:square‘> <v:imagedata src=3D"file9462.files/image001.jpg" o:title=3D"725017921264= 249223"/> </v:shape><![endif]--><![if !vml]><img width=3D609 height=3D454 src=3D"file9462.files/image002.jpg" alt=3D725017921264249223.jpg v:shapes= =3D"_x56fe__x7247__x0020_0"><![endif]>*/ StringBuilder sb1=new StringBuilder(); sb1.append(" <!--[if gte vml 1]>"); sb1.append("<v:shape id=3D\"" + shapeid+"\""); sb1.append("\n"); sb1.append(" o:spid=3D\""+ spid +"\"" ); sb1.append(" type=3D\""+ typeid +"\" alt=3D\"" + imageFielShortName +"\""); sb1.append("\n"); sb1.append( " style=3D‘ " + generateImageBodyBlockStyleAttr(imageFilePath,imageHeight,imageWidth) + imageStyle +"‘"); sb1.append(">"); sb1.append("\n"); sb1.append(" <v:imagedata src=3D\"" + srcLocationShortName +"\"" ); sb1.append("\n"); sb1.append(" o:title=3D\"" + imageFielShortName.split("\\.")[0]+"\"" ); sb1.append("/>"); sb1.append("</v:shape>"); sb1.append("<![endif]-->"); //以下是为了兼容游览器显示时的效果,但是如果是纯word阅读的话没必要这么做。 /* StringBuilder sb2=new StringBuilder(); sb2.append(" <![if !vml]>"); sb2.append("<img width=3D"+imageWidth +" height=3D" +imageHeight + " src=3D\"" + srcLocationShortName +"\" alt=" +imageFielShortName+ " v:shapes=3D\"" + shapeid +"\">"); sb2.append("<![endif]>");*/ //return sb1.toString()+sb2.toString(); return sb1.toString(); } /** * @Description: 生成图片的base4块 * @param @param nextPartId * @param @param contextLoacation * @param @param ContentType * @param @param base64Content * @param @return * @return String * @throws */ public static String generateImageBase64Block(String nextPartId,String contextLoacation, String fileTypeName,String base64Content){ /*--=_NextPart_01D188DB.E436D870 Content-Location: file:///C:/70ED9946/file9462.files/image001.jpg Content-Transfer-Encoding: base64 Content-Type: image/jpeg base64Content */ StringBuilder sb=new StringBuilder(); sb.append("\n"); sb.append("\n"); sb.append("------=_NextPart_"+nextPartId); sb.append("\n"); sb.append("Content-Location: "+ contextLoacation); sb.append("\n"); sb.append("Content-Transfer-Encoding: base64"); sb.append("\n"); sb.append("Content-Type: " + getImageContentType(fileTypeName)); sb.append("\n"); sb.append("\n"); sb.append(base64Content); return sb.toString(); } private static String generateImageBodyBlockStyleAttr(String imageFilePath, int height,int width){ StringBuilder sb=new StringBuilder(); BufferedImage sourceImg; try { sourceImg = ImageIO.read(new FileInputStream(imageFilePath)); if(height==0){ height=sourceImg.getHeight(); } if(width==0){ width=sourceImg.getWidth(); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } //将像素转化成pt BigDecimal heightValue=new BigDecimal(height*12/16); heightValue= heightValue.setScale(2, BigDecimal.ROUND_HALF_UP); BigDecimal widthValue=new BigDecimal(width*12/16); widthValue= widthValue.setScale(2, BigDecimal.ROUND_HALF_UP); sb.append("height:"+heightValue +"pt;"); sb.append("width:"+widthValue +"pt;"); sb.append("visibility:visible;"); sb.append("mso-wrap-style:square; "); return sb.toString(); } private static String getImageContentType(String fileTypeName){ String result="image/jpeg"; //http://tools.jb51.net/table/http_content_type if(fileTypeName.equals("tif") || fileTypeName.equals("tiff")){ result="image/tiff"; }else if(fileTypeName.equals("fax")){ result="image/fax"; }else if(fileTypeName.equals("gif")){ result="image/gif"; }else if(fileTypeName.equals("ico")){ result="image/x-icon"; }else if(fileTypeName.equals("jfif") || fileTypeName.equals("jpe") ||fileTypeName.equals("jpeg") ||fileTypeName.equals("jpg")){ result="image/jpeg"; }else if(fileTypeName.equals("net")){ result="image/pnetvue"; }else if(fileTypeName.equals("png") || fileTypeName.equals("bmp") ){ result="image/png"; }else if(fileTypeName.equals("rp")){ result="image/vnd.rn-realpix"; }else if(fileTypeName.equals("rp")){ result="image/vnd.rn-realpix"; } return result; } public static String getFileSuffix(String srcRealPath){ int lastIndex = srcRealPath.lastIndexOf("."); String suffix = srcRealPath.substring(lastIndex + 1); // String suffix = srcRealPath.substring(srcRealPath.indexOf(".")+1); return suffix; } }
Test.java
以上是关于JAVA导出下载word文档(导出带富文本图片word)的主要内容,如果未能解决你的问题,请参考以下文章
java实现word导入导出富文本(含图片)-附完整测试用例