Android实战开发篇 解析读取复杂WordExcelPPT

Posted 彭老希

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Android实战开发篇 解析读取复杂WordExcelPPT相关的知识,希望对你有一定的参考价值。

一、jar包导入

Android实战开发篇 读取Word文档的 doc 与 docx 格式文本内容

二、文档读取工具 - 转换格式 html

import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.text.TextUtils;
import android.util.Log;
import android.util.Xml;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFPalette;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.util.HSSFColor;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.util.CellRangeAddress;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipFile;

/**
 * @ClassName : DocumenReadingContentUtil.java
 * @Function : 文档读取工具 - 转换格式 html
 * @Description :
 * @Idea :
 * {@link  }
 * @Encourage :Do everything you can right now, and then decide.
 *              全力以赴,历而后择。
 * @date : 2021/8/23
 */
public class DocumenReadingContentUtil {

    static final int BUFFER = 2048;
    public static int presentPicture = 0;
    public Range range = null;
    public HWPFDocument hwpf = null;
    public String htmlPath;
    public String picturePath;
    public List pictures;
    public TableIterator tableIterator;
    public int screenWidth;
    public FileOutputStream output;
    public File myFile;
    public String returnPath = "";
    public String data = "";
    StringBuffer lsb = new StringBuffer();
    private String nameStr;

    public DocumenReadingUtil(String namepath) {
        // this.screenWidth =
        // this.getWindowManager().getDefaultDisplay().getWidth() -
        // 10;//设置宽度为屏幕宽度-10
        this.nameStr = namepath;
        read();
    }

    /**
     * 取得单元格的值
     *
     * @param cell
     * @return
     * @throws IOException
     */
    private static Object getCellValue(HSSFCell cell) throws IOException {
        Object value = "";
        if (cell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
            value = cell.getRichStringCellValue().toString();
        } else if (cell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) {
            if (HSSFDateUtil.isCellDateFormatted(cell)) {
                Date date = (Date) cell.getDateCellValue();
                SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
                value = sdf.format(date);
            } else {
                double value_temp = (double) cell.getNumericCellValue();
                BigDecimal bd = new BigDecimal(value_temp);
                BigDecimal bd1 = bd.setScale(3, bd.ROUND_HALF_UP);
                value = bd1.doubleValue();

                DecimalFormat format = new DecimalFormat("#0.###");
                value = format.format(cell.getNumericCellValue());

            }
        }
        if (cell.getCellType() == HSSFCell.CELL_TYPE_BLANK) {
            value = "";
        }
        return value;
    }

    /**
     * 判断单元格在不在合并单元格范围内,如果是,获取其合并的列数。
     *
     * @param sheet   工作表
     * @param cellRow 被判断的单元格的行号
     * @param cellCol 被判断的单元格的列号
     * @return
     * @throws IOException
     */
    private static int getMergerCellRegionCol(HSSFSheet sheet, int cellRow,
                                              int cellCol) throws IOException {
        int retVal = 0;
        int sheetMergerCount = sheet.getNumMergedRegions();
        for (int i = 0; i < sheetMergerCount; i++) {
            CellRangeAddress cra = (CellRangeAddress) sheet.getMergedRegion(i);
            int firstRow = cra.getFirstRow(); // 合并单元格CELL起始行
            int firstCol = cra.getFirstColumn(); // 合并单元格CELL起始列
            int lastRow = cra.getLastRow(); // 合并单元格CELL结束行
            int lastCol = cra.getLastColumn(); // 合并单元格CELL结束列
            if (cellRow >= firstRow && cellRow <= lastRow) { // 判断该单元格是否是在合并单元格中
                if (cellCol >= firstCol && cellCol <= lastCol) {
                    retVal = lastCol - firstCol + 1; // 得到合并的列数
                    break;
                }
            }
        }
        return retVal;
    }

    /**
     * 判断单元格是否是合并的单格,如果是,获取其合并的行数。
     *
     * @param sheet   表单
     * @param cellRow 被判断的单元格的行号
     * @param cellCol 被判断的单元格的列号
     * @return
     * @throws IOException
     */
    private static int getMergerCellRegionRow(HSSFSheet sheet, int cellRow,
                                              int cellCol) throws IOException {
        int retVal = 0;
        int sheetMergerCount = sheet.getNumMergedRegions();
        for (int i = 0; i < sheetMergerCount; i++) {
            CellRangeAddress cra = (CellRangeAddress) sheet.getMergedRegion(i);
            int firstRow = cra.getFirstRow(); // 合并单元格CELL起始行
            int firstCol = cra.getFirstColumn(); // 合并单元格CELL起始列
            int lastRow = cra.getLastRow(); // 合并单元格CELL结束行
            int lastCol = cra.getLastColumn(); // 合并单元格CELL结束列
            if (cellRow >= firstRow && cellRow <= lastRow) { // 判断该单元格是否是在合并单元格中
                if (cellCol >= firstCol && cellCol <= lastCol) {
                    retVal = lastRow - firstRow + 1; // 得到合并的行数
                    break;
                }
            }
        }
        return 0;
    }

    public static void deleteAll(File file) {

        if (file.isFile() || file.list().length == 0) {
            file.delete();
        } else {
            File[] files = file.listFiles();
            for (int i = 0; i < files.length; i++) {
                deleteAll(files[i]);
                files[i].delete();
            }

            //如果文件本身就是目录 ,就要删除目录
            if (file.exists()) {
                file.delete();
            }
        }
    }

    public void read() {

        if (this.nameStr.endsWith(".doc")) {
            this.getRange();
            this.makeFile();
            this.readDOC();
            returnPath = "file:///" + this.htmlPath;
            // this.view.loadUrl("file:///" + this.htmlPath);
            System.out.println("htmlPath" + this.htmlPath);
        }
        if (this.nameStr.endsWith(".docx")) {
            this.makeFile();
            this.readDOCX();
            returnPath = "file:///" + this.htmlPath;
            // this.view.loadUrl("file:///" + this.htmlPath);
            System.out.println("htmlPath" + this.htmlPath);
        }
        if (this.nameStr.endsWith(".xls")) {

            try {
                this.makeFile();
                // this.readXLS();
                returnPath = "file:///" + this.htmlPath;
                // this.view.loadUrl("file:///" + this.htmlPath);
                System.out.println("htmlPath" + this.htmlPath);
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }

        }
        if (this.nameStr.endsWith(".xlsx")) {
            this.makeFile();
            this.readXLSX();
            returnPath = "file:///" + this.htmlPath;
            // this.view.loadUrl("file:///" + this.htmlPath);
            System.out.println("htmlPath" + this.htmlPath);
        }

    }

    /* 读取word中的内容写到sdcard上的.html文件中 */
    public void readDOC() {

        try {
            myFile = new File(htmlPath);
            output = new FileOutputStream(myFile);
            String head = "<html><meta charset=\\"utf-8\\"><body>";
            String tagBegin = "<p>";
            String tagEnd = "</p>";
            output.write(head.getBytes());
            // 得到页面所有的段落数
            int numParagraphs = range.numParagraphs();
            // 遍历段落数
            for (int i = 0; i < numParagraphs; i++) {
                // 得到文档中的每一个段落
                Paragraph p = range.getParagraph(i);
                if (p.isInTable()) {
                    int temp = i;
                    if (tableIterator.hasNext()) {
                        String tableBegin = "<table style=\\"border-collapse:collapse\\" border=1 bordercolor=\\"black\\">";
                        String tableEnd = "</table>";
                        String rowBegin = "<tr>";
                        String rowEnd = "</tr>";
                        String colBegin = "<td>";
                        String colEnd = "</td>";
                        Table table = tableIterator.next();
                        output.write(tableBegin.getBytes());
                        int rows = table.numRows();
                        for (int r = 0; r < rows; r++) {
                            output.write(rowBegin.getBytes());
                            TableRow row = table.getRow(r);
                            int cols = row.numCells();
                            int rowNumParagraphs = row.numParagraphs();
                            int colsNumParagraphs = 0;
                            for (int c = 0; c < cols; c++) {
                                output.write(colBegin.getBytes());
                                TableCell cell = row.getCell(c);
                                int max = temp + cell.numParagraphs();
                                colsNumParagraphs = colsNumParagraphs
                                        + cell.numParagraphs();
                                for (int cp = temp; cp < max; cp++) {
                                    Paragraph p1 = range.getParagraph(cp);
                                    output.write(tagBegin.getBytes());
                                    writeParagraphContent(p1);
                                    output.write(tagEnd.getBytes());
                                    temp++;
                                }
                                output.write(colEnd.getBytes());
                            }
                            int max1 = temp + rowNumParagraphs;
                            for (int m = temp + colsNumParagraphs; m < max1; m++) {
                                temp++;
                            }
                            output.write(rowEnd.getBytes());
                        }
                        output.write(tableEnd.getBytes());
                    }
                    i = temp;
                } else {
                    output.write(tagBegin.getBytes());
                    writeParagraphContent(p);
                    output.write(tagEnd.getBytes());
                }
            }
            String end = "</body></html>";
            output.write(end.getBytes());
            output.close();
        } catch (Exception e) {

            System.out.println("readAndWrite Exception:" + e.getMessage());
            e.printStackTrace();
        }
    }

    public void readDOCX() {
        String river = "";
        try {
            // new一个File,路径为html文件
            this.myFile = new File(this.htmlPath);
            // new一个流,目标为html文件
            this.output = new FileOutputStream(this.myFile);
            // 定义头文件,我在这里加了utf-8,不然会出现乱码
            String head = "<!DOCTYPE><html><meta charset=\\"utf-8\\"><body>";
            String end = "</body></html>";
            // 段落开始,标记开始
            String tagBegin = "<p>";
            // 段落结束
            String tagEnd = "</p>";
            String tableBegin = "<table style=\\"border-collapse:collapse\\" border=1 bordercolor=\\"black\\">";
            String tableEnd = "</table>";
            String rowBegin = "<tr>";
            String rowEnd = "</tr>";
            String colBegin = "<td>";
            String colEnd = "</td>";
            String style = "style=\\"";
            // 写如头部
            this.output.write(head.getBytes());
            ZipFile xlsxFile = new ZipFile(new File(this.nameStr));
            ZipEntry sharedStringXML = xlsxFile.getEntry("word/document.xml");
            InputStream inputStream = xlsxFile.getInputStream(sharedStringXML);
            XmlPullParser xmlParser = Xml.newPullParser();
            xmlParser.setInput(inputStream, "utf-8");
            int evtType = xmlParser.getEventType();
            // 是表格 用来统计 列 行 数
            boolean isTable = false;
            // 大小状态
            boolean isSize = false;
            // 颜色状态
            boolean isColor = false;
            // 居中状态
            boolean isCenter = false;
            // 居右状态
            boolean isRight = false;
            // 是斜体
            boolean isItalic = false;
            // 是下划线
            boolean isUnderline = false;
            // 加粗
            boolean isBold = false;
            // 在那个r中
            boolean isR = false;
            boolean isStyle = false;
            // docx 压缩包中的图片名 iamge1 开始 所以索引从1开始
            int pictureIndex = 1;
            while (evtType != XmlPullParser.END_DOCUMENT) {
                switch (evtType) {
                    // 开始标签
                    case XmlPullParser.START_TAG:
                        String tag = xmlParser.getName();

                        if (tag.equalsIgnoreCase("r")) {
                            isR = true;
                        }
                        if (tag.equalsIgnoreCase("u")) {
                            // 判断下划线
                            isUnderline = true;
                        }
                        if (tag.equalsIgnoreCase("jc")) {
                            // 判断对齐方式
                            String align = xmlParser.getAttributeValue(0);
                            if (align.equals("center")) {
                                this.output.write("<center>".getBytes());
                                isCenter = true;
                            }
                            if (align.equals("right")) {
                                this.output.write("<div align=\\"right\\">"
                                        .getBytes());
                                isRight = true;
                            }
                        }

                        if (tag.equalsIgnoreCase("color")) {
                            // 判断颜色
                            String color = xmlParser.getAttributeValue(0);
                            this.output.write(("<span style=\\"color:" + color + ";\\">").getBytes());
                            isColor = true;
                        }
                        if (tag.equalsIgnoreCase("sz")) {
                            // 判断大小
                            if (isR == true) {
                                int size = decideSize(Integer.valueOf(xmlParser.getAttributeValue(0)));
                                this.output.write(("<font size=" + size + ">").getBytes());
                                isSize = true;
                            }
                        }
                        // 下面是表格处理
                        if (tag.equalsIgnoreCase("tbl")) {
                            // 检测到tbl 表格开始
                            this.output.write(tableBegin.getBytes());
                            isTable = true;
                        }
                        if (tag.equalsIgnoreCase("tr")) {
                            // 行
                            this.output.write(rowBegin.getBytes());
                        }
                        if (tag.equalsIgnoreCase("tc")) {
                            // 列
                            this.output.write(colBegin.getBytes());
                        }

                        if (tag.equalsIgnoreCase("pic")) {
                            // 检测到标签 pic 图片
                            String entryName_jpeg = "word/media/image"
                                    + pictureIndex + ".jpeg";
                            String entryName_png = "word/media/image"
                                    + pictureIndex + ".png";
                            String entryName_gif = "word/media/image"
                                    + pictureIndex + ".gif";
                            String entryName_wmf = "word/media/image"
                                    + pictureIndex + ".wmf";
                            ZipEntry sharePicture = null;
                            InputStream pictIS = null;
                            sharePicture = xlsxFile.getEntry(entryName_jpeg);
                            // 一下为读取docx的图片 转化为流数组
                            if (sharePicture == null) {
                                sharePicture = xlsxFile.getEntry(entryName_png);
                            }
                            if (sharePicture == null) {
                                sharePicture = xlsxFile.getEntry(entryName_gif);
                            }
                            if (sharePicture == null) {
                                sharePicture = xlsxFile.getEntry(entryName_wmf);
                            }

                            if (sharePicture != null) {
                                pictIS = xlsxFile.getInputStream(sharePicture);
                                ByteArrayOutputStream pOut = new ByteArrayOutputStream();
                                byte[以上是关于Android实战开发篇 解析读取复杂WordExcelPPT的主要内容,如果未能解决你的问题,请参考以下文章

限时下载 |《 Android物联网开发从入门到实战》国内第一本开发书籍!

Android实战开发篇 获取控件宽高

Android开发实战宝典,附答案+考点,源码解析

JavaCV开发详解之21补充篇1:使用javacv读取海康大华平台和海康大华摄像头sdk回调视频裸流并解析

JavaCV开发详解之21补充篇1:使用javacv读取海康大华平台和海康大华摄像头sdk回调视频裸流并解析

某大厂开发者对于Android多线程的总结,实战解析