Android实战开发篇 解析读取复杂WordExcelPPT
Posted 彭老希
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Android实战开发篇 解析读取复杂WordExcelPPT相关的知识,希望对你有一定的参考价值。
一、jar包导入
Android实战开发篇 读取Word文档的 doc 与 docx 格式文本内容
二、文档读取工具 - 转换格式 html
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.text.TextUtils;
import android.util.Log;
import android.util.Xml;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFPalette;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.util.HSSFColor;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.util.CellRangeAddress;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipFile;
/**
* @ClassName : DocumenReadingContentUtil.java
* @Function : 文档读取工具 - 转换格式 html
* @Description :
* @Idea :
* {@link }
* @Encourage :Do everything you can right now, and then decide.
* 全力以赴,历而后择。
* @date : 2021/8/23
*/
public class DocumenReadingContentUtil {
static final int BUFFER = 2048;
public static int presentPicture = 0;
public Range range = null;
public HWPFDocument hwpf = null;
public String htmlPath;
public String picturePath;
public List pictures;
public TableIterator tableIterator;
public int screenWidth;
public FileOutputStream output;
public File myFile;
public String returnPath = "";
public String data = "";
StringBuffer lsb = new StringBuffer();
private String nameStr;
public DocumenReadingUtil(String namepath) {
// this.screenWidth =
// this.getWindowManager().getDefaultDisplay().getWidth() -
// 10;//设置宽度为屏幕宽度-10
this.nameStr = namepath;
read();
}
/**
* 取得单元格的值
*
* @param cell
* @return
* @throws IOException
*/
private static Object getCellValue(HSSFCell cell) throws IOException {
Object value = "";
if (cell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
value = cell.getRichStringCellValue().toString();
} else if (cell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) {
if (HSSFDateUtil.isCellDateFormatted(cell)) {
Date date = (Date) cell.getDateCellValue();
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
value = sdf.format(date);
} else {
double value_temp = (double) cell.getNumericCellValue();
BigDecimal bd = new BigDecimal(value_temp);
BigDecimal bd1 = bd.setScale(3, bd.ROUND_HALF_UP);
value = bd1.doubleValue();
DecimalFormat format = new DecimalFormat("#0.###");
value = format.format(cell.getNumericCellValue());
}
}
if (cell.getCellType() == HSSFCell.CELL_TYPE_BLANK) {
value = "";
}
return value;
}
/**
* 判断单元格在不在合并单元格范围内,如果是,获取其合并的列数。
*
* @param sheet 工作表
* @param cellRow 被判断的单元格的行号
* @param cellCol 被判断的单元格的列号
* @return
* @throws IOException
*/
private static int getMergerCellRegionCol(HSSFSheet sheet, int cellRow,
int cellCol) throws IOException {
int retVal = 0;
int sheetMergerCount = sheet.getNumMergedRegions();
for (int i = 0; i < sheetMergerCount; i++) {
CellRangeAddress cra = (CellRangeAddress) sheet.getMergedRegion(i);
int firstRow = cra.getFirstRow(); // 合并单元格CELL起始行
int firstCol = cra.getFirstColumn(); // 合并单元格CELL起始列
int lastRow = cra.getLastRow(); // 合并单元格CELL结束行
int lastCol = cra.getLastColumn(); // 合并单元格CELL结束列
if (cellRow >= firstRow && cellRow <= lastRow) { // 判断该单元格是否是在合并单元格中
if (cellCol >= firstCol && cellCol <= lastCol) {
retVal = lastCol - firstCol + 1; // 得到合并的列数
break;
}
}
}
return retVal;
}
/**
* 判断单元格是否是合并的单格,如果是,获取其合并的行数。
*
* @param sheet 表单
* @param cellRow 被判断的单元格的行号
* @param cellCol 被判断的单元格的列号
* @return
* @throws IOException
*/
private static int getMergerCellRegionRow(HSSFSheet sheet, int cellRow,
int cellCol) throws IOException {
int retVal = 0;
int sheetMergerCount = sheet.getNumMergedRegions();
for (int i = 0; i < sheetMergerCount; i++) {
CellRangeAddress cra = (CellRangeAddress) sheet.getMergedRegion(i);
int firstRow = cra.getFirstRow(); // 合并单元格CELL起始行
int firstCol = cra.getFirstColumn(); // 合并单元格CELL起始列
int lastRow = cra.getLastRow(); // 合并单元格CELL结束行
int lastCol = cra.getLastColumn(); // 合并单元格CELL结束列
if (cellRow >= firstRow && cellRow <= lastRow) { // 判断该单元格是否是在合并单元格中
if (cellCol >= firstCol && cellCol <= lastCol) {
retVal = lastRow - firstRow + 1; // 得到合并的行数
break;
}
}
}
return 0;
}
public static void deleteAll(File file) {
if (file.isFile() || file.list().length == 0) {
file.delete();
} else {
File[] files = file.listFiles();
for (int i = 0; i < files.length; i++) {
deleteAll(files[i]);
files[i].delete();
}
//如果文件本身就是目录 ,就要删除目录
if (file.exists()) {
file.delete();
}
}
}
public void read() {
if (this.nameStr.endsWith(".doc")) {
this.getRange();
this.makeFile();
this.readDOC();
returnPath = "file:///" + this.htmlPath;
// this.view.loadUrl("file:///" + this.htmlPath);
System.out.println("htmlPath" + this.htmlPath);
}
if (this.nameStr.endsWith(".docx")) {
this.makeFile();
this.readDOCX();
returnPath = "file:///" + this.htmlPath;
// this.view.loadUrl("file:///" + this.htmlPath);
System.out.println("htmlPath" + this.htmlPath);
}
if (this.nameStr.endsWith(".xls")) {
try {
this.makeFile();
// this.readXLS();
returnPath = "file:///" + this.htmlPath;
// this.view.loadUrl("file:///" + this.htmlPath);
System.out.println("htmlPath" + this.htmlPath);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
if (this.nameStr.endsWith(".xlsx")) {
this.makeFile();
this.readXLSX();
returnPath = "file:///" + this.htmlPath;
// this.view.loadUrl("file:///" + this.htmlPath);
System.out.println("htmlPath" + this.htmlPath);
}
}
/* 读取word中的内容写到sdcard上的.html文件中 */
public void readDOC() {
try {
myFile = new File(htmlPath);
output = new FileOutputStream(myFile);
String head = "<html><meta charset=\\"utf-8\\"><body>";
String tagBegin = "<p>";
String tagEnd = "</p>";
output.write(head.getBytes());
// 得到页面所有的段落数
int numParagraphs = range.numParagraphs();
// 遍历段落数
for (int i = 0; i < numParagraphs; i++) {
// 得到文档中的每一个段落
Paragraph p = range.getParagraph(i);
if (p.isInTable()) {
int temp = i;
if (tableIterator.hasNext()) {
String tableBegin = "<table style=\\"border-collapse:collapse\\" border=1 bordercolor=\\"black\\">";
String tableEnd = "</table>";
String rowBegin = "<tr>";
String rowEnd = "</tr>";
String colBegin = "<td>";
String colEnd = "</td>";
Table table = tableIterator.next();
output.write(tableBegin.getBytes());
int rows = table.numRows();
for (int r = 0; r < rows; r++) {
output.write(rowBegin.getBytes());
TableRow row = table.getRow(r);
int cols = row.numCells();
int rowNumParagraphs = row.numParagraphs();
int colsNumParagraphs = 0;
for (int c = 0; c < cols; c++) {
output.write(colBegin.getBytes());
TableCell cell = row.getCell(c);
int max = temp + cell.numParagraphs();
colsNumParagraphs = colsNumParagraphs
+ cell.numParagraphs();
for (int cp = temp; cp < max; cp++) {
Paragraph p1 = range.getParagraph(cp);
output.write(tagBegin.getBytes());
writeParagraphContent(p1);
output.write(tagEnd.getBytes());
temp++;
}
output.write(colEnd.getBytes());
}
int max1 = temp + rowNumParagraphs;
for (int m = temp + colsNumParagraphs; m < max1; m++) {
temp++;
}
output.write(rowEnd.getBytes());
}
output.write(tableEnd.getBytes());
}
i = temp;
} else {
output.write(tagBegin.getBytes());
writeParagraphContent(p);
output.write(tagEnd.getBytes());
}
}
String end = "</body></html>";
output.write(end.getBytes());
output.close();
} catch (Exception e) {
System.out.println("readAndWrite Exception:" + e.getMessage());
e.printStackTrace();
}
}
public void readDOCX() {
String river = "";
try {
// new一个File,路径为html文件
this.myFile = new File(this.htmlPath);
// new一个流,目标为html文件
this.output = new FileOutputStream(this.myFile);
// 定义头文件,我在这里加了utf-8,不然会出现乱码
String head = "<!DOCTYPE><html><meta charset=\\"utf-8\\"><body>";
String end = "</body></html>";
// 段落开始,标记开始
String tagBegin = "<p>";
// 段落结束
String tagEnd = "</p>";
String tableBegin = "<table style=\\"border-collapse:collapse\\" border=1 bordercolor=\\"black\\">";
String tableEnd = "</table>";
String rowBegin = "<tr>";
String rowEnd = "</tr>";
String colBegin = "<td>";
String colEnd = "</td>";
String style = "style=\\"";
// 写如头部
this.output.write(head.getBytes());
ZipFile xlsxFile = new ZipFile(new File(this.nameStr));
ZipEntry sharedStringXML = xlsxFile.getEntry("word/document.xml");
InputStream inputStream = xlsxFile.getInputStream(sharedStringXML);
XmlPullParser xmlParser = Xml.newPullParser();
xmlParser.setInput(inputStream, "utf-8");
int evtType = xmlParser.getEventType();
// 是表格 用来统计 列 行 数
boolean isTable = false;
// 大小状态
boolean isSize = false;
// 颜色状态
boolean isColor = false;
// 居中状态
boolean isCenter = false;
// 居右状态
boolean isRight = false;
// 是斜体
boolean isItalic = false;
// 是下划线
boolean isUnderline = false;
// 加粗
boolean isBold = false;
// 在那个r中
boolean isR = false;
boolean isStyle = false;
// docx 压缩包中的图片名 iamge1 开始 所以索引从1开始
int pictureIndex = 1;
while (evtType != XmlPullParser.END_DOCUMENT) {
switch (evtType) {
// 开始标签
case XmlPullParser.START_TAG:
String tag = xmlParser.getName();
if (tag.equalsIgnoreCase("r")) {
isR = true;
}
if (tag.equalsIgnoreCase("u")) {
// 判断下划线
isUnderline = true;
}
if (tag.equalsIgnoreCase("jc")) {
// 判断对齐方式
String align = xmlParser.getAttributeValue(0);
if (align.equals("center")) {
this.output.write("<center>".getBytes());
isCenter = true;
}
if (align.equals("right")) {
this.output.write("<div align=\\"right\\">"
.getBytes());
isRight = true;
}
}
if (tag.equalsIgnoreCase("color")) {
// 判断颜色
String color = xmlParser.getAttributeValue(0);
this.output.write(("<span style=\\"color:" + color + ";\\">").getBytes());
isColor = true;
}
if (tag.equalsIgnoreCase("sz")) {
// 判断大小
if (isR == true) {
int size = decideSize(Integer.valueOf(xmlParser.getAttributeValue(0)));
this.output.write(("<font size=" + size + ">").getBytes());
isSize = true;
}
}
// 下面是表格处理
if (tag.equalsIgnoreCase("tbl")) {
// 检测到tbl 表格开始
this.output.write(tableBegin.getBytes());
isTable = true;
}
if (tag.equalsIgnoreCase("tr")) {
// 行
this.output.write(rowBegin.getBytes());
}
if (tag.equalsIgnoreCase("tc")) {
// 列
this.output.write(colBegin.getBytes());
}
if (tag.equalsIgnoreCase("pic")) {
// 检测到标签 pic 图片
String entryName_jpeg = "word/media/image"
+ pictureIndex + ".jpeg";
String entryName_png = "word/media/image"
+ pictureIndex + ".png";
String entryName_gif = "word/media/image"
+ pictureIndex + ".gif";
String entryName_wmf = "word/media/image"
+ pictureIndex + ".wmf";
ZipEntry sharePicture = null;
InputStream pictIS = null;
sharePicture = xlsxFile.getEntry(entryName_jpeg);
// 一下为读取docx的图片 转化为流数组
if (sharePicture == null) {
sharePicture = xlsxFile.getEntry(entryName_png);
}
if (sharePicture == null) {
sharePicture = xlsxFile.getEntry(entryName_gif);
}
if (sharePicture == null) {
sharePicture = xlsxFile.getEntry(entryName_wmf);
}
if (sharePicture != null) {
pictIS = xlsxFile.getInputStream(sharePicture);
ByteArrayOutputStream pOut = new ByteArrayOutputStream();
byte[以上是关于Android实战开发篇 解析读取复杂WordExcelPPT的主要内容,如果未能解决你的问题,请参考以下文章
限时下载 |《 Android物联网开发从入门到实战》国内第一本开发书籍!
JavaCV开发详解之21补充篇1:使用javacv读取海康大华平台和海康大华摄像头sdk回调视频裸流并解析