poi大数据读写excel

Posted 2020-08-09

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了poi大数据读写excel相关的知识，希望对你有一定的参考价值。

一、描述

前端时间写了注解方式Excel的读取和写入，它是根据注解完成Excel的操作，虽说支持大数据，但对于超大数据就无能为力了，因为它的读写期间都是将所有数据放入系统内存的，除非你有超大的内存。

因项目需要对超大数据的Excel读写操作，于是网上找了个超大数据的读写代码，这个不需要太大内存。并对此进行了简单的修改。

原理如下：

Excel超大数据读取：抽象Excel2007读取器，excel2007的底层数据结构是xml文件，采用SAX的事件驱动的方法解析 xml，需要继承DefaultHandler，在遇到文件内容时，事件会触发，这种做法可以大大降低内存的耗费，特别使用于大数据量的文件。

Excel超大数据写入：抽象excel2007读入器，先构建.xlsx一张模板，改写模板中的sheet.xml, 使用这种方法写入.xlsx文件，不需要太大的内存。

先看调用示例：

String file = "E:/导入测试数据.xlsx";
ExcelReader reader = new ExcelReader() {
public void getRows(int sheetIndex, int curRow, List<String> rowList) {
System.out.println("Sheet:" + sheetIndex + ", Row:" + curRow + ", Data:" +rowList);
}
};
reader.process(file, 1);
String file = "E:/导出测试数据.xlsx";
ExcelWriter writer = new ExcelWriter() {
public void generate() throws Exception {
// 电子表格开始
this.beginSheet();
for (int rownum = 0; rownum < 100; rownum++) {
// 插入新行
this.insertRow(rownum);
// 建立新单元格,索引值从0开始,表示第一列
this.createCell(0, "第 " + rownum + " 行");
this.createCell(1, 34343.123456789);
this.createCell(2, "23.67%");
this.createCell(3, "12:12:23");
this.createCell(4, "2014-10-11 12:12:23");
this.createCell(5, "true");
this.createCell(6, "false");
// 结束行
this.endRow();
}
// 电子表格结束
this.endSheet();
}
};
writer.process(file);
}

这里只展示了对数据的读取和写入，如果正式保存到数据库时建议读取一部分（如100条）再写入一次数据库，尽量不要读取一条就写入一条，这样会非常耗费资源。?

import java.io.InputStream;
import java.math.BigDecimal;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
/**
* Excel超大数据读取，抽象Excel2007读取器，excel2007的底层数据结构是xml文件，采用SAX的事件驱动的方法解析
* xml，需要继承DefaultHandler，在遇到文件内容时，事件会触发，这种做法可以大大降低 内存的耗费，特别使用于大数据量的文件。
* @version 2014-9-2
*/
public abstract class ExcelReader extends DefaultHandler {
// 共享字符串表
private SharedStringsTable sst;
// 上一次的内容
private String lastContents;
private boolean nextIsString;
private int sheetIndex = -1;
private List<String> rowList = new ArrayList<String>();
// 当前行
private int curRow = 0;
// 当前列
private int curCol = 0;
// 日期标志
private boolean dateFlag;
// 数字标志
private boolean numberFlag;
private boolean isTElement;
/**
* 遍历工作簿中所有的电子表格
* @param filename
* @throws Exception
*/
public void process(String filename) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> sheets = r.getSheetsData();
while (sheets.hasNext()) {
curRow = 0;
sheetIndex++;
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
}
}
/**
* 只遍历一个电子表格，其中sheetId为要遍历的sheet索引，从1开始，1-3
* @param filename
* @param sheetId
* @throws Exception
*/
public void process(String filename, int sheetId) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
// 根据 rId# 或 rSheet# 查找sheet
InputStream sheet2 = r.getSheet("rId" + sheetId);
sheetIndex++;
InputSource sheetSource = new InputSource(sheet2);
parser.parse(sheetSource);
sheet2.close();
}
public XMLReader fetchSheetParser(SharedStringsTable sst)
throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
this.sst = sst;
parser.setContentHandler(this);
return parser;
}
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
// System.out.println("startElement: " + localName + ", " + name + ", " + attributes);
// c => 单元格
if ("c".equals(name)) {
// 如果下一个元素是 SST 的索引，则将nextIsString标记为true
String cellType = attributes.getValue("t");
if ("s".equals(cellType)) {
nextIsString = true;
} else {
nextIsString = false;
}
// 日期格式
String cellDateType = attributes.getValue("s");
if ("1".equals(cellDateType)) {
dateFlag = true;
} else {
dateFlag = false;
}
String cellNumberType = attributes.getValue("s");
if ("2".equals(cellNumberType)) {
numberFlag = true;
} else {
numberFlag = false;
}
}
// 当元素为t时
if ("t".equals(name)) {
isTElement = true;
} else {
isTElement = false;
}
// 置空
lastContents = "";
}
public void endElement(String uri, String localName, String name)
throws SAXException {
// System.out.println("endElement: " + localName + ", " + name);
// 根据SST的索引值的到单元格的真正要存储的字符串
// 这时characters()方法可能会被调用多次
if (nextIsString) {
try {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
.toString();
} catch (Exception e) {
}
}
// t元素也包含字符串
if (isTElement) {
String value = lastContents.trim();
rowList.add(curCol, value);
curCol++;
isTElement = false;
// v => 单元格的值，如果单元格是字符串则v标签的值为该字符串在SST中的索引
// 将单元格内容加入rowlist中，在这之前先去掉字符串前后的空白符
} else if ("v".equals(name)) {
String value = lastContents.trim();
value = value.equals("") ? " " : value;
try {
// 日期格式处理
if (dateFlag) {
Date date = HSSFDateUtil.getJavaDate(Double.valueOf(value));
SimpleDateFormat dateFormat = new SimpleDateFormat("dd/MM/yyyy");
value = dateFormat.format(date);
}
// 数字类型处理
if (numberFlag) {
BigDecimal bd = new BigDecimal(value);
value = bd.setScale(3, BigDecimal.ROUND_UP).toString();
}
} catch (Exception e) {
// 转换失败仍用读出来的值
}
rowList.add(curCol, value);
curCol++;
} else {
// 如果标签名称为 row ，这说明已到行尾，调用 optRows() 方法
if (name.equals("row")) {
getRows(sheetIndex + 1, curRow, rowList);
rowList.clear();
curRow++;
curCol = 0;
}
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
// 得到单元格内容的值
lastContents += new String(ch, start, length);
}
/**
* 获取行数据回调
* @param sheetIndex
* @param curRow
* @param rowList
*/
public abstract void getRows(int sheetIndex, int curRow, List<String> rowList);
/**
* 测试方法
*/
public static void main(String[] args) throws Exception {
String file = "E:/导入测试数据.xlsx";
ExcelReader reader = new ExcelReader() {
public void getRows(int sheetIndex, int curRow, List<String> rowList) {
System.out.println("Sheet:" + sheetIndex + ", Row:" + curRow + ", Data:" +rowList);
}
};
reader.process(file, 1);
}
}

以下为写入程序

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Writer;
import java.util.Calendar;
import java.util.Enumeration;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
import org.apache.poi.hssf.util.CellReference;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
/**
* Excel超大数据写入，抽象excel2007读入器，先构建.xlsx一张模板，改写模板中的sheet.xml,
* 使用这种方法 写入.xlsx文件，不需要太大的内存
* @version 2014-9-2
*/
public abstract class ExcelWriter {
private SpreadsheetWriter sw;
/**
* 写入电子表格的主要流程
*
* @param fileName
* @throws Exception
*/
public void process(String fileName) throws Exception {
// 建立工作簿和电子表格对象
XSSFWorkbook wb = new XSSFWorkbook();
XSSFSheet sheet = wb.createSheet("sheet1");
// 持有电子表格数据的xml文件名 例如 /xl/worksheets/sheet1.xml
String sheetRef = sheet.getPackagePart().getPartName().getName();
// 保存模板
FileOutputStream os = new FileOutputStream("template.xlsx");
wb.write(os);
os.close();
// 生成xml文件
File tmp = File.createTempFile("sheet", ".xml");
Writer fw = new FileWriter(tmp);
sw = new SpreadsheetWriter(fw);
generate();
fw.close();
// 使用产生的数据替换模板
File templateFile = new File("template.xlsx");
FileOutputStream out = new FileOutputStream(fileName);
substitute(templateFile, tmp, sheetRef.substring(1), out);
out.close();
// 删除文件之前调用一下垃圾回收器，否则无法删除模板文件
System.gc();
// 删除临时模板文件
if (templateFile.isFile() && templateFile.exists()) {
templateFile.delete();
}
}
/**
* 类使用者应该使用此方法进行写操作
*
* @throws Exception
*/
public abstract void generate() throws Exception;
public void beginSheet() throws IOException {
sw.beginSheet();
}
public void insertRow(int rowNum) throws IOException {
sw.insertRow(rowNum);
}
public void createCell(int columnIndex, String value) throws IOException {
sw.createCell(columnIndex, value, -1);
}
public void createCell(int columnIndex, double value) throws IOException {
sw.createCell(columnIndex, value, -1);
}
public void endRow() throws IOException {
sw.endRow();
}
public void endSheet() throws IOException {
sw.endSheet();
}
/**
*
* @param zipfile
* the template file
* @param tmpfile
* the XML file with the sheet data
* @param entry
* the name of the sheet entry to substitute, e.g.
* xl/worksheets/sheet1.xml
* @param out
* the stream to write the result to
*/
private static void substitute(File zipfile, File tmpfile, String entry,
OutputStream out) throws IOException {
ZipFile zip = new ZipFile(zipfile);
ZipOutputStream zos = new ZipOutputStream(out);
@SuppressWarnings("unchecked")
Enumeration<ZipEntry> en = (Enumeration<ZipEntry>) zip.entries();
while (en.hasMoreElements()) {
ZipEntry ze = en.nextElement();
if (!ze.getName().equals(entry)) {
zos.putNextEntry(new ZipEntry(ze.getName()));
InputStream is = zip.getInputStream(ze);
copyStream(is, zos);
is.close();
}
}
zos.putNextEntry(new ZipEntry(entry));
InputStream is = new FileInputStream(tmpfile);
copyStream(is, zos);
is.close();
zos.close();
}
private static void copyStream(InputStream in, OutputStream out)
throws IOException {
byte[] chunk = new byte[1024];
int count;
while ((count = in.read(chunk)) >= 0) {
out.write(chunk, 0, count);
}
}
/**
* 在写入器中写入电子表格
*
*/
public static class SpreadsheetWriter {
private final Writer _out;
private int _rownum;
private static String LINE_SEPARATOR = System
.getProperty("line.separator");
public SpreadsheetWriter(Writer out) {
_out = out;
}
public void beginSheet() throws IOException {
_out.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+ "<worksheet xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\">");
_out.write("<sheetData>" + LINE_SEPARATOR);
}
public void endSheet() throws IOException {
_out.write("</sheetData>");
_out.write("</worksheet>");
}
/**
* 插入新行
*
* @param rownum
* 以0开始
*/
public void insertRow(int rownum) throws IOException {
_out.write("<row r=\"" + (rownum + 1) + "\">" + LINE_SEPARATOR);
this._rownum = rownum;
}
/**
* 插入行结束标志
*/
public void endRow() throws IOException {
_out.write("</row>" + LINE_SEPARATOR);
}
/**
* 插入新列
*
* @param columnIndex
* @param value
* @param styleIndex
* @throws IOException
*/
public void createCell(int columnIndex, String value, int styleIndex)
throws IOException {
String ref = new CellReference(_rownum, columnIndex)
.formatAsString();
_out.write("<c r=\"" + ref + "\" t=\"inlineStr\"");
if (styleIndex != -1)
_out.write(" s=\"" + styleIndex + "\"");
_out.write(">");
_out.write("<is><t>" + encoderXML(value) + "</t></is>");
_out.write("</c>");
}
public void createCell(int columnIndex, String value)
throws IOException {
createCell(columnIndex, value, -1);
}
public void createCell(int columnIndex, double value, int styleIndex)
throws IOException {
String ref = new CellReference(_rownum, columnIndex)
.formatAsString();
_out.write("<c r=\"" + ref + "\" t=\"n\"");
if (styleIndex != -1)
_out.write(" s=\"" + styleIndex + "\"");
_out.write(">");
_out.write("<v>" + value + "</v>");
_out.write("</c>");
}
public void createCell(int columnIndex, double value)
throws IOException {
createCell(columnIndex, value, -1);
}
public void createCell(int columnIndex, Calendar value, int styleIndex)
throws IOException {
createCell(columnIndex, DateUtil.getExcelDate(value, false),
styleIndex);
}
}
// XML Encode
private static final String[] xmlCode = new String[256];
static {
// Special characters
xmlCode[‘\‘‘] = "‘";
xmlCode[‘\"‘] = "\""; // double quote
xmlCode[‘&‘] = "&"; // ampersand
xmlCode[‘<‘] = "<"; // lower than
xmlCode[‘>‘] = ">"; // greater than
}
/**
* <p>
* Encode the given text into xml.
* </p>
*
* @param string
* the text to encode
* @return the encoded string
*/
public static String encoderXML(String string) {
if (string == null)
return "";
int n = string.length();
char character;
String xmlchar;
StringBuffer buffer = new StringBuffer();
// loop over all the characters of the String.
for (int i = 0; i < n; i++) {
character = string.charAt(i);
// the xmlcode of these characters are added to a StringBuffer
// one by one
try {
xmlchar = xmlCode[character];
if (xmlchar == null) {
buffer.append(character);
} else {
buffer.append(xmlCode[character]);
}
} catch (ArrayIndexOutOfBoundsException aioobe) {
buffer.append(character);
}
}
return buffer.toString();
}
/**
* 测试方法
*/
public static void main(String[] args) throws Exception {
String file = "E:/导出测试数据.xlsx";
ExcelWriter writer = new ExcelWriter() {
public void generate() throws Exception {
// 电子表格开始
this.beginSheet();
for (int rownum = 0; rownum < 100; rownum++) {
// 插入新行
this.insertRow(rownum);
// 建立新单元格,索引值从0开始,表示第一列
this.createCell(0, "第 " + rownum + " 行");
this.createCell(1, 34343.123456789);
this.createCell(2, "23.67%");
this.createCell(3, "12:12:23");
this.createCell(4, "2014-10-11 12:12:23");
this.createCell(5, "true");
this.createCell(6, "false");
// 结束行
this.endRow();
}
// 电子表格结束
this.endSheet();
}
};
writer.process(file);
}
}

来自为知笔记(Wiz)

以上是关于poi大数据读写excel的主要内容，如果未能解决你的问题，请参考以下文章