poi大数据读写excel
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了poi大数据读写excel相关的知识,希望对你有一定的参考价值。
一、描述
前端时间写了注解方式Excel的读取和写入,它是根据注解完成Excel的操作,虽说支持大数据,但对于超大数据就无能为力了,因为它的读写期间都是将所有数据放入系统内存的,除非你有超大的内存。
因项目需要对超大数据的Excel读写操作,于是网上找了个超大数据的读写代码,这个不需要太大内存。并对此进行了简单的修改。
原理如下:
Excel超大数据读取:抽象Excel2007读取器,excel2007的底层数据结构是xml文件,采用SAX的事件驱动的方法解析 xml,需要继承DefaultHandler,在遇到文件内容时,事件会触发,这种做法可以大大降低内存的耗费,特别使用于大数据量的文件。
Excel超大数据写入:抽象excel2007读入器,先构建.xlsx一张模板,改写模板中的sheet.xml, 使用这种方法 写入.xlsx文件,不需要太大的内存。
先看调用示例:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | String file = "E:/导入测试数据.xlsx" ; ExcelReader reader = new ExcelReader() { public void getRows( int sheetIndex, int curRow, List<String> rowList) { System.out.println( "Sheet:" + sheetIndex + ", Row:" + curRow + ", Data:" +rowList); } }; reader.process(file, 1 ); String file = "E:/导出测试数据.xlsx" ; ExcelWriter writer = new ExcelWriter() { public void generate() throws Exception { // 电子表格开始 this .beginSheet(); for ( int rownum = 0 ; rownum < 100 ; rownum++) { // 插入新行 this .insertRow(rownum); // 建立新单元格,索引值从0开始,表示第一列 this .createCell( 0 , "第 " + rownum + " 行" ); this .createCell( 1 , 34343.123456789 ); this .createCell( 2 , "23.67%" ); this .createCell( 3 , "12:12:23" ); this .createCell( 4 , "2014-10-11 12:12:23" ); this .createCell( 5 , "true" ); this .createCell( 6 , "false" ); // 结束行 this .endRow(); } // 电子表格结束 this .endSheet(); } }; writer.process(file); } |
这里只展示了对数据的读取和写入,如果正式保存到数据库时建议读取一部分(如100条)再写入一次数据库,尽量不要读取一条就写入一条,这样会非常耗费资源。?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | import java.io.InputStream; import java.math.BigDecimal; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.Iterator; import java.util.List; import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; /** * Excel超大数据读取,抽象Excel2007读取器,excel2007的底层数据结构是xml文件,采用SAX的事件驱动的方法解析 * xml,需要继承DefaultHandler,在遇到文件内容时,事件会触发,这种做法可以大大降低 内存的耗费,特别使用于大数据量的文件。 * @version 2014-9-2 */ public abstract class ExcelReader extends DefaultHandler { // 共享字符串表 private SharedStringsTable sst; // 上一次的内容 private String lastContents; private boolean nextIsString; private int sheetIndex = - 1 ; private List<String> rowList = new ArrayList<String>(); // 当前行 private int curRow = 0 ; // 当前列 private int curCol = 0 ; // 日期标志 private boolean dateFlag; // 数字标志 private boolean numberFlag; private boolean isTElement; /** * 遍历工作簿中所有的电子表格 * @param filename * @throws Exception */ public void process(String filename) throws Exception { OPCPackage pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); Iterator<InputStream> sheets = r.getSheetsData(); while (sheets.hasNext()) { curRow = 0 ; sheetIndex++; InputStream sheet = sheets.next(); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); } } /** * 只遍历一个电子表格,其中sheetId为要遍历的sheet索引,从1开始,1-3 * @param filename * @param sheetId * @throws Exception */ public void process(String filename, int sheetId) throws Exception { OPCPackage pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); // 根据 rId# 或 rSheet# 查找sheet InputStream sheet2 = r.getSheet( "rId" + sheetId); sheetIndex++; InputSource sheetSource = new InputSource(sheet2); parser.parse(sheetSource); sheet2.close(); } public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException { XMLReader parser = XMLReaderFactory.createXMLReader( "org.apache.xerces.parsers.SAXParser" ); this .sst = sst; parser.setContentHandler( this ); return parser; } public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { // System.out.println("startElement: " + localName + ", " + name + ", " + attributes); // c => 单元格 if ( "c" .equals(name)) { // 如果下一个元素是 SST 的索引,则将nextIsString标记为true String cellType = attributes.getValue( "t" ); if ( "s" .equals(cellType)) { nextIsString = true ; } else { nextIsString = false ; } // 日期格式 String cellDateType = attributes.getValue( "s" ); if ( "1" .equals(cellDateType)) { dateFlag = true ; } else { dateFlag = false ; } String cellNumberType = attributes.getValue( "s" ); if ( "2" .equals(cellNumberType)) { numberFlag = true ; } else { numberFlag = false ; } } // 当元素为t时 if ( "t" .equals(name)) { isTElement = true ; } else { isTElement = false ; } // 置空 lastContents = "" ; } public void endElement(String uri, String localName, String name) throws SAXException { // System.out.println("endElement: " + localName + ", " + name); // 根据SST的索引值的到单元格的真正要存储的字符串 // 这时characters()方法可能会被调用多次 if (nextIsString) { try { int idx = Integer.parseInt(lastContents); lastContents = new XSSFRichTextString(sst.getEntryAt(idx)) .toString(); } catch (Exception e) { } } // t元素也包含字符串 if (isTElement) { String value = lastContents.trim(); rowList.add(curCol, value); curCol++; isTElement = false ; // v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引 // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符 } else if ( "v" .equals(name)) { String value = lastContents.trim(); value = value.equals( "" ) ? " " : value; try { // 日期格式处理 if (dateFlag) { Date date = HSSFDateUtil.getJavaDate(Double.valueOf(value)); SimpleDateFormat dateFormat = new SimpleDateFormat( "dd/MM/yyyy" ); value = dateFormat.format(date); } // 数字类型处理 if (numberFlag) { BigDecimal bd = new BigDecimal(value); value = bd.setScale( 3 , BigDecimal.ROUND_UP).toString(); } } catch (Exception e) { // 转换失败仍用读出来的值 } rowList.add(curCol, value); curCol++; } else { // 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法 if (name.equals( "row" )) { getRows(sheetIndex + 1 , curRow, rowList); rowList.clear(); curRow++; curCol = 0 ; } } } public void characters( char [] ch, int start, int length) throws SAXException { // 得到单元格内容的值 lastContents += new String(ch, start, length); } /** * 获取行数据回调 * @param sheetIndex * @param curRow * @param rowList */ public abstract void getRows( int sheetIndex, int curRow, List<String> rowList); /** * 测试方法 */ public static void main(String[] args) throws Exception { String file = "E:/导入测试数据.xlsx" ; ExcelReader reader = new ExcelReader() { public void getRows( int sheetIndex, int curRow, List<String> rowList) { System.out.println( "Sheet:" + sheetIndex + ", Row:" + curRow + ", Data:" +rowList); } }; reader.process(file, 1 ); } } |
以下为写入程序
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 | import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.Writer; import java.util.Calendar; import java.util.Enumeration; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import java.util.zip.ZipOutputStream; import org.apache.poi.hssf.util.CellReference; import org.apache.poi.ss.usermodel.DateUtil; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; /** * Excel超大数据写入,抽象excel2007读入器,先构建.xlsx一张模板,改写模板中的sheet.xml, * 使用这种方法 写入.xlsx文件,不需要太大的内存 * @version 2014-9-2 */ public abstract class ExcelWriter { private SpreadsheetWriter sw; /** * 写入电子表格的主要流程 * * @param fileName * @throws Exception */ public void process(String fileName) throws Exception { // 建立工作簿和电子表格对象 XSSFWorkbook wb = new XSSFWorkbook(); XSSFSheet sheet = wb.createSheet( "sheet1" ); // 持有电子表格数据的xml文件名 例如 /xl/worksheets/sheet1.xml String sheetRef = sheet.getPackagePart().getPartName().getName(); // 保存模板 FileOutputStream os = new FileOutputStream( "template.xlsx" ); wb.write(os); os.close(); // 生成xml文件 File tmp = File.createTempFile( "sheet" , ".xml" ); Writer fw = new FileWriter(tmp); sw = new SpreadsheetWriter(fw); generate(); fw.close(); // 使用产生的数据替换模板 File templateFile = new File( "template.xlsx" ); FileOutputStream out = new FileOutputStream(fileName); substitute(templateFile, tmp, sheetRef.substring( 1 ), out); out.close(); // 删除文件之前调用一下垃圾回收器,否则无法删除模板文件 System.gc(); // 删除临时模板文件 if (templateFile.isFile() && templateFile.exists()) { templateFile.delete(); } } /** * 类使用者应该使用此方法进行写操作 * * @throws Exception */ public abstract void generate() throws Exception; public void beginSheet() throws IOException { sw.beginSheet(); } public void insertRow( int rowNum) throws IOException { sw.insertRow(rowNum); } public void createCell( int columnIndex, String value) throws IOException { sw.createCell(columnIndex, value, - 1 ); } public void createCell( int columnIndex, double value) throws IOException { sw.createCell(columnIndex, value, - 1 ); } public void endRow() throws IOException { sw.endRow(); } public void endSheet() throws IOException { sw.endSheet(); } /** * * @param zipfile * the template file * @param tmpfile * the XML file with the sheet data * @param entry * the name of the sheet entry to substitute, e.g. * xl/worksheets/sheet1.xml * @param out * the stream to write the result to */ private static void substitute(File zipfile, File tmpfile, String entry, OutputStream out) throws IOException { ZipFile zip = new ZipFile(zipfile); ZipOutputStream zos = new ZipOutputStream(out); @SuppressWarnings ( "unchecked" ) Enumeration<ZipEntry> en = (Enumeration<ZipEntry>) zip.entries(); while (en.hasMoreElements()) { ZipEntry ze = en.nextElement(); if (!ze.getName().equals(entry)) { zos.putNextEntry( new ZipEntry(ze.getName())); InputStream is = zip.getInputStream(ze); copyStream(is, zos); is.close(); } } zos.putNextEntry( new ZipEntry(entry)); InputStream is = new FileInputStream(tmpfile); copyStream(is, zos); is.close(); zos.close(); } private static void copyStream(InputStream in, OutputStream out) throws IOException { byte [] chunk = new byte [ 1024 ]; int count; while ((count = in.read(chunk)) >= 0 ) { out.write(chunk, 0 , count); } } /** * 在写入器中写入电子表格 * */ public static class SpreadsheetWriter { private final Writer _out; private int _rownum; private static String LINE_SEPARATOR = System .getProperty( "line.separator" ); public SpreadsheetWriter(Writer out) { _out = out; } public void beginSheet() throws IOException { _out.write( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" _out.write( "<sheetData>" + LINE_SEPARATOR); } public void endSheet() throws IOException { _out.write( "</sheetData>" ); _out.write( "</worksheet>" ); } /** * 插入新行 * * @param rownum * 以0开始 */ public void insertRow( int rownum) throws IOException { _out.write( "<row r=\"" + (rownum + 1 ) + "\">" + LINE_SEPARATOR); this ._rownum = rownum; } /** * 插入行结束标志 */ public void endRow() throws IOException { _out.write( "</row>" + LINE_SEPARATOR); } /** * 插入新列 * * @param columnIndex * @param value * @param styleIndex * @throws IOException */ public void createCell( int columnIndex, String value, int styleIndex) throws IOException { String ref = new CellReference(_rownum, columnIndex) .formatAsString(); _out.write( "<c r=\"" + ref + "\" t=\"inlineStr\"" ); if (styleIndex != - 1 ) _out.write( " s=\"" + styleIndex + "\"" ); _out.write( ">" ); _out.write( "<is><t>" + encoderXML(value) + "</t></is>" ); _out.write( "</c>" ); } public void createCell( int columnIndex, String value) throws IOException { createCell(columnIndex, value, - 1 ); } public void createCell( int columnIndex, double value, int styleIndex) throws IOException { String ref = new CellReference(_rownum, columnIndex) .formatAsString(); _out.write( "<c r=\"" + ref + "\" t=\"n\"" ); if (styleIndex != - 1 ) _out.write( " s=\"" + styleIndex + "\"" ); _out.write( ">" ); _out.write( "<v>" + value + "</v>" ); _out.write( "</c>" ); } public void createCell( int columnIndex, double value) throws IOException { createCell(columnIndex, value, - 1 ); } public void createCell( int columnIndex, Calendar value, int styleIndex) throws IOException { createCell(columnIndex, DateUtil.getExcelDate(value, false ), styleIndex); } } // XML Encode private static final String[] xmlCode = new String[ 256 ]; static { // Special characters xmlCode[ ‘\‘‘ ] = "‘" ; xmlCode[ ‘\"‘ ] = "\ "" ; // double quote xmlCode[ ‘&‘ ] = "&" ; // ampersand xmlCode[ ‘<‘ ] = "<" ; // lower than xmlCode[ ‘>‘ ] = ">" ; // greater than } /** * <p> * Encode the given text into xml. * </p> * * @param string * the text to encode * @return the encoded string */ public static String encoderXML(String string) { if (string == null ) return "" ; int n = string.length(); char character; String xmlchar; StringBuffer buffer = new StringBuffer(); // loop over all the characters of the String. for ( int i = 0 ; i < n; i++) { character = string.charAt(i); // the xmlcode of these characters are added to a StringBuffer // one by one try { xmlchar = xmlCode[character]; if (xmlchar == null ) { buffer.append(character); } else { buffer.append(xmlCode[character]); } } catch (ArrayIndexOutOfBoundsException aioobe) { buffer.append(character); } } return buffer.toString(); } /** * 测试方法 */ public static void main(String[] args) throws Exception { String file = "E:/导出测试数据.xlsx" ; ExcelWriter writer = new ExcelWriter() { public void generate() throws Exception { // 电子表格开始 this .beginSheet(); for ( int rownum = 0 ; rownum < 100 ; rownum++) { // 插入新行 this .insertRow(rownum); // 建立新单元格,索引值从0开始,表示第一列 this .createCell( 0 , "第 " + rownum + " 行" ); this .createCell( 1 , 34343.123456789 ); this .createCell( 2 , "23.67%" ); this .createCell( 3 , "12:12:23" ); this .createCell( 4 , "2014-10-11 12:12:23" ); this .createCell( 5 , "true" ); this .createCell( 6 , "false" ); // 结束行 this .endRow(); } // 电子表格结束 this .endSheet(); } }; writer.process(file); } } |
以上是关于poi大数据读写excel的主要内容,如果未能解决你的问题,请参考以下文章
POI读写大数据量excel,解决超过几万行而导致内存溢出的问题