java 读取 500M文件中间的内容
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了java 读取 500M文件中间的内容相关的知识,希望对你有一定的参考价值。
大家好:
这是本人第一次问问题,希望大家多多帮忙。
情况:有500M的一个文件,假设我要读取的东西在文件的内容在文件的中后部,那我要怎么处理才能保证速度?
我自己尝试了很多,没有发现好方法。
BufferedReader中的readLine()也要文件头开始一行一行读起,这样很慢。我知道有skip()这样的函数,但是我没办法确定我要跳过多少个字符,所以用不来,我想知道有个mark()的函数是怎么用的,是不是可以再某处进行标记?如果有成功运行的源码希望能贴出来研究下。
nio包也试过了,速度也不怎么样。
PS:不要说什么把数据存入DB这样的建议啊,要是这样的话就不用在这里问了。就是不能用DB。
联系邮箱:2008ruc@sina.com
500M的就是txt的文件,大家不必把重心发在为什么有这么大的文件上啊。我看了下大家的答案,我觉得2楼的“2个线程。一个读,一个显示”比较有可能显示。但是我不知道具体处理啊。谢谢啊 。
还有“把文件中后部分都一次读入内存”这个思路很对,但是会内存溢出的。
import java.io.File;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
public class t
public static void main(String[] args) throws Exception
final int BUFFER_SIZE = 0x300000;// 缓冲去大小为3M
File f = new File("C:\\Documents and Settings\\XHY\\桌面\\a.txt");
/**
* map(FileChannel.MapMode mode,long position, long size)
* mode - 根据是按只读、读取/写入或专用(写入时拷贝)来映射文件,分别为 FileChannel.MapMode 类中所定义的 READ_ONLY、READ_WRITE 或 PRIVATE 之一
* position - 文件中的位置,映射区域从此位置开始;必须为非负数
* size - 要映射的区域大小;必须为非负数且不大于 Integer.MAX_VALUE
* 所以若想读取文件后半部分内容,如例子所写;若想读取文本后1/8内容,需要这样写map(FileChannel.MapMode.READ_ONLY, f.length()*7/8,f.length()/8)
*/
MappedByteBuffer inputBuffer = new RandomAccessFile(f, "r").getChannel().map(FileChannel.MapMode.READ_ONLY, f.length()/2,f.length()/2);
byte[] dst = new byte[BUFFER_SIZE];// 每次读出3M的内容
Long start = System.currentTimeMillis();
for (int offset = 0; offset < inputBuffer.capacity(); offset += BUFFER_SIZE)
if (inputBuffer.capacity() - offset >= BUFFER_SIZE)
for (int i = 0; i < BUFFER_SIZE; i++)
dst[i] = inputBuffer.get(offset + i);
else
for (int i = 0; i < inputBuffer.capacity() - offset; i++)
dst[i] = inputBuffer.get(offset + i);
int length = (inputBuffer.capacity()%BUFFER_SIZE==0)?BUFFER_SIZE:inputBuffer.capacity()%BUFFER_SIZE;
// System.out.println(new String(dst,0,length));//new String(dst,0,length)这样可以取出缓存保存的字符串,可以对其进行操作
long end = System.currentTimeMillis();
System.out.println("读取文件文件一半内容花费:"+(end-start)+"毫秒");
参考技术A 把文件中后部分都一次读入内存,然后对内存读写就可以了。大文件不适合用缓冲。 参考技术B 文件中间的内容必须有特定的场景才可以进行判断,但是可以通过下面的方法来实现所有的内容输出,之后再内容输出的位置增加必要的判断即可。
可以通过BufferedReader 流的形式进行流缓存,之后通过readLine方法获取到缓存的内容。
BufferedReader bre = null;
try
String file = "D:/test/test.txt";
bre = new BufferedReader(new FileReader(file));//此时获取到的bre就是整个文件的缓存流
while ((str = bre.readLine())!= null) // 判断最后一行不存在,为空结束循环
System.out.println(str);//原样输出读到的内容,此部分内容可以根据世界需要进行必要的判断
;
备注: 流用完之后必须close掉,如上面的就应该是:bre.close(),否则bre流会一直存在,直到程序运行结束。 参考技术C 你可以获取文件的长度,skip长度一半的字节。 参考技术D 你要啥子速度》
就是全速读取500MB文件,也要10来秒,你看看用记事本打开一个500MB的文件嘛,别说是个有格式文件了。
2个线程。一个读,一个显示
java读取excel
package com.skywares.safety.utils;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PushbackInputStream;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.servlet.ServletInputStream;
import javax.servlet.ServletOutputStream;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.RichTextString;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import com.google.common.base.Strings;
public class ExcelUtil {
/**
* 导入excel文件,返回List<Map<String, Object>>
*
* @Author 不告诉你
* @date 2016-09-09 15:25:35
* @param Object
* in 数据输出流(FileInputStream,ServletInputStream)
* @return
*/
public static List<Map<String, Object>> importExcel(Object in) throws Exception {
Workbook tempWorkbook = null;
Sheet fromsheet = null;
Row fromRow = null;
Cell fromCell = null;
Row headerRow = null;
Cell headerCell = null;
List<Map<String, Object>> list = new ArrayList<>();
if (in instanceof InputStream) {
tempWorkbook = create((InputStream) in);
}else if (in instanceof FileInputStream) {
tempWorkbook = create((FileInputStream) in);
}else if (in instanceof ServletInputStream) {
tempWorkbook = create((ServletInputStream) in);
}else {
return null;
}
for (int i = 0; i < tempWorkbook.getNumberOfSheets(); i++) {
fromsheet = tempWorkbook.getSheetAt(i);
if (fromsheet == null || fromsheet.getLastRowNum() == 0 || fromsheet.getLastRowNum() < 3) {
continue;
}
removeRow(fromsheet, 1);
headerRow = fromsheet.getRow(0);
for (int k = 1; k < fromsheet.getLastRowNum() + 1; k++) {
Map<String, Object> map = new HashMap<String, Object>();
fromRow = fromsheet.getRow(k);
if (fromRow == null || fromRow.getPhysicalNumberOfCells() == 0) {
continue;
}
for (int j = headerRow.getFirstCellNum(); j < headerRow.getPhysicalNumberOfCells(); j++) {
fromCell = fromRow.getCell(j);
headerCell = headerRow.getCell(j);
if (fromCell == null || headerCell == null) {
continue;
}
String headerCellValue = headerCell.getStringCellValue();
String[] headerCellValues = headerCellValue.split("\.");
if (Strings.isNullOrEmpty(headerCellValue) || headerCellValues.length < 2) {
continue;
}
int cType = fromCell.getCellType();
switch (cType) {
case Cell.CELL_TYPE_STRING:
map.put(headerCellValues[1], fromCell.getRichStringCellValue().toString().trim());
break;
case Cell.CELL_TYPE_NUMERIC:
map.put(headerCellValues[1], fromCell.getNumericCellValue());
break;
case Cell.CELL_TYPE_FORMULA:
map.put(headerCellValues[1], fromCell.getCellFormula());
break;
case Cell.CELL_TYPE_BOOLEAN:
map.put(headerCellValues[1], fromCell.getBooleanCellValue());
break;
case Cell.CELL_TYPE_ERROR:
map.put(headerCellValues[1], fromCell.getErrorCellValue());
break;
default:
map.put(headerCellValues[1], fromCell.getRichStringCellValue().toString().trim());
break;
}
}
list.add(map);
}
}
tempWorkbook.close();
return list;
}
/**
* 根据excel模版创建文件(map<String, Object>或者javaBean或者ResultSet数据源)
*
* @Author 不告诉你
* @date 2016-09-05 11:11:11
* @param String
* tempPath 模版路径(模版xls只能导出2003,模版xlsx只能导出2007)
* @param String
* sheetName 生成工作表的名字(为空时,默认为sheet1)
* @param Object
* data map<String, Object>或者javaBean或者ResultSet数据源
* @param Object
* os 数据输出流(FileOutputStream,ServletOutputStream)
* @return
*/
public static void paddingExcel(String tempPath, String sheetName, Object data, Object os) throws Exception {
tempExportExcel(tempPath, sheetName, null, null, os, data);
}
/**
* 根据excel模版创建文件(model)
*
* @Author 不告诉你
* @date 2016-09-05 11:11:11
* @param String
* tempPath 模版路径(模版xls只能导出2003,模版xlsx只能导出2007)
* @param String
* sheetName 生成工作表的名字(为空时,默认为sheet1)
* @param Object
* data model List数据源
* @param Object
* os 数据输出流(FileOutputStream,ServletOutputStream)
* @param Object
* headerData 固定数据源map<String, Object>或者javaBean或者ResultSet
* @return
*/
public static void javaBeanToExcel(String tempPath, String sheetName, Object data, Object os,
Object headerData) throws Exception {
tempExportExcel(tempPath, sheetName, data, null, os, headerData);
}
/**
* 根据excel模版创建文件(ResultSet rs)
*
* @Author 不告诉你
* @date 2016-09-05 11:11:11
* @param String
* tempPath 模版路径(模版xls只能导出2003,模版xlsx只能导出2007)
* @param String
* sheetName 生成工作表的名字(为空时,默认为sheet1)
* @param ResultSet
* rs 结果集 List数据源
* @param Object
* os 数据输出流(FileOutputStream,ServletOutputStream)
* @param Object
* headerData 固定数据源map<String, Object>或者javaBean或者ResultSet
* @return
*/
public static void resultSetToExcel(String tempPath, String sheetName, ResultSet rs, Object os, Object headerData)
throws Exception {
tempExportExcel(tempPath, sheetName, null, rs, os, headerData);
}
private static void tempExportExcel(String tempPath, String sheetName, Object data, ResultSet rs, Object os,
Object headerData) throws Exception {
if (!System.getProperty("os.name").equalsIgnoreCase("Linux")) {
tempPath = tempPath.substring(1);
}
String version = tempPath.substring(tempPath.lastIndexOf("."));
if (version.equals(".xls")) {
tempExport2003Excel(tempPath, sheetName, data, rs, os, headerData);
} else if (version.equals(".xlsx")) {
tempExport2007Excel(tempPath, sheetName, data, rs, os, headerData);
} else {
return;
}
}
private static void tempExport2007Excel(String tempPath, String sheetName, Object data, ResultSet rs,
Object os, Object headerData) throws Exception {
OutputStream out = null;
Workbook tempWorkbook = null;
Sheet fromsheet = null;
Cell fromCell = null;
try {
if (os instanceof FileOutputStream) {
out = (FileOutputStream) os;
}else if (os instanceof ServletOutputStream) {
out = (ServletOutputStream) os;
}else {
return;
}
if (Strings.isNullOrEmpty(sheetName)) {
sheetName = "sheet1";
}
tempWorkbook = new XSSFWorkbook(OPCPackage.open(new FileInputStream(tempPath)));
fromsheet = (XSSFSheet) tempWorkbook.getSheetAt(0);
if (fromsheet == null || fromsheet.getLastRowNum() == 0) {
return;
}
tempWorkbook.setSheetName(0, sheetName);
if (headerData != null) {
int firstrow = fromsheet.getFirstRowNum();
int lastrow = fromsheet.getLastRowNum();
for (int i = firstrow; i < lastrow; i++) {
fromCell = fromsheet.getRow(i).getCell(0);
String start = fromCell.getStringCellValue();
if ("data.start".equalsIgnoreCase(start.trim())) {
lastrow = i;
break;
}
if (i == (lastrow - 1)) {
return;
}
}
paddingExcel(fromsheet, firstrow, lastrow, headerData);
}
if (data != null || rs != null) {
int firstrow = fromsheet.getFirstRowNum();
int lastrow = fromsheet.getLastRowNum();
for (int i = firstrow; i < lastrow; i++) {
fromCell = fromsheet.getRow(i).getCell(0);
String start = fromCell.getStringCellValue();
if ("data.start".equalsIgnoreCase(start.trim())) {
firstrow = i;
break;
}
if (i == (lastrow - 1)) {
return;
}
}
if (data != null) {
copySheets(fromsheet, firstrow, lastrow, data, null);
}
if (rs != null) {
copySheets(fromsheet, firstrow, lastrow, null, rs);
}
removeRow(fromsheet, firstrow);
removeRow(fromsheet, firstrow);
removeRow(fromsheet, firstrow);
}
tempWorkbook.write(out);
out.flush();
tempWorkbook.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
@SuppressWarnings("resource")
private static void tempExport2003Excel(String tempPath, String sheetName, Object data, ResultSet rs,
Object os, Object headerData) throws Exception {
OutputStream out = null;
Workbook tempWorkbook = null;
Sheet fromsheet = null;
Cell fromCell = null;
try {
if (os instanceof FileOutputStream) {
out = (FileOutputStream) os;
}else if (os instanceof ServletOutputStream) {
out = (ServletOutputStream) os;
}else {
return;
}
if (Strings.isNullOrEmpty(sheetName)) {
sheetName = "sheet1";
}
tempWorkbook = new HSSFWorkbook(new FileInputStream(tempPath));
fromsheet = (HSSFSheet) tempWorkbook.getSheetAt(0);
if (fromsheet == null || fromsheet.getLastRowNum() == 0) {
return;
}
tempWorkbook.setSheetName(0, sheetName);
if (headerData != null) {
int firstrow = fromsheet.getFirstRowNum();
int lastrow = fromsheet.getLastRowNum();
for (int i = firstrow; i < lastrow; i++) {
fromCell = fromsheet.getRow(i).getCell(0);
String start = fromCell.getStringCellValue();
if ("data.start".equalsIgnoreCase(start.trim())) {
lastrow = i;
break;
}
if (i == (lastrow - 1)) {
return;
}
}
paddingExcel(fromsheet, firstrow, lastrow, headerData);
}
if (data != null || rs != null) {
int firstrow = fromsheet.getFirstRowNum();
int lastrow = fromsheet.getLastRowNum();
for (int i = firstrow; i < lastrow; i++) {
fromCell = fromsheet.getRow(i).getCell(0);
String start = fromCell.getStringCellValue();
if ("data.start".equalsIgnoreCase(start.trim())) {
firstrow = i;
break;
}
if (i == (lastrow - 1)) {
return;
}
}
if (data != null) {
copySheets(fromsheet, firstrow, lastrow, data, null);
}
if (rs != null) {
copySheets(fromsheet, firstrow, lastrow, null, rs);
}
removeRow(fromsheet, firstrow);
removeRow(fromsheet, firstrow);
removeRow(fromsheet, firstrow);
}
tempWorkbook.write(out);
out.flush();
tempWorkbook.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
@SuppressWarnings("unchecked")
private static void copySheets(Sheet fromsheet, int firstrow, int lastrow, Object data, ResultSet rs)
throws Exception {
if ((firstrow == -1) || (lastrow == -1) || lastrow < firstrow) {
return;
}
Row fromRow = null;
Row newRow = null;
Cell newCell = null;
Cell fromCell = null;
RichTextString textValue = null;
CellStyle fromCellStyle = null;
fromCell = fromsheet.getRow(firstrow + 2).getCell(0);
String end = fromCell.getStringCellValue();
if (!"data.end".equalsIgnoreCase(end.trim())) {
return;
}
if (data != null) {
int iRow = firstrow + 3;
List<Object> javaBean = (List<Object>) data;
int j=0;
for (Object o : javaBean) {
j++;
Map<String, Object> map = FormatUtil.javaBeanToMap(o);
newRow = fromsheet.createRow(iRow);
fromRow = fromsheet.getRow(firstrow + 1);
for (int i = fromRow.getFirstCellNum(); i < fromRow.getPhysicalNumberOfCells(); i++) {
fromCell = fromRow.getCell(i);
newCell = newRow.createCell(i);
if (fromCell == null) {
continue;
}
fromCellStyle = fromCell.getCellStyle();
newCell.setCellStyle(fromCellStyle);
for (Map.Entry<String, Object> entry : map.entrySet()) {
Object value = entry.getValue();
String fromCellValue = fromCell.getStringCellValue();
if (entry.getKey().equalsIgnoreCase(fromCellValue.split("\.")[1].trim())) {
if (value instanceof Date) {
Date date = (Date) value;
Calendar cDate = Calendar.getInstance();
cDate.setTime(date);
int hh = cDate.get(Calendar.HOUR_OF_DAY); // 获取当前小时
int mm = cDate.get(Calendar.MINUTE); // 获取当前分钟
int ss = cDate.get(Calendar.SECOND); // 获取当前秒
if (hh == 0 && mm == 0 && ss == 0) {
newCell.setCellValue(DateUtil.dateToString(date, "yyyy-MM-dd"));
} else {
newCell.setCellValue(DateUtil.dateToString(date, "yyyy-MM-dd HH:mm:ss"));
}
}
if (value instanceof Integer) {
int intValue = (Integer) value;
newCell.setCellValue(intValue);
} else if (value instanceof Float) {
float fValue = (Float) value;
if (fromsheet instanceof HSSFSheet) {
textValue = new HSSFRichTextString(String.valueOf(fValue));
}
if (fromsheet instanceof XSSFSheet) {
textValue = new XSSFRichTextString(String.valueOf(fValue));
}
newCell.setCellValue(textValue);
} else if (value instanceof Double) {
double dValue = (Double) value;
if (fromsheet instanceof HSSFSheet) {
textValue = new HSSFRichTextString(String.valueOf(dValue));
}
if (fromsheet instanceof XSSFSheet) {
textValue = new XSSFRichTextString(String.valueOf(dValue));
}
newCell.setCellValue(textValue);
} else if (value instanceof Long) {
long longValue = (Long) value;
newCell.setCellValue(longValue);
} else if (value instanceof String) {
newCell.setCellValue(value.toString());
} else if (value instanceof Boolean) {
if((boolean) value) {
newCell.setCellValue("是");
}else {
newCell.setCellValue("否");
}
}
break;
}
if(fromCellValue.split("\.")[1].trim().equals("autoNumber")){
newCell.setCellValue(j);
}
}
}
iRow++;
}
}
if (rs != null) {
return;
}
}
/**
*
* @param fromsheet
* @param firstrow
* @param lastrow
* @param data
* map<String, Object>或者javaBean或者ResultSet
* @throws Exception
*/
@SuppressWarnings("unchecked")
private static void paddingExcel(Sheet fromsheet, int firstrow, int lastrow, Object data) throws Exception {
if ((firstrow == -1) || (lastrow == -1) || lastrow < firstrow) {
return;
}
Row fromRow = null;
Cell fromCell = null;
RichTextString textValue = null;
if (data != null) {
if (data instanceof ResultSet) {
return;
} else {
Map<String, Object> map = null;
if (data instanceof Map<?, ?>) {
map = (Map<String, Object>) data;
} else {
map = FormatUtil.javaBeanToMap(data);
}
for (int i = firstrow; i < lastrow; i++) {
fromRow = fromsheet.getRow(i);
for (int j = fromRow.getFirstCellNum(); j < fromRow.getPhysicalNumberOfCells(); j++) {
fromCell = fromRow.getCell(j);
if (fromCell == null) {
continue;
}
String fromCellValue = fromCell.getStringCellValue();
if (Strings.isNullOrEmpty(fromCellValue) || (fromCellValue.split("\.").length < 2)) {
continue;
}
if ("data.start".equalsIgnoreCase(fromCellValue.trim())) {
return;
}
for (Map.Entry<String, Object> entry : map.entrySet()) {
Object value = entry.getValue();
if (entry.getKey().equalsIgnoreCase(fromCellValue.split("\.")[1].trim())) {
if (value instanceof Date) {
Date date = (Date) value;
Calendar cDate = Calendar.getInstance();
cDate.setTime(date);
int hh = cDate.get(Calendar.HOUR_OF_DAY); // 获取当前小时
int mm = cDate.get(Calendar.MINUTE); // 获取当前分钟
int ss = cDate.get(Calendar.SECOND); // 获取当前秒
if (hh == 0 && mm == 0 && ss == 0) {
fromCell.setCellValue(DateUtil.dateToString(date, "yyyy-MM-dd"));
} else {
fromCell.setCellValue(DateUtil.dateToString(date, "yyyy-MM-dd HH:mm:ss"));
}
}
if (value instanceof Integer) {
int intValue = (Integer) value;
fromCell.setCellValue(intValue);
} else if (value instanceof Float) {
float fValue = (Float) value;
if (fromsheet instanceof HSSFSheet) {
textValue = new HSSFRichTextString(String.valueOf(fValue));
}
if (fromsheet instanceof XSSFSheet) {
textValue = new XSSFRichTextString(String.valueOf(fValue));
}
fromCell.setCellValue(textValue);
} else if (value instanceof Double) {
double dValue = (Double) value;
if (fromsheet instanceof HSSFSheet) {
textValue = new HSSFRichTextString(String.valueOf(dValue));
}
if (fromsheet instanceof XSSFSheet) {
textValue = new XSSFRichTextString(String.valueOf(dValue));
}
fromCell.setCellValue(textValue);
} else if (value instanceof Long) {
long longValue = (Long) value;
fromCell.setCellValue(longValue);
} else if (value instanceof String) {
fromCell.setCellValue(value.toString());
} else if (value instanceof Boolean) {
if((boolean) value) {
fromCell.setCellValue("是");
}else {
fromCell.setCellValue("否");
}
}
break;
}
}
}
}
}
}
}
private static Workbook create(InputStream in) throws Exception {
if (!in.markSupported()) {
in = new PushbackInputStream(in, 8);
}
if (POIFSFileSystem.hasPOIFSHeader(in)) {
return new HSSFWorkbook(in);
}
if (POIXMLDocument.hasOOXMLHeader(in)) {
return new XSSFWorkbook(OPCPackage.open(in));
}
throw new IllegalArgumentException("你的excel版本目前poi解析不了");
}
private static void removeRow(Sheet sheet, int rowIndex) {
int lastRowNum = sheet.getLastRowNum();
if (rowIndex >= 0 && rowIndex < lastRowNum) {
sheet.shiftRows(rowIndex + 1, lastRowNum, -1);
}
if (rowIndex == lastRowNum) {
Row removingRow = sheet.getRow(rowIndex);
if (removingRow != null) {
sheet.removeRow(removingRow);
}
}
}
}
以上是关于java 读取 500M文件中间的内容的主要内容,如果未能解决你的问题,请参考以下文章