Java实现通过POI读取Excel文件提取手机号码并去重
Posted Jame
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Java实现通过POI读取Excel文件提取手机号码并去重相关的知识,希望对你有一定的参考价值。
添加所需依赖:
<!-- apache poi 操作Microsoft Document -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<!-- Apache POI - Java API To Access Microsoft Format Files -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
<!-- Apache POI - Java API To Access Microsoft Format Files -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.17</version>
</dependency>
<!-- poi eventmodel方式 依赖包 -->
<dependency>
<groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId>
<version>2.11.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.14</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.14</version>
</dependency>
代码如下:
package com.cy.exceldata.util;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.cy.exceldata.util.SheetHandler;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.junit.jupiter.api.Test;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
/**
- 数据量比较大(8万条以上)的excel文件解析,将excel文件解析为 行列坐标-值的形式存入map中,此方式速度快,内存耗损小 但只能读取excle文件
- 提供处理单个sheet方法 processOneSheet(String filename) 以及处理多个sheet方法 processAllSheets(String filename)
*
*/
public class LargeExcelFileReadUtil {
private LinkedHashMap<String, String>rowContents=new LinkedHashMap<String, String>();
private SheetHandler sheetHandler;
public LinkedHashMap<String, String> getRowContents() {
return rowContents;
}
public void setRowContents(LinkedHashMap<String, String> rowContents) {
this.rowContents = rowContents;
}
public SheetHandler getSheetHandler() {
return sheetHandler;
}
public void setSheetHandler(SheetHandler sheetHandler) {
this.sheetHandler = sheetHandler;
}
//处理一个sheet
public void processOneSheet(String filename) throws Exception {
InputStream sheet2=null;
OPCPackage pkg =null;
try {
pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
sheet2 = r.getSheet("rId1");
InputSource sheetSource = new InputSource(sheet2);
parser.parse(sheetSource);
setRowContents(sheetHandler.getRowContents());
}catch (Exception e) {
e.printStackTrace();
throw e;
}finally{
if(pkg!=null){
pkg.close();
}
if(sheet2!=null){
sheet2.close();
}
}
}
//处理多个sheet
public void processAllSheets(String filename) throws Exception {
OPCPackage pkg =null;
InputStream sheet=null;
try{
pkg=OPCPackage.open(filename);
XSSFReader r = new XSSFReader( pkg );
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> sheets = r.getSheetsData();
while(sheets.hasNext()) {
System.out.println("Processing new sheet:\\n");
sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
}
}catch (Exception e) {
e.printStackTrace();
throw e;
}finally{
if(pkg!=null){
pkg.close();
}
if(sheet!=null){
sheet.close();
}
}
}
public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
XMLReader parser =
XMLReaderFactory.createXMLReader(
"com.sun.org.apache.xerces.internal.parsers.SAXParser"
);
setSheetHandler(new SheetHandler(sst));
ContentHandler handler = (ContentHandler) sheetHandler;
parser.setContentHandler(handler);
return parser;
}
/**
* See org.xml.sax.helpers.DefaultHandler javadocs
*/
//测试
@Test
public void test ()throws Exception {
Long time=System.currentTimeMillis();
LargeExcelFileReadUtil example = new LargeExcelFileReadUtil();
example.processOneSheet("C:/Users/Administrator/Desktop/data/test/模拟数据406250.xlsx");
Long endtime=System.currentTimeMillis();
LinkedHashMap<String, String> map=example.getRowContents();
Iterator<Entry<String, String>> it= map.entrySet().iterator();
final Pattern CHINA_PATTERN = Pattern.compile("^((13[0-9])|(14[0,1,4-9])|(15[0-3,5-9])|(16[2,5,6,7])|(17[0-8])|(18[0-9])|(19[0-3,5-9]))\\\\d{8}$");
int count=0;
int phoneNum = 0;
String prePos="";
Set<String> numSet = new HashSet<String>();
while (it.hasNext()){
Map.Entry<String, String> entry=(Map.Entry<String, String>)it.next();
String pos=entry.getKey();
if(!pos.substring(1).equals(prePos)){
prePos=pos.substring(1);
count++;
}
if(isMobile(entry.getValue())){
numSet.add(entry.getValue());
}
}
for (String s : numSet) {
System.out.println(s);
phoneNum = phoneNum + 1;
}
System.out.println("号码数量有: "+phoneNum);
System.out.println("解析数据"+count+"条;耗时"+(endtime-time)/1000+"秒");
}
public static boolean isMobile(final String str) {
Pattern p = null;
Matcher m = null;
boolean b = false;
p = Pattern.compile("^[1][3,4,5,7,8][0-9]{9}$"); // 验证手机号
m = p.matcher(str);
b = m.matches();
return b;
}
}
以上是关于Java实现通过POI读取Excel文件提取手机号码并去重的主要内容,如果未能解决你的问题,请参考以下文章
java通过apache poi框架读取2007版Excel文件
要用Java POI读取Excel文件中的数据,并且实现对数据的格式(时间,整形,字符)校验,输入错误信息