java SFDC CSV解析器
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了java SFDC CSV解析器相关的知识,希望对你有一定的参考价值。
@isTest
public class CsvDataTest {
@isTest
public static void testDataParser(){
List<List<String>> rows = CsvParser.Parse('hone, htwo, hthree\none,"two\ntwo",three');
System.assertEquals(2, rows.size());
System.assertEquals(3, rows[0].size());
System.assertEquals('hone', rows[0][0]);
System.assertEquals('htwo', rows[0][1]);
System.assertEquals('hthree', rows[0][2]);
System.assertEquals(3, rows[1].size());
System.assertEquals('one', rows[1][0]);
System.assertEquals('two\ntwo', rows[1][1]);
System.assertEquals('three', rows[1][2]);
CsvData csvData = new CsvData(rows);
System.assertEquals(1, csvData.dataRows.size());
System.assertEquals(3, csvData.headers.size());
System.assertEquals('one', csvData.dataRows[0].get('hone'));
System.assertEquals('two\ntwo', csvData.dataRows[0].get('htwo'));
System.assertEquals('three', csvData.dataRows[0].get('hthree'));
}
}
/*
* Class to wrap lists of rows and columns, assumes headers are present and makes
* data available as maps by header name. This class is not optimized for performance but for convenience.
* If you need more performance please use the CsvParser class directly and work with the raw lists.
*/
public class CsvData {
public Integer fieldCount {get;private set;}
public List<String> headers {get;set;}
public List<String> commentRows {get;set;}
public List<Map<String, String>> dataRows {get;set;}
public CsvData(List<List<String>> rows){
headers = new List<String>();
commentRows = new List<String>();
dataRows = new List<Map<String, String>>();
fieldCount = 0;
for(List<String> row : rows){
if(row.isEmpty()){
continue;
}
// Parser has dropped some data as it doesn't preserve comment lines but this is good enough for now
if(row[0].startsWith('#')){
commentRows.add(String.join(row,','));
continue;
}
if(row.size() > fieldCount){
fieldCount = row.size();
}
if(headers.isEmpty()){
headers = row;
} else {
Map<String, String> rowMap = new Map<String, String>();
for(Integer i = 0; i < headers.size(); i ++){
if(row.size() > i){
rowMap.put(headers[i], row[i]);
}
}
dataRows.add(rowMap);
}
}
}
}
@isTest
public class CsvParserTest {
@isTest
public static void testSimpleParse(){
List<List<String>> rows = CsvParser.Parse('one,two,three');
System.assertEquals(1, rows.size());
System.assertEquals(3, rows[0].size());
System.assertEquals('one', rows[0][0]);
System.assertEquals('two', rows[0][1]);
System.assertEquals('three', rows[0][2]);
}
@isTest
public static void testEndingTwo(){
List<List<String>> rows = CsvParser.Parse('one,two,three\n');
System.assertEquals(1, rows.size());
System.assertEquals(3, rows[0].size());
System.assertEquals('one', rows[0][0]);
System.assertEquals('two', rows[0][1]);
System.assertEquals('three', rows[0][2]);
}
@isTest
public static void testEndingThree(){
List<List<String>> rows = CsvParser.Parse('one,two,three\r\n');
System.assertEquals(1, rows.size());
System.assertEquals(3, rows[0].size());
System.assertEquals('one', rows[0][0]);
System.assertEquals('two', rows[0][1]);
System.assertEquals('three', rows[0][2]);
}
@isTest
public static void testEndingFour(){
List<List<String>> rows = CsvParser.Parse('one,two,three\r');
System.assertEquals(1, rows.size());
System.assertEquals(3, rows[0].size());
System.assertEquals('one', rows[0][0]);
System.assertEquals('two', rows[0][1]);
System.assertEquals('three', rows[0][2]);
}
@isTest
public static void testQuotedParse(){
List<List<String>> rows = CsvParser.Parse('one,"two",three');
System.assertEquals(1, rows.size());
System.assertEquals(3, rows[0].size());
System.assertEquals('one', rows[0][0]);
System.assertEquals('two', rows[0][1]);
System.assertEquals('three', rows[0][2]);
}
@isTest
public static void testQuotedWQuoteParse(){
List<List<String>> rows = CsvParser.Parse('one,"two""two",three');
System.assertEquals(1, rows.size());
System.assertEquals(3, rows[0].size());
System.assertEquals('one', rows[0][0]);
System.assertEquals('two"two', rows[0][1]);
System.assertEquals('three', rows[0][2]);
}
@isTest
public static void testQuotedWCommaParse(){
List<List<String>> rows = CsvParser.Parse('one,"two,two",three');
System.assertEquals(1, rows.size());
System.assertEquals(3, rows[0].size());
System.assertEquals('one', rows[0][0]);
System.assertEquals('two,two', rows[0][1]);
System.assertEquals('three', rows[0][2]);
}
@isTest
public static void testMultiLineParse(){
List<List<String>> rows = CsvParser.Parse('one,two,three\nfour,five,six');
System.assertEquals(2, rows.size());
System.assertEquals(3, rows[0].size());
System.assertEquals('one', rows[0][0]);
System.assertEquals('two', rows[0][1]);
System.assertEquals('three', rows[0][2]);
System.assertEquals(3, rows[1].size());
System.assertEquals('four', rows[1][0]);
System.assertEquals('five', rows[1][1]);
System.assertEquals('six', rows[1][2]);
}
@isTest
public static void testMultiLineParse2(){
List<List<String>> rows = CsvParser.Parse('one,two,three\r\nfour,five,six\n');
System.assertEquals(2, rows.size());
System.assertEquals(3, rows[0].size());
System.assertEquals('one', rows[0][0]);
System.assertEquals('two', rows[0][1]);
System.assertEquals('three', rows[0][2]);
System.assertEquals(3, rows[1].size());
System.assertEquals('four', rows[1][0]);
System.assertEquals('five', rows[1][1]);
System.assertEquals('six', rows[1][2]);
}
@isTest
public static void testMultiLineParseLiteral(){
List<List<String>> rows = CsvParser.Parse('one,"two\r\n\rtwo",three\r\nfour,five,six\n');
System.assertEquals(2, rows.size());
System.assertEquals(3, rows[0].size());
System.assertEquals('one', rows[0][0]);
System.assertEquals('two\r\n\rtwo', rows[0][1]);
System.assertEquals('three', rows[0][2]);
System.assertEquals(3, rows[1].size());
System.assertEquals('four', rows[1][0]);
System.assertEquals('five', rows[1][1]);
System.assertEquals('six', rows[1][2]);
}
@isTest
public static void testSpaces(){
List<List<String>> rows = CsvParser.Parse(' one, " two\ntwo" , three ');
System.assertEquals(1, rows.size());
System.assertEquals(3, rows[0].size());
System.assertEquals('one', rows[0][0]);
System.assertEquals(' two\ntwo', rows[0][1]);
System.assertEquals('three', rows[0][2]);
}
@isTest
public static void testBrokenLineParse(){
List<List<String>> rows = CsvParser.Parse('one,"two\ntwo",three');
System.assertEquals(1, rows.size());
System.assertEquals(3, rows[0].size());
System.assertEquals('one', rows[0][0]);
System.assertEquals('two\ntwo', rows[0][1]);
System.assertEquals('three', rows[0][2]);
}
@isTest
public static void testBulk(){
String text = 'one,"two\ntwo",three\n';
String largeText = text;
for(Integer i = 1; i < 1000; i++){
largeText += text;
}
List<List<String>> rows = CsvParser.Parse(largeText);
System.assertEquals(1000, rows.size());
System.assertEquals(3, rows[0].size());
System.assertEquals('one', rows[0][0]);
System.assertEquals('two\ntwo', rows[0][1]);
System.assertEquals('three', rows[0][2]);
}
}
/*
* CSV Parser ported from http://www.boyet.com/articles/csvparser.html
* Takes in CSV data and expands to rows of columns, pair with CsvData class
* to create a more structured and easy to use
*/
public class CsvParser {
public CsvParserConfig config;
public CsvParser(){
this.config = new CsvParserConfig();
}
public static List<List<String>> Parse(Blob bytes){
return Parse(bytes.toString());
}
public static List<List<String>> Parse(String text){
CsvParser parser = new CsvParser();
DefaultCsvConsumer c = new DefaultCsvConsumer();
DefaultStringCharTokenizer t = new DefaultStringCharTokenizer(parser.config, text);
parser.Parse(t, c);
return c.getRows();
}
public void Parse(ICharTokenizer reader, ICsvConsumer consumer) {
parseCsvFile(reader, consumer);
}
private void parseCsvFile(ICharTokenizer reader, ICsvConsumer consumer) {
while (reader.Peek() != config.EOF) {
parseCsvRecord(reader, consumer);
}
consumer.SignalEndOfFile();
}
private void parseCsvRecord(ICharTokenizer reader, ICsvConsumer consumer) {
parseCsvStringList(reader, consumer);
String ch = reader.Read();
if (ch == config.EOF) {
reader.Unread(ch);
ch = '\n';
}
if (ch != '\n') {
throw new CsvParserTooMuchDataException('End of record was expected but more data exists.');
}
consumer.SignalEndOfRecord();
}
private void parseCsvStringList(ICharTokenizer reader, ICsvConsumer consumer) {
String ch;
do {
parseRawString(reader, consumer);
ch = reader.Read();
} while (ch == ',');
reader.Unread(ch);
}
private Boolean isFieldTerminator(String c) {
return ((c == ',') || (c == '\n') || (c == config.EOF));
}
private Boolean isSpace(String c) {
return ((c == ' ') | (c == '\t'));
}
private void parseOptionalSpaces(ICharTokenizer reader) {
String ch;
do {
ch = reader.Read();
} while (isSpace(ch));
reader.Unread(ch);
}
private void parseRawString(ICharTokenizer reader, ICsvConsumer consumer) {
parseOptionalSpaces(reader);
parseRawField(reader, consumer);
if (!isFieldTerminator(reader.Peek()))
parseOptionalSpaces(reader);
}
private void parseRawField(ICharTokenizer reader, ICsvConsumer consumer) {
String fieldValue = '';
String ch = reader.Peek();
if (!isFieldTerminator(ch)) {
if (ch == '"')
fieldValue = parseQuotedField(reader);
else
fieldValue = parseSimpleField(reader);
}
consumer.ConsumeField(fieldValue);
}
private string parseQuotedField(ICharTokenizer reader) {
System.debug('parse quoted field');
reader.Read(); // read and discard initial quote
string field = parseEscapedField(reader);
String ch = reader.Read();
if (ch != '"') {
reader.Unread(ch);
throw new CsvParserNoTermQuoteException('Quoted field has no terminating double quote');
}
return field;
}
private string parseEscapedField(ICharTokenizer reader) {
String sb = '';
sb = parseSubField(reader);
System.debug('Sub 1:'+sb);
String ch = reader.Read();
while (processDoubleQuote(reader, ch)) {
sb += '"';
sb += parseSubField(reader);
System.debug('Sub 2:'+sb);
ch = reader.Read();
}
reader.Unread(ch);
return sb;
}
private String parseSubField(ICharTokenizer reader) {
String sb = '';
String ch = reader.ReadLiteral();
while ((ch != '"') && (ch != config.EOF)) {
sb += ch;
ch = reader.ReadLiteral();
}
reader.Unread(ch);
return sb;
}
private Boolean isBadSimpleFieldChar(String c) {
return isSpace(c) || isFieldTerminator(c) || (c == '"');
}
private string parseSimpleField(ICharTokenizer reader) {
System.debug('parse simple');
String ch = reader.Read();
if (isBadSimpleFieldChar(ch)) {
reader.Unread(ch);
return '';
}
String sb = '';
sb += ch;
ch = reader.Read();
while (!isBadSimpleFieldChar(ch)) {
sb += ch;
ch = reader.Read();
}
reader.Unread(ch);
return sb;
}
private Boolean processDoubleQuote(ICharTokenizer reader, String ch) {
if ((ch == '"') && (reader.Peek() == '"')) {
reader.Read(); // discard second quote of double
return true;
}
return false;
}
public interface ICharTokenizer {
String Peek();
String Read();
String ReadLiteral();
void Unread(String text);
}
public interface ICsvConsumer {
void SignalEndOfFile();
void SignalEndOfRecord();
void ConsumeField(String text);
}
public class CsvParserTooMuchDataException extends Exception {}
public class CsvParserNoTermQuoteException extends Exception{}
public class CharTokenizerException extends Exception {}
public class CsvParserConfig {
public String EOF = null;
public String TextEnclusure = '"';
public String Separator = ',';
}
// Doesn't have the most respect for memory or performance, TODO optimize
public class DefaultStringCharTokenizer implements ICharTokenizer {
private List<String> s;
private CsvParserConfig config;
private Integer index;
private Boolean haveUnreadChar;
private String unreadChar;
public DefaultStringCharTokenizer(CsvParserConfig config, string s) {
this.config = config;
this.s = s.split('');
index = 0;
haveUnreadChar = false;
}
private void skipCrInCrLf() {
if ((s[index] == '\r') && (index + 1 < s.size()) && (s[index + 1] == '\n'))
index++;
}
private String mapCrToLf(String c) {
if (c == '\r')
return '\n';
return c;
}
public String Peek() {
if (haveUnreadChar)
return unreadChar;
if (index < s.size())
return mapCrToLf(s[index]);
return config.EOF;
}
public String Read() {
if (haveUnreadChar) {
haveUnreadChar = false;
return unreadChar;
}
if (index < s.size()) {
skipCrInCrLf();
return mapCrToLf(s[index++]);
}
return config.EOF;
}
public String ReadLiteral() {
if (haveUnreadChar) {
haveUnreadChar = false;
return unreadChar;
}
if (index < s.size()) {
return s[index++];
}
return config.EOF;
}
public void Unread(String c) {
if (haveUnreadChar) {
throw new CsvParser.CharTokenizerException('Unread() cannot accept more than one pushed back character');
}
haveUnreadChar = true;
unreadChar = c;
}
}
public class DefaultCsvConsumer implements ICsvConsumer {
List<List<String>> rows = new List<List<String>>();
List<String> row = new List<String>();
public List<List<String>> getRows(){
return rows;
}
public void ConsumeField(string s) {
System.debug('[' + s + ']');
row.add(s);
}
public void SignalEndOfRecord() {
System.debug('[end of record]');
rows.add(row);
row = new List<String>();
}
public void SignalEndOfFile() {
// capture end of row if needed
if(!row.isEmpty()){
SignalEndOfRecord();
}
System.debug('[end of file]');
}
}
}
以上是关于java SFDC CSV解析器的主要内容,如果未能解决你的问题,请参考以下文章