正则法导入txt文本

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了正则法导入txt文本相关的知识,希望对你有一定的参考价值。

package cc.pubone.project.gsexam.temp;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import cc.pubone.framework.Config;
import cc.pubone.framework.RequestImpl;
import cc.pubone.framework.ResponseImpl;
import cc.pubone.framework.User;
import cc.pubone.framework.data.QueryBuilder;
import cc.pubone.framework.data.Transaction;
import cc.pubone.framework.utility.FileUtil;
import cc.pubone.framework.utility.StringUtil;
import cc.pubone.misc.MockUniter;
import cc.pubone.platform.pub.NoUtil;
import cc.pubone.project.gsexam.admin.Category;
import cc.pubone.project.gsexam.schema.C30QuestionsOptionSchema;
import cc.pubone.project.gsexam.schema.C30QuestionsOptionSet;
import cc.pubone.project.gsexam.schema.C30QuestionsSchema;

public class ImportQuestions {

 public enum LineType {
  Headline1, Headline2, Question, Answer, ReferenceAnswer, AnswerAnalysis, BigQuestion, SubQuestion
 }

 public enum QuestionType {
  Headline, Normal, Big, Sub
 }
 
 public class Question {
  long id;
  long parentID = 0;
  String catalogCode;
  String type;
  String question;
  List<String> answers = new ArrayList<String>();
  String referenceAnswer;
  String answerAnalysis;

  public long getId() {
   return id;
  }
  public void setId(long id) {
   this.id = id;
  }
  public long getParentID() {
   return parentID;
  }
  public void setParentID(long parentID) {
   this.parentID = parentID;
  }
  public String getCatalogCode() {
   return catalogCode;
  }
  public void setCatalogCode(String catalogCode) {
   this.catalogCode = catalogCode;
  }
  public String getType() {
   return type;
  }
  public void setType(String type) {
   this.type = type;
  }
  public String getQuestion() {
   return question;
  }
  public void setQuestion(String question) {
   this.question = question;
  }
  public List<String> getAnswers() {
   return answers;
  }
  public void setAnswers(List<String> answers) {
   this.answers = answers;
  }
  public void addAnswers(List<String> answers) {
   if (this.answers==null) {
    this.answers = answers;
   } else {
    this.answers.addAll(answers);
   }
  }
  public String getReferenceAnswer() {
   return referenceAnswer;
  }
  public void setReferenceAnswer(String referenceAnswer) {
   this.referenceAnswer = referenceAnswer;
  }
  public String getAnswerAnalysis() {
   return answerAnalysis;
  }
  public void setAnswerAnalysis(String answerAnalysis) {
   this.answerAnalysis = answerAnalysis;
  }
  
  @Override
  public String toString() {
   return "Question [id=" + id + ", parentID=" + parentID
     + ", catalogCode=" + catalogCode + ", question=" + question
     + ", answers=" + answers + ", referenceAnswer="
     + referenceAnswer + ", answerAnalysis=" + answerAnalysis
     + "]";
  }
 }

 public static void main3(String[] args) {
  System.out.println(isBigQuestionLikeness("(四)、A县工商局拟对个体工商户B某经销过期食品的违法行为作出”罚款2000元、没收违法所得2000元”的行政处罚。告知期间,B某提出听证要求,问:", QuestionType.Sub));
 }
 
 public static void main(String[] args) throws Exception {
  MockUniter.mockUserLogin("admin");
  
  String rootCatalogCode = "0001";
  boolean autoInsertCatalog = false;
  
  String orgFilePath = Config.getContextRealPath()+"WEB-INF/classes/cc/pubone/project/gsexam/temp/原始2000题(2013.7.15).txt";
  String fixFilePath = Config.getContextRealPath()+"WEB-INF/classes/cc/pubone/project/gsexam/temp/试题文件格式校正.txt";
  String tmpFilePath = Config.getContextRealPath()+"WEB-INF/classes/cc/pubone/project/gsexam/temp/临时导入中间文件.txt";
  String orgContent = FileUtil.readText(orgFilePath);
  String fixContent = FileUtil.readText(fixFilePath);
  String tmpContent = orgContent;
  String[] fixArr = fixContent.split("[\r\n]+");
  for (int i = 1; i < fixArr.length; i+=2) {
   //System.out.println(fixArr[i]);
   tmpContent = StringUtil.replaceEx(tmpContent, fixArr[i-1], fixArr[i]);
  }
  FileUtil.writeText(tmpFilePath, tmpContent);
  
  fixArr = null;
  orgContent = null;
  fixContent = null;
  tmpContent = null;
  
  
  FileInputStream is = new FileInputStream(tmpFilePath);
  BufferedReader dr = new BufferedReader(new InputStreamReader(is));

  String lastCatalogCode = rootCatalogCode;
  String lastHeadline1 = null;
  String lastHeadline2 = null;
  String lastType = null;
  LineType lastLineType = null;
  QuestionType lastQuestionType = null;

  Question q = null;
  long lastBigQuestionID = 0;
  int lastNumFromBigQuestion=0;
  
  int countByHeadline2 = 0;
  int questionCount = 0;
  int subQuestionCount = 0;

  String line = dr.readLine();
  while (line != null) {
   // 剔除行开头的无用空白字符
   // line = line.replaceFirst("^  ", "");
   
   if (isBlankline(line)) {
    // 跳过空白行
    // System.out.println("跳过:" + line);

   } else if (isHandline1(line)) {
    lastLineType = LineType.Headline1;
    lastQuestionType = QuestionType.Headline;
    
    lastHeadline1 = getHandline1(line);
    
    String catalogCode = new QueryBuilder("select InnerCode from C30Category where CodeName=?", lastHeadline1).executeString();
    if (StringUtil.isNotEmpty(catalogCode)) {
     lastCatalogCode = catalogCode;
    } else if (autoInsertCatalog){
     if (!insertCatalog(rootCatalogCode, lastHeadline1)) {
      System.out.println("错误:无法创建分类:" + line);
     } else {
      continue;
     }
    } else {
     System.out.println("错误:未找到分类:" + line);
    }

   } else if (isHandline2(line)) {
    lastLineType = LineType.Headline2;
    lastQuestionType = QuestionType.Headline;
    
    lastHeadline2 = getHandline2(line);
    lastType = getQuestionType(lastHeadline2);
    
    String catalogCode = new QueryBuilder("select InnerCode from C30Category where CodeName=? and ParentCode=(select InnerCode from C30Category where CodeName=?)",
      lastHeadline2, lastHeadline1).executeString();
    if (StringUtil.isNotEmpty(catalogCode)) {
     lastCatalogCode = catalogCode;
    } else if (autoInsertCatalog) {
     String parentCatalogCode = new QueryBuilder("select InnerCode from C30Category where CodeName=?", lastHeadline1).executeString();
     if (StringUtil.isNotEmpty(parentCatalogCode)) {
      if (!insertCatalog(parentCatalogCode, lastHeadline2)) {
       System.out.println("错误:无法创建分类:" + line);
      } else {
       continue;
      }
     } else {
      System.out.println("错误:出现非预期情况:" + line);
     }
    } else {
     System.out.println("错误:未找到分类:" + line);
    }
    
    countByHeadline2 += getNumFromHandline2(line);

   } else if (isQuestion(line, lastLineType)) {
    lastLineType = LineType.Question;
    lastQuestionType = QuestionType.Normal;
    
    q = new ImportQuestions().new Question();
    q.setQuestion(getQuestion(line));
    q.setCatalogCode(lastCatalogCode);
    q.setType(lastType);

   } else if (isAnswer(line, lastLineType)) {
    lastLineType = LineType.Answer;
    if (q!=null) {
     q.addAnswers(getAnswers(line));
    } else {
     System.out.println("错误:答案没有对应题目:"+line);
    }

   } else if (isReferenceAnswer(line, lastLineType)) {
    lastLineType = LineType.ReferenceAnswer;
    if (q!=null) {
     q.setReferenceAnswer(getReferenceAnswer(line));
     q.setAnswerAnalysis(getAnswerAnalysis(line));
    } else {
     System.out.println("错误:参考答案没有对应题目:"+line);
    }
    
    boolean isMutiLine = false;
    int lastNumFromReferenceAnswer=0;
    line = dr.readLine();
    while (line!=null && !isBlankline(line) && !isHandline1(line) && !isHandline2(line) && !isQuestion(line, lastLineType)
      && !isSubQuestion(line, lastQuestionType)) {
     if (isBigQuestionLikeness(line, lastQuestionType)) {
      int num = getNumFromBigQuestionLikeness(line);
      if (lastNumFromReferenceAnswer + 1 == num) {
       lastNumFromReferenceAnswer = num;
       q.setAnswerAnalysis(q.getAnswerAnalysis() + "\r\n" + line);
      } else if (lastNumFromBigQuestion + 1 == num) {
       break;
      } else {
       System.out.println("错误:非期望情况:"+line);
      }
     } else {
      lastNumFromReferenceAnswer=0;
      q.setAnswerAnalysis(q.getAnswerAnalysis() + "\r\n" + line);
     }
     
     line = dr.readLine();
     isMutiLine = true;
    }
    
    if (isMutiLine) {
     // System.out.println(q);
    }
    
    // 插入数据并重置q为null
    insertQuestion(q);
    questionCount++;
    q = null;
    
    continue;
    
   } else if (isBigQuestionLikeness(line, lastQuestionType)) {
    lastLineType = LineType.BigQuestion;
    lastQuestionType = QuestionType.Big;
    lastNumFromBigQuestion = getNumFromBigQuestionLikeness(line);
    
    q = new ImportQuestions().new Question();
    q.setQuestion(getBigQuestion(line));
    q.setCatalogCode(lastCatalogCode);
    q.setType(lastType);
    // lastQuestionID = 1;
    // System.out.println(q);
    
   } else if (isSubQuestion(line, lastQuestionType)) {
    if (lastQuestionType.equals(QuestionType.Big)) {
     lastBigQuestionID = insertQuestion(q);
     questionCount++;
    }
    
    lastLineType = LineType.SubQuestion;
    lastQuestionType = QuestionType.Sub;
    
    q = new ImportQuestions().new Question();
    q.setQuestion(getSubQuestion(line));
    q.setCatalogCode(lastCatalogCode);
    q.setType("3");
    q.setParentID(lastBigQuestionID);
    
    System.out.println(q);
    subQuestionCount++;
    
   } else {
    if (lastLineType.equals(LineType.Question) || lastLineType.equals(LineType.BigQuestion) || lastLineType.equals(LineType.SubQuestion)) {
     q.setQuestion(q.getQuestion()+"\r\n"+line);
    } else {
     System.out.println("错误:未找到对应的解析规则:"+line);
    }
   }
   // System.out.println("----");

   line = dr.readLine();
  }
  
  System.out.println("共有题目数:"+countByHeadline2);
  System.out.println("解析到题目数:"+(questionCount-subQuestionCount));
 }
 
 

 /*
  * 参考答案:C;依据:《行政处罚法》第29条。   1.除法律另有规定,违法行为在多长时间内未被发现的,不再给予行政处罚。( )   A.6个月
  * B.1年 C.2年 D.3年
  */

}

以上是关于正则法导入txt文本的主要内容,如果未能解决你的问题,请参考以下文章

求notepad正则表达式。

正则表达式

使用正则表达式从 txt 中提取数据 [关闭]

将Repeater控件导入Excel表(正则清除a标签保留文本,img标签清除)

正则表达式的语法和简单应用

Linux Shell 文本处理工具