181Java8实现单线程外部排序

Posted zhangchao19890805

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了181Java8实现单线程外部排序相关的知识,希望对你有一定的参考价值。

1. 生成用于测试排序的CSV文件

常量类 Const.java

package zhangchao.externalsort;

public class Const 
    // 没有经过排序的原数据文件路径
	public static final String ORIGIN_FILE = "D:\\\\testTemp\\\\origin_file.csv";
	
	// 生成多少条数据
	public static final int MAX_ITEMS = 100 * 10000;
	
	// 输出文件路径
	public static final String OUT_FILE = "D:\\\\testTemp\\\\out_file.csv";
	
	// 临时的中间文件路径
	public static final String TEMP_MIDDLE_FILE = "D:\\\\testTemp\\\\temp_middle_file.txt";


创建没有排序的CSV文件。CreateData.java

package zhangchao.preparedata;

import java.io.*;
import java.util.Random;
import java.util.UUID;

import zhangchao.externalsort.Const;

/**
 * 创建一个没有排序的CSV数据文件
 * @author zhangchao
 *
 */
public class CreateData 
	
	/**
	 * 随机生成字符串
	 * @return 随机生成字符串
	 */
	private static String genName() 
		String[] arr = 
			"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", 
			"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
			"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", 
			"N", "O", "P", "Q", "R", "S", "T", "u", "v", "W", "X", "Y", "Z",
			"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", 
			"个", "好", "二", "黑", "科", "技", "地", "就", "里", "吗", "看", "图", "遇", 
			"啊", "吧", "版", "不", "别", "把", "被", "帮", "办", "过", "及", "奶", "胡"
		;
		Random random = new Random();
		int size = 100 + random.nextInt(100);
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < size; i++) 
			int index = random.nextInt(arr.length);
			sb.append(arr[index]);
		
		return sb.toString();
	

	
	public static void main(String[] args) 
		File file = new File(Const.ORIGIN_FILE);
		if (file.exists()) 
			file.delete();
		
		try 
			file.createNewFile();
		 catch (IOException e) 
			e.printStackTrace();
		
		
		FileOutputStream fos = null;
		BufferedWriter bw = null;
		try 
			fos = new FileOutputStream(file);
			bw = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8"));
			// 生成CSV文件标题
			String title = "id,name,price\\r\\n";
			bw.write(title);
			
			for (int i = 0; i < Const.MAX_ITEMS; i++) 
				String id = UUID.randomUUID().toString().replaceAll("-", "");
				String name = genName();
				Random r = new Random();
				double price = r.nextDouble() * 100.0 + 0.01;
				StringBuilder sb = new StringBuilder();
				sb.append(id).append(",").append(name).append(",").append(price)
				.append("\\r\\n");
				bw.write(sb.toString());
			
			
		 catch (FileNotFoundException e) 
			e.printStackTrace();
		 catch (UnsupportedEncodingException e) 
			e.printStackTrace();
		 catch (IOException e) 
			e.printStackTrace();
		 finally 
			try 
				if (null != bw) 
					bw.flush();
					bw.close();
				
				
				if (null != fos) 
					fos.flush();
					fos.close();
				
				
			 catch (IOException e) 
				// TODO Auto-generated catch block
				e.printStackTrace();
			
		
		
	



2. 做预备工作,编写基础类

为了方便读取CSV文件的每条记录,我们需要编写DTO,用来接受记录的各个属性。

ItemDto.java

package zhangchao.externalsort;

import java.math.BigDecimal;

/**
 * 对应CSV文件每条记录的DTO类
 * @author zhangchao
 *
 */
public class ItemDto 
	// 主键
	private String id;
	// 名称
	private String name;
	// 价格
	private BigDecimal price;
	// 文件中的位置
	private Long filePosition;
	
	
	
	@Override
	public String toString() 
		return "ItemDto [id=" + id + ", name=" + name + ", price=" + price + ", filePosition=" + filePosition + "]";
	
	
	
	//      setters/getters
	

	public String getId() 
		return id;
	
	public Long getFilePosition() 
		return filePosition;
	
	public void setFilePosition(Long filePosition) 
		this.filePosition = filePosition;
	
	public void setId(String id) 
		this.id = id;
	
	public String getName() 
		return name;
	
	public void setName(String name) 
		this.name = name;
	
	public BigDecimal getPrice() 
		return price;
	
	public void setPrice(BigDecimal price) 
		this.price = price;
	


我们还需要编写一个类来传递生成临时中间文件的结果,类名是 TempMiddleFileResult 。

package zhangchao.externalsort;

/**
 * 创建临时中间文件的结果
 * @author zhangchao
 *
 */
public class TempMiddleFileResult 
	private boolean flag = false; // 是否成功
	private int lines = 0; // 文件总行数。
	public boolean getFlag() 
		return flag;
	
	public void setFlag(boolean flag) 
		this.flag = flag;
	
	public int getLines() 
		return lines;
	
	public void setLines(int lines) 
		this.lines = lines;
	
	
	


检查输出文件是否正确排序的工具类 CheckResult :

package zhangchao.externalsort;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.math.BigDecimal;

public class CheckResult 
	public static void main(String args[])

		File file = new File(Const.OUT_FILE);
		if (!file.exists()) 
			System.out.print(false);
			return;
		
		FileInputStream fis = null;
		BufferedReader br = null;
		try 
			fis = new FileInputStream(file);
			br = new BufferedReader(new InputStreamReader(fis, "UTF-8"));
			String str = br.readLine();
			if (str.startsWith("id,"))
				str = br.readLine();
			
			if (null == str)
				System.out.print(false);
				return;
			
			ItemDto pre = null;
			ItemDto current = null;
			while (null != str) 
				str = str.trim();
				if (str.length() > 0 && !str.startsWith("id,")) 
					pre = current;
					String arr[] = str.split(",");
					current = new ItemDto();
					current.setId(arr[0]);
					current.setName(arr[1]);
					try 
						current.setPrice(new BigDecimal(arr[2]));
					 catch (Exception e) 
						System.out.println(str);
						throw e;
					
					
					if (null != pre) 
						BigDecimal prePrice = pre.getPrice();
						BigDecimal currPrice = current.getPrice();
						if (currPrice.compareTo(prePrice) < 0) 
							System.out.println("pre=" + pre);
							System.out.println("current=" + current);
							System.out.println("currPrice.compareTo(prePrice) < 0");
							System.out.println(false);
							return;
						
					
				
				str = br.readLine();
			
			System.out.print(true);
		 catch (FileNotFoundException e) 
			e.printStackTrace();
		 catch (UnsupportedEncodingException e) 
			e.printStackTrace();
		 catch (IOException e) 
			e.printStackTrace();
		 finally 
			try 
				if (null != br) 
					br.close();
				
				if (null != fis) 
					fis.close();
				
			 catch (IOException e) 
				e.printStackTrace();
			
		
		
	


3. 编写内部排序的类,用于做对比。

为了和内部排序做对比,而编写的Test1 类。Test1 类全部读取原文件的内容到内存中,并且进行排序,当文件过大就会因为内存不足报错。

Test1.java

package zhangchao.externalsort;

import java.util.List;
import java.util.ArrayList;
import java.io.*;
import java.math.BigDecimal;

import zhangchao.preparedata.CreateData;


public class Test1 

	public static void main(String[] args) 
		File file = new File(Const.ORIGIN_FILE);
		if (!file.exists()) 
			System.out.println("No file");
			return;
		
		FileInputStream fis = null;
		BufferedReader br = null;
		FileOutputStream fos = null;
		BufferedWriter bw = null;
		try 
			List<ItemDto> itemDtoList = new ArrayList<ItemDto>();
			fis = new FileInputStream(file);
			br = new BufferedReader(new InputStreamReader(fis, "UTF-8"));
			String str = null;
			str = br.readLine();
			while(null != str) 
				if (!str.startsWith("id,")) 
					String arr[] = str.split(",");
					ItemDto itemDto = new ItemDto();
					itemDto.setId(arr[0]);
					itemDto.setName(arr[1]);
					itemDto.setPrice(new BigDecimal(arr[2]));
					itemDtoList.add(itemDto);
				
				str = br.readLine();
			
			itemDtoList.以上是关于181Java8实现单线程外部排序的主要内容,如果未能解决你的问题,请参考以下文章

php中在循环外部如何强制结束循环?

常见排序算法整理总结

外部排序

外部排序&多路归并排序

外部排序归并排序 败者树

常用的外部排序方法