堆排序的Verilog实现
Posted FPGA之家
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了堆排序的Verilog实现相关的知识,希望对你有一定的参考价值。
欢迎FPGA工程师加入官方微信技术群
最近在做项目的过程中遇到了一个排序问题,需要对4096个正值数据进行排序。由于使用的Xilinx的Kintex系列FPGA,逻辑资源较为丰富,所以希望能利用逻辑资源尽量加速排序算法。但是自己对排序算法了解不多,所以目前根据网上的资料,选择了堆排序实现排序算法。参考资料如下
目前我实现的堆排序算法并没有很好的利用FPGA的并行特性,更多的还是利用FSM进行循环计算。希望可以借本文抛砖引玉,得到大神的指点,介绍给我一个可以利用FPGA并行计算特性实现大量数据快速排序的FPGA算法。大家要是有什么想法或者建议一定要给我留言,拜托拜托。
这里状态机均由三段式状态机实现,Verilog代码如下(请看明白原理再拿走代码,不然我这篇文章就没有意义了)
堆初始化模块:
`timescale 1ns / 1ps
module initial_stack
#(
parameter addr_width = 5,
parameter data_width = 8
)
(
input clk,
input rst_n,
input en,
input clr,
output reg done,
input [addr_width - 1:0] parent,
input [addr_width - 1:0] length,
output reg wea,
output reg [ addr_width - 1:0 ] addra,
output reg [ data_width - 1:0 ] data_we,
input [ data_width - 1:0 ] data_re
);
reg [data_width - 1:0] temp;
reg [addr_width :0] parent_r;//attention: For recognize the parent, we must expand data width of it
reg [addr_width :0] child_r;
reg [addr_width :0] length_r;
parameter IDLE = 6'b000001;
parameter BEGIN = 6'b000010;
parameter GET = 6'b000100;
parameter COMPARE = 6'b001000;
parameter WRITE = 6'b010000;
parameter COMPLETE= 6'b100000;
reg [5:0] state;
reg [5:0] next_state;
reg [7:0] cnt;
reg [data_width - 1:0] child_compare;
always@(posedge clk or negedge rst_n)
begin
if(!rst_n) begin state <= IDLE; end
else begin state <= next_state; end
end
always@(*)
begin
case(state)
IDLE: begin
if(en) begin next_state = BEGIN; end
else begin next_state = IDLE; end
end
BEGIN:begin
if(cnt == 8'd2) begin next_state = GET; end
else begin next_state = BEGIN; end
end
GET: begin
if(child_r >= length_r) begin next_state = COMPLETE; end
else if(cnt == 8'd4) begin next_state = COMPARE; end
else begin next_state = GET; end
end
COMPARE: begin
if(temp >= child_compare) begin next_state = COMPLETE; end
else begin next_state = WRITE; end
end
WRITE: begin
if(cnt == 8'd1) begin next_state = GET; end
else begin next_state = WRITE; end
end
COMPLETE:begin
if(clr) begin next_state = IDLE; end
else begin next_state = COMPLETE; end
end
endcase
end
reg [data_width - 1:0] child_R;
reg [data_width - 1:0] child_L;
always@(posedge clk or negedge rst_n)
begin
if(!rst_n) begin done <= 1'b0; end
else
begin
case(state)
IDLE: begin
parent_r <= {1'b0, parent};
length_r <= {1'b0, length};
child_r <= 2*parent + 1'b1;
cnt <= 8'd0; child_R <= 0; child_L <= 0;
done <= 1'b0;
end
BEGIN:begin
if(cnt == 8'd0) begin addra <= parent_r; cnt <= cnt + 1'b1; end
else if(cnt == 8'd2) begin temp <= data_re; cnt <= 1'b0; end
else begin cnt <= cnt + 1'b1; end
end
GET: begin
if(child_r >= length_r) begin addra <= addra; end
else
begin
if(cnt == 8'd0) begin addra <= child_r; cnt <= cnt + 1'b1; end
else if(cnt == 8'd1) begin addra <= child_r + 1'b1; cnt <= cnt + 1'b1; end
else if(cnt == 8'd2) begin child_L <= data_re; cnt <= cnt + 1'b1; end
else if(cnt == 8'd3) begin child_R <= data_re; cnt <= cnt + 1'b1; end
else if(cnt == 8'd4)
begin
if( (child_r + 1'b1 < length_r) && (child_R > child_L) )
begin
child_r <= child_r + 1'b1;
child_compare <= child_R;
end
else
begin
child_r <= child_r;
child_compare <= child_L;
end
cnt <= 8'd0;
end
else begin cnt <= cnt + 1'b1; end
end
end
COMPARE: begin end
WRITE: begin
if(cnt == 8'd0) begin
addra <= parent_r; wea <= 1'b1;
data_we <= child_compare; cnt <= cnt + 1'b1;
end
else if(cnt == 8'd1) begin
wea <= 1'b0; cnt <= 8'd0;
parent_r <= child_r;
child_r <= child_r*2 + 1'b1;
end
else begin cnt <= cnt; end
end
COMPLETE: begin
if(cnt == 8'd0) begin
wea <= 1'b1; addra <= parent_r;
data_we <= temp; cnt <= cnt + 1'b1;
end
else if(cnt == 8'd1)
begin
wea <= 1'b0;
cnt <= cnt + 1'b1;
done <= 1'b1;
end
else if(cnt == 8'd2)
begin
done <= 1'b0;
cnt <= 8'd2;
end
end
endcase
end
end
endmodule
上层控制模块代码:
`timescale 1ns / 1ps
module TOP
#(
parameter addr_width = 5, //stack address width
parameter data_width = 8, //stack data width
parameter stack_deepth = 20 //stack deepth
)
(
input clk,
input rst_n
);
reg en; //initial module input: Enable initial process
reg clr; //initial module input: Reset initial process
wire done; //initial module output: One initial process have done
reg [addr_width - 1:0] parent; //initial module input: Parent
reg [addr_width - 1:0] length; //initial module input: Length of list
wire wea; //RAM module input: write enable
wire [addr_width - 1:0] addra; //RAM module input: write/read address
wire [data_width - 1:0] data_we; //RAM module input: write data
wire [data_width - 1:0] data_re; //RAM module output: read data
parameter BEGIN = 9'b0_0000_0001;//stage 1: stack initial
parameter RANK = 9'b0_0000_0010;
parameter FINISH = 9'b0_0000_0100;
parameter DONE = 9'b0_0000_1000;
parameter READ = 9'b0_0001_0000;//stage 2: rank of stack
parameter WRITE = 9'b0_0010_0000;
parameter RANK_2 = 9'b0_0100_0000;
parameter FINISH_2= 9'b0_1000_0000;
parameter DONE_2 = 9'b1_0000_0000;
reg [addr_width - 1:0] cnt; //counter in FSM stage 1/2
reg [addr_width - 1:0] cnt2; //counter in FSM stage 2
reg [8:0] state; //FSM state
reg [8:0] next_state; //FSM next state
reg [addr_width - 1:0] addr; //stack inital process read RAM address
reg initial_done; //stack initial done
reg [data_width - 1:0] list_i; //RANK process reg
reg [data_width - 1:0] list_0; //RANK process reg
reg wea_FSM; //wea signal from FSM
reg [data_width - 1:0] data_we_FSM; //write data form FSM
//FSM stage 1: state transform
always@(posedge clk or negedge rst_n)
begin
if(!rst_n) begin state <= BEGIN; end
else begin state <= next_state; end
end
//FSM stage 2: state change
always@(*)
begin
case(state)
BEGIN: begin next_state = RANK; end //stack initial process begin
RANK: begin
if(done) begin next_state = FINISH; end
else begin next_state = RANK; end
end
FINISH:begin
if(addr == stack_deepth - 1 & cnt != {addr_width{1'b1}} ) begin next_state = BEGIN; end
else if(addr == stack_deepth - 1 & cnt == {addr_width{1'b1}} ) begin next_state = DONE; end
else begin next_state = FINISH; end
end
DONE: begin next_state = READ; end //stack initial process have done
READ: begin //stack rank process begin
if(cnt == 3) begin next_state = WRITE; end
else begin next_state = READ; end
end
WRITE:begin
if(cnt == 2) begin next_state = RANK_2; end
else begin next_state = WRITE; end
end
RANK_2:begin
if(done) begin next_state = FINISH_2; end
else begin next_state = RANK_2; end
end
FINISH_2:begin
if(addr == stack_deepth - 1 & cnt2 != 0) begin next_state = READ; end
else if(addr == stack_deepth - 1 & cnt2 == 0) begin next_state = DONE_2; end
else begin next_state = FINISH_2; end
end
DONE_2:begin next_state = DONE_2; end//stack rank process done
endcase
end
//FSM stage 3: state output
always@(posedge clk or negedge rst_n)
begin
if(!rst_n) begin cnt <= stack_deepth/2; addr <= {addr_width{1'b1}}; initial_done <= 1'b0; wea_FSM <= 1'b0; end
else
begin
case(state)
BEGIN: begin //stack initial begin
en <= 1'b1;
clr <= 1'b0;
parent <= cnt;
length <= stack_deepth;
end
RANK: begin
clr <= 1'b0;
if(done) begin cnt <= cnt - 1'b1; clr <= 1'b1; en <= 1'b0; addr <= 4'd0; end
end
FINISH:begin clr <= 1'b0; addr <= addr + 1'b1; end
DONE: begin
initial_done <= 1'b1; //stack initial have done
cnt2 <= stack_deepth - 1;
cnt <= 0;
end
READ: begin //stack rank process begin
if(cnt == 0) begin addr <= 0; cnt <= cnt + 1'b1; end
else if(cnt == 1) begin addr <= cnt2; cnt <= cnt + 1'b1; end
else if(cnt == 2) begin list_0 <= data_re; cnt <= cnt + 1'b1; end
else if(cnt == 3) begin list_i <= data_re; cnt <= 0; end
else begin cnt <= cnt; end
end
WRITE:begin
if(cnt == 0) begin
wea_FSM <= 1'b1;
addr <= 0; data_we_FSM <= list_i;
cnt <= cnt + 1'b1;
end
else if(cnt == 1) begin
wea_FSM <= 1'b1;
addr <= cnt2; data_we_FSM <= list_0;
cnt <= cnt + 1'b1;
end
else if(cnt == 2) begin wea_FSM <= 1'b0; cnt <= 0; parent <= 0; length <= cnt2; en <= 1'b1; end
else begin cnt <= cnt; end
end
RANK_2:begin
if(done) begin cnt2 <= cnt2 - 1'b1; clr <= 1'b1; en <= 1'b0; addr <= 0; end
end
FINISH_2:begin
clr <= 1'b0; addr <= addr + 1'b1;
end
endcase
end
end
wire wea_initial;
wire [data_width - 1:0] data_we_initial;
//stack initial process
initial_stack U1
(
.clk(clk),
.rst_n(rst_n),
.en(en),
.clr(clr),
.done(done),
.parent(parent),
.length(length),
.wea(wea_initial),
.addra(addra),
.data_we(data_we_initial),
.data_re(data_re)
);
wire [addr_width - 1:0] RAM_addr;
assign wea = (state == WRITE) ? wea_FSM:wea_initial;
assign RAM_addr = (state == FINISH || state == READ || state == WRITE || state == FINISH_2) ? addr:addra;
assign data_we = (state == WRITE) ? data_we_FSM:data_we_initial;
//RAM module
Stack_RAM_2 RAM1
(
.clka(clk),
.wea(wea),
.addra(RAM_addr),
.dina(data_we),
.douta(data_re)
);
endmodule
1,3,4,5,2,6,9,7,8,0,11,15,13,19,20,16,12,10,14,11;
仿真结果如下:
可以看出第一次堆初始化后数据依旧无序。按照上述状态机经过30次初始化后,RAM数据如下:
本文的算法和代码可能还有许多缺点和我没有发现的bug,希望大家看见了能够告诉我。另外也希望能借此获得大家的指点,得到一个适合FPGA实现的排序算法,谢谢啦。
FPGA微信技术群
欢迎大家加入全国FPGA微信技术群,这里有一群热爱技术的工程师,在这里可以一起交流讨论技术!
用手指按住就可以加入FPGA全国技术群哦
FPGA IP核服务:各类优质IP核服务商,服务到位,有保障!有需求的可以直接联系群主!
FPGA技术群平台自营:Xilinx Altera 镁光、三星、海力士、ADI TI ST NXP 等品牌的优势代理分销商,欢迎大家有需求随时发型号清单,我们将在第一时间为您提供最优竞争力的报价!价格低于您原有供应商5%以上!欢迎询价-直接把需求发给群主!
FPGA技术群官方鸣谢品牌:Xilinx、 intel(Altera)、microsemi(,Actel)、LattIC e,Vantis,Quicklogic,Lucent等
以上是关于堆排序的Verilog实现的主要内容,如果未能解决你的问题,请参考以下文章