串/并行乘法器的速度比较慢,本节就是在上节的基础上对电路进行改进,提高速度。引入流水线的形式,将相邻的两个部分积乘积结构再以加法树的形式相加,形成了结构上的流水化处理。缩短关键路径
代码:16位的乘法器相乘
module Multiply(
clk,rst,
dataa,datab,
dout
);
input clk;
input rst;
input [7:0] dataa;
input [7:0] datab;
output [15:0] dout;
reg [15:0]stored0;
reg [15:0]stored1;
reg [15:0]stored2;
reg [15:0]stored3;
reg [15:0]stored4;
reg [15:0]stored5;
reg [15:0]stored6;
reg [15:0]stored7;
reg [16: 0] add01;
reg [16: 0] add23;
reg [16: 0] add45;
reg [16: 0] add67;
reg [17:0] mul_out;
always @ (posedge clk or negedge rst)
begin
if(!rst)
begin //初始化寄存器变量
// dout <= 16'd0;
stored0 <= 16'd0;
stored1 <= 16'd0;
stored2 <= 16'd0;
stored3 <= 16'd0;
stored4 <= 16'd0;
stored5 <= 16'd0;
stored6 <= 16'd0;
stored7 <= 16'd0;
add01<= 17'd0;
add23<= 17'd0;
add45<= 17'd0;
add67<= 17'd0;
mul_out<= 18'd0;
end
else
begin //实现移位相加
stored7 <= datab[7]?{1'b0,dataa,7'b0}: 8'b0;
stored6 <= datab[6]?{2'b0,dataa,6'b0}: 8'b0;
stored5 <= datab[5]?{3'b0,dataa,5'b0}: 8'b0;
stored4 <= datab[4]?{4'b0,dataa,4'b0}: 8'b0;
stored3 <= datab[3]?{5'b0,dataa,3'b0}: 8'b0;
stored2 <= datab[2]?{6'b0,dataa,2'b0}: 8'b0;
stored1 <= datab[1]?{7'b0,dataa,1'b0}: 8'b0;
stored0 <= datab[0]?{8'b0,dataa }: 8'b0;
add01 <= stored1 + stored0;
add23 <= stored3 + stored2;
add45 <= stored5 + stored4;
add67 <= stored7 + stored6;
mul_out <= (add01 + add23)+(add45+add67);
end
end
assign dout = mul_out[15:0];
endmodule
testbench代码为:
`timescale 1 ns/ 1 ps
module mult_test;
reg clk;
reg rst;
reg [7:0]dataa;
reg [7:0]datab;
wire [15:0]dout;
Multiply u1(
.clk(clk),
.rst(rst),
.dataa(dataa),
.datab(datab),
.dout(dout)
);
initial
begin
rst = 0;
clk=0;
#10 rst = 1;
end
always #5 clk = ~clk;
always@(posedge clk)
begin
dataa = {$random}%15;
datab = {$random}%13;
end
endmodule