OpenCores
URL https://opencores.org/ocsvn/cr_div/cr_div/trunk

Subversion Repositories cr_div

[/] [cr_div/] [trunk/] [rtl/] [verilog/] [cr_div.v] - Blame information for rev 2

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 2 robfinch
`timescale 1ns / 1ps
2
// ============================================================================
3
//        __
4
//   \\__/ o\    (C) 2012-2013  Robert Finch, Stratford
5
//    \  __ /    All rights reserved.
6
//     \/_//     robfinch<remove>@opencores.org
7
//       ||
8
//
9
// Cached Reciprocal Divider
10
// - Allows divides to be performed in three clock cycles by storing the
11
//   reciprocal of the divisor in a cache, then using a multiply for
12
//   subsequent divides.
13
//
14
// This source file is free software: you can redistribute it and/or modify 
15
// it under the terms of the GNU Lesser General Public License as published 
16
// by the Free Software Foundation, either version 3 of the License, or     
17
// (at your option) any later version.                                      
18
//                                                                          
19
// This source file is distributed in the hope that it will be useful,      
20
// but WITHOUT ANY WARRANTY; without even the implied warranty of           
21
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            
22
// GNU General Public License for more details.                             
23
//                                                                          
24
// You should have received a copy of the GNU General Public License        
25
// along with this program.  If not, see <http://www.gnu.org/licenses/>.    
26
//
27
// ============================================================================
28
//
29
module cr_div(rst, clk, addr, start, a, b, q, r, done);
30
parameter IDLE = 3'd1;
31
parameter RECIP = 3'd2;
32
parameter RECIP1 = 3'd3;
33
parameter DONE = 3'd4;
34
input rst;
35
input clk;
36
input [31:0] addr;
37
input start;
38
input [31:0] a;
39
input [31:0] b;
40
output [31:0] q;
41
output [31:0] r;
42
output done;
43
 
44
reg [2:0] state;
45
reg [31:0] bcache [0:63];         // 'b' is the cache tag
46
reg [31:0] recip_cache [0:63];    // cache of reciprocals
47
wire [63:0] prod = recip_cache[addr[7:2]] * a;
48
reg [31:0] q,r;
49
reg [7:0] cnt;
50
wire cnt_done = cnt==8'd0;
51
assign done = state==DONE;
52
 
53
wire b0 = b <= r;
54
wire [31:0] r1 = b0 ? r - b : r;
55
 
56
always @(posedge clk)
57
if (rst) begin
58
        state <= IDLE;
59
end
60
else begin
61
if (!cnt_done)
62
        cnt <= cnt - 8'd1;
63
case(state)
64
IDLE:
65
        if (start) begin
66
                // Note: we are calculating the inverse as a fraction less than one, so
67
                // we start by placing the dividend directly into the remainder field
68
                // rather than the quotient field as for a normal divide. We can save
69
                // 32 clock cycles this way. We know there would just be 32 leading
70
                // zeros because the fraction is less than one.
71
                q <= 32'd0;
72
                r <= 32'd1;
73
                if (b==1) begin
74
                        q <= a;
75
                        r <= 0;
76
                        state <= DONE;
77
                end
78
                // Here is what speeds things up, if we find the reciprocal cached, the
79
                // quotient is returned right away after a multiply.
80
                else if (b==bcache[addr[7:2]]) begin
81
                        q <= prod[63:32];
82
                        state <= DONE;
83
                end
84
                else
85
                        state <= RECIP;
86
                cnt <= 8'd33;
87
        end
88
// This state computes the reciprocal and caches it if the reciprocal isn't in
89
// the cache already.
90
RECIP:
91
        if (!cnt_done) begin
92
                q <= {q[30:0],b0};
93
                r <= {r1,q[31]};
94
        end
95
        else begin
96
                bcache[addr[7:2]] <= b;
97
                recip_cache[addr[7:2]] <= q;
98
                state <= RECIP1;
99
        end
100
// State to compute the quotient using the newly cached reciprocal.
101
RECIP1:
102
        begin
103
                q <= prod[63:32];
104
                state <= DONE;
105
        end
106
// Compute the remainder. You may not want to since it's a resource hog - it
107
// takes an additional multiply and subtract. The remainder is often easily
108
// calculated by program code rather than hardware.
109
DONE:
110
        begin
111
        $display("==========================");
112
        $display("a=%d,b=%d",a,b);
113
        $display("q=%d,r=%d",q,a - b * q);
114
        $display("rc[%h]=%d",addr[7:2],recip_cache[addr[7:2]]);
115
        $display("==========================");
116
        r <= a - b * q;
117
        state <= IDLE;
118
        end
119
endcase
120
end
121
 
122
endmodule
123
 
124
module cr_div_tb();
125
 
126
reg rst;
127
reg clk;
128
reg start;
129
wire done;
130
wire [31:0] q,r;
131
reg [31:0] a,b;
132
reg [7:0] cnt;
133
reg [7:0] cycles;
134
reg [31:0] addr,oaddr;
135
 
136
initial begin
137
        clk = 1;
138
        rst = 0;
139
        #100 rst = 1;
140
        #100 rst = 0;
141
        #100 start = 1;
142
        #150 start = 0;
143
end
144
 
145
always #10 clk = ~clk;  //  50 MHz
146
 
147
cr_div u1
148
(
149
        .rst(rst),
150
        .clk(clk),
151
        .start(start),
152
        .addr(addr),
153
        .a(a),
154
        .b(b),
155
        .q(q),
156
        .r(r),
157
        .done(done)
158
);
159
 
160
always @(posedge clk)
161
if (rst) begin
162
addr <= 32'd0;
163
cycles <= 8'h0;
164
end
165
else begin
166
start <= 1'b0;
167
cycles <= cycles + 8'd1;
168
oaddr <= addr;
169
if (done)
170
        addr <= addr + 32'd4;
171
if (addr != oaddr) begin
172
        start <= 1'b1;
173
        cycles <= 8'h00;
174
end
175
case(addr)
176
10'h00:  begin a = 32'd10005; b = 32'd27; end
177
10'h04:  begin a = 32'd9999; b = 32'd21; end
178
10'h08:  begin a = 32'd9999; b = 32'd0; end
179
10'h0C:  begin a = 32'hFFFFFFFF; b = 32'd1; end
180
10'h10: begin a = 32'h36969; b = 27; end
181
10'h14:  begin a = 32'd0; b = 32'hFFFFFFFF; end
182
10'h18:  begin a = 32'd1; b = 32'hFFFFFFFF; end
183
10'h1C:  begin a = 32'hFFFFFFFF; b = 32'd2; end
184
10'h100:begin a = 32'd3721; b = 32'd27; end                      // <- this one simulates a loop (hits the same cache address as h00
185
default:        begin a = 32'd999; b = 32'd99;  end
186
endcase
187
$display("addr=%h,a=%d,b=%d,q=%d,r=%d,done=%d,cycles=%d",addr,a,b,q,r,done,cycles);
188
end
189
 
190
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.