OpenCores
URL https://opencores.org/ocsvn/zipcpu/zipcpu/trunk

Subversion Repositories zipcpu

[/] [zipcpu/] [trunk/] [rtl/] [core/] [mpyop.v] - Blame information for rev 209

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 209 dgisselq
////////////////////////////////////////////////////////////////////////////////
2
//
3
// Filename:    mpyop.v
4
//
5
// Project:     Zip CPU -- a small, lightweight, RISC CPU soft core
6
//
7
// Purpose:     This code has been pulled from the cpuops.v file so as to
8
//              encapsulate the multiply component--the one component that
9
//      (can't be) formally verified well, and so must be abstracted away.
10
//      This separation was done to support potential future abstraction.
11
//
12
//
13
// Creator:     Dan Gisselquist, Ph.D.
14
//              Gisselquist Technology, LLC
15
//
16
////////////////////////////////////////////////////////////////////////////////
17
//
18
// Copyright (C) 2015-2019, Gisselquist Technology, LLC
19
//
20
// This program is free software (firmware): you can redistribute it and/or
21
// modify it under the terms of  the GNU General Public License as published
22
// by the Free Software Foundation, either version 3 of the License, or (at
23
// your option) any later version.
24
//
25
// This program is distributed in the hope that it will be useful, but WITHOUT
26
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
27
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
28
// for more details.
29
//
30
// You should have received a copy of the GNU General Public License along
31
// with this program.  (It's in the $(ROOT)/doc directory.  Run make with no
32
// target there if the PDF file isn't present.)  If not, see
33
// <http://www.gnu.org/licenses/> for a copy.
34
//
35
// License:     GPL, v3, as defined and found on www.gnu.org,
36
//              http://www.gnu.org/licenses/gpl.html
37
//
38
//
39
////////////////////////////////////////////////////////////////////////////////
40
//
41
//
42
`default_nettype        none
43
//
44
module  mpyop(i_clk,i_reset, i_stb, i_op, i_a, i_b, o_valid, o_busy, o_result, o_hi);
45
        // The following parameter selects which multiply algorithm we use.
46
        // Timing performance is strictly dependent upon it.
47
        parameter       IMPLEMENT_MPY = 1;
48
        input   wire            i_clk, i_reset, i_stb;
49
        input   wire    [1:0]    i_op; // 2'b00=MPY, 2'b10=MPYUHI, 2'b11=MPYSHI
50
        input   wire    [31:0]   i_a, i_b;
51
        output  wire            o_valid; // True if we'll be valid on the next clock;
52
        output  wire            o_busy; // The multiply is busy if true
53
        output  wire    [63:0]   o_result; // Where we dump the multiply result
54
        output  reg             o_hi;   // Return the high half of the multiply
55
 
56
 
57
        // A 4-way multiplexer can be done in one 6-LUT.
58
        // A 16-way multiplexer can therefore be done in 4x 6-LUT's with
59
        //      the Xilinx multiplexer fabric that follows. 
60
        // Given that we wish to apply this multiplexer approach to 33-bits,
61
        // this will cost a minimum of 132 6-LUTs.
62
 
63
// i_stb instead of this_is_a_multiply_op
64
// o_result
65
// o_busy
66
// o_done
67
        generate
68
        if (IMPLEMENT_MPY == 0)
69
        begin : MPYNONE // No multiply support.
70
 
71
                assign  o_result   = 64'h00;
72
                assign  o_busy     = 1'b0;
73
                assign  o_valid    = i_stb;
74
                always @(*) o_hi = 1'b0; // Not needed
75
 
76
`ifdef  VERILATOR
77
                // verilator lint_off UNUSED
78
                wire    [32+32+5-1:0]    mpy_unused;
79
                assign  mpy_unused = { i_clk, i_reset, i_stb, i_op, i_a, i_b };
80
                // verilator lint_on  UNUSED
81
`endif
82
        end else begin : IMPY
83
        if (IMPLEMENT_MPY == 1)
84
        begin : MPY1CK // Our single clock option (no extra clocks)
85
 
86
                wire    signed  [63:0]   w_mpy_a_input, w_mpy_b_input;
87
 
88
                assign  w_mpy_a_input = {{(32){(i_a[31])&(i_op[0])}},i_a[31:0]};
89
                assign  w_mpy_b_input = {{(32){(i_b[31])&(i_op[0])}},i_b[31:0]};
90
 
91
                assign  o_result = w_mpy_a_input * w_mpy_b_input;
92
 
93
                assign  o_busy  = 1'b0;
94
                assign  o_valid = 1'b0;
95
                always @(*) o_hi = i_op[1];
96
 
97
`ifdef  VERILATOR
98
                // verilator lint_off UNUSED
99
                wire    [3:0]    mpy_unused;
100
                assign  mpy_unused = { i_clk, i_reset, i_stb, i_op[1] };
101
                // verilator lint_on  UNUSED
102
`endif
103
 
104
        end else begin: MPN1
105
        if (IMPLEMENT_MPY == 2)
106
        begin : MPY2CK // Our two clock option (ALU must pause for 1 clock)
107
 
108
                reg     signed  [63:0]   r_mpy_a_input, r_mpy_b_input;
109
                always @(posedge i_clk)
110
                begin
111
                        r_mpy_a_input <={{(32){(i_a[31])&(i_op[0])}},i_a[31:0]};
112
                        r_mpy_b_input <={{(32){(i_b[31])&(i_op[0])}},i_b[31:0]};
113
                end
114
 
115
                assign  o_result = r_mpy_a_input * r_mpy_b_input;
116
                assign  o_busy  = 1'b0;
117
 
118
                reg     mpypipe;
119
                initial mpypipe = 1'b0;
120
                always @(posedge i_clk)
121
                        if (i_reset)
122
                                mpypipe <= 1'b0;
123
                        else
124
                                mpypipe <= (i_stb);
125
 
126
                assign  o_valid = mpypipe; // this_is_a_multiply_op;
127
                always @(posedge i_clk)
128
                if (i_stb)
129
                        o_hi  <= i_op[1];
130
 
131
        end else begin : MPN2
132
        if (IMPLEMENT_MPY == 3)
133
        begin : MPY3CK // Our three clock option (ALU pauses for 2 clocks)
134
                reg     signed  [63:0]   r_smpy_result;
135
                reg             [63:0]   r_umpy_result;
136
                reg     signed  [31:0]   r_mpy_a_input, r_mpy_b_input;
137
                reg             [1:0]    mpypipe;
138
                reg             [1:0]    r_sgn;
139
 
140
                initial mpypipe = 2'b0;
141
                always @(posedge i_clk)
142
                        if (i_reset)
143
                                mpypipe <= 2'b0;
144
                        else
145
                        mpypipe <= { mpypipe[0], i_stb };
146
 
147
                // First clock
148
                always @(posedge i_clk)
149
                begin
150
                        r_mpy_a_input <= i_a[31:0];
151
                        r_mpy_b_input <= i_b[31:0];
152
                        r_sgn <= { r_sgn[0], i_op[0] };
153
                end
154
 
155
                // Second clock
156
`ifdef  VERILATOR
157
                wire    signed  [63:0]   s_mpy_a_input, s_mpy_b_input;
158
                wire            [63:0]   u_mpy_a_input, u_mpy_b_input;
159
 
160
                assign  s_mpy_a_input = {{(32){r_mpy_a_input[31]}},r_mpy_a_input};
161
                assign  s_mpy_b_input = {{(32){r_mpy_b_input[31]}},r_mpy_b_input};
162
                assign  u_mpy_a_input = {32'h00,r_mpy_a_input};
163
                assign  u_mpy_b_input = {32'h00,r_mpy_b_input};
164
                always @(posedge i_clk)
165
                        r_smpy_result <= s_mpy_a_input * s_mpy_b_input;
166
                always @(posedge i_clk)
167
                        r_umpy_result <= u_mpy_a_input * u_mpy_b_input;
168
`else
169
 
170
                wire            [31:0]   u_mpy_a_input, u_mpy_b_input;
171
 
172
                assign  u_mpy_a_input = r_mpy_a_input;
173
                assign  u_mpy_b_input = r_mpy_b_input;
174
 
175
                always @(posedge i_clk)
176
                        r_smpy_result <= r_mpy_a_input * r_mpy_b_input;
177
                always @(posedge i_clk)
178
                        r_umpy_result <= u_mpy_a_input * u_mpy_b_input;
179
`endif
180
 
181
                always @(posedge i_clk)
182
                if (i_stb)
183
                        o_hi  <= i_op[1];
184
                assign  o_busy  = mpypipe[0];
185
                assign  o_result = (r_sgn[1])?r_smpy_result:r_umpy_result;
186
                assign  o_valid = mpypipe[1];
187
 
188
                // Results are then set on the third clock
189
        end else begin : MPN3
190
        if (IMPLEMENT_MPY == 4)
191
        begin : MPY4CK // The three clock option
192
                reg     [63:0]   r_mpy_result;
193
                reg     [31:0]   r_mpy_a_input, r_mpy_b_input;
194
                reg             r_mpy_signed;
195
                reg     [2:0]    mpypipe;
196
 
197
                // First clock, latch in the inputs
198
                initial mpypipe = 3'b0;
199
                always @(posedge i_clk)
200
                begin
201
                        // mpypipe indicates we have a multiply in the
202
                        // pipeline.  In this case, the multiply
203
                        // pipeline is a two stage pipeline, so we need 
204
                        // two bits in the pipe.
205
                        if (i_reset)
206
                                mpypipe <= 3'h0;
207
                        else begin
208
                                mpypipe[0] <= i_stb;
209
                                mpypipe[1] <= mpypipe[0];
210
                                mpypipe[2] <= mpypipe[1];
211
                        end
212
 
213
                        if (i_op[0]) // i.e. if signed multiply
214
                        begin
215
                                r_mpy_a_input <= {(~i_a[31]),i_a[30:0]};
216
                                r_mpy_b_input <= {(~i_b[31]),i_b[30:0]};
217
                        end else begin
218
                                r_mpy_a_input <= i_a[31:0];
219
                                r_mpy_b_input <= i_b[31:0];
220
                        end
221
                        // The signed bit really only matters in the
222
                        // case of 64 bit multiply.  We'll keep track
223
                        // of it, though, and pretend in all other
224
                        // cases.
225
                        r_mpy_signed  <= i_op[0];
226
 
227
                        if (i_stb)
228
                                o_hi  <= i_op[1];
229
                end
230
 
231
                assign  o_busy  = |mpypipe[1:0];
232
                assign  o_valid = mpypipe[2];
233
 
234
                // Second clock, do the multiplies, get the "partial
235
                // products".  Here, we break our input up into two
236
                // halves, 
237
                //
238
                //   A  = (2^16 ah + al)
239
                //   B  = (2^16 bh + bl)
240
                //
241
                // and use these to compute partial products.
242
                //
243
                //   AB = (2^32 ah*bh + 2^16 (ah*bl + al*bh) + (al*bl)
244
                //
245
                // Since we're following the FOIL algorithm to get here,
246
                // we'll name these partial products according to FOIL.
247
                //
248
                // The trick is what happens if A or B is signed.  In
249
                // those cases, the real value of A will not be given by
250
                //      A = (2^16 ah + al)
251
                // but rather
252
                //      A = (2^16 ah[31^] + al) - 2^31
253
                //  (where we have flipped the sign bit of A)
254
                // and so ...
255
                //
256
                // AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31)
257
                //      = 2^32(ah*bh)
258
                //              +2^16 (ah*bl+al*bh)
259
                //              +(al*bl)
260
                //              - 2^31 (2^16 bh+bl + 2^16 ah+al)
261
                //              - 2^62
262
                //      = 2^32(ah*bh)
263
                //              +2^16 (ah*bl+al*bh)
264
                //              +(al*bl)
265
                //              - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31)
266
                //
267
                reg     [31:0]   pp_f, pp_l; // F and L from FOIL
268
                reg     [32:0]   pp_oi; // The O and I from FOIL
269
                reg     [32:0]   pp_s;
270
                always @(posedge i_clk)
271
                begin
272
                        pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16];
273
                        pp_oi<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0]
274
                                + r_mpy_a_input[15: 0]*r_mpy_b_input[31:16];
275
                        pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0];
276
                        // And a special one for the sign
277
                        if (r_mpy_signed)
278
                                pp_s <= 32'h8000_0000-(
279
                                        r_mpy_a_input[31:0]
280
                                        + r_mpy_b_input[31:0]);
281
                        else
282
                                pp_s <= 33'h0;
283
                end
284
 
285
                // Third clock, add the results and produce a product
286
                always @(posedge i_clk)
287
                begin
288
                        r_mpy_result[15:0] <= pp_l[15:0];
289
                        r_mpy_result[63:16] <=
290
                                { 32'h00, pp_l[31:16] }
291
                                + { 15'h00, pp_oi }
292
                                + { pp_s, 15'h00 }
293
                                + { pp_f, 16'h00 };
294
                end
295
 
296
                assign  o_result = r_mpy_result;
297
                // Fourth clock -- results are clocked into writeback
298
        end else begin : MPYSLOW
299
 
300
                // verilator lint_off UNUSED
301
                wire            unused_aux;
302
                wire    [65:0]   full_result;
303
                // verilator lint_on  UNUSED
304
 
305
                slowmpy #(.LGNA(6), .NA(33)) slowmpyi(i_clk, i_reset, i_stb,
306
                        { (i_op[0])&(i_a[31]), i_a },
307
                        { (i_op[0])&(i_b[31]), i_b }, 1'b0, o_busy,
308
                                o_valid, full_result, unused_aux);
309
 
310
                assign  o_result = full_result[63:0];
311
 
312
                always @(posedge i_clk)
313
                if (i_stb)
314
                        o_hi  <= i_op[1];
315
 
316
        end end end end end
317
        endgenerate // All possible multiply results have been determined
318
 
319
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.