OpenCores
URL https://opencores.org/ocsvn/openarty/openarty/trunk

Subversion Repositories openarty

[/] [openarty/] [trunk/] [rtl/] [cpu/] [cpuops.v] - Blame information for rev 30

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 dgisselq
///////////////////////////////////////////////////////////////////////////
2
//
3
// Filename:    cpuops.v
4
//
5
// Project:     Zip CPU -- a small, lightweight, RISC CPU soft core
6
//
7
// Purpose:     This supports the instruction set reordering of operations
8
//              created by the second generation instruction set, as well as
9
//      the new operations of POPC (population count) and BREV (bit reversal).
10
//
11
//
12
// Creator:     Dan Gisselquist, Ph.D.
13
//              Gisselquist Technology, LLC
14
//
15
///////////////////////////////////////////////////////////////////////////
16
//
17
// Copyright (C) 2015, Gisselquist Technology, LLC
18
//
19
// This program is free software (firmware): you can redistribute it and/or
20
// modify it under the terms of  the GNU General Public License as published
21
// by the Free Software Foundation, either version 3 of the License, or (at
22
// your option) any later version.
23
//
24
// This program is distributed in the hope that it will be useful, but WITHOUT
25
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
26
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
27
// for more details.
28
//
29
// License:     GPL, v3, as defined and found on www.gnu.org,
30
//              http://www.gnu.org/licenses/gpl.html
31
//
32
//
33
///////////////////////////////////////////////////////////////////////////
34
//
35
`define LONG_MPY
36
module  cpuops(i_clk,i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid,
37
                        o_illegal, o_busy);
38
        parameter       IMPLEMENT_MPY = 1;
39
        input           i_clk, i_rst, i_ce;
40
        input           [3:0]    i_op;
41
        input           [31:0]   i_a, i_b;
42
        input                   i_valid;
43
        output  reg     [31:0]   o_c;
44
        output  wire    [3:0]    o_f;
45
        output  reg             o_valid;
46
        output  wire            o_illegal;
47
        output  wire            o_busy;
48
 
49
        // Rotate-left pre-logic
50
        wire    [63:0]   w_rol_tmp;
51
        assign  w_rol_tmp = { i_a, i_a } << i_b[4:0];
52
        wire    [31:0]   w_rol_result;
53
        assign  w_rol_result = w_rol_tmp[63:32]; // Won't set flags
54
 
55
        // Shift register pre-logic
56
        wire    [32:0]           w_lsr_result, w_asr_result, w_lsl_result;
57
        wire    signed  [32:0]   w_pre_asr_input, w_pre_asr_shifted;
58
        assign  w_pre_asr_input = { i_a, 1'b0 };
59
        assign  w_pre_asr_shifted = w_pre_asr_input >>> i_b[4:0];
60
        assign  w_asr_result = (|i_b[31:5])? {(33){i_a[31]}}
61
                                : w_pre_asr_shifted;// ASR
62
        assign  w_lsr_result = ((|i_b[31:6])||(i_b[5]&&(i_b[4:0]!=0)))? 33'h00
63
                                :((i_b[5])?{32'h0,i_a[31]}
64
 
65
                                : ( { i_a, 1'b0 } >> (i_b[4:0]) ));// LSR
66
        assign  w_lsl_result = ((|i_b[31:6])||(i_b[5]&&(i_b[4:0]!=0)))? 33'h00
67
                                :((i_b[5])?{i_a[0], 32'h0}
68
                                : ({1'b0, i_a } << i_b[4:0]));   // LSL
69
 
70
        // Bit reversal pre-logic
71
        wire    [31:0]   w_brev_result;
72
        genvar  k;
73
        generate
74
        for(k=0; k<32; k=k+1)
75
        begin : bit_reversal_cpuop
76
                assign w_brev_result[k] = i_b[31-k];
77
        end endgenerate
78
 
79
        // Popcount pre-logic
80
        wire    [31:0]   w_popc_result;
81
        assign  w_popc_result[5:0]=
82
                 ({5'h0,i_b[ 0]}+{5'h0,i_b[ 1]}+{5'h0,i_b[ 2]}+{5'h0,i_b[ 3]})
83
                +({5'h0,i_b[ 4]}+{5'h0,i_b[ 5]}+{5'h0,i_b[ 6]}+{5'h0,i_b[ 7]})
84
                +({5'h0,i_b[ 8]}+{5'h0,i_b[ 9]}+{5'h0,i_b[10]}+{5'h0,i_b[11]})
85
                +({5'h0,i_b[12]}+{5'h0,i_b[13]}+{5'h0,i_b[14]}+{5'h0,i_b[15]})
86
                +({5'h0,i_b[16]}+{5'h0,i_b[17]}+{5'h0,i_b[18]}+{5'h0,i_b[19]})
87
                +({5'h0,i_b[20]}+{5'h0,i_b[21]}+{5'h0,i_b[22]}+{5'h0,i_b[23]})
88
                +({5'h0,i_b[24]}+{5'h0,i_b[25]}+{5'h0,i_b[26]}+{5'h0,i_b[27]})
89
                +({5'h0,i_b[28]}+{5'h0,i_b[29]}+{5'h0,i_b[30]}+{5'h0,i_b[31]});
90
        assign  w_popc_result[31:6] = 26'h00;
91
 
92
        // Prelogic for our flags registers
93
        wire    z, n, v;
94
        reg     c, pre_sign, set_ovfl;
95
        always @(posedge i_clk)
96
                if (i_ce) // 1 LUT
97
                        set_ovfl =(((i_op==4'h0)&&(i_a[31] != i_b[31]))//SUB&CMP
98
                                ||((i_op==4'h2)&&(i_a[31] == i_b[31])) // ADD
99
                                ||(i_op == 4'h6) // LSL
100
                                ||(i_op == 4'h5)); // LSR
101
 
102
`ifdef  LONG_MPY
103
        reg     mpyhi;
104
        wire    mpybusy;
105
`endif
106
 
107
        // A 4-way multiplexer can be done in one 6-LUT.
108
        // A 16-way multiplexer can therefore be done in 4x 6-LUT's with
109
        //      the Xilinx multiplexer fabric that follows. 
110
        // Given that we wish to apply this multiplexer approach to 33-bits,
111
        // this will cost a minimum of 132 6-LUTs.
112
        generate
113
        if (IMPLEMENT_MPY == 0)
114
        begin
115
                always @(posedge i_clk)
116
                if (i_ce)
117
                begin
118
                        pre_sign <= (i_a[31]);
119
                        c <= 1'b0;
120
                        casez(i_op)
121
                        4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB
122
                        4'b0001:   o_c   <= i_a & i_b;          // BTST/And
123
                        4'b0010:{c,o_c } <= i_a + i_b;          // Add
124
                        4'b0011:   o_c   <= i_a | i_b;          // Or
125
                        4'b0100:   o_c   <= i_a ^ i_b;          // Xor
126
                        4'b0101:{o_c,c } <= w_lsr_result[32:0];  // LSR
127
                        4'b0110:{c,o_c } <= w_lsl_result[32:0]; // LSL
128
                        4'b0111:{o_c,c } <= w_asr_result[32:0];  // ASR
129
`ifndef LONG_MPY
130
                        4'b1000:   o_c   <= { i_b[15: 0], i_a[15:0] }; // LODIHI
131
`endif
132
                        4'b1001:   o_c   <= { i_a[31:16], i_b[15:0] }; // LODILO
133
                        // 4'h1010: The unimplemented MPYU,
134
                        // 4'h1011: and here for the unimplemented MPYS
135
                        4'b1100:   o_c   <= w_brev_result;      // BREV
136
                        4'b1101:   o_c   <= w_popc_result;      // POPC
137
                        4'b1110:   o_c   <= w_rol_result;       // ROL
138
                        default:   o_c   <= i_b;                // MOV, LDI
139
                        endcase
140
                end
141
 
142
                assign o_busy = 1'b0;
143
 
144
                reg     r_illegal;
145
                always @(posedge i_clk)
146
                        r_illegal <= (i_ce)&&((i_op == 4'ha)||(i_op == 4'hb)
147
`ifdef  LONG_MPY
148
                                ||(i_op == 4'h8)
149
`endif
150
                        );
151
                assign o_illegal = r_illegal;
152
        end else begin
153
                //
154
                // Multiply pre-logic
155
                //
156
`ifdef  LONG_MPY
157
                reg     [63:0]   r_mpy_result;
158
                if (IMPLEMENT_MPY == 1)
159
                begin // Our two clock option (one clock extra)
160
                        reg     signed  [64:0]   r_mpy_a_input, r_mpy_b_input;
161
                        reg                     mpypipe, x;
162
                        initial mpypipe = 1'b0;
163
                        always @(posedge i_clk)
164
                                mpypipe <= (i_ce)&&((i_op[3:1]==3'h5)||(i_op[3:0]==4'h8));
165
                        always @(posedge i_clk)
166
                        if (i_ce)
167
                        begin
168
                                r_mpy_a_input <= {{(33){(i_a[31])&(i_op[0])}},
169
                                                        i_a[31:0]};
170
                                r_mpy_b_input <= {{(33){(i_b[31])&(i_op[0])}},
171
                                                        i_b[31:0]};
172
                        end
173
                        always @(posedge i_clk)
174
                                if (mpypipe)
175
                                        {x, r_mpy_result} = r_mpy_a_input
176
                                                        * r_mpy_b_input;
177
                        always @(posedge i_clk)
178
                                if (i_ce)
179
                                        mpyhi  = i_op[1];
180
                        assign  mpybusy = mpypipe;
181
                end else if (IMPLEMENT_MPY == 2)
182
                begin // The three clock option
183
                        reg     [31:0]   r_mpy_a_input, r_mpy_b_input;
184
                        reg             r_mpy_signed;
185
                        reg     [1:0]    mpypipe;
186
 
187
                        // First clock, latch in the inputs
188
                        always @(posedge i_clk)
189
                        begin
190
                                // mpypipe indicates we have a multiply in the
191
                                // pipeline.  In this case, the multiply
192
                                // pipeline is a two stage pipeline, so we need 
193
                                // two bits in the pipe.
194
                                mpypipe[0] <= (i_ce)&&((i_op[3:1]==3'h5)
195
                                                        ||(i_op[3:0]==4'h8));
196
                                mpypipe[1] <= mpypipe[0];
197
 
198
                                if (i_op[0]) // i.e. if signed multiply
199
                                begin
200
                                        r_mpy_a_input <= {(~i_a[31]),i_a[30:0]};
201
                                        r_mpy_b_input <= {(~i_b[31]),i_b[30:0]};
202
                                end else begin
203
                                        r_mpy_a_input <= i_a[31:0];
204
                                        r_mpy_b_input <= i_b[31:0];
205
                                end
206
                                // The signed bit really only matters in the
207
                                // case of 64 bit multiply.  We'll keep track
208
                                // of it, though, and pretend in all other
209
                                // cases.
210
                                r_mpy_signed  <= i_op[0];
211
 
212
                                if (i_ce)
213
                                        mpyhi  = i_op[1];
214
                        end
215
 
216
                        assign  mpybusy = |mpypipe;
217
 
218
                        // Second clock, do the multiplies, get the "partial
219
                        // products".  Here, we break our input up into two
220
                        // halves, 
221
                        //
222
                        //   A  = (2^16 ah + al)
223
                        //   B  = (2^16 bh + bl)
224
                        //
225
                        // and use these to compute partial products.
226
                        //
227
                        //   AB = (2^32 ah*bh + 2^16 (ah*bl + al*bh) + (al*bl)
228
                        //
229
                        // Since we're following the FOIL algorithm to get here,
230
                        // we'll name these partial products according to FOIL.
231
                        //
232
                        // The trick is what happens if A or B is signed.  In
233
                        // those cases, the real value of A will not be given by
234
                        //      A = (2^16 ah + al)
235
                        // but rather
236
                        //      A = (2^16 ah[31^] + al) - 2^31
237
                        //  (where we have flipped the sign bit of A)
238
                        // and so ...
239
                        //
240
                        // AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31)
241
                        //      = 2^32(ah*bh)
242
                        //              +2^16 (ah*bl+al*bh)
243
                        //              +(al*bl)
244
                        //              - 2^31 (2^16 bh+bl + 2^16 ah+al)
245
                        //              - 2^62
246
                        //      = 2^32(ah*bh)
247
                        //              +2^16 (ah*bl+al*bh)
248
                        //              +(al*bl)
249
                        //              - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31)
250
                        //
251
                        reg     [31:0]   pp_f, pp_l; // F and L from FOIL
252
                        reg     [32:0]   pp_oi; // The O and I from FOIL
253
                        reg     [32:0]   pp_s;
254
                        always @(posedge i_clk)
255
                        begin
256
                                pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16];
257
                                pp_oi<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0]
258
                                        + r_mpy_a_input[15: 0]*r_mpy_b_input[31:16];
259
                                pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0];
260
                                // And a special one for the sign
261
                                if (r_mpy_signed)
262
                                        pp_s <= 32'h8000_0000-(
263
                                                r_mpy_a_input[31:0]
264
                                                + r_mpy_b_input[31:0]);
265
                                else
266
                                        pp_s <= 33'h0;
267
                        end
268
 
269
                        // Third clock, add the results and produce a product
270
                        always @(posedge i_clk)
271
                        begin
272
                                r_mpy_result[15:0] <= pp_l[15:0];
273
                                r_mpy_result[63:16] <=
274
                                        { 32'h00, pp_l[31:16] }
275
                                        + { 15'h00, pp_oi }
276
                                        + { pp_s, 15'h00 }
277
                                        + { pp_f, 16'h00 };
278
                        end
279
                end // Fourth clock -- results are available for writeback.
280
`else
281
                wire    signed  [16:0]   w_mpy_a_input, w_mpy_b_input;
282
                wire            [33:0]   w_mpy_result;
283
                reg             [31:0]   r_mpy_result;
284
                assign  w_mpy_a_input ={ ((i_a[15])&(i_op[0])), i_a[15:0] };
285
                assign  w_mpy_b_input ={ ((i_b[15])&(i_op[0])), i_b[15:0] };
286
                assign  w_mpy_result   = w_mpy_a_input * w_mpy_b_input;
287
                always @(posedge i_clk)
288
                        if (i_ce)
289
                                r_mpy_result  = w_mpy_result[31:0];
290
`endif
291
 
292
                //
293
                // The master ALU case statement
294
                //
295
                always @(posedge i_clk)
296
                if (i_ce)
297
                begin
298
                        pre_sign <= (i_a[31]);
299
                        c <= 1'b0;
300
                        casez(i_op)
301
                        4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB
302
                        4'b0001:   o_c   <= i_a & i_b;          // BTST/And
303
                        4'b0010:{c,o_c } <= i_a + i_b;          // Add
304
                        4'b0011:   o_c   <= i_a | i_b;          // Or
305
                        4'b0100:   o_c   <= i_a ^ i_b;          // Xor
306
                        4'b0101:{o_c,c } <= w_lsr_result[32:0];  // LSR
307
                        4'b0110:{c,o_c } <= w_lsl_result[32:0]; // LSL
308
                        4'b0111:{o_c,c } <= w_asr_result[32:0];  // ASR
309
`ifdef  LONG_MPY
310
                        4'b1000:   o_c   <= r_mpy_result[31:0]; // MPY
311
`else
312
                        4'b1000:   o_c   <= { i_b[15: 0], i_a[15:0] }; // LODIHI
313
`endif
314
                        4'b1001:   o_c   <= { i_a[31:16], i_b[15:0] }; // LODILO
315
`ifdef  LONG_MPY
316
                        4'b1010:   o_c   <= r_mpy_result[63:32]; // MPYHU
317
                        4'b1011:   o_c   <= r_mpy_result[63:32]; // MPYHS
318
`else
319
                        4'b1010:   o_c   <= r_mpy_result; // MPYU
320
                        4'b1011:   o_c   <= r_mpy_result; // MPYS
321
`endif
322
                        4'b1100:   o_c   <= w_brev_result;      // BREV
323
                        4'b1101:   o_c   <= w_popc_result;      // POPC
324
                        4'b1110:   o_c   <= w_rol_result;       // ROL
325
                        default:   o_c   <= i_b;                // MOV, LDI
326
                        endcase
327
                end else if (r_busy)
328
`ifdef  LONG_MPY
329
                        o_c <= (mpyhi)?r_mpy_result[63:32]:r_mpy_result[31:0];
330
`else
331
                        o_c <= r_mpy_result;
332
`endif
333
 
334
                reg     r_busy;
335
                initial r_busy = 1'b0;
336
                always @(posedge i_clk)
337
                        r_busy <= (~i_rst)&&(i_ce)&&(i_valid)
338
`ifdef  LONG_MPY
339
                                        &&((i_op[3:1] == 3'h5)
340
                                                ||(i_op[3:0] == 4'h8))||mpybusy;
341
`else
342
                                        &&(i_op[3:1] == 3'h5);
343
`endif
344
 
345
                assign o_busy = r_busy;
346
 
347
                assign o_illegal = 1'b0;
348
        end endgenerate
349
 
350
        assign  z = (o_c == 32'h0000);
351
        assign  n = (o_c[31]);
352
        assign  v = (set_ovfl)&&(pre_sign != o_c[31]);
353
 
354
        assign  o_f = { v, n, c, z };
355
 
356
        initial o_valid = 1'b0;
357
        always @(posedge i_clk)
358
                if (i_rst)
359
                        o_valid <= 1'b0;
360
                else
361
                        o_valid <= (i_ce)&&(i_valid)
362
`ifdef  LONG_MPY
363
                                &&(i_op[3:1] != 3'h5)&&(i_op[3:0] != 4'h8)
364
                                ||(o_busy)&&(~mpybusy);
365
`else
366
                                &&(i_op[3:1] != 3'h5)||(o_busy);
367
`endif
368
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.