OpenCores
URL https://opencores.org/ocsvn/zipcpu/zipcpu/trunk

Subversion Repositories zipcpu

[/] [zipcpu/] [trunk/] [rtl/] [core/] [cpuops.v] - Blame information for rev 133

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 2 dgisselq
///////////////////////////////////////////////////////////////////////////
2
//
3
// Filename:    cpuops.v
4
//
5
// Project:     Zip CPU -- a small, lightweight, RISC CPU soft core
6
//
7 69 dgisselq
// Purpose:     This supports the instruction set reordering of operations
8
//              created by the second generation instruction set, as well as
9
//      the new operations of POPC (population count) and BREV (bit reversal).
10 2 dgisselq
//
11 69 dgisselq
//
12 2 dgisselq
// Creator:     Dan Gisselquist, Ph.D.
13 69 dgisselq
//              Gisselquist Technology, LLC
14 2 dgisselq
//
15
///////////////////////////////////////////////////////////////////////////
16
//
17
// Copyright (C) 2015, Gisselquist Technology, LLC
18
//
19
// This program is free software (firmware): you can redistribute it and/or
20
// modify it under the terms of  the GNU General Public License as published
21
// by the Free Software Foundation, either version 3 of the License, or (at
22
// your option) any later version.
23
//
24
// This program is distributed in the hope that it will be useful, but WITHOUT
25
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
26
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
27
// for more details.
28
//
29
// License:     GPL, v3, as defined and found on www.gnu.org,
30
//              http://www.gnu.org/licenses/gpl.html
31
//
32
//
33
///////////////////////////////////////////////////////////////////////////
34
//
35 133 dgisselq
// `define      LONG_MPY
36 69 dgisselq
module  cpuops(i_clk,i_rst, i_ce, i_valid, i_op, i_a, i_b, o_c, o_f, o_valid,
37 71 dgisselq
                        o_illegal, o_busy);
38 56 dgisselq
        parameter       IMPLEMENT_MPY = 1;
39 2 dgisselq
        input           i_clk, i_rst, i_ce;
40
        input           [3:0]    i_op;
41
        input           [31:0]   i_a, i_b;
42
        input                   i_valid;
43
        output  reg     [31:0]   o_c;
44
        output  wire    [3:0]    o_f;
45
        output  reg             o_valid;
46 56 dgisselq
        output  wire            o_illegal;
47 71 dgisselq
        output  wire            o_busy;
48 2 dgisselq
 
49 62 dgisselq
        // Rotate-left pre-logic
50 2 dgisselq
        wire    [63:0]   w_rol_tmp;
51
        assign  w_rol_tmp = { i_a, i_a } << i_b[4:0];
52
        wire    [31:0]   w_rol_result;
53
        assign  w_rol_result = w_rol_tmp[63:32]; // Won't set flags
54 62 dgisselq
 
55
        // Shift register pre-logic
56 56 dgisselq
        wire    [32:0]           w_lsr_result, w_asr_result;
57
        assign  w_asr_result = (|i_b[31:5])? {(33){i_a[31]}}
58
                                : ( {i_a, 1'b0 } >>> (i_b[4:0]) );// ASR
59
        assign  w_lsr_result = (|i_b[31:5])? 33'h00
60
                                : ( { i_a, 1'b0 } >> (i_b[4:0]) );// LSR
61 2 dgisselq
 
62 69 dgisselq
        // Bit reversal pre-logic
63
        wire    [31:0]   w_brev_result;
64
        genvar  k;
65
        generate
66
        for(k=0; k<32; k=k+1)
67 80 dgisselq
        begin : bit_reversal_cpuop
68 69 dgisselq
                assign w_brev_result[k] = i_b[31-k];
69 80 dgisselq
        end endgenerate
70 25 dgisselq
 
71 69 dgisselq
        // Popcount pre-logic
72
        wire    [31:0]   w_popc_result;
73
        assign  w_popc_result[5:0]=
74
                 ({5'h0,i_b[ 0]}+{5'h0,i_b[ 1]}+{5'h0,i_b[ 2]}+{5'h0,i_b[ 3]})
75
                +({5'h0,i_b[ 4]}+{5'h0,i_b[ 5]}+{5'h0,i_b[ 6]}+{5'h0,i_b[ 7]})
76
                +({5'h0,i_b[ 8]}+{5'h0,i_b[ 9]}+{5'h0,i_b[10]}+{5'h0,i_b[11]})
77
                +({5'h0,i_b[12]}+{5'h0,i_b[13]}+{5'h0,i_b[14]}+{5'h0,i_b[15]})
78
                +({5'h0,i_b[16]}+{5'h0,i_b[17]}+{5'h0,i_b[18]}+{5'h0,i_b[19]})
79
                +({5'h0,i_b[20]}+{5'h0,i_b[21]}+{5'h0,i_b[22]}+{5'h0,i_b[23]})
80
                +({5'h0,i_b[24]}+{5'h0,i_b[25]}+{5'h0,i_b[26]}+{5'h0,i_b[27]})
81
                +({5'h0,i_b[28]}+{5'h0,i_b[29]}+{5'h0,i_b[30]}+{5'h0,i_b[31]});
82
        assign  w_popc_result[31:6] = 26'h00;
83
 
84
        // Prelogic for our flags registers
85 2 dgisselq
        wire    z, n, v;
86
        reg     c, pre_sign, set_ovfl;
87
        always @(posedge i_clk)
88 69 dgisselq
                if (i_ce) // 1 LUT
89
                        set_ovfl =(((i_op==4'h0)&&(i_a[31] != i_b[31]))//SUB&CMP
90
                                ||((i_op==4'h2)&&(i_a[31] == i_b[31])) // ADD
91
                                ||(i_op == 4'h6) // LSL
92
                                ||(i_op == 4'h5)); // LSR
93 56 dgisselq
 
94 133 dgisselq
`ifdef  LONG_MPY
95
        reg     mpyhi;
96
        wire    mpybusy;
97
`endif
98 62 dgisselq
 
99
        // A 4-way multiplexer can be done in one 6-LUT.
100
        // A 16-way multiplexer can therefore be done in 4x 6-LUT's with
101
        //      the Xilinx multiplexer fabric that follows. 
102
        // Given that we wish to apply this multiplexer approach to 33-bits,
103
        // this will cost a minimum of 132 6-LUTs.
104 56 dgisselq
        generate
105
        if (IMPLEMENT_MPY == 0)
106
        begin
107
                always @(posedge i_clk)
108 2 dgisselq
                if (i_ce)
109
                begin
110
                        pre_sign <= (i_a[31]);
111
                        c <= 1'b0;
112 3 dgisselq
                        casez(i_op)
113 69 dgisselq
                        4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB
114
                        4'b0001:   o_c   <= i_a & i_b;          // BTST/And
115
                        4'b0010:{c,o_c } <= i_a + i_b;          // Add
116
                        4'b0011:   o_c   <= i_a | i_b;          // Or
117
                        4'b0100:   o_c   <= i_a ^ i_b;          // Xor
118
                        4'b0101:{o_c,c } <= w_lsr_result[32:0];  // LSR
119
                        4'b0110:{c,o_c } <= (|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0];     // LSL
120
                        4'b0111:{o_c,c } <= w_asr_result[32:0];  // ASR
121 133 dgisselq
`ifndef LONG_MPY
122 69 dgisselq
                        4'b1000:   o_c   <= { i_b[15: 0], i_a[15:0] }; // LODIHI
123 133 dgisselq
`endif
124 69 dgisselq
                        4'b1001:   o_c   <= { i_a[31:16], i_b[15:0] }; // LODILO
125
                        // 4'h1010: The unimplemented MPYU,
126
                        // 4'h1011: and here for the unimplemented MPYS
127
                        4'b1100:   o_c   <= w_brev_result;      // BREV
128
                        4'b1101:   o_c   <= w_popc_result;      // POPC
129
                        4'b1110:   o_c   <= w_rol_result;       // ROL
130
                        default:   o_c   <= i_b;                // MOV, LDI
131 56 dgisselq
                        endcase
132
                end
133 71 dgisselq
 
134
                assign o_busy = 1'b0;
135
 
136
                reg     r_illegal;
137
                always @(posedge i_clk)
138 133 dgisselq
                        r_illegal <= (i_ce)&&((i_op == 4'ha)||(i_op == 4'hb)
139
`ifdef  LONG_MPY
140
                                ||(i_op == 4'h8)
141
`endif
142
                        );
143 71 dgisselq
                assign o_illegal = r_illegal;
144 56 dgisselq
        end else begin
145 62 dgisselq
                //
146
                // Multiply pre-logic
147
                //
148 133 dgisselq
`ifdef  LONG_MPY
149
                reg     [63:0]   r_mpy_result;
150
                if (IMPLEMENT_MPY == 1)
151
                begin // Our two clock option (one clock extra)
152
                        reg     signed  [64:0]   r_mpy_a_input, r_mpy_b_input;
153
                        reg                     mpypipe, x;
154
                        initial mpypipe = 1'b0;
155
                        always @(posedge i_clk)
156
                                mpypipe <= (i_ce)&&((i_op[3:1]==3'h5)||(i_op[3:0]==4'h8));
157
                        always @(posedge i_clk)
158
                        if (i_ce)
159
                        begin
160
                                r_mpy_a_input <= {{(33){(i_a[31])&(i_op[0])}},
161
                                                        i_a[31:0]};
162
                                r_mpy_b_input <= {{(33){(i_b[31])&(i_op[0])}},
163
                                                        i_b[31:0]};
164
                        end
165
                        always @(posedge i_clk)
166
                                if (mpypipe)
167
                                        {x, r_mpy_result} = r_mpy_a_input
168
                                                        * r_mpy_b_input;
169
                        always @(posedge i_clk)
170
                                if (i_ce)
171
                                        mpyhi  = i_op[1];
172
                        assign  mpybusy = mpypipe;
173
                end else if (IMPLEMENT_MPY == 2)
174
                begin // The three clock option
175
                        reg     [31:0]   r_mpy_a_input, r_mpy_b_input;
176
                        reg             r_mpy_signed;
177
                        reg     [1:0]    mpypipe;
178
 
179
                        // First clock, latch in the inputs
180
                        always @(posedge i_clk)
181
                        begin
182
                                // mpypipe indicates we have a multiply in the
183
                                // pipeline.  In this case, the multiply
184
                                // pipeline is a two stage pipeline, so we need 
185
                                // two bits in the pipe.
186
                                mpypipe[0] <= (i_ce)&&((i_op[3:1]==3'h5)
187
                                                        ||(i_op[3:0]==4'h8));
188
                                mpypipe[1] <= mpypipe[0];
189
 
190
                                if (i_op[0]) // i.e. if signed multiply
191
                                begin
192
                                        r_mpy_a_input <= {(~i_a[31]),i_a[30:0]};
193
                                        r_mpy_b_input <= {(~i_b[31]),i_b[30:0]};
194
                                end else begin
195
                                        r_mpy_a_input <= i_a[31:0];
196
                                        r_mpy_b_input <= i_b[31:0];
197
                                end
198
                                // The signed bit really only matters in the
199
                                // case of 64 bit multiply.  We'll keep track
200
                                // of it, though, and pretend in all other
201
                                // cases.
202
                                r_mpy_signed  <= i_op[0];
203
 
204
                                if (i_ce)
205
                                        mpyhi  = i_op[1];
206
                        end
207
 
208
                        assign  mpybusy = |mpypipe;
209
 
210
                        // Second clock, do the multiplies, get the "partial
211
                        // products".  Here, we break our input up into two
212
                        // halves, 
213
                        //
214
                        //   A  = (2^16 ah + al)
215
                        //   B  = (2^16 bh + bl)
216
                        //
217
                        // and use these to compute partial products.
218
                        //
219
                        //   AB = (2^32 ah*bh + 2^16 (ah*bl + al*bh) + (al*bl)
220
                        //
221
                        // Since we're following the FOIL algorithm to get here,
222
                        // we'll name these partial products according to FOIL.
223
                        //
224
                        // The trick is what happens if A or B is signed.  In
225
                        // those cases, the real value of A will not be given by
226
                        //      A = (2^16 ah + al)
227
                        // but rather
228
                        //      A = (2^16 ah[31^] + al) - 2^31
229
                        //  (where we have flipped the sign bit of A)
230
                        // and so ...
231
                        //
232
                        // AB= (2^16 ah + al - 2^31) * (2^16 bh + bl - 2^31)
233
                        //      = 2^32(ah*bh)
234
                        //              +2^16 (ah*bl+al*bh)
235
                        //              +(al*bl)
236
                        //              - 2^31 (2^16 bh+bl + 2^16 ah+al)
237
                        //              - 2^62
238
                        //      = 2^32(ah*bh)
239
                        //              +2^16 (ah*bl+al*bh)
240
                        //              +(al*bl)
241
                        //              - 2^31 (2^16 bh+bl + 2^16 ah+al + 2^31)
242
                        //
243
                        reg     [31:0]   pp_f, pp_o, pp_i, pp_l;
244
                        reg     [32:0]   pp_s;
245
                        always @(posedge i_clk)
246
                        begin
247
                                pp_f<=r_mpy_a_input[31:16]*r_mpy_b_input[31:16];
248
                                pp_o<=r_mpy_a_input[31:16]*r_mpy_b_input[15: 0];
249
                                pp_i<=r_mpy_a_input[15: 0]*r_mpy_b_input[31:16];
250
                                pp_l<=r_mpy_a_input[15: 0]*r_mpy_b_input[15: 0];
251
                                // And a special one for the sign
252
                                if (r_mpy_signed)
253
                                        pp_s <= 32'h8000_0000-(
254
                                                r_mpy_a_input[31:0]
255
                                                + r_mpy_b_input[31:0]);
256
                                else
257
                                        pp_s <= 33'h0;
258
                        end
259
 
260
                        // Third clock, add the results and produce a product
261
                        always @(posedge i_clk)
262
                        begin
263
                                r_mpy_result[15:0] <= pp_l[15:0];
264
                                r_mpy_result[63:16] <=
265
                                        { 32'h00, pp_l[31:16] }
266
                                        + { 16'h00, pp_o }
267
                                        + { 16'h00, pp_i }
268
                                        + { pp_s, 15'h00 }
269
                                        + { pp_f, 16'h00 };
270
                        end
271
                end // Fourth clock -- results are available for writeback.
272
`else
273 80 dgisselq
                wire    signed  [16:0]   w_mpy_a_input, w_mpy_b_input;
274 71 dgisselq
                wire            [33:0]   w_mpy_result;
275
                reg             [31:0]   r_mpy_result;
276
                assign  w_mpy_a_input ={ ((i_a[15])&(i_op[0])), i_a[15:0] };
277
                assign  w_mpy_b_input ={ ((i_b[15])&(i_op[0])), i_b[15:0] };
278
                assign  w_mpy_result   = w_mpy_a_input * w_mpy_b_input;
279
                always @(posedge i_clk)
280
                        if (i_ce)
281
                                r_mpy_result  = w_mpy_result[31:0];
282 133 dgisselq
`endif
283 56 dgisselq
 
284 62 dgisselq
                //
285
                // The master ALU case statement
286
                //
287 56 dgisselq
                always @(posedge i_clk)
288
                if (i_ce)
289
                begin
290
                        pre_sign <= (i_a[31]);
291
                        c <= 1'b0;
292
                        casez(i_op)
293 69 dgisselq
                        4'b0000:{c,o_c } <= {1'b0,i_a}-{1'b0,i_b};// CMP/SUB
294
                        4'b0001:   o_c   <= i_a & i_b;          // BTST/And
295
                        4'b0010:{c,o_c } <= i_a + i_b;          // Add
296
                        4'b0011:   o_c   <= i_a | i_b;          // Or
297
                        4'b0100:   o_c   <= i_a ^ i_b;          // Xor
298
                        4'b0101:{o_c,c } <= w_lsr_result[32:0];  // LSR
299
                        4'b0110:{c,o_c } <= (|i_b[31:5])? 33'h00 : {1'b0, i_a } << i_b[4:0];     // LSL
300
                        4'b0111:{o_c,c } <= w_asr_result[32:0];  // ASR
301 133 dgisselq
`ifdef  LONG_MPY
302
                        4'b1000:   o_c   <= r_mpy_result[31:0]; // MPY
303
`else
304 69 dgisselq
                        4'b1000:   o_c   <= { i_b[15: 0], i_a[15:0] }; // LODIHI
305 133 dgisselq
`endif
306 69 dgisselq
                        4'b1001:   o_c   <= { i_a[31:16], i_b[15:0] }; // LODILO
307 133 dgisselq
`ifdef  LONG_MPY
308
                        4'b1010:   o_c   <= r_mpy_result[63:32]; // MPYHU
309
                        4'b1011:   o_c   <= r_mpy_result[63:32]; // MPYHS
310
`else
311 71 dgisselq
                        4'b1010:   o_c   <= r_mpy_result; // MPYU
312
                        4'b1011:   o_c   <= r_mpy_result; // MPYS
313 133 dgisselq
`endif
314 69 dgisselq
                        4'b1100:   o_c   <= w_brev_result;      // BREV
315
                        4'b1101:   o_c   <= w_popc_result;      // POPC
316
                        4'b1110:   o_c   <= w_rol_result;       // ROL
317
                        default:   o_c   <= i_b;                // MOV, LDI
318 2 dgisselq
                        endcase
319 71 dgisselq
                end else if (r_busy)
320 133 dgisselq
`ifdef  LONG_MPY
321
                        o_c <= (mpyhi)?r_mpy_result[63:32]:r_mpy_result[31:0];
322
`else
323 71 dgisselq
                        o_c <= r_mpy_result;
324 133 dgisselq
`endif
325 2 dgisselq
 
326 71 dgisselq
                reg     r_busy;
327
                initial r_busy = 1'b0;
328 56 dgisselq
                always @(posedge i_clk)
329 71 dgisselq
                        r_busy <= (~i_rst)&&(i_ce)&&(i_valid)
330 133 dgisselq
`ifdef  LONG_MPY
331
                                        &&((i_op[3:1] == 3'h5)
332
                                                ||(i_op[3:0] == 4'h8))||mpybusy;
333
`else
334 71 dgisselq
                                        &&(i_op[3:1] == 3'h5);
335 133 dgisselq
`endif
336 71 dgisselq
 
337
                assign o_busy = r_busy;
338
 
339 56 dgisselq
                assign o_illegal = 1'b0;
340 71 dgisselq
        end endgenerate
341 56 dgisselq
 
342 2 dgisselq
        assign  z = (o_c == 32'h0000);
343
        assign  n = (o_c[31]);
344
        assign  v = (set_ovfl)&&(pre_sign != o_c[31]);
345
 
346
        assign  o_f = { v, n, c, z };
347
 
348
        initial o_valid = 1'b0;
349
        always @(posedge i_clk)
350
                if (i_rst)
351
                        o_valid <= 1'b0;
352 56 dgisselq
                else
353 133 dgisselq
                        o_valid <= (i_ce)&&(i_valid)
354
`ifdef  LONG_MPY
355
                                &&(i_op[3:1] != 3'h5)&&(i_op[3:0] != 4'h8)
356
                                ||(o_busy)&&(~mpybusy);
357
`else
358
                                &&(i_op[3:1] != 3'h5)||(o_busy);
359
`endif
360 2 dgisselq
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.