OpenCores
URL https://opencores.org/ocsvn/forwardcom/forwardcom/trunk

Subversion Repositories forwardcom

[/] [forwardcom/] [trunk/] [alu.sv] - Blame information for rev 153

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 12 Agner
//////////////////////////////////////////////////////////////////////////////////
2
// Engineer: Agner Fog
3
//
4
// Create Date:   2020-06-06
5
// Last modified: 2021-08-03
6
// Module Name: decoder
7
// Project Name: ForwardCom soft core
8
// Target Devices: Artix 7
9
// Tool Versions: Vivado v. 2020.1
10
// License: CERN-OHL-W
11
// Description: Arithmetic-logic unit for general purpose registers.
12
// Executes add, subtract, bit manipulation, etc.
13
//////////////////////////////////////////////////////////////////////////////////
14
 
15
`include "defines.vh"
16
`include "subfunctions.vh"
17
 
18
module alu (
19
    input clock,                            // system clock
20
    input clock_enable,                     // clock enable. Used when single-stepping
21
    input reset,                            // system reset
22
    input valid_in,                         // data from previous stage ready
23
    input stall_in,                         // pipeline is stalled
24
 
25
    input [`CODE_ADDR_WIDTH-1:0] instruction_pointer_in, // address of current instruction
26
    input [31:0] instruction_in,            // current instruction, only first word used here
27
    input [`TAG_WIDTH-1:0] tag_val_in,      // instruction tag value
28
    input [1:0]  category_in,               // 00: multiformat, 01: single format, 10: jump
29
    input        mask_alternative_in,       // mask register and fallback register used for alternative purposes
30
    input [1:0]  result_type_in,            // type of result: 0: register, 1: system register, 2: memory, 3: other or nothing
31
    input        vector_in,                 // vector registers used
32
    input [6:0]  opx_in,                    // operation ID in execution unit. This is mostly equal to op1 for multiformat instructions
33
    input [5:0]  opj_in,                    // operation ID for conditional jump instructions
34
    input [2:0]  ot_in,                     // operand type
35
    input [5:0]  option_bits_in,            // option bits from IM3 or mask
36
    input [15:0] im2_bits_in,               // constant bits from IM2 as extra operand
37
 
38
    // monitor result buses:
39
    input write_en1,                        // a result is written to writeport1
40
    input [`TAG_WIDTH-1:0] write_tag1_in,   // tag of result inwriteport1
41
    input [`RB1:0] writeport1_in,           // result bus 1
42
    input write_en2,                        // a result is written to writeport2
43
    input [`TAG_WIDTH-1:0] write_tag2_in,   // tag of result inwriteport2
44
    input [`RB1:0] writeport2_in,           // result bus 2
45
    input [`TAG_WIDTH-1:0] predict_tag1_in, // result tag value on writeport1 in next clock cycle
46
    input [`TAG_WIDTH-1:0] predict_tag2_in, // result tag value on writeport2 in next clock cycle
47
 
48
    // Register values sampled from result bus in previous stages
49
    input [`RB:0] operand1_in,              // first register operand or fallback
50
    input [`RB:0] operand2_in,              // second register operand RS
51
    input [`RB:0] operand3_in,              // last register operand RT
52
    input [`MASKSZ:0] regmask_val_in,       // mask register
53
    input [`RB1:0] ram_data_in,             // memory operand from data ram
54
    input        opr2_from_ram_in,          // value of operand 2 comes from data ram
55
    input        opr3_from_ram_in,          // value of last operand comes from data ram
56
    input        opr1_used_in,              // operand1_in is needed
57
    input        opr2_used_in,              // operand2_in is needed
58
    input        opr3_used_in,              // operand3_in is needed
59
    input        regmask_used_in,           // regmask_val_in is needed
60
 
61
    output reg valid_out,                   // for debug display: alu is active
62
    output reg register_write_out,
63
    output reg [5:0] register_a_out,        // register to write
64
    output reg [`RB1:0] result_out,         // output result to destination register
65
    output reg [`TAG_WIDTH-1:0] tag_val_out,// instruction tag value
66
    output reg jump_out,                    //  jump instruction: jump taken
67
    output reg nojump_out,                  // jump instruction: jump not taken
68
    output reg [`CODE_ADDR_WIDTH-1:0] jump_pointer_out, // jump target to fetch unit
69
    output reg stall_out,                   // alu is waiting for an operand or not ready to receive a new instruction
70
    output reg stall_next_out,              // alu will be waiting in next clock cycle
71
    output reg error_out,                   // unknown instruction
72
    output reg error_parm_out,              // wrong parameter for instruction
73
 
74
    // outputs for debugger:
75
    output reg [31:0] debug1_out,           // debug information
76
    output reg [31:0] debug2_out            // temporary debug information
77
);
78
 
79
logic [`RB1:0] operand1;                    // first register operand RD or RU. bit `RB is 1 if invalid
80
logic [`RB1:0] operand2;                    // second register operand RS. bit `RB is 1 if invalid
81
logic [`RB1:0] operand3;                    // last register operand RT. bit `RB is 1 if invalid
82
logic [`MASKSZ:0] regmask_val;              // mask register
83
logic [1:0]  otout;                         // operand type for output
84
logic [5:0]  msb;                           // index to most significant bit
85
logic signbit2, signbit3;                   // sign bits of operands
86
logic [`RB1:0] sbit;                        // position of sign bit
87
logic [`RB1:0] result;                      // result for output
88
logic [1:0]  result_type;                   // type of result
89
logic [6:0]  opx;                           // operation ID in execution unit. This is mostly equal to op1 for multiformat instructions
90
logic [6:0]  opj;                           // operation ID for conditional jump
91
logic jump_result;                          // result of jump condition (needs inversion if opj[0])
92
 
93
logic mask_off;                             // result is masked off
94
logic stall;                                // waiting for operands
95
logic stall_next;                           // will be waiting for operands in next clock cycle
96
logic error;                                // unknown instruction
97
logic error_parm;                           // wrong parameter for instruction
98
logic jump_taken;                           // conditional jump is jumping
99
logic jump_not_taken;                       // conditional jump is not jumping or target follows immediately
100
logic normal_output;                        // normal register output
101
 
102
logic [`CODE_ADDR_WIDTH-1:0] nojump_target; // next address if not jumping
103
logic [`CODE_ADDR_WIDTH-1:0] relative_jump_target; // jump target for multiway relative jump
104
// It seems to be more efficient to truncate operands locally by ANDing with sizemask than to
105
// make separate wires for the truncated operands, because wiring is more expensive than logic:
106
logic [`RB1:0] sizemask;                    // mask for operand type
107
 
108
always_comb begin
109
    stall       = 0;
110
    stall_next  = 0;
111
    regmask_val = 0;
112
 
113
    // get all inputs
114
    if (regmask_val_in[`MASKSZ]) begin      // value missing
115
        if (write_en1 && regmask_val_in[`TAG_WIDTH-1:0] == write_tag1_in) begin
116
            regmask_val = writeport1_in;    // obtained from result bus 1 (which may be my own output)
117
        end else if (write_en2 && regmask_val_in[`TAG_WIDTH-1:0] == write_tag2_in) begin
118
            regmask_val = writeport2_in[(`MASKSZ-1):0]; // obtained from result bus 2
119
        end else begin
120
            if (regmask_used_in) begin
121
                stall = 1;                  // operand not ready
122
                if (regmask_val_in[`TAG_WIDTH-1:0] != predict_tag1_in && regmask_val_in[`TAG_WIDTH-1:0] != predict_tag2_in) begin
123
                    stall_next = 1;         // operand not ready in next clock cycle
124
                end
125
            end
126
        end
127
    end else begin  // value available
128
        regmask_val = regmask_val_in;
129
    end
130
 
131
    // result is masked off
132
    mask_off = regmask_used_in && regmask_val[`MASKSZ] == 0 && regmask_val[0] == 0 && !mask_alternative_in;
133
 
134
    operand1 = 0;
135
    if (operand1_in[`RB]) begin             // value missing
136
        if (write_en1 && operand1_in[`TAG_WIDTH-1:0] == write_tag1_in) begin
137
            operand1 = writeport1_in;       // obtained from result bus 1 (which may be my own output)
138
        end else if (write_en2 && operand1_in[`TAG_WIDTH-1:0] == write_tag2_in) begin
139
            operand1 = writeport2_in;       // obtained from result bus 2
140
        end else begin
141
            if (opr1_used_in) begin
142
                stall = 1;                  // operand not ready
143
                if (operand1_in[`TAG_WIDTH-1:0] != predict_tag1_in && operand1_in[`TAG_WIDTH-1:0] != predict_tag2_in) begin
144
                    stall_next = 1;         // operand not ready in next clock cycle
145
                end
146
            end
147
        end
148
    end else begin
149
        operand1 = operand1_in[`RB1:0];
150
    end
151
 
152
    operand2 = 0;
153
    if (opr2_from_ram_in) begin
154
        operand2 = ram_data_in;
155
    end else if (operand2_in[`RB]) begin    // value missing
156
        if (write_en1 && operand2_in[`TAG_WIDTH-1:0] == write_tag1_in) begin
157
            operand2 = writeport1_in;       // obtained from result bus 1 (which may be my own output)
158
        end else if (write_en2 && operand2_in[`TAG_WIDTH-1:0] == write_tag2_in) begin
159
            operand2 =  writeport2_in;      // obtained from result bus 2
160
        end else begin
161
            if (opr2_used_in /*&& !mask_off*/) begin  // mask_off removed because of critical timing
162
                stall = 1;                  // operand not ready
163
                if (operand2_in[`TAG_WIDTH-1:0] != predict_tag1_in && operand2_in[`TAG_WIDTH-1:0] != predict_tag2_in) begin
164
                    stall_next = 1;         // operand not ready in next clock cycle
165
                end
166
            end
167
        end
168
    end else begin // value available
169
        operand2 = operand2_in[`RB1:0];
170
    end
171
 
172
    operand3 = 0;
173
    if (opr3_from_ram_in) begin
174
        operand3 = ram_data_in;
175
    end else if (operand3_in[`RB]) begin    // value missing
176
        if (write_en1 && operand3_in[`TAG_WIDTH-1:0] == write_tag1_in) begin
177
            operand3 = writeport1_in;       // obtained from result bus 1 (which may be my own output)
178
        end else if (write_en2 && operand3_in[`TAG_WIDTH-1:0] == write_tag2_in) begin
179
            operand3 = writeport2_in;       // obtained from result bus 2
180
        end else begin
181
            if (opr3_used_in /*&& !mask_off*/) begin // mask_off removed because of critical timing
182
                stall = 1;                  // operand not ready
183
                if (operand3_in[`TAG_WIDTH-1:0] != predict_tag1_in && operand3_in[`TAG_WIDTH-1:0] != predict_tag2_in) begin
184
                    stall_next = 1;         // operand not ready in next clock cycle
185
                end
186
            end
187
        end
188
    end else begin // value available
189
        operand3 = operand3_in[`RB1:0];
190
    end
191
 
192
    opx = opx_in;       // operation ID in execution unit. This is mostly equal to op1 for multiformat instructions
193
    opj = opj_in;       // operation ID for conditional jump
194
    result = 0;
195
    jump_result = 0;
196
    otout = ot_in[1:0]; // operand type for output
197
    result_type = result_type_in;
198
    jump_taken = 0;
199
    jump_not_taken = 0;
200
    nojump_target = 0;
201
    relative_jump_target = 0;
202
    error = 0;
203
    error_parm = 0;
204
 
205
    // auxiliary variables depending on operand type
206
    case (ot_in[1:0])
207
    0: begin                     // 8 bit
208
        msb      = 7;            // most significant bit
209
        sbit     = 8'H80;        // sign bit
210
        sizemask = 8'HFF;        // mask off unused bits
211
        signbit2 = operand2[7];  // sign bit of operand 2
212
        signbit3 = operand3[7];  // sign bit of operand 3
213
        end
214
    1: begin                     // 16 bit
215
        msb      = 15;           // most significant bit
216
        sbit     = 16'H8000;     // sign bit
217
        sizemask = 16'HFFFF;     // mask off unused bits
218
        signbit2 = operand2[15]; // sign bit of operand 2
219
        signbit3 = operand3[15]; // sign bit of operand 3
220
        end
221
    2: begin                     // 32 bit
222
        msb      = 31;           // most significant bit
223
        sbit     = 32'H80000000; // sign bit
224
        sizemask = 32'HFFFFFFFF; // mask off unused bits
225
        signbit2 = operand2[31]; // sign bit of operand 2
226
        signbit3 = operand3[31]; // sign bit of operand 3
227
        end
228
    3: begin                     // 64 bit, or 32 if 64 bit not supported
229
        msb      = `RB1;         // most significant bit
230
        sbit     = {1'b1,{(`RB-1){1'b0}}}; // sign bit
231
        sizemask = ~(`RB'b0);    // mask off unused bits
232
        signbit2 = operand2[`RB1]; // sign bit of operand 2
233
        signbit3 = operand3[`RB1]; // sign bit of operand 3
234
        end
235
    endcase
236
 
237
 
238
 
239
    ////////////////////////////////////////////////
240
    //             Select ALU operation
241
    ////////////////////////////////////////////////
242
 
243
    if (opx == `II_MOVE || opx == `II_STORE) begin
244
        // simple move instructions
245
        result = operand3;
246
 
247
    end else if (opx == `IX_READ_SPEC || opx == `IX_WRITE_SPEC) begin
248
        // read or write special registers
249
        result = operand2;
250
 
251
    end else if (opx == `II_SIGN_EXTEND || opx == `II_SIGN_EXTEND_ADD || opx == `IX_RELATIVE_JUMP) begin
252
        // instructions involving sign extension
253
        logic [`RB1:0] sign_ex;    // result of sign extension
254
        logic [`RB1:0] sign_ex_sc; // result of sign extension and scaling
255
        otout = 3;                 // 64 bit output
256
        // sign extend:
257
        case (ot_in[1:0])
258
        0: sign_ex = {{56{operand3[ 7]}},operand3[7:0]};    // 8 bit
259
        1: sign_ex = {{48{operand3[15]}},operand3[15:0]};   // 16 bit
260
        2: sign_ex = {{32{operand3[31]}},operand3[31:0]};   // 32 bit
261
        3: sign_ex = operand3[`RB1:0];                      // 64 bit
262
        endcase
263
        if (opx == `II_SIGN_EXTEND_ADD) begin
264
            // scale sign_ex.
265
            // The scale factor is limited to 3 here for timing reasons so that it fits a 6-input LUT
266
            // A full barrel shifter takes too much time
267
            case (option_bits_in[1:0])       // optional shift count in option bits
268
            0: sign_ex_sc =  sign_ex;        // scale factor 1
269
            1: sign_ex_sc = {sign_ex,1'b0};  // scale factor 2
270
            2: sign_ex_sc = {sign_ex,2'b0};  // scale factor 4
271
            3: sign_ex_sc = {sign_ex,3'b0};  // scale factor 8
272
            endcase
273
            result = sign_ex_sc + operand2;  // add
274
            if (|(option_bits_in[5:2])) error_parm = 1; // shift count > 3
275
 
276
        end else begin
277
            result = sign_ex;
278
        end
279
        if (opx == `IX_RELATIVE_JUMP) begin
280
            relative_jump_target = sign_ex + operand2[`RB1:2] - {1'b1,{(`CODE_ADDR_START-2){1'b0}}}; // subtract (code memory start)/4
281
            if (|(operand2[1:0])) error_parm = 1; // jump to misaligned address
282
        end
283
 
284
    end else if (opx == `II_COMPARE || (opx >= `II_MIN && opx <= `II_MAX_U)) begin
285
        // instructions involving signed and unsigned compare. operation defined by option bits
286
        logic b1, b2, b3, eq, less;  // intermediate results
287
        logic [`RB1:0] sbit1;
288
        b1 = 0; b2 = 0; b3 = 0; eq = 0; less = 0;
289
        // flip a 1 in the sign bit position if comparison is signed (option_bits_in[3] = 0)
290
        sbit1 = option_bits_in[3] ? `RB'b0 : sbit;            // sign bit if signed
291
        eq = (operand2 & sizemask) == (operand3 & sizemask);  // operands are equal
292
        less = ((operand2 & sizemask) ^ sbit1) < ((operand3 & sizemask) ^ sbit1); // a < b, signed or unsigned
293
 
294
        if (option_bits_in[2:1] == 0) begin
295
            b1 = eq;              // a == b
296
        end else if (option_bits_in[2:1] == 1) begin
297
            b1 = less;            // a < b
298
        end else if (option_bits_in[2:1] == 2) begin
299
            b1 = ~less & ~eq;     // a > b
300
        end else begin
301
            logic [`RB1:0] absa;
302
            logic [`RB1:0] absb;
303
            absa = signbit2 ? -operand2 : operand2;      // abs(a)
304
            absb = signbit3 ? -operand3 : operand3;      // abs(b)
305
            b1 = (absa & sizemask) < (absb & sizemask);  // abs(a) < abs(b)
306
        end
307
        jump_result = b1;                                // result for conditional jump
308
        b2 = b1 ^ option_bits_in[0];                     // bit 0 of condition code inverts the result
309
 
310
        // alternative use of mask
311
        case (option_bits_in[5:4])
312
        2'b00: b3 = regmask_val[0] ? b2 : operand1[0];    // normal fallback
313
        2'b01: b3 = regmask_val[0] & b2 & operand1[0];    // mask & result & fallback
314
        2'b10: b3 = regmask_val[0] & (b2 | operand1[0]);  // mask & (result | fallback)
315
        2'b11: b3 = regmask_val[0] & (b2 ^ operand1[0]);  // mask & (result ^ fallback)
316
        endcase
317
 
318
        // copy remaining bits from mask
319
        if (opx == `II_COMPARE && instruction_in[`MASK] != 3'b111) begin
320
            result[`RB1:1] = regmask_val[(`MASKSZ-1):1];
321
        end
322
 
323
        if (opx >= `II_MIN) begin
324
            // min and max instructions
325
            result = b1 ? operand2 : operand3;
326
        end else if (regmask_used_in | mask_alternative_in) begin
327
            // combine result with rest of mask or NUMCONTR
328
            result = {regmask_val[(`MASKSZ-1):1],b3};  // get remaining bits from mask
329
        end else begin
330
            // normal compare
331
            result = b3;
332
        end
333
 
334
    end else if (opx == `II_ADD || opx == `II_SUB) begin
335
        // addition, subtraction, and conditional jumps involving addition or subtraction
336
        logic [`RB:0] bigresult;       // one extra bit on result for carry
337
        logic zero;                    // result is zero
338
        logic sign;                    // sign of result
339
        logic carry;                   // unsigned carry/borrow
340
        logic overflow;                // signed overflow
341
 
342
        if (~opx[0]) bigresult = operand2 + operand3; // add
343
        else         bigresult = operand2 - operand3; // subtract
344
        result = bigresult[`RB1:0];    // result without extra carry bit
345
 
346
        case (ot_in[1:0])
347
        0:  begin                      // 8 bit
348
            sign  = bigresult[7];      // sign bit
349
            carry = bigresult[8];      // carry out (unsigned overflow)
350
            end
351
        1:  begin                      // 16 bit
352
            sign  = bigresult[15];     // sign bit
353
            carry = bigresult[16];     // carry out (unsigned overflow)
354
            end
355
        2:  begin                      // 32 bit
356
            sign  = bigresult[31];     // sign bit
357
            carry = bigresult[32];     // carry out (unsigned overflow)
358
            end
359
        3:  begin                      // 64 bit (or 32)
360
            sign  = bigresult[`RB1];   // sign bit
361
            carry = bigresult[`RB];    // carry out (unsigned overflow)
362
            end
363
        endcase
364
        zero = ~|(result & sizemask);  // result is zero
365
        overflow = (signbit2 ^ signbit3 ^ ~opx[0]) & (signbit2 ^ sign); // signed overflow
366
 
367
        // jump condition
368
        case (opj[3:1])
369
        `IJ_SUB_JZ      >> 1: jump_result = zero;
370
        `IJ_SUB_JNEG    >> 1: jump_result = sign;
371
        `IJ_SUB_JPOS    >> 1: jump_result = ~sign & ~zero;
372
        `IJ_SUB_JOVFLW  >> 1: jump_result = overflow;
373
        `IJ_SUB_JBORROW >> 1: jump_result = carry;
374
        default:              jump_result = 0;
375
        endcase
376
 
377
    end else if (opx == `II_AND || opx == `II_OR || opx == `II_XOR) begin
378
        if (opx == `II_AND) begin
379
            // bitwise AND, and conditional jumps involving this
380
            result = operand2[`RB1:0] & operand3[`RB1:0];
381
        end else if (opx == `II_OR) begin
382
            // bitwise OR, and conditional jumps involving this
383
            result = operand2[`RB1:0] | operand3[`RB1:0];
384
        end else if (opx == `II_XOR) begin
385
            // bitwise XOR, and conditional jumps involving this
386
            result = operand2[`RB1:0] ^ operand3[`RB1:0];
387
        end
388
        jump_result = ~|(result & sizemask);     // zero condition for conditional jump
389
 
390
    end else if (opx >= `II_CLEAR_BIT && opx <= `II_TEST_BITS_OR) begin
391
        // various bit manipulation instructions
392
        logic [`RB1:0] onebit;                   // 1 in the position indicated by opr3
393
        logic rbit;                              // result bit from test
394
        rbit = 0;
395
        onebit = 0;
396
        if ((operand3 & sizemask) <= msb) onebit[operand3[5:0]] = 1'b1;// onebit = 1 ** opr3
397
 
398
        case (opx)
399
        `II_CLEAR_BIT:      result = operand2 & ~ onebit;
400
        `II_SET_BIT:        result = operand2 | onebit;
401
        `II_TOGGLE_BIT:     result = operand2 ^ onebit;
402
        `II_TEST_BIT:       begin
403
                                rbit =  |(operand2 & onebit);
404
                            end
405
        `II_TEST_BITS_OR:   begin
406
                                rbit =  |(operand2 & operand3 & sizemask);
407
                            end
408
        `II_TEST_BITS_AND:  begin
409
                                rbit = ~|(((operand2 & operand3) ^ operand3) & sizemask);
410
                            end
411
        endcase
412
        jump_result  = rbit;                           // jump condition for bit tests
413
 
414
        if (opx >= `II_TEST_BIT && opx <= `II_TEST_BITS_OR) begin
415
            // alternative use of mask and fallback in bit test instructions
416
            logic a, b, c;
417
            a = regmask_val[0] ^ option_bits_in[4];    // mask bit flipped by option bit 4
418
            b = rbit ^ option_bits_in[2];              // result bit flipped by option bit 2
419
            c = operand1[0] ^ option_bits_in[3];       // fallback bit flipped by option bit 3
420
            case (option_bits_in[1:0])                 // boolean operations controlled by option bits 1-0
421
            2'b00: result[0] = a ?  b : c;             // normal fallback
422
            2'b01: result[0] = a & (b & c);            // mask & result & fallback
423
            2'b10: result[0] = a & (b | c);            // mask & (result | fallback)
424
            2'b11: result[0] = a & (b ^ c);            // mask & (result ^ fallback)
425
            endcase
426
            if (option_bits_in[5]) begin               // copy remaining bits from mask or NUMCONTR
427
                result[`RB1:1] = regmask_val[(`MASKSZ-1):1];
428
            end
429
        end
430
 
431
    end else if ((opx >= `II_SHIFT_LEFT && opx <= `II_SHIFT_RIGHT_U) || opx == `II_FUNNEL_SHIFT
432
        || opx == `IX_MOVE_BITS1 || opx == `IX_MOVE_BITS2) begin
433
        // shift instructions and other instruction involving shift and rotate
434
 
435
        // Barrel shifters are expensive in terms of LUT use.
436
        // Make one universal barrel shifter to use for all shift and rotate instructions
437
        logic [(`RB*2-1):0] barrel;         // input to barrel shifter. 2x32 or 2x64 bits
438
        logic [`RB1:0] barrel_out;          // output from barrel shifter. 32 or 64 bits
439
        logic [5:0] shift_count1;           // shift count for barrel shifter
440
        logic [5:0] shift_count2;           // shift count for barrel shifter, limited
441
        logic overfl;                       // shift count overflows
442
        if (opx == `II_SHIFT_LEFT || opx == `II_ROTATE) begin
443
            shift_count1 = -operand3[5:0];
444
        end else begin
445
            shift_count1 =  operand3[5:0];
446
        end
447
 
448
        // select input for barrel shifter
449
        barrel = 0;
450
        if (ot_in[1:0] == 0) begin // 8 bits
451
            shift_count2 = shift_count1[2:0];
452
            if (opx == `II_SHIFT_LEFT || opx == `IX_MOVE_BITS1) begin
453
                barrel[15:8] = operand2[7:0];
454
                if (operand3[5:0] == 0) barrel[7:0] = operand2[7:0]; // no shift
455
            end else if (opx == `II_SHIFT_RIGHT_S) begin
456
                barrel[7:0]  = operand2[7:0];
457
                barrel[15:8] = {8{operand2[7]}}; // sign bit
458
            end else if (opx == `II_SHIFT_RIGHT_U || opx == `IX_MOVE_BITS2) begin
459
                barrel[7:0]  = operand2[7:0];
460
            end else if (opx == `II_ROTATE) begin
461
                barrel[7:0]  = operand2[7:0];
462
                barrel[15:8] = operand2[7:0];
463
            end else begin // funnel shift
464
                barrel[7:0]  = operand1[7:0];
465
                barrel[15:8] = operand2[7:0];
466
            end
467
        end else if (ot_in[1:0] == 1) begin // 16 bits
468
            shift_count2 = shift_count1[3:0];
469
            if (opx == `II_SHIFT_LEFT || opx == `IX_MOVE_BITS1) begin
470
                barrel[31:16] = operand2[15:0];
471
                if (operand3[5:0] == 0) barrel[15:0] = operand2[15:0]; // no shift
472
            end else if (opx == `II_SHIFT_RIGHT_S) begin
473
                barrel[15:0]  = operand2[15:0];
474
                barrel[31:16] = {16{operand2[15]}}; // sign bit
475
            end else if (opx == `II_SHIFT_RIGHT_U || opx == `IX_MOVE_BITS2) begin
476
                barrel[15:0]  = operand2[15:0];
477
            end else if (opx == `II_ROTATE) begin
478
                barrel[15:0]  = operand2[15:0];
479
                barrel[31:16] = operand2[15:0];
480
            end else begin // funnel shift
481
                barrel[15:0]  = operand1[15:0];
482
                barrel[31:16] = operand2[15:0];
483
            end
484
        end else if (ot_in[1:0] == 2 || `RB <= 32) begin // 32 bits (or 64 bits if not supported)
485
            shift_count2 = shift_count1[4:0];
486
            if (opx == `II_SHIFT_LEFT || opx == `IX_MOVE_BITS1) begin
487
                barrel[63:32] = operand2[31:0];
488
                if (operand3[5:0] == 0) barrel[31:0] = operand2[31:0]; // no shift
489
            end else if (opx == `II_SHIFT_RIGHT_S) begin
490
                barrel[31:0]  = operand2[31:0];
491
                barrel[63:32] = {32{operand2[31]}}; // sign bit
492
            end else if (opx == `II_SHIFT_RIGHT_U || opx == `IX_MOVE_BITS2) begin
493
                barrel[31:0]  = operand2[31:0];
494
            end else if (opx == `II_ROTATE) begin
495
                barrel[31:0]  = operand2[31:0];
496
                barrel[63:32] = operand2[31:0];
497
            end else begin // funnel shift
498
                barrel[31:0]  = operand1[31:0];
499
                barrel[63:32] = operand2[31:0];
500
            end
501
        end else begin // 64 bits (if supported)
502
            shift_count2 = shift_count1[5:0];
503
            if (opx == `II_SHIFT_LEFT || opx == `IX_MOVE_BITS1) begin
504
                barrel[127:64] = operand2[63:0];
505
                if (operand3[5:0] == 0) barrel[63:0] = operand2[63:0]; // no shift
506
            end else if (opx == `II_SHIFT_RIGHT_S) begin
507
                barrel[63:0]   = operand2[63:0];
508
                barrel[127:64] = {64{operand2[63]}}; // sign bit
509
            end else if (opx == `II_SHIFT_RIGHT_U || opx == `IX_MOVE_BITS2) begin
510
                barrel[63:0]  = operand2[63:0];
511
            end else if (opx == `II_ROTATE) begin
512
                barrel[63:0]  = operand2[63:0];
513
                barrel[127:64] = operand2[63:0];
514
            end else begin // funnel shift
515
                barrel[63:0]   = operand1[63:0];
516
                barrel[127:64] = operand2[63:0];
517
            end
518
        end
519
 
520
        // big barrel shifter
521
        barrel_out = barrel[shift_count2+:`RB];
522
 
523
        // select output
524
        overfl = (operand3 & sizemask) > msb; // check if shift count overflows
525
 
526
        if (opx == `IX_MOVE_BITS1 || opx == `IX_MOVE_BITS2) begin   // move_bits instruction
527
            // insert shift result in destination bit field
528
            integer i;
529
            for (i = 0; i < `RB; i++) begin
530
                if (i >= im2_bits_in[13:8] && i <= option_bits_in) result[i] = barrel_out[i];
531
                else result[i] = operand1[i];
532
            end
533
 
534
        end else if (overfl) begin
535
            if (opx == `II_SHIFT_RIGHT_S) result = {`RB{signbit2}}; // shift right overflows to sign bit
536
            else if (opx == `II_ROTATE) result = barrel_out;        // rotate has no overflow
537
            else result = 0;                                        // all other shifts overflow to zero
538
 
539
        end else begin
540
            result = barrel_out;  // result of shift or rotate
541
        end
542
 
543
 
544
    end else if (opx == `II_ADD_ADD) begin
545
        // 3-operand add. signs are controlled by option bits
546
        // (this is separate from the add and subtract operations with conditional jumps because the timing is critical)
547
        logic [`RB1:0] r1, r2, r3;
548
        r1 = option_bits_in[0] ? -operand1[`RB1:0] : operand1[`RB1:0];
549
        r2 = option_bits_in[1] ? -operand2[`RB1:0] : operand2[`RB1:0];
550
        r3 = option_bits_in[2] ? -operand3[`RB1:0] : operand3[`RB1:0];
551
        result = r1 + r2 + r3;
552
 
553
    end else if (opx == `II_SELECT_BITS) begin
554
        // select_bits instruction
555
        result = (operand1[`RB1:0] & operand3[`RB1:0]) | (operand2[`RB1:0] & ~operand3[`RB1:0]);
556
 
557
    // bit scan is critical in terms of timing. Several different implementations tried here:
558
    `define BITSCAN_BASED_ON_ROUNDP2
559
    `ifdef  BITSCAN_BASED_ON_ROUNDP2   // bit scan and roundp2 instructions combined. This takes less resources
560
 
561
    end else if (opx == `IX_BIT_SCAN || opx == `IX_ROUNDP2) begin
562
        //
563
        // using bit index method because this makes roundp2 simple
564
 
565
        logic [`RB1:0] a;              // intermediate results
566
        logic [`RB1:0] b;
567
        logic [`RB1:0] c;
568
        logic [`RB1:0] d;
569
        logic [6:0]    bitscan_result;
570
        logic [5:0]    r;
571
        logic          iszero;         // input is zero
572
        logic          ispow2;         // input is a power of 2
573
        r = 0; iszero = 0;
574
 
575
        a = operand2 & sizemask;
576
        ispow2 = ~|(a & (a-1));        // a is a power of 2
577
 
578
        if (opx == `IX_ROUNDP2 || operand3[0]) begin
579
            // bitscan reverse scan
580
            `ifdef SUPPORT_64BIT
581
                b = reversebits64(a);  // reverse order of bits (in subfunctions.vh)
582
                c = b & ~(b-1);        // isolate lowest 1-bit
583
                d = reversebits64(c);  // reverse back again
584
            `else
585
                b = reversebits32(a);  // reverse order of bits (in subfunctions.vh)
586
                c = b & ~(b-1);        // isolate lowest 1-bit
587
                d = reversebits32(c);  // reverse back again
588
            `endif
589
        end else begin
590
            // bitscan forward scan
591
            d = a & ~(a-1);            // isolate lowest 1-bit
592
        end
593
 
594
        // bitindex implemented in subfunctions.vh
595
        bitscan_result = bitindex(d);
596
        r = bitscan_result[6:1];
597
        iszero = bitscan_result[0];
598
 
599
        if (iszero) begin              // input is zero. output determined by option bit 1
600
            if (operand3[4]) begin
601
                result = ~(`RB'b0);    // return -1 if zero
602
            end else begin
603
                result = `RB'b0;       // return 0 if zero
604
            end
605
        end else if (opx == `IX_BIT_SCAN) begin
606
            result = r;                // output result
607
        end else if (!operand3[0] || ispow2) begin
608
            // roundp2 round down to nearest power of 2
609
            result = d;
610
        end else begin
611
            // round up to nearest power of 2
612
            if (signbit2) begin        // overflow
613
                result = operand3[5] ? ~(`RB'b0) : 0; // return 0 or -1 if overflow
614
            end else begin
615
                result = {d,1'b0};     // round up
616
            end
617
        end
618
 
619
    `else   // bit scan and roundp2 instructions implemented separately
620
 
621
    end else if (opx == `IX_ROUNDP2) begin
622
 
623
        logic [`RB1:0] a;              // intermediate results
624
        logic [`RB1:0] b;
625
        logic [`RB1:0] c;
626
        logic [`RB1:0] d;
627
        logic          iszero;         // input is zero
628
        logic          ispow2;         // input is a power of 2
629
 
630
        a = operand2 & sizemask;       // cut off input to desired operand size
631
        iszero = ~|a;                  // input is zero
632
        ispow2 = ~|(a & (a-1));        // input is a power of 2
633
 
634
        `ifdef SUPPORT_64BIT
635
            b = reversebits64(a);      // reverse order of bits (in subfunctions.vh)
636
            c = b & ~(b-1);            // isolate lowest 1-bit
637
            d = reversebits64(c);      // reverse back again
638
        `else
639
            b = reversebits32(a);      // reverse order of bits (in subfunctions.vh)
640
            c = b & ~(b-1);            // isolate lowest 1-bit
641
            d = reversebits32(c);      // reverse back again
642
        `endif
643
 
644
        if (iszero) begin              // input is zero. output determined by option bit 4
645
            if (operand3[4]) begin
646
                result = ~(`RB'b0);    // return -1 if zero
647
            end else begin
648
                result = 0;            // return 0 if zero
649
            end
650
        end else if (~operand3[0] | ispow2) begin
651
            // roundp2 round down to nearest power of 2
652
            result = d;
653
        end else begin
654
            // round up to nearest power of 2
655
            if (signbit2) begin        // overflow
656
                result = operand3[5] ? ~(`RB'b0) : 0; // return 0 or -1 if overflow
657
            end else begin
658
                result = {d,1'b0};     // round up
659
            end
660
        end
661
 
662
    end else if (opx == `IX_BIT_SCAN) begin
663
 
664
        logic [`RB1:0] a;              // input cut off to desired operand size
665
        logic [`RB1:0] b;              // input with bits reversed
666
        logic [`RB1:0] c;              // input bits reversed if forward scan
667
        logic [6:0]    r;              // bitscan result
668
        logic          iszero;         // input is zero
669
 
670
        a = operand2 & sizemask;       // cut off input to desired operand size
671
 
672
        // reverse bits if forward scan
673
        case (ot_in[1:0])
674
        0:  b = reversebits8(operand2[7:0]);         // 8 bit
675
        1:  b = reversebits16(operand2[15:0]);       // 16 bit
676
        `ifdef SUPPORT_64BIT
677
        3:  b = reversebits64(operand2[63:0]);       // 64 bit
678
        `endif
679
        default: b = reversebits32(operand2[31:0]);  // 32 bit
680
        endcase
681
 
682
        if (operand3[0]) c = a;        // reverse scan
683
        else             c = b;        // forward scan
684
 
685
        // bitscan function defined in subfunctions.vh
686
        r = bitscan64A(a);             // this implementation may be faster?
687
        //r = bitscan64C(c);           // alternative implementation
688
        iszero = r[0];                 // input is zero
689
 
690
        if (iszero) begin              // input is zero. output determined by option bit 4
691
            if (operand3[4]) begin
692
                result = ~(`RB'b0);    // return -1 if zero
693
            end else begin
694
                result = 0;            // return 0 if zero
695
            end
696
        end else begin
697
            result = r[6:1];           // normal bitscan result
698
        end
699
 
700
    `endif
701
 
702
 
703
    end else if (opx == `IX_POPCOUNT) begin
704
        // popcount instruction. functions are is in subfunctions.vh
705
        if (`RB <= 32) result = popcount32(operand2 & sizemask);
706
        else result = popcount64(operand2 & sizemask);
707
 
708
    end else if (opx == `IX_ABS) begin
709
        // abs instruction
710
        if (~signbit2) begin
711
            result = operand2;       // input is not negative
712
        end else if ((operand2 & ~sbit & sizemask) == 0) begin
713
            // overflow
714
            case (operand3[1:0])     // last operand determines what to do with overflow
715
            0: result = operand2;    // overfloaw wraps around
716
            1: result = ~sbit;       // overfloaw gives saturation
717
            2: result = 0;           // overflow gives 0
718
            endcase
719
        end else begin
720
            result = -operand2;      // input is negative. change sign
721
        end
722
 
723
    end else if (opx == `IX_TRUTH_TAB3) begin
724
        // truth_tab3 instruction
725
        // truth_table_lookup is in subfunctions.vh
726
        result = truth_table_lookup(operand1, operand2, operand3, im2_bits_in[7:0]);
727
        if (option_bits_in[0]) result[`RB1:1] = 0;   // output only bit 0
728
        else if (option_bits_in[1]) result[`RB1:1] = regmask_val[(`MASKSZ-1):1]; // remaining bits from mask
729
 
730
    end else if (opx == `IX_INSERT_HI) begin
731
        // insert constant into high 32 bits, leave low 32 bit unchanged
732
        `ifdef SUPPORT_64BIT
733
            result = {operand3[31:0],operand2[31:0]};
734
        `else
735
            result = operand2;
736
        `endif
737
 
738
    end else if (category_in == `CAT_JUMP) begin
739
        // jump instructions that have no corresponding general instruction
740
 
741
        if (opj[5:0] >= `IJ_INC_COMP_JBELOW && opj[5:0] <= `IJ_INC_COMP_JABOVE+1) begin
742
            // loop instruction: increment and jump if below/above
743
 
744
        `ifdef THIS_VERSION_IS_SLOW__IT_IS_NOT_USED
745
            // This version is slow because the addition and the compare both involve a big carry-lookahead circuit.
746
            // Use this version only if timing is not critical
747
            logic eq, less;
748
            result = operand2 + 1;     // increment
749
            eq = (result & sizemask) == (operand3 & sizemask);  // operands are equal
750
            less = ((result & sizemask) ^ sbit) < ((operand3 & sizemask) ^ sbit); // a+1 < b, signed
751
            if (opj[1]) begin
752
                jump_result = ~less & ~eq;   // above
753
            end else begin
754
                jump_result = less;          // below
755
            end
756
        `else
757
            // This version is faster because it does most of the compare in parallel with the addition
758
            logic less;                // a < b, signed
759
            logic result_equal_limit;  // a + 1 == b
760
            logic b_is_min;            // the limit b is INT_MIN. a+1 < b always false
761
            logic overflow1;           // a+1 overflows
762
            // The overflow check may not be important, but we want to make sure that the result is always
763
            // the same as if the increment and the compare are coded as two separate instructions
764
            result = operand2 + 1;     // increment
765
            less = ((operand2 & sizemask) ^ sbit) < ((operand3 & sizemask) ^ sbit); // a < b, signed
766
            overflow1 = ((operand2 & sizemask) ^ sbit) == sizemask;       // a+1 overflows
767
            b_is_min = ((operand3 & sizemask) ^ sbit) == 0;               // limit is INT_MIN, nothing is less than limit
768
            result_equal_limit = ((result ^ operand3) & sizemask) == 0;   // a + 1 == b
769
            if (opj[1]) begin          // increment_compare/jump_above
770
                // check if a+1 > b <=> !(a+1 <= b) <=> !(a < b || overflow)
771
                jump_result = ~(less | overflow1);  // a+1 > b
772
            end else begin    // increment_compare/jump_below
773
                // check if a+1 < b <=> (a < b && a+1 != b) || (overflow && b != INT_MIN)
774
                jump_result = (less & ~result_equal_limit) | (overflow1 & ~b_is_min);  // a + 1 < b
775
            end
776
 
777
        `endif
778
 
779
        end else if (opj[5:1] == `IJ_SUB_MAXLEN_JPOS >> 1) begin
780
 
781
            // vector loop instruction: subtract maximum vector length and jump if positive
782
            logic [`RB1:0] max_vector_length;
783
            logic sign;                     // sign of result
784
            logic zero;                     // result is zero
785
            if (`NUM_VECTOR_UNITS > 0) max_vector_length = `NUM_VECTOR_UNITS * 8;
786
            else max_vector_length = 8;     // make sure max_vector_length is not zero to avoid infinite loop
787
            result = operand2 - max_vector_length;
788
 
789
            zero = ~|(result & sizemask);
790
            case (ot_in[1:0])
791
            0:  sign = result[7];           // 8 bit
792
            1:  sign = result[15];          // 16 bit
793
            2:  sign = result[31];          // 32 bit
794
            3:  sign = result[`RB1];        // 64 bit (or 32)
795
            endcase
796
            `ifdef SUPPORT_64BIT
797
            if (instruction_in[`IL] == 1) begin
798
                // 64 bits in format C
799
                otout = 3;                  // 64 bit output
800
                sign = result[`RB1];
801
                zero = ~|result;
802
            end
803
            `endif
804
            jump_result = ~sign & ~zero;
805
 
806
        end
807
 
808
    end else if (opx == `II_NOP) begin
809
        // nop instruction. do nothing
810
 
811
    end else begin
812
        // unknown instruction. error
813
        error = 1;
814
 
815
    end
816
 
817
    if (vector_in) error = 1;  // Vector instructions not supported yet
818
 
819
    if (category_in == `CAT_JUMP) begin
820
 
821
        // manage conditional jump conditions
822
 
823
        logic [1:0] il;
824
        logic [2:0] mode;
825
        il = instruction_in[`IL];
826
        mode = instruction_in[`MODE];
827
 
828
        // calculate target if not jumping
829
        //instruction_length = il[1] ? il : 1;  // il cannot be 0 for jump instructions)
830
        nojump_target = instruction_pointer_in + il;
831
        // treat jump as not taken if jump target is equal to nojump target
832
        jump_not_taken = nojump_target == operand1_in;
833
 
834
        // detect jump result
835
        if (jump_result ^ opj[0]) jump_taken = 1;        // bit 0 of opj inverts the condition
836
 
837
        if (opj > `IJ_LAST_CONDITIONAL) jump_taken = 1;  // unconditional jump always taken
838
 
839
        if (opj == `IJ_TRAP) begin // trap and IJ_SYSCALL have same opj. Both will stop debugger
840
            jump_taken = 0;        // use trap as debug breakpoint. Resume execution in next instruction
841
        end
842
 
843
        // compare, test and indirect jumps have no register return. The decoder takes care of result_type = `RESULT_NONE;
844
    end
845
 
846
    // normal register output
847
    // regmask_used_in removed from this equation because of critical timing:
848
    normal_output = valid_in & ~stall & ~stall_in
849
    & (result_type == `RESULT_REG | result_type == `RESULT_SYS)
850
    & (regmask_val[0] | mask_alternative_in) & ~vector_in;
851
 
852
end
853
 
854
 
855
// outputs
856
always_ff @(posedge clock) if (clock_enable) begin
857
    if (normal_output) begin
858
        // normal register output
859
        case (otout)
860
        0: result_out <= result[7:0];
861
        1: result_out <= result[15:0];
862
        2: result_out <= result[31:0];
863
        3: result_out <= result[`RB1:0];
864
        endcase
865
        register_write_out <= ~reset;
866
        tag_val_out <= tag_val_in;
867
        // destination register number. high bit is 1 for system registers
868
        register_a_out <= {result_type[0],instruction_in[`RD]};
869
 
870
    end else if (!valid_in || stall || stall_in || result_type == `RESULT_MEM || result_type == `RESULT_NONE || vector_in) begin
871
        // stall_in must disable the output to avoid executing the same instruction twice.
872
        // note: the FPGA has no internal tri-state buffers. We need to simulate result bus by or'ing outputs
873
        register_write_out <= 0;
874
        result_out <= 0;
875
        register_a_out <= 0;
876
        tag_val_out <= 0;
877
 
878
    end else /*if (!regmask_val[0] && !mask_alternative_in) */ begin
879
        // mask is zero. output is fallback
880
        case (otout)
881
        0: result_out <= operand1[7:0];
882
        1: result_out <= operand1[15:0];
883
        2: result_out <= operand1[31:0];
884
        3: result_out <= operand1[`RB1:0];
885
        endcase
886
        register_write_out <= ~reset;
887
        register_a_out <= {1'b0,instruction_in[`RD]};
888
        tag_val_out <= tag_val_in;
889
    end
890
 
891
    if (stall || stall_in || !valid_in) begin
892
        jump_out <= 0;
893
        nojump_out <= 0;
894
 
895
    end else if (category_in == `CAT_JUMP) begin
896
        // additional output for conditional jump instructions
897
        if (jump_not_taken | ~jump_taken) begin
898
            jump_out <= 0;
899
            nojump_out <= valid_in;
900
            jump_pointer_out <= nojump_target;
901
        end else begin // jump taken
902
            jump_out <= valid_in && !reset;
903
            nojump_out <= 0;
904
        end
905
 
906
    end else begin
907
        // not a jump instruction
908
        jump_out <= 0;
909
        nojump_out <= 0;
910
        jump_pointer_out <= 0;
911
    end
912
 
913
    // special cases for indirect jumps
914
    if (opx == `IX_INDIRECT_JUMP) begin
915
        jump_pointer_out <= operand3[`RB1:2] - {1'b1,{(`CODE_ADDR_START-2){1'b0}}}; // jump target = (last operand - code memory start)/ 4
916
        if (|(operand3[1:0])) error_parm_out <= 1;    // misaligned jump target
917
 
918
    end else if (opx == `IX_RELATIVE_JUMP) begin      // jump target is calculated
919
        jump_pointer_out <= relative_jump_target;
920
 
921
    end else begin
922
        jump_pointer_out <= operand1_in;              // jump target is calculated in previous stage
923
    end
924
 
925
    // other outputs
926
    valid_out <= !stall & valid_in & !reset;          // a valid output is produced
927
    stall_out <= stall  & valid_in & !reset;          // stalled. waiting for operand
928
    stall_next_out <= stall_next & valid_in & !reset; // predict stall in next clock cycle
929
    error_out <= error & valid_in & !reset;           // unknown instruction
930
    error_parm_out <= error_parm & valid_in & !reset; // wrong parameter
931
 
932
 
933
    // outputs for debugger:
934
    debug1_out <= 0;
935
 
936
    debug1_out[6:0]   <= opx;
937
    debug1_out[14:8]  <= opj;
938
 
939
 
940
    debug1_out[21:20] <= category_in;
941
 
942
    debug1_out[24]    <= stall;
943
    debug1_out[25]    <= stall_next;
944
    debug1_out[27]    <= error;
945
 
946
    debug1_out[28]    <= jump_taken;
947
    debug1_out[29]    <= jump_not_taken;
948
    debug1_out[30]    <= jump_result;
949
    debug1_out[31]    <= valid_in;
950
    debug2_out[16]    <= opr1_used_in;
951
    debug2_out[17]    <= opr2_used_in;
952
    debug2_out[18]    <= opr3_used_in;
953
    debug2_out[19]    <= regmask_used_in;
954
    debug2_out[20]    <= mask_alternative_in;
955
    debug2_out[21]    <= mask_off;
956
    /*
957
    debug2_out[22]    <= regmask_val_in[0];
958
    debug2_out[23]    <= regmask_val_in[`MASKSZ];
959
    debug2_out[27:24] <= regmask_val[3:0];
960
    debug2_out[28]    <= regmask_val[`MASKSZ];
961
    */
962
end
963
 
964
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.