OpenCores
URL https://opencores.org/ocsvn/forwardcom/forwardcom/trunk

Subversion Repositories forwardcom

[/] [forwardcom/] [trunk/] [fetch.sv] - Blame information for rev 117

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 26 Agner
//////////////////////////////////////////////////////////////////////////////////
2
// Engineer: Agner Fog
3
//
4
// Create Date:    2020-05-03
5
// Last modified:  2021-07-30
6
// Module Name:    fetch
7
// Project Name:   ForwardCom soft core
8
// Target Devices: Artix 7
9
// Tool Versions:  Vivado v. 2020.1
10
// License:        CERN-OHL-W v. 2 or later
11
// Description:    Instruction fetch and unconditional jump, call, and return
12
//
13
//////////////////////////////////////////////////////////////////////////////////
14
`include "defines.vh"
15
 
16
// code address to jump to when reset button is pressed
17
parameter max_loader_size   = (`MAX_LOADER_SIZE) << 2;          // loader size in words
18
parameter code_memory_start = 2**`CODE_ADDR_START;
19
parameter code_memory_size  = 2**(`CODE_ADDR_WIDTH+2);
20
//parameter code_memory_end   = code_memory_start + code_memory_size;
21
parameter loader_start_address = code_memory_size - max_loader_size;  // address of loader relative to code memory start, in bytes
22
 
23
// upper 7 bits of instruction word identifying unconditional jump or call
24
parameter instruction_jump_uncond = 7'b0111100; // next bit is 1 for call, 0 for jump. The rest is 24 bits signed offset
25
// upper 11 bits of instruction word identifying return instruction
26
parameter instruction_return  = 11'b01110111110;
27
// upper 11 bits of instruction word identifying sys_return instruction
28
parameter instruction_sys_return  = 11'b01111111110;
29
// upper 4 bits of any 1-word control transfer instruction
30
parameter instruction_jumpa = 4'b0111;
31
// upper 8 bits of any 2-word control transfer instruction
32
parameter instruction_jump2w = 8'b10101000;
33
// upper 8 bits of any 3-word control transfer instruction
34
parameter instruction_jump3w = 8'b11001000;
35
// bit OP1 for push and pop instructions (= 56,57)
36
parameter instruction_push_pop = 6'b111000;
37
// upper 11 bits of instruction word identifying read_perfs serializing instruction. Need M bit too
38
parameter instruction_read_perfs  = 11'b01000100101;
39
 
40
 
41
// Fetch module: fetch instructions from memory or code cache
42
module fetch
43
(   input clock,                                 // system clock (100 MHz)
44
    input clock_enable,                          // clock enable. Used when single-stepping
45
    input reset,                                 // system reset.
46
    input restart,                               // restart running program
47
    input valid_in,                              // valid data from code cache ready
48
    input stall_in,                              // a later stage in pipeline is stalled
49
    input jump_in,                               // a jump target is coming from the ALU. jump_pointer has been sent to the code cache
50
    input nojump_in,                             // signal from ALU that the jump target is the next instruction
51
    input [`CODE_ADDR_WIDTH-1:0] jump_pointer,   // jump target from ALU
52
    input [`CODE_DATA_WIDTH-1:0] read_data,      // data from code cache
53
    input [`CODE_ADDR_WIDTH-1:0] return_pop_data,// Return address popped here at return instruction
54
    output reg [`CODE_ADDR_WIDTH-2:0] read_addr_out, // read address relative to code memory start
55
    output reg read_enable_out,                  // code cache read enable
56
    output reg valid_out,                        // An instruction is ready for output to decoder
57
    output reg jump_out,                         // A jump instruction is bypassing the pipeline
58
    output reg [`CODE_ADDR_WIDTH-1:0] instruction_pointer_out, // address of current instruction
59
    output reg [95:0] instruction_out,           // current instruction, up to 3 words long
60
    output reg call_e_out,                       // Executing call instruction. push_data contains return address
61
    output reg return_e_out,                     // Executing return instruction. return address is available in advance on pop_data
62
    output reg stall_predict_out,                // Predict that decoder will use multiple clock cycles
63
    output reg [`CODE_ADDR_WIDTH-1:0] call_push_data_out, // Return address pushed here at call instruction
64
    output reg [31:0] debug1_out                 // temporary debug output
65
);
66
 
67
// Efficient handling of jumps, calls, and returns:
68
// Unconditional jumps, calls, and returns are executed directly in the fetch unit rather
69
// than waiting for the instruction to go through the pipeline.
70
// Conditional and indirect jumps must go to the ALU. The jump target address is fed from the ALU
71
// directly to the code cache in order to save one clock cycle.
72
// Direct calls and returns are communicating directly with the call stack.
73
// Indirect calls are handled in both fetch unit and ALU. The return address is pushed on the
74
// call stack by the fecth module while the target address comes from the ALU.
75
// Return addresses are obtained from the call stack. It takes one clock to send a call or return
76
// request to the call stack and another clock to retrieve the return address from the stack.
77
// Therefore, it is not possible to execute a return in the first clock cycle after another
78
// call or return. The fetch module does not check for this because the second return is delayed
79
// for a clock cycle anyway to wait for the target to be fetched from the code cache.
80
 
81
parameter fetch_buffer_size = 8; // number of 32-bit words in instruction fetch buffer
82
 
83
// Name suffixes on local variables:
84
// 0: relates to the instruction that is currently in output registers
85
// 1: relates to the instruction that is being generated in the current clock cycle
86
// 2: relates to the instruction that will be generated in the next clock cycle
87
 
88
reg [0:fetch_buffer_size-1][31:0] fetch_buffer;  // instruction buffer, (fetch_buffer_size) * 32-bit words
89
reg   unsigned [3:0] valid_words0;               // number of valid 32-bit words in fetch_buffer
90
logic unsigned [3:0] valid_words1;               // number of valid words in fetch_buffer in next clock cycle
91
logic unsigned [1:0] instruction_length0;        // length of current instruction, in 32-bit words
92
logic unsigned [1:0] instruction_length1;        // length of next instruction, in 32-bit words
93
logic unsigned [1:0] instruction_length2;        // length of 2. next instruction, in 32-bit words
94
logic instruction_ready0;                        // current instruction has been fetched
95
logic instruction_ready1;                        // instruction 1 will be dispatched in next clock cycle
96
 
97
logic [1:0] buffer_action;   // 0: idle. nothing dispatched. buffer is full or waiting for data
98
                             // 1: fill buffer. nothing dispatched. new data arriving from code cache
99
                             // 2: dispatch. instruction 0 is dispatched to the pipeline. shift down data
100
                             // 3: dispatch and fill.
101
logic shift_out0;                                // instruction 0 is dispatched in this clock cycle and fetch_buffer is shifted to get the next instruction to position 0
102
logic unsigned [1:0] dispatch_length0;           // length of dispatched instruction
103
logic send_next;                                 // send an address to code cache. true if buffer is sure not to overflow in next two clocks
104
logic [3:0] fetch_buffer_pos;                    // position where to write to fetch_buffer from cache
105
 
106
logic early_jump;                                // jump instruction detected in instruction 1 or 2
107
logic conditional_jump;                          // a conditional or indirect jump or call detected in instruction 1. Wait for ALU to find target
108
logic [1:0] call_instruction;                    // 1: any kind of call or trap detected in instruction 1 or 2. Push return address on stack
109
                                                 // 2: return or system return instruction detected. pop return address from stack
110
logic unsigned [`CODE_ADDR_WIDTH-1:0] early_jump_addr; // target address for early jump
111
reg unsigned [`CODE_ADDR_WIDTH:0] jump_target;   // save jump target address. may be calculated here for unconditional jump, or input from ALU for conditional jump
112
logic unsigned [`CODE_ADDR_WIDTH:0] reset_target;// Address of loader or restart code
113
reg restart_underway;                            // remember restarting is in process
114
 
115
logic unsigned [`CODE_ADDR_WIDTH-1:0] return_addr; // return address after call instruction
116
logic [31:0] word1;                              // first word of instruction 1
117
logic unsigned [`CODE_ADDR_WIDTH-1:0] instruction_pointer1; // address of instruction 1
118
 
119
reg [3:0] jump_case;  // for debug display only. may be removed
120
 
121
// It takes two clock cycles to fetch data from the code cache: one clock to send an address to
122
// the code cache, and one clock to send the data from the code cache.
123
// The following three shift registers are keeping track for the data that is underway:
124
// next_underway is tracking sequential code, target_underway is tracking jump targets,
125
// and wait_for_target tells that we are waiting for a jump target to be calculated and fetched.
126
 
127
reg [1:0] next_underway; // target_underway is a shift register indicating that code words are underway from the code cache
128
// next_underway is shifted right with zero extension
129
// next_underway[0]: data arrived from code cache
130
// next_underway[1]: next address has been sent to code cache
131
 
132
reg [2:0] target_underway;  // target_underway is a shift register indicating that a jump target is underway:
133
// target_underway is shifted right with zero extension
134
// 100: system reset
135
// 010: wait for target to be fetched from code cache
136
// 001: target code is inserted in fetch_buffer. Clear wait_for_target
137
 
138
reg wait_for_target; // wait_for_target indicates that an unconditional jump, call, or return
139
// is waiting for the target to be fetched from the code cache
140
 
141
reg wait_for_jump; // wait_for_jump indicates that a conditional or indirect jump or call
142
// has been dispatched and is waiting for the ALU to deliver the target address
143
 
144
 
145
// Analyze the status of fetch_buffer:
146
always_comb begin
147
 
148
    // if (restart == 0): Start address is loader address
149
    // if (restart == 1): Start address is restart address = loader address + 1
150
    reset_target = {loader_start_address >> 3, (restart | restart_underway)};
151
 
152
    // Find length and position of instruction 0
153
    if (valid_words0 > 0) begin
154
        instruction_length0 = fetch_buffer[0][31] ? fetch_buffer[0][31:30] : 2'b01; // the length of instruction 0
155
        // instruction 0 is ready if all words belonging to the instruction are fetched.
156
        instruction_ready0 = (valid_words0 >= instruction_length0) && !target_underway[0] && !wait_for_target;
157
        shift_out0 = instruction_ready0 & !stall_in & !reset & (!wait_for_jump | nojump_in);  // instruction 0 will be dispatched in this clock cycle
158
    end else begin
159
        // First instruction has not been fetched yet
160
        instruction_length0 = 0;
161
        instruction_ready0 = 0;
162
        shift_out0 = 0;
163
    end
164
 
165
    // number of words dispatched
166
    if (shift_out0)
167
        dispatch_length0 = instruction_length0;
168
    else
169
        dispatch_length0 = 0;
170
 
171
    // check if we can fill the buffer
172
    if ((target_underway[0] | early_jump | jump_in) & valid_in) begin  // overwrite buffer with new jump target
173
        send_next = 1;
174
        fetch_buffer_pos = 0;
175
    end else begin
176
        if (shift_out0) begin
177
            fetch_buffer_pos = valid_words0 - instruction_length0;
178
        end else begin
179
            fetch_buffer_pos = valid_words0;
180
        end
181
 
182
        // determine whether we will fetch the next doubleword from the code cache.
183
        // maybe this can be tweaked a little better, but make sure the fetch buffer cannot overflow in case of stalls
184
        if (next_underway[0] & valid_in & next_underway[1]) begin
185
            send_next = fetch_buffer_pos < fetch_buffer_size - 6;
186
        end else if ((next_underway[0] & valid_in) | next_underway[1]) begin
187
            send_next = fetch_buffer_pos < fetch_buffer_size - 4;
188
        end else begin
189
            send_next = fetch_buffer_pos < fetch_buffer_size - 2;
190
        end
191
    end
192
 
193
    buffer_action[0] = (next_underway[0] | target_underway[0]) & valid_in;  // fill buffer
194
    buffer_action[1] = shift_out0;  // instruction 0 dispatched. shift down buffer
195
 
196
    // predict if the next instruction, i.e. instruction 1, will be ready in next clock cycle
197
    if (target_underway[0] & valid_in) begin
198
        if (jump_target[0])
199
            valid_words1 = 1;  // jumping to an odd address. use only the upper half of read_data
200
        else
201
            valid_words1 = 2;  // jumping to even address. use 64 bits read_data
202
    end else if (wait_for_target) begin
203
        valid_words1 = 0;
204
    end else begin
205
        if (next_underway[0] & valid_in)
206
            valid_words1 = valid_words0 - dispatch_length0 + 2;
207
        else
208
            valid_words1 = valid_words0 - dispatch_length0;
209
    end
210
 
211
    // Find first word of instruction 1 for the sake of early jump detection and predecoding.
212
    //  (Here, I am shortening the critical path
213
    //   valid_words0 -> instruction_length0 -> instruction_ready0 -> shift_out0 -> dispatch_length0
214
    //   -> valid_words1 -> word1 -> instruction_length1 -> early_jump_addr -> instruction_pointer_out
215
    //   by postponing "if (valid_words1 != 0)")
216
    if (target_underway[0] && valid_in) begin  // get instruction1 from jump target
217
        if (jump_target[0]) begin
218
            word1 = read_data[63:32]; // jumping to odd address
219
        end else begin
220
            word1 = read_data[31:0];
221
        end
222
        instruction_pointer1 = jump_target;
223
        instruction_length1 = word1[31] ? word1[31:30] : 2'b01; // length of second instruction
224
    end else if (valid_words0 > instruction_length0) begin // instruction 1 is already in buffer
225
        word1 = fetch_buffer[instruction_length0];
226
        instruction_pointer1 = instruction_pointer_out + instruction_length0;
227
        instruction_length1 = word1[31] ? word1[31:30] : 2'b01; // length of second instruction
228
 
229
    end else if (valid_words0 == instruction_length0) begin // instruction 1 is going into buffer in this clock cycle
230
        word1 = read_data[31:0];
231
        instruction_pointer1 = instruction_pointer_out + instruction_length0;
232
        instruction_length1 = word1[31] ? word1[31:30] : 2'b01; // length of second instruction
233
 
234
    end else if (valid_words0 > 0) begin // instruction 1 is partially in buffer
235
        word1 = fetch_buffer[0];
236
        instruction_pointer1 = instruction_pointer_out;
237
        instruction_length1 = word1[31] ? word1[31:30] : 2'b01; // length of second instruction
238
 
239
    end else begin
240
        word1 = 0;
241
        instruction_pointer1 = 0; //64'HXXXXXXXXXXXXXXXX;
242
        instruction_length1  = 3; // indicate not ready
243
    end
244
 
245
 
246
    // Look for jump, call, and return instructions in instruction 1
247
    // in order to fetch target as early as possible.
248
    // This is done in the following way:
249
    // Unconditional jumps, calls, and returns are handled as early as possible in order
250
    // to fetch early from the target address and thereby save time. However,
251
    // we have to check if there is a preceding jump or call in a preceding position in
252
    // fetch_buffer before we execute a jump, call, or return in position 2.
253
    // Conditional and indirect jumps are detected when they are in position 0 in fetch_buffer,
254
    // and we have to wait for the ALU to find the target address.
255
    // Indirect calls are are also detected when they are in position 0 in fetch_buffer:
256
    // the return address is pushed on the call stack while we wait for the ALU to find the target address.
257
    // The following variables tell what we have found here:
258
    // early_jump:    An unconditional jump, call, or return detected in position 1 or 2.
259
    // conditional_jump: A conditional or indirect jump or call is detected. Wait for ALU to find target
260
    // call_instruction: 1: any kind of call detected. Push return address on stack
261
    //                   2: a return or sys_return instruction detected. Pop return address from stack
262
 
263
    conditional_jump = 0;
264
    early_jump = 0;
265
    early_jump_addr = 0;
266
    call_instruction = 0;
267
    return_addr = 0;
268
 
269
    instruction_ready1 = (valid_words1 >= instruction_length1) & !reset && (!wait_for_jump | nojump_in);  // instruction 1 will be dispatched in next clock cycle
270
    //valid_out <= valid_words1 >= instruction_length1 & !reset && !early_jump & target_underway[2:1] == 0 & (!wait_for_jump | nojump_in);
271
 
272
 
273
    if (valid_words1 != 0 && word1[31:28] == instruction_jumpa) begin
274
        // Any single-word control transfer instruction is underway
275
        if ((word1[31:25] == instruction_jump_uncond) & !stall_in & (!wait_for_jump | nojump_in)) begin
276
            // unconditional jump or call instruction found in instruction 1
277
            early_jump = 1;
278
            early_jump_addr = $signed(word1[23:0]) + instruction_pointer1 + 1; // add 24-bit signed offset to address of end of instruction
279
            call_instruction = word1[24]; // 0: unconditional jump, 1: direct call
280
            return_addr = instruction_pointer1 + instruction_length1; // return address for call instruction
281
        end else if ((word1[31:21] == instruction_return || word1[31:21] == instruction_sys_return) & !stall_in & (!wait_for_jump | nojump_in)) begin
282
            // a return instruction is found in the first instruction
283
            early_jump = 1;
284
            early_jump_addr = return_pop_data;  // get return address from call stack
285
            call_instruction = 2;              // 2 means return instruction
286
            return_addr = 0;
287
        end else if ((word1[`OP1] == `IJ_JUMP_INDIRECT_MEM+1 || word1[`OP1] == `IJ_JUMP_RELATIVE+1 || word1[`OP1] == `IJ_SYSCALL) & !stall_in & (!wait_for_jump | nojump_in)) begin
288
            // an indirect call or system call instruction is found in the first instruction
289
            early_jump = 0;
290
            early_jump_addr = 0;
291
            return_addr = instruction_pointer1 + instruction_length1; // return address to push on call stack
292
            conditional_jump = 1;  // this instruction must go the the ALU
293
            if (word1[`OP1] == `IJ_TRAP && word1[`MODE] == 7) begin
294
                // Trap or breakpoint in format 1.7C (IJ_TRAP == IJ_SYSCALL)
295
                // The breakpoint instruction should not push a return address on the call stack as long
296
                // as it only activates single step mode without calling any interrupt service routine.
297
                // Note: this code must be changed if any traps or trap instructions go to an interrupt
298
                // service routine that ends with a return or a system return.
299
                // Setting call_instruction to 1 here will make the next return instruction fail if the
300
                // trap does not end with a return.
301
                call_instruction = 0;
302
            end else begin
303
                // All other indirect call and system call instructions
304
                call_instruction = 1;
305
            end
306
        end else begin
307
            // other conditional or indirect jump instruction found in instruction 1
308
            early_jump = 0;
309
            early_jump_addr = 0;
310
            call_instruction = 0;
311
            conditional_jump = 1;  // this instruction must go the the ALU
312
            return_addr = 0;
313
        end
314
 
315
    end else if (valid_words1 > 1 && word1[31:24] == instruction_jump2w) begin
316
        // any double-word jump or call instruction found in the instruction 1
317
        early_jump = 0;
318
        early_jump_addr = 0;
319
        conditional_jump = 1;           // this instruction must go the the ALU
320
        if (word1[5:0] == `IJ_JUMP_INDIRECT_MEM + 1  // indirect call
321
        ||  word1[5:0] == `IJ_JUMP_RELATIVE + 1  // call with relative pointer
322
        ||  word1[5:0] == `IJ_SYSCALL  // system call
323
        ||  word1[`OP1] == 7 // system call
324
        )   begin
325
            call_instruction = !stall_in & (!wait_for_jump | nojump_in);  // push return address on stack
326
            return_addr = instruction_pointer1 + instruction_length1;
327
        end else begin
328
            call_instruction = 0;
329
            return_addr = 0;
330
        end
331
 
332
    end else if (valid_words1 > 2 && word1[31:24] == instruction_jump3w) begin
333
        // any triple-word jump or call instruction found in first instruction
334
        early_jump = 0;
335
        early_jump_addr = 0;
336
        conditional_jump = 1;           // this instruction must go the the ALU
337
        if (word1[5:0] == `IJ_JUMP_INDIRECT_MEM+1  // 64-bit call
338
        ||  word1[5:0] == `IJ_SYSCALL  // system call
339
        ) begin
340
            call_instruction = !stall_in & (!wait_for_jump | nojump_in);  // push return address on stack
341
            return_addr = instruction_pointer1 + instruction_length1;
342
        end else begin
343
            call_instruction = 0;
344
            return_addr = 0;
345
        end
346
    end else if (valid_words1 != 0 && word1[31:21] == instruction_read_perfs && word1[`M]) begin
347
        // the serializing instruction read_perfs must flush the pipeline.
348
        // Use the conditional jump mechanism for this, and give a nojump_in when ready to resume feeding the pipeline
349
        conditional_jump = 1;           // serializing instruction read_perfs
350
    end
351
end
352
 
353
 
354
// Generate code for all possible inputs to each word in fetch_buffer.
355
// The current instruction is removed, and the rest of fetch_buffer is shifted down to make space for next 2 words of code
356
// Data from the code cache are inserted into the first vacant space of fetch_buffer
357
genvar i;
358
generate
359
    // generation loop for each word in fetch_buffer
360
    for (i = 0; i < fetch_buffer_size; i++) begin
361
        always_ff @(posedge clock) if (clock_enable) begin
362
 
363
            if (i < fetch_buffer_pos && buffer_action[1]) begin
364
                // instruction 0 is being dispatched. shift down
365
                fetch_buffer[i][31:0] <= fetch_buffer[i+instruction_length0][31:0];
366
 
367
            end else if (i == fetch_buffer_pos && buffer_action[0]) begin
368
                // load first word
369
                if (target_underway[0] & jump_target[0]) begin
370
                    // jumping to an odd address. use only upper half of read_data
371
                    fetch_buffer[i][31:0] <= read_data[63:32];
372
                end else begin
373
                    // load first word
374
                    fetch_buffer[i][31:0] <= read_data[31:0];
375
                end
376
 
377
            end else if (i == fetch_buffer_pos + 1 && buffer_action[0]) begin
378
                // load second word
379
                fetch_buffer[i][31:0] <= read_data[63:32];
380
 
381
            end
382
        end
383
    end
384
endgenerate
385
 
386
 
387
// Calculate read_addr and instruction_pointer in next clock cycle
388
// The shift registers named target_underway and wait_for_target indicate if we are waiting for a jump target
389
always_ff @(posedge clock) if (clock_enable) begin
390
 
391
    valid_words0 <= valid_words1;
392
    read_enable_out <= send_next;
393
 
394
    if (!stall_in) begin
395
        // send instruction to the decoder
396
        valid_out <= instruction_ready1 && !early_jump;
397
 
398
        // Unconditional jumps are bypassing the pipeline
399
        jump_out <= early_jump;
400
 
401
    end else if (instruction_ready1 && !early_jump) begin
402
 
403
        // Turn valid_out on, but not off, when there is stall_in.
404
        // This is necessary if there is a stall one instruction before a fast jump,
405
        // causing the jump bubble to be filled. Otherwise, it skips the first instruction after the jump
406
        valid_out <= 1;
407
    end
408
 
409
    jump_case <= 0;
410
 
411
    if (reset) begin
412
        // reset button pressed
413
        if (restart) restart_underway <= 1;
414
        next_underway <= 2'b00;
415
        target_underway <= 3'b100;
416
        wait_for_target <= 1;
417
        wait_for_jump <= 0;
418
        jump_target <= reset_target;
419
        read_addr_out <= reset_target >> 1;
420
        instruction_pointer_out <= reset_target;
421
        valid_words0 <= 0;
422
        read_enable_out <= 0;
423
        valid_out <= 0;
424
        jump_out <= 0;
425
 
426
    end else if (target_underway[2]) begin
427
        // first clock after reset
428
        jump_case <= 1;
429
        next_underway <= 2'b00;
430
        target_underway <= {1'b0,target_underway[2:1]}; // shift right to indicate when jump target arrives
431
        wait_for_target <= 1;  // skip all instructions until jump target arrives
432
        instruction_pointer_out <= reset_target;
433
        jump_target <= reset_target;
434
        read_addr_out <= reset_target >> 1;
435
 
436
    end else if (early_jump) begin
437
        // unconditional jump detected in instruction 1
438
        jump_case <= 2;
439
        next_underway <= 2'b00;
440
        target_underway <= 3'b010;     // wait 2 clock cycles for target
441
        read_addr_out <= early_jump_addr >> 1;
442
        jump_target <= early_jump_addr;
443
        restart_underway <= 0;
444
        if (!stall_in) begin
445
            wait_for_target <= 1;      // skip all instructions until jump target arrives
446
            wait_for_jump <= 0;
447
            instruction_pointer_out <= early_jump_addr;
448
        end
449
 
450
    end else if (conditional_jump && (instruction_ready1 & !stall_in || shift_out0)) begin
451
        // conditional jump detected in instruction 1
452
        jump_case <= 3;
453
        next_underway <= {send_next,next_underway[1]}; // shift right to indicate when data arrives
454
        target_underway <= 3'b000;  // wait 2 clock cycles for target
455
        // read address is two words ahead because reading takes 2 clock cycles
456
        if (send_next) begin
457
            read_addr_out <= read_addr_out + 1;
458
        end
459
        wait_for_jump <= 1; // wait for jump target address from ALU
460
        jump_target <= 0;
461
        wait_for_target <= 0;
462
        if (shift_out0) begin
463
            // point to next instruction
464
            instruction_pointer_out <= instruction_pointer_out + instruction_length0;
465
        end
466
        /*if (!stall_in) begin
467
            jump_target <= 0;
468
            wait_for_target <= 0;
469
        end*/
470
 
471
    end else if (target_underway[0] & valid_in) begin
472
        // a jump target has arrived from code cache. (ignore any subsequent jump instructions)
473
        restart_underway <= 0;
474
        jump_case <= 4;
475
        next_underway <= {send_next, next_underway[1]}; // shift right to indicate when data arrives
476
        wait_for_target <= 0; // stop waiting for jump target
477
        target_underway <= 3'b000;
478
        read_addr_out <= read_addr_out + 1;
479
        if (!stall_in) begin
480
            instruction_pointer_out <= jump_target; // set address of current instruction
481
        end
482
 
483
    end else if (jump_in & wait_for_jump & valid_words1 >= instruction_length1) begin
484
        // a conditional or indirect jump instruction has been executed in ALU
485
        // the ALU has sent the target address directly to the code cache to save one clock cycle
486
        //next_underway <= 2'b00;
487
        restart_underway <= 0;
488
        jump_case <= 5;
489
        next_underway <= {send_next, next_underway[1]}; // shift right to indicate when data arrives
490
        target_underway <= 3'b001;   // wait one clock cycle for target
491
        if (!stall_in) begin
492
            wait_for_jump <= 0;
493
            read_addr_out <= (jump_pointer >> 1) + 1;
494
            wait_for_target <= 1;
495
            jump_target <= jump_pointer;
496
            instruction_pointer_out <= jump_pointer;
497
        end
498
 
499
    end else if (nojump_in & wait_for_jump) begin
500
        // a conditional or indirect jump instruction has been executed in ALU
501
        // and the target is the next instruction
502
        //next_underway <= {send_next,next_underway[1]}; // shift right to indicate when data arrives
503
        restart_underway <= 0;
504
        jump_case <= 6;
505
        next_underway <= {send_next, next_underway[1]}; // shift right to indicate when data arrives
506
        target_underway <= 3'b000;   // wait two clock cycles for target
507
        wait_for_target <= 0;
508
        wait_for_jump <= 0;
509
        if (send_next) begin
510
            read_addr_out <= read_addr_out + 1;
511
        end
512
        // if (!stall_in) begin
513
        if (shift_out0) begin
514
            instruction_pointer_out <= instruction_pointer_out + instruction_length0;
515
        end
516
 
517
    end else begin
518
        // no new jump instruction
519
        restart_underway <= 0;
520
        jump_case <= 7;
521
        next_underway <= {send_next,next_underway[1]};  // shift right to indicate when data arrives
522
        target_underway <= {1'b0,target_underway[2:1]}; // shift right to indicate when jump target arrives
523
 
524
        // make ready for next read. Least significant address bit ignored because data bus is double size
525
        // read address is two words ahead because reading takes 2 clock cycles
526
        if (send_next) begin
527
            read_addr_out <= read_addr_out + 1;
528
        end
529
        if (shift_out0) begin
530
            // point to next instruction
531
            instruction_pointer_out <= instruction_pointer_out + instruction_length0;
532
        end
533
 
534
    end
535
 
536
    // communicate with call stack as soon as a call or return instruction is detected.
537
    // checking !target_underway[0] && !wait_for_target[0] to avoid seding the call_e_out
538
    // or return_e_out multiple times
539
    if (reset || target_underway[2:1] != 0) begin
540
        call_e_out <= 0;
541
        return_e_out <= 0;
542
        call_push_data_out <= 0;
543
    end else if (call_instruction == 1) begin
544
        call_e_out <= 1;
545
        return_e_out <= 0;
546
        call_push_data_out <= return_addr;
547
    end else if (call_instruction == 2) begin
548
        return_e_out <= 1;
549
        call_e_out <= 0;
550
        call_push_data_out <= 0;
551
    end else begin
552
        call_e_out <= 0;
553
        call_push_data_out <= 0;
554
        return_e_out <= 0;
555
    end
556
 
557
    // predict that decoder will use multiple clock cycles for push and pop instructions
558
    if (valid_words1 != 0 && word1[`IL] == 2'b01 && (word1[`MODE] == 3'b011 || (word1[`MODE] == 3'b00 && word1[`M]))
559
    && word1[`OP1] >> 1 == instruction_push_pop >> 1 && shift_out0) begin
560
        stall_predict_out <= 1;  // mode = 1.3 or 1.8, op1 = 56 or 57
561
    end else begin
562
        stall_predict_out <= 0;
563
    end
564
 
565
    // collect various signals for debugging purpose
566
    debug1_out[0]    <= early_jump;
567
    debug1_out[1]    <= conditional_jump;
568
    debug1_out[3]    <= stall_in;
569
 
570
    debug1_out[6:4]  <= valid_words1[2:0];
571
    debug1_out[7]    <= instruction_ready1;
572
 
573
    debug1_out[8]    <= buffer_action[0]; // fill buffer
574
    debug1_out[9]    <= buffer_action[1]; // shift_out0;
575
    debug1_out[11:10]<= dispatch_length0;
576
 
577
    debug1_out[15:12]<= fetch_buffer_pos;
578
 
579
    debug1_out[16]   <= send_next;
580
    debug1_out[17]   <= instruction_ready0;
581
    debug1_out[18]   <= nojump_in;
582
    debug1_out[19]   <= jump_in;
583
end
584
    // register variables are assigned to avoid an extra clock delay:
585
    assign debug1_out[21:20] = next_underway;
586
    assign debug1_out[23:22] = target_underway[1:0];
587
 
588
    assign debug1_out[27:24] = jump_case; // jump handling case
589
 
590
    assign debug1_out[28]  = wait_for_target;
591
    assign debug1_out[29]  = wait_for_jump;
592
    assign debug1_out[31]  = valid_out;
593
 
594
 
595
// output instruction, 1-3 words
596
assign instruction_out[31:0]  = fetch_buffer[0][31:0];
597
assign instruction_out[63:32] = fetch_buffer[1][31:0];
598
assign instruction_out[95:64] = fetch_buffer[2][31:0];
599
 
600
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.