OpenCores
URL https://opencores.org/ocsvn/m32632/m32632/trunk

Subversion Repositories m32632

[/] [m32632/] [trunk/] [rtl/] [DP_FPU.v] - Blame information for rev 15

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 9 ns32kum
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2
//
3
// This file is part of the M32632 project
4
// http://opencores.org/project,m32632
5
//
6
// Filename: DP_FPU.v
7
// Version:  1.0
8
// Date:     30 May 2015
9
//
10
// Copyright (C) 2015 Udo Moeller
11
// 
12
// This source file may be used and distributed without 
13
// restriction provided that this copyright statement is not 
14
// removed from the file and that any derivative work contains 
15
// the original copyright notice and the associated disclaimer.
16
// 
17
// This source file is free software; you can redistribute it 
18
// and/or modify it under the terms of the GNU Lesser General 
19
// Public License as published by the Free Software Foundation;
20
// either version 2.1 of the License, or (at your option) any 
21
// later version. 
22
// 
23
// This source is distributed in the hope that it will be 
24
// useful, but WITHOUT ANY WARRANTY; without even the implied 
25
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
26
// PURPOSE. See the GNU Lesser General Public License for more 
27
// details. 
28
// 
29
// You should have received a copy of the GNU Lesser General 
30
// Public License along with this source; if not, download it 
31
// from http://www.opencores.org/lgpl.shtml 
32
// 
33
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
34
//
35
//      Modules contained in this file:
36
//      1. PREPDATA     Prepare data for the big multiplier
37
//      2. BCDADDER     4 bit BCD adder
38
//      3. DFPU_BCD             Binary coded decimal (BCD) adder and subtractor
39
//      4. DFPU_ADDSUB  Double precision floating point adder and subtractor
40
//      5. DFPU_MISC    Double precision floating point miscellaneous operations 
41
//      6. DFPU_MUL             Double precision floating point multiplier
42
//      7. DIVI_PREP    Prepare data for the divider
43
//      8. DFPU_DIV             The divider for all divide opcodes : double, single and integer
44
//      9. DP_LOGIK             Control logic and result path for different functions
45
// 10. DP_FPU           Top level of long operations datapath
46
//
47 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
48 9 ns32kum
 
49 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
50 9 ns32kum
//
51
//      1. PREPDATA     Prepare data for the big multiplier
52
//
53 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
54 9 ns32kum
module PREPDATA ( START, MEI, DFLOAT, BWD, SRC1, SRC2,
55
                                  MSD_1, MSD_2, LSD_1, LSD_2, LOAD_MSD, LOAD_LSD1, LOAD_LSD2 );
56
 
57
        input    [1:0]   START;
58
        input                   MEI,DFLOAT;
59
        input    [1:0]   BWD;
60
        input   [31:0]   SRC1,SRC2;
61
 
62
        output [52:32]  MSD_1,MSD_2;
63
        output  [31:0]   LSD_1,LSD_2;
64
        output                  LOAD_MSD,LOAD_LSD1,LOAD_LSD2;
65
 
66
        reg             [31:0]   LSD_1,LSD_2;
67
 
68
        assign MSD_1 = MEI ? 21'h0 : {1'b1,SRC1[19:0]};
69
        assign MSD_2 = MEI ? 21'h0 : {1'b1,SRC2[19:0]};
70
 
71
        always @(MEI or BWD or SRC1)
72
                casex ({MEI,BWD})
73
                  3'b100 : LSD_1 = {24'h000000,SRC1[7:0]};
74
                  3'b101 : LSD_1 = {16'h0000,SRC1[15:0]};
75
                 default : LSD_1 = SRC1;
76
                endcase
77
 
78
        always @(MEI or BWD or SRC2)
79
                casex ({MEI,BWD})
80
                  3'b100 : LSD_2 = {24'h000000,SRC2[7:0]};
81
                  3'b101 : LSD_2 = {16'h0000,SRC2[15:0]};
82
                 default : LSD_2 = SRC2;
83
                endcase
84
 
85
        assign LOAD_MSD  = (START[0] & MEI) | (START[0] & DFLOAT);        // 1. step data load at DFLOAT
86
        assign LOAD_LSD1 = (START[0] & MEI) | (START[1] & DFLOAT);       // 2. step execute at DFLOAT
87
        assign LOAD_LSD2 = (START[1] & MEI) | (START[1] & DFLOAT);      // 2. step execute at DFLOAT
88
 
89
endmodule
90
 
91 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
92 9 ns32kum
//
93
//      2. BCDADDER     4 bit BCD adder
94
//
95 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
96 9 ns32kum
module BCDADDER ( A_IN, B_IN, CY_IN, SUBP, OUT, CY_OUT );
97
 
98
        input    [3:0]   A_IN,B_IN;
99
        input                   CY_IN;
100
        input                   SUBP;
101
 
102
        output   [3:0]   OUT;
103
        output                  CY_OUT;
104
 
105
        wire     [4:0]   result;
106
        wire                    over;
107
 
108
        assign result = SUBP ? ({1'b0,A_IN} - {1'b0,B_IN} - {4'b0,CY_IN})
109
                                                 : ({1'b0,A_IN} + {1'b0,B_IN} + {4'b0,CY_IN});
110
 
111
        assign over = result[4] | (result[3] & (result[2] | result[1]));
112
 
113
                                                                //              if result<0 : -6                                if result>9 : -10
114
        assign OUT = result[3:0] - (SUBP ? {1'b0,result[4],result[4],1'b0} : {over,1'b0,over,1'b0});
115
        assign CY_OUT = SUBP ? result[4] : over;
116
 
117
endmodule
118
 
119 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
120 9 ns32kum
//
121
//      3. DFPU_BCD             Binary coded decimal (BCD) adder and subtractor
122
//
123 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
124
module DFPU_BCD ( BCLK, BRESET, START, DO_BCD, BWD, SRC1, SRC2, CY_IN, SUBP, BCD_Q, CY_OUT, BCD_DONE );
125 9 ns32kum
 
126
        // Byte : 3 cycles in shortest case REG-REG, Word : 4 cycles and Double : 6 cycles
127
        input                   BCLK;
128
        input                   BRESET;
129
        input                   START;  // START[1]
130
        input                   DO_BCD; // BCD Opcode is valid
131
        input    [1:0]   BWD;
132
        input   [31:0]   SRC1,SRC2;      // Source , Destination, data is stable during operation
133
        input                   CY_IN;  // comes from PSR
134
        input                   SUBP;   // SUBP = 1 : SUBP , 0 : ADDP
135
 
136
        output  reg     [31:0]   BCD_Q;
137
        output  reg             CY_OUT; // went to PSR if DONE is valid
138
        output                  BCD_DONE;
139
 
140
        reg                             run_bcd;
141
        reg              [1:0]   byte_cou;
142
        reg             [15:0]   datain;
143
 
144
        wire     [7:0]   result;
145
        wire                    carry,carry_lsd,carry_msd;
146
 
147
        // START :     _/---\________________
148
        // byte_cou :  xxxxxx 0 x 1 x 2 x 3 x
149
        // BCD_DONE :  _____/---\____________  if BWD = Byte
150
 
151
        always @(posedge BCLK or negedge BRESET)
152
                if (!BRESET) run_bcd <= 1'b0;
153
                  else
154
                        run_bcd <= (START & DO_BCD) | (run_bcd & (BWD != byte_cou));
155
 
156
        always @(posedge BCLK) byte_cou <= START ? 2'd0 : byte_cou + {1'b0,run_bcd};
157
 
158
        always @(*)
159
                casex ({START,byte_cou})
160
                  3'b1_xx : datain = {SRC1[7:0],  SRC2[7:0]};
161
                  3'b0_00 : datain = {SRC1[15:8], SRC2[15:8]};
162
                  3'b0_01 : datain = {SRC1[23:16],SRC2[23:16]};
163
                  3'b0_1x : datain = {SRC1[31:24],SRC2[31:24]};
164
                endcase
165
 
166
        assign carry = START ? CY_IN : CY_OUT;
167
 
168
        BCDADDER        lsd_inst ( .A_IN(datain[3:0]), .B_IN(datain[11:8]), .CY_IN(carry), .SUBP(SUBP),
169
                                                   .OUT(result[3:0]), .CY_OUT(carry_lsd) );
170
 
171
        BCDADDER        msd_inst ( .A_IN(datain[7:4]), .B_IN(datain[15:12]), .CY_IN(carry_lsd), .SUBP(SUBP),
172
                                                   .OUT(result[7:4]), .CY_OUT(carry_msd) );
173
 
174
        always @(posedge BCLK) CY_OUT <= carry_msd;
175
 
176
        always @(posedge BCLK) if (START)                        BCD_Q[7:0]   <= result;
177
        always @(posedge BCLK) if (byte_cou == 2'd0) BCD_Q[15:8]  <= result;
178
        always @(posedge BCLK) if (byte_cou == 2'd1) BCD_Q[23:16] <= result;
179
        always @(posedge BCLK) if (byte_cou[1])          BCD_Q[31:24] <= result;
180
 
181
        assign BCD_DONE = run_bcd & (BWD == byte_cou);
182
 
183
endmodule
184
 
185 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
186 9 ns32kum
//
187
//      4. DFPU_ADDSUB  Double precision floating point adder and subtractor
188
//
189 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
190
module DFPU_ADDSUB ( BCLK, START, SRC1, SRC2, MAN1, MAN2, SRCFLAGS, BWD, SELECT, OUT, IOUT, CMPRES );
191 9 ns32kum
 
192
        input                   BCLK;
193
        input    [1:0]   START;
194
        input   [31:0]   SRC1,SRC2;      // The input data
195
        input   [20:0]   MAN1,MAN2;
196
        input    [5:0]   SRCFLAGS;       // NAN, ZERO and SIGN of operands
197
        input    [1:0]   BWD;            // size of integer
198
        input    [3:0]   SELECT;         // upper 2 bits : R.T.F. code
199
 
200
        output  [69:0]   OUT;
201
        output  [31:0]   IOUT;           // result of ROUNDLi/TRUNCLi/FLOORLi = R.T.F.
202
        output  [1:0]    CMPRES;
203
 
204
        reg             [69:0]   outreg;
205
        reg             [31:0]   IOUT;
206
 
207
        // MOViL   : 2 cycles
208
        // ROUNDLi : 3 cycles (+TRUNC & FLOOR)
209
        // ADD/SUB : 4 cycles
210
        // CMP     : 2 cycles
211
 
212
        // ++++++++++++++++++++++++++++++++++
213
        // MOViL : 1. Pipeline stage : needs 3 cycles
214
 
215
        reg  [31:8]     movdat;
216
        reg      [31:0]  movif;
217
        reg                     sign_movif;
218
 
219
        always @(BWD or SRC1)
220
                casex({BWD,SRC1[15],SRC1[7]})
221
                  4'b00x0 : movdat =  24'h0000_00;                              // Byte
222
                  4'b00x1 : movdat =  24'hFFFF_FF;
223
                  4'b010x : movdat = {16'h0000,SRC1[15:8]};             // Word
224
                  4'b011x : movdat = {16'hFFFF,SRC1[15:8]};
225
                default   : movdat = SRC1[31:8];                                // Double
226
                endcase
227
 
228
        // This  pipeline stage for better timing 
229 11 ns32kum
        always @(posedge BCLK) movif <= movdat[31] ? (32'h0 - {movdat,SRC1[7:0]}) : {movdat,SRC1[7:0]};   // -2^31 is kept !
230 9 ns32kum
 
231
        always @(posedge BCLK) sign_movif <= movdat[31];
232
 
233 11 ns32kum
        // ROUNDLi/TRUNCLi/FLOORLi : 1. pipeline stage : can Opcode-Decoder deliver direct the 64 bit operand ? From register "yes"
234 9 ns32kum
 
235
        reg                     ovflag,ovflag2;
236
        reg                     rovfl;
237
        reg                     minint;
238
        wire [11:0]      rexdiff,rexo;
239
        wire            ganzklein;      // Flag for 0
240
 
241 11 ns32kum
        assign rexdiff = 12'h41D - {1'b0,SRC1[30:20]};  // 4..0 is the right shift value : like Single FP same value space
242 9 ns32kum
 
243
        // ovflag2 at the end of rounding : Check for Overflow
244
        always @(posedge BCLK) rovfl <= (ovflag | ovflag2) & (SELECT[1:0] == 2'b11) & ~minint;
245
 
246
        // a large positiv difference is a very small number :
247
        assign ganzklein = (~rexdiff[11] & (rexdiff[10:5] != 6'b0));    // 0 is implicit via SRC1[30:20]=0
248
 
249
        // Detection of Overflow
250
        assign rexo = ({1'b0,SRC1[30:20]} - {11'h1FF,~BWD[1]}); // subtract B/W = 3FF , D = 3FE
251
 
252
        always @(BWD or rexo)   // 0 ist in implicitly
253
                casex (BWD)
254
                  2'b00 : ovflag = (~rexo[11] & (rexo[10:3] != 8'h0));  // Exponent 0..7 because -128.4 => -128
255
                  2'b01 : ovflag = (~rexo[11] & (rexo[10:4] != 7'h0));  // Exponent 0..15 look above
256
                default : ovflag = (~rexo[11] & (rexo[10:5] != 6'h0));  // but Exponent only 0..30
257
                endcase
258
 
259
        always @(posedge BCLK)
260
                if (START[1]) minint <= (SRC1 == 32'hC1E0_0000) & (SRC2 == 32'h0) & BWD[1];     // detection of -2^31
261
 
262
        // ++++++++++++++++++++++++++++++++++++
263
        // ADD/SUB : 1. Pipeline Stage : which operand ist bigger ? Exchange if neccessary
264
        // SUB/CMP : SRC2 - SRC1
265
 
266
        reg                             ex_null,ma_null,ex_msb,ma_msb;
267
        reg             [10:0]   expo1,expo2;
268
        wire    [11:0]   exdiff,exdiff12;
269
        wire    [20:0]   madiff;
270
        wire                    switch,nan,sign,sign1,sign2;
271
        reg              [5:0]   shift1,shift2;
272
 
273
                // Pipeline register :
274
        reg             [63:0]   muxsrc2;
275
        reg             [55:3]  pipe1;  // Nummbers for right shifter
276
        reg      [5:0]   shift;
277
        reg                             vorz,addflag;
278
 
279
        wire    [52:0]   muxsrc1;
280
        wire    [32:0]   lowdiff;
281
 
282 11 ns32kum
        assign nan = (SELECT[1:0] == 2'b11) ? SRCFLAGS[1] : (~SELECT[1] & (SRCFLAGS[3] | SRCFLAGS[1]));  // used at the end
283 9 ns32kum
 
284
        assign exdiff   = {1'b0,SRC2[30:20]} - {1'b0,SRC1[30:20]};      // Difference of Exponents
285
        assign madiff   = {1'b0,SRC2[19:0]}  - {1'b0,SRC1[19:0]}; // Difference of Mantissa 
286
        assign exdiff12 = {1'b0,SRC1[30:20]} - {1'b0,SRC2[30:20]};      // Diff. Exponents exchanged
287
 
288
        always @(posedge BCLK)
289
                if (START[0])
290
                  begin
291
                        ex_null <= (exdiff[10:0] == 11'h0);
292
                        ma_null <= (madiff[19:0] == 20'h0);
293
                        ex_msb  <= exdiff[11];
294
                        ma_msb  <= madiff[20];
295
                        shift1  <= (exdiff[10:6]   != 5'h0) ? 6'h3F : exdiff[5:0];
296
                        shift2  <= (exdiff12[10:6] != 5'h0) ? 6'h3F : exdiff12[5:0];
297
                        expo1   <= SRC1[30:20];
298
                        expo2   <= SRC2[30:20];
299
                  end
300
 
301
        assign lowdiff = {1'b0,SRC2} - {1'b0,SRC1};     // LSD compare
302
 
303
        assign switch = ex_msb | (ex_null & (ma_msb | (ma_null & lowdiff[32])));        // exchange ?
304
 
305
        assign muxsrc1 = switch ? {MAN2,SRC2} : {MAN1,SRC1};
306
 
307
        always @(posedge BCLK)  // Pipeline Reg
308
          begin
309
                muxsrc2 <= switch  ? {expo1,MAN1,SRC1}   : {expo2,MAN2,SRC2};   // Incl. Exponent & "1" of mantissa
310
                pipe1 <= SELECT[1] ? (ganzklein ? 53'd0  : {1'b1,SRC1[19:0],SRC2}) : muxsrc1;    // Feeding of R.T.F.
311
                shift <= SELECT[1] ? {1'b0,rexdiff[4:0]} : (switch ? shift2 : shift1);
312
          end
313
 
314
        //      SRC2   SRC1     : switch = 0            SRC2   SRC1 : switch = 1
315
        //        5  +   3  : +(5 + 3) =  8               3  +   5  : +(5 + 3) =  8             SELECT[0] = 0
316
        //        5  + (-3) : +(5 - 3) =  2               3  + (-5) : -(5 - 3) = -2
317
        //      (-5) +   3  : -(5 - 3) = -2             (-3) +   5  : +(5 - 3) =  2
318
        //      (-5) + (-3) : -(5 + 3) = -8             (-3) + (-5) : -(5 + 3) = -8
319
        //        5  -   3  : +(5 - 3) =  2               3  -   5  : -(5 - 3) = -2             SELECT[0] = 1
320
        //        5  - (-3) : +(5 + 3) =  8               3  - (-5) : +(5 + 3) =  8
321
        //      (-5) -   3  : -(5 + 3) = -8             (-3) -   5  : -(5 + 3) = -8
322
        //      (-5) - (-3) : -(5 - 3) = -2             (-3) - (-5) : +(5 - 3) =  2
323
 
324
        assign sign1 = SRCFLAGS[4];
325
        assign sign2 = SRCFLAGS[5];
326
 
327
        always @(posedge BCLK)  // Pipeline Reg
328
          begin
329
                vorz    <= switch ? (SELECT[0] ^ sign1) : sign2;
330
                addflag <= ~(SELECT[0] ^ (sign1 ^ sign2));
331
          end
332
 
333
        // CMPF : 1. Pipeline Stage : first result : is stored one level higer in Reg
334
 
335
        assign CMPRES[1] = ~CMPRES[0] & (switch ? ~sign1 : sign2);       // look table above
336 11 ns32kum
        assign CMPRES[0] = (ex_null & ma_null & (sign1 == sign2) & (lowdiff == 33'h0)) | (SRCFLAGS[2] & SRCFLAGS[0]);
337 9 ns32kum
 
338
        // ++++++++++++++++++++++++++++++++++
339
        // ADD/SUB + ROUND/TRUNC : 2. Step : Barrelshifter to the right -->
340
 
341
        wire [55:0] brshifta,brshiftb,brshiftc,brshiftd,brshifte,brshiftf;
342
 
343
        // 5..33322222222221111111111   is this picture still correct ? Took over from Single FP
344
        // 5..2109876543210987654321098765432-10
345
        // 1..VVVVVVVVVVVVVVVVVVVVVVVV0000000-00        // last 2 bit for rounding
346
 
347
        assign brshifta = shift[5] ? {32'h0,   pipe1[55:33],   (pipe1[32:3] != 30'h0)} : {pipe1,3'h0};
348
        assign brshiftb = shift[4] ? {16'h0,brshifta[55:17],(brshifta[16:0] != 17'h0)} : brshifta;
349
        assign brshiftc = shift[3] ? { 8'h0, brshiftb[55:9], (brshiftb[8:0] !=  9'h0)} : brshiftb;
350
        assign brshiftd = shift[2] ? { 4'h0, brshiftc[55:5], (brshiftc[4:0] !=  5'h0)} : brshiftc;
351
        assign brshifte = shift[1] ? { 2'h0, brshiftd[55:3], (brshiftd[2:0] !=  3'h0)} : brshiftd;
352
        assign brshiftf = shift[0] ? { 1'b0, brshifte[55:2], (brshifte[1:0] !=  2'h0)} : brshifte;
353
 
354
        // ++++++++++++++++++++++++++++++++++
355
        // ROUNDLi/TRUNCLi/FLOORLi : 3. Step : round to Integer
356
 
357
        reg                     car_ry;
358
        wire  [1:0] inex;
359
        wire [30:0] compl;
360
        wire [31:0] iadder;
361
        wire            restbits;
362
 
363
        assign restbits = (brshiftf[23:0] != 24'h0);
364 11 ns32kum
        assign inex     = {brshiftf[24],restbits};              // Inexact-Flag-Data transfered to multiplexer at the end
365 9 ns32kum
 
366
        always @(SELECT or sign1 or brshiftf or restbits or inex or ganzklein)
367
                casex (SELECT[3:2])
368 11 ns32kum
                    2'b00 : car_ry = sign1 ^ (((brshiftf[25:24] == 2'b11) & ~restbits) | (inex == 2'b11));      // ROUNDLi
369
                    2'b1x : car_ry = sign1 ? (~ganzklein & (inex == 2'b00)) : 1'b0;     // +numbers like TRUNCLi, -numbers to "-infinity" round
370 9 ns32kum
                  default : car_ry = sign1;     // TRUNCLi , simple cut off
371
                endcase
372
 
373
        assign compl  = sign1 ? ~brshiftf[55:25] : brshiftf[55:25];
374
 
375
        assign iadder = {sign1,compl} + {31'h0,car_ry};
376
 
377
        always @(posedge BCLK) IOUT <= minint ? 32'h8000_0000 : iadder;
378
 
379
        always @(iadder or BWD or sign1)        // special overflow detection i.e. -129 to -255 at Byte
380
                casex (BWD)                                             // or 127.9 -> 128 = error !
381
                  2'b00 : ovflag2 = (iadder[8]  != iadder[7]);  // Byte
382
                  2'b01 : ovflag2 = (iadder[16] != iadder[15]); // Word
383
                default : ovflag2 = 1'b0;
384
                endcase
385
 
386
        // ++++++++++++++++++++++++++++++++++
387
        // ADD/SUB : 3. Step : Addition or Subtraction
388
 
389
        wire    [67:0]   result;
390
        wire    [55:0]   blshifti;
391
        wire    [12:0]   shiftl;
392
        wire                    shift_32;
393
        wire    [65:0]   add_q;
394
 
395
        // The central adder : the subtraction needs 3 Guard-Bits after LSB for correct rounding
396 11 ns32kum
        assign result = {1'b0,muxsrc2,3'b000} + (addflag ? {12'h0,brshiftf} : {12'hFFF,~brshiftf}) + {67'd0,~addflag};
397 9 ns32kum
 
398 11 ns32kum
        assign blshifti = SELECT[1] ? {movif,24'h0} : result[55:0];      // Feeding of MOViL, comes from Register
399 9 ns32kum
 
400
        assign shiftl = SELECT[1] ? 13'h041E : {1'b0,result[67:56]};    // MOViL
401
 
402
        assign shift_32 = (blshifti[55:24] == 32'h0);
403
 
404
        // In case of ADD the result bypasses the barrelshifter : LSB of exponent has changed
405
        assign add_q = (muxsrc2[53] != result[56]) ? {result[67:3],(result[2:0] != 3'b000)}
406
                                                                                           : {result[67:56],result[54:2],(result[1:0] != 2'b00)} ;
407
 
408
        // ++++++++++++++++++++++++++++++++++
409
        // ADD/SUB : 4. Step : Barrelshifter left for SUB and MOViF :
410
 
411
        wire            shift_16,shift_8,shift_4,shift_2,shift_1,zero;
412
        wire  [1:0] lsb_bl;
413
        wire [55:0]      blshifta,blshiftb,blshiftc,blshiftd,blshifte,blshiftf;
414
        wire [12:0]      expol;
415
 
416
        assign blshifta = shift_32 ? {blshifti[23:0],32'h0} : blshifti;
417
        assign shift_16 = (blshifta[55:40] == 16'h0);
418
        assign blshiftb = shift_16 ? {blshifta[39:0],16'h0}      : blshifta;
419
        assign shift_8  = (blshiftb[55:48] == 8'h00);
420
        assign blshiftc = shift_8  ? {blshiftb[47:0],8'h0}       : blshiftb;
421
        assign shift_4  = (blshiftc[55:52] == 4'h0);
422
        assign blshiftd = shift_4  ? {blshiftc[51:0],4'h0}       : blshiftc;
423
        assign shift_2  = (blshiftd[55:54] == 2'b00);
424
        assign blshifte = shift_2  ? {blshiftd[53:0],2'b0}       : blshiftd;
425
        assign shift_1  = ~blshifte[55];
426
        assign blshiftf = shift_1  ? {blshifte[54:0],1'b0}       : blshifte;
427
 
428
        // Overflow at ROUNDLi/TRUNCLi/FLOORLi is shown in overflow of exponent , SELECT[1] is then 1
429
        assign expol = shiftl - {7'h00,shift_32,shift_16,shift_8,shift_4,shift_2,shift_1};
430
 
431
        // Inexact at ROUNDLi/TRUNCLi/FLOORLi : evaluation for all one level higher
432
        assign lsb_bl = (SELECT == 2'b11) ? inex : {blshiftf[2],(blshiftf[1:0] != 2'b0)};
433
 
434
        assign zero =  (~SELECT[1] & SRCFLAGS[2] & SRCFLAGS[0])
435
                                 | ((blshifti == 56'h0) & ((~addflag & ~SELECT[1]) | (SELECT[1:0] == 2'b10)));
436
 
437
        assign sign = SELECT[1] ? sign_movif : (vorz & ~zero);  // sign for MOViL
438
 
439
        // 2. Pipeline register for ADD , SUB and MOViL 
440
        always @(posedge BCLK)
441
                outreg <= (addflag & ~SELECT[1]) ? {nan,zero,sign,1'b0,add_q}
442
                                                                                 : {nan,zero,sign,expol,blshiftf[54:3],lsb_bl};
443
 
444
        // ++++++++++++++++++++++++++++++++++
445
 
446
        assign OUT = {outreg[69:67],(rovfl ? 2'b01 : outreg[66:65]),outreg[64:0]};
447
 
448
endmodule
449
 
450 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
451 9 ns32kum
//
452
//      5. DFPU_MISC    Double precision floating point miscellaneous operations 
453
//
454 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
455 9 ns32kum
module DFPU_MISC ( BCLK, START, SRC1, SRC2, SRCFLAG, MIMUX, MODE, OUT );
456
 
457
        input                   BCLK;
458
        input                   START;
459
        input   [31:0]   SRC1,SRC2;
460
        input                   SRCFLAG;
461
        input                   MIMUX;
462
        input    [3:0]   MODE;
463
        output  [69:0]   OUT;
464
 
465
        reg             [69:0]   OUT;
466
        reg             [63:0]   daten;
467
 
468
        wire    [10:0]   scalb_l;
469
        wire                    nan,zero,sign;
470
        wire    [12:0]   lexpo,sexpo;
471
        wire    [69:0]   scalb_res,logb_res,fl_lf;
472
 
473
        always @(posedge BCLK) if (START) daten <= {(MIMUX ? {daten[31],scalb_l,daten[19:0]}: SRC1),SRC2};
474
 
475
        assign nan  = MODE[0] ? (daten[62:55] == 8'hFF) : (daten[62:52] == 11'h7FF);
476
        assign zero = MODE[0] ? (daten[62:55] == 8'h00) : (daten[62:52] == 11'h000);
477
        assign sign = daten[63] & ~zero;
478
 
479
        assign lexpo = {5'b0,daten[62:55]} + 13'h0380;  // -7F + 3FF
480
 
481
        assign sexpo = (daten[62:52] > 11'h47E) ? 13'h0FFF
482
                                                                                        : ((daten[62:52] < 11'h381) ? 13'h0 : {2'b0,{4{daten[62]}},daten[58:52]});
483
 
484
        assign fl_lf = MODE[0] ? {nan,zero,sign,lexpo,daten[54:32],31'h0}                                                                                // MOVFL
485
                                                   : {nan,zero,sign,sexpo,daten[51:29],28'h0,daten[29:28],(daten[27:0] != 28'h0)};       // MOVLF
486
 
487
        // +++++++++++++++++++++++++++  LOGBf  +++++++++++++++++++++++++++++++++++
488
 
489
        wire                    logb_null;
490
        wire     [9:0]   sel_data,unbiased,shift_l8,shift_l4,shift_l2;
491
        wire     [8:0]   shift_l;
492
        wire                    posi_8,posi_4,posi_2,posi_1;
493
        wire     [4:0]   calc_exp;
494
        wire     [6:0]   logb_exp;
495
 
496
        assign logb_null = MODE[1] ? (daten[62:55] == 8'h7F) : (daten[62:52] == 11'h3FF);
497
 
498
        assign sel_data  = MODE[1] ? {{3{~daten[62]}},daten[61:55]} : daten[61:52];
499
        assign unbiased  = daten[62] ? (sel_data + 10'h001) : ~sel_data;
500
 
501
        // detection of leading "1"
502
        assign posi_8   = (unbiased[9:2] == 8'h00);
503
        assign shift_l8 = posi_8 ? {unbiased[1:0],8'h00} : unbiased;
504
        assign posi_4   = (shift_l8[9:6] == 4'h0);
505
        assign shift_l4 = posi_4 ? {shift_l8[5:0],4'h0}  : shift_l8;
506
        assign posi_2   = (shift_l4[9:8] == 2'b00);
507
        assign shift_l2 = posi_2 ? {shift_l4[7:0],2'b0}  : shift_l4;
508
        assign posi_1   = ~shift_l2[9];
509
        assign shift_l  = posi_1 ? {shift_l2[7:0],1'b0}  : shift_l2[8:0]; // top bit is hidden "1"
510
 
511 11 ns32kum
        assign calc_exp = 5'h08 - {1'b0,posi_8,posi_4,posi_2,posi_1};   // Minimum is "F" = for exponent +/-1 <=> 2^0
512 9 ns32kum
 
513
        // exponent is set one level higher for F and L
514
        assign logb_exp = MODE[1] ? {{4{~calc_exp[4]}},{3{calc_exp[4]}}} : {~calc_exp[4],{6{calc_exp[4]}}};
515
 
516 11 ns32kum
        assign logb_res = logb_null ? {70'h10_0000_0000_0000_0000} : {2'b00,~daten[62],2'b00,logb_exp,calc_exp[3:0],shift_l,45'h0};
517 9 ns32kum
 
518
        // ++++++++++++++++++++++++  SCALBf  ++++++++++++++++++++++++++++++++++
519
 
520
        wire     [7:0]   scalb_f;
521
 
522
        assign scalb_f = SRCFLAG ?  8'h00  : (daten[39:32] + daten[30:23]);
523
        assign scalb_l = SRCFLAG ? 11'h000 : (daten[42:32] + daten[30:20]);
524
 
525
        assign scalb_res = MODE[1] ?    // no rounding of Single Data
526
                                           {2'b00,daten[31],5'b0,scalb_f,daten[22:0],daten[28:1],3'b000}
527
                                         : {2'b00,daten[63],2'b0,daten[62:0],2'b00};
528
 
529
        // ++++++++++++++++++++++++  Output  ++++++++++++++++++++++++++++++++++++++++++++++++++++++
530
 
531 11 ns32kum
        always @(posedge BCLK) OUT <= MODE[3] ? (MODE[2] ? logb_res : scalb_res) : fl_lf ;      // LOGB/SCALB : MOVLF/MOVFL
532 9 ns32kum
 
533
endmodule
534
 
535 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
536 9 ns32kum
//
537
//      6. DFPU_MUL             Double precision floating point multiplier
538
//
539 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
540 9 ns32kum
module DFPU_MUL ( BCLK, SRC1, SRC2, START, MRESULT, SRCFLAGS, OUT );
541
 
542
        input                   BCLK;
543
        input   [31:0]   SRC1,SRC2;
544
        input                   START;          // that is START[0]
545
        input  [105:0]   MRESULT;
546
        input    [5:0]   SRCFLAGS;       // NAN and ZERO flags
547
        output  [69:0]   OUT;            // The result
548
 
549
        reg             [69:0]   OUT;
550
        reg             [12:0]   exponent;
551
        wire                    orlow;
552
        wire    [12:0]   expoh,expol;
553
        wire     [1:0]   restlow,resthigh;
554
        wire                    zero,nan,sign;
555
 
556
        assign zero =   SRCFLAGS[2] | SRCFLAGS[0];       // one is NULL -> NULL is the result
557
        assign nan =    SRCFLAGS[3] | SRCFLAGS[1];      // one is NAN -> error
558
        assign sign =   (SRCFLAGS[5] ^ SRCFLAGS[4]) & ~zero;
559
 
560
        assign orlow = (MRESULT[50:0] != 51'b0);
561
 
562
        assign restlow  = {MRESULT[51],orlow};
563
        assign resthigh = {MRESULT[52],(MRESULT[51] | orlow)};
564
 
565
        always @(posedge BCLK) if (START) exponent <= {2'b00,SRC1[30:20]} + {2'b00,SRC2[30:20]};
566
 
567
        assign expoh    = exponent - 13'h03FE;
568
        assign expol    = exponent - 13'h03FF;  // for MSB if MRESULT=0
569
 
570
        always @(posedge BCLK)
571
                OUT <= MRESULT[105] ? {nan,zero,sign,expoh,MRESULT[104:53],resthigh}    // 52 Bit Mantissa
572
                                                        : {nan,zero,sign,expol,MRESULT[103:52],restlow};
573
 
574
endmodule
575
 
576 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
577 9 ns32kum
//
578
//      7. DIVI_PREP    Prepare data for the divider
579
//
580 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
581 9 ns32kum
module DIVI_PREP (SRC, BWD, NOT_DEI, EXTDATA, DOUT, MSB, NULL, MINUS);
582
 
583
        input   [31:0]   SRC;
584
        input    [1:0]   BWD;
585
        input                   NOT_DEI;
586
        input                   EXTDATA;
587
 
588
        output  [31:0]   DOUT;
589
        output   [4:0]   MSB;
590
        output                  NULL;
591
        output                  MINUS;
592
 
593
        reg             [31:0]   double;
594
 
595
        wire    [15:0]   test_16;
596
        wire     [7:0]   test_8;
597
        wire     [3:0]   test_4;
598
        wire     [1:0]   test_2;
599
        wire                    bit_4,bit_3,bit_2,bit_1,bit_0;
600
        wire     [1:0]   modus;
601
 
602
        assign modus = (NOT_DEI | EXTDATA) ? BWD : {(BWD[1] | BWD[0]),1'b1};
603
 
604
        always @(modus or SRC or NOT_DEI)
605
                casex (modus)
606
                  2'b00 : double = {{24{SRC[7]  & NOT_DEI}},SRC[7:0]};
607
                  2'b01 : double = {{16{SRC[15] & NOT_DEI}},SRC[15:0]};
608
                  2'b1x : double = SRC;
609
                endcase
610
 
611
        assign MINUS = double[31] & NOT_DEI;
612
 
613 11 ns32kum
        assign DOUT = ({32{MINUS}} ^ double) + {31'h0,MINUS};   //      assign DOUT = MINUS ? (32'd0 - double) : double;
614 9 ns32kum
 
615
        // now find most significant set bit : FFS
616
 
617
        assign bit_4   = (DOUT[31:16] != 16'h0);
618
        assign test_16 = bit_4 ? DOUT[31:16]   : DOUT[15:0];
619
        assign bit_3   = (test_16[15:8] != 8'h0);
620
        assign test_8  = bit_3 ? test_16[15:8] : test_16[7:0];
621
        assign bit_2   = (test_8[7:4] != 4'h0);
622
        assign test_4  = bit_2 ? test_8[7:4]   : test_8[3:0];
623
        assign bit_1   = (test_4[3:2] != 2'b0);
624
        assign test_2  = bit_1 ? test_4[3:2]   : test_4[1:0];
625
        assign bit_0   =  test_2[1];
626
        assign NULL    = (test_2 == 2'b00);
627
 
628
        assign MSB = {bit_4,bit_3,bit_2,bit_1,bit_0};
629
 
630
endmodule
631
 
632 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
633 9 ns32kum
//
634
//      8. DFPU_DIV             The divider for all divide opcodes : double, single and integer
635
//
636 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
637
module DFPU_DIV ( BCLK, BRST, START, SRC1, SRC2, MAN1, MAN2, SRCFLAGS, FL, BWD, OPCODE, OUT, DONE, DIVI_OUT, DVZ_TRAP, DEI_OVF );
638 9 ns32kum
 
639
        // This version needs for Double 28+1 cycles if MAN1<MAN2 otherwise 28+2.
640
        // For Single it needs 13+1 cyckes or 13+2.
641
 
642
        input                   BCLK,BRST;
643
        input    [3:0]   START;          // START & recognized Divider Operation
644
        input   [31:0]   SRC1,SRC2;      // input data
645
        input   [20:0]   MAN1,MAN2;
646
        input    [5:0]   SRCFLAGS;       // NAN and ZERO
647
        input                   FL;
648
        input    [1:0]   BWD;
649
        input    [2:0]   OPCODE;         // for all DIVi variants
650
 
651
        output  [69:0]   OUT;            // the result
652
        output  reg             DONE;           // Pipeline-Flag
653
        output  [63:0]   DIVI_OUT;       // for Integer Division
654
        output                  DVZ_TRAP;       // Divide by Zero Trap
655
        output   [1:0]   DEI_OVF;        // DEI Overflow
656
 
657
        // ++++++++++++++  for Integer Division  ++++++++++++++
658
        reg                             run_divi;
659
        reg                             divi_pipe1,divi_pipe2,divi_pipe3,divi_pipe4;
660
        reg                             neg_src1,neg_src2,nul_src2;
661
        reg              [4:0]   msb_src1;
662
        reg              [5:0]   msb_src2;
663
        reg             [31:0]   ivalue,src2_reg,pipe_reg;
664
        reg              [4:0]   divi_counter;
665
        reg                             sub_case;
666
        reg                             negativ;
667
        reg             [32:0]   divi_result;
668
        reg             [63:0]   DIVI_OUT;
669
        reg                             DVZ_TRAP,dvz_pipe;
670
        reg                             sel_in;
671
        reg             [62:0]   din_mux;
672
        reg                             dei_pipe;
673
        reg                             extdata;        // extended data : 2 data packets, only apply to DEI
674
        reg              [2:0]   addoff;
675
        reg                             next_msb2;
676
        reg             [31:0]   dei_result;
677
        reg              [1:0]   DEI_OVF;
678
 
679
        wire    [31:0]   i_in;
680
        wire    [37:0]   i_out;
681
        wire     [6:0]   diff_msb;
682
        wire     [5:1]  shift_r;
683
        wire    [62:0]   shift_2;
684
        wire    [62:0]   shift_4;
685
        wire    [62:0]   shift_8;
686
        wire    [62:0]   shift_16;
687
        wire    [64:0]   shift_32;
688
        wire                    stop_divi,neg_flag;
689
        wire                    rest_null,plus_1,ist_null;
690
        wire                    not_dei;
691
        wire                    valdata;        // Data <> 0 at DEI
692
 
693
        // ++++++++++++++  Floating Point & calculation path  ++++++++
694
        reg             [69:0]   OUT;
695
        reg             [32:0]   save1;
696
        reg                             runflag;
697
        reg             [55:0]   dreimal;
698
        reg             [56:0]   divreg,divsr;
699
        reg             [31:0]   divreg_ext;
700
        reg             [12:0]   exponent;
701
 
702
        wire                    load_src1,load_src2;
703
        wire    [56:0]   sub1,sub2,sub3;
704
        wire    [32:0]   src_1;
705
        wire    [20:0]   man_1;
706
        wire    [12:0]   expoh,expol,offset;
707
        wire                    restlsb,restlow,resthigh;
708
        wire                    zero,nan,sign,ende;
709
        wire                    orlow_s,orlow_d;
710
        wire                    short;
711
 
712
        // +++++++++++++++++++++++++++  Integer Division, DEI  +++++++++++++++++++++++++++
713
 
714
        assign not_dei = OPCODE[2];     // 0 = DEI
715
        always @(posedge BCLK) if (START[3]) extdata <= ~START[1];      // during START[0] for SRC1 not valid
716
 
717
        always @(posedge BCLK or negedge BRST)
718
                if (!BRST) run_divi <= 1'b0;
719
                        else
720
                                run_divi <= (START[3] & ~ist_null) | (~divi_pipe4 & run_divi);  // Abort at DVZ Trap
721
 
722
        always @(posedge BCLK) divi_pipe1 <= START[3] & ~ist_null;      // no start if SRC1 = 0 : DVZ Trap
723
        always @(posedge BCLK) dei_pipe   <= divi_pipe1 & extdata;
724
        always @(posedge BCLK) divi_pipe2 <= extdata ? dei_pipe : divi_pipe1;
725
 
726
        always @(posedge BCLK) src2_reg <= SRC2;
727
 
728
        always @(posedge BCLK) sel_in <= START[3] | divi_pipe1; // two times data for DEI
729
        assign i_in = sel_in ? src2_reg : SRC1;
730
 
731
        DIVI_PREP prep_inst ( .SRC(i_in), .BWD(BWD), .NOT_DEI(not_dei), .EXTDATA(extdata | START[0]),
732
                                                  .DOUT(i_out[31:0]), .MSB(i_out[36:32]), .NULL(ist_null), .MINUS(i_out[37]) );
733
 
734
        always @(posedge BCLK) dvz_pipe <= START[3] & ist_null; // Pulse 1 cycle long
735
        always @(posedge BCLK) DVZ_TRAP <= dvz_pipe;    // one cycle later if DEI with extdata
736
 
737
        always @(posedge BCLK)
738
                if (START[3])
739
                        begin
740
                                neg_src1 <= i_out[37];
741
                                msb_src1 <= i_out[36:32];
742
                        end
743
 
744
        always @(posedge BCLK)
745
                if (divi_pipe1)
746
                        begin
747
                                nul_src2 <= ist_null;
748
                                neg_src2 <= i_out[37];
749
                        end
750
 
751
        always @(posedge BCLK) ivalue   <= i_out[31:0];
752
 
753
        // The following is only for DEI :
754
        always @(posedge BCLK) pipe_reg <= {32{extdata}} & ivalue;      // Register must be 0 if not used
755
 
756
        assign valdata = extdata & ~ist_null;
757
        always @(BWD or valdata)
758
                casex (BWD)
759
                  2'b00   : addoff = {   1'b0,   1'b0,valdata};
760
                  2'b01   : addoff = {   1'b0,valdata,   1'b0};
761
                  default : addoff = {valdata,   1'b0,   1'b0};
762
                endcase
763
 
764 11 ns32kum
        always @(posedge BCLK) next_msb2 <= extdata & ist_null & divi_pipe1;    // Special case at DEI : MSD = 0
765 9 ns32kum
 
766
        always @(posedge BCLK)
767
                if (divi_pipe1) msb_src2 <= {addoff[2],(addoff[1:0] | i_out[36:35]),i_out[34:32]};
768
                  else
769
                        if (next_msb2) msb_src2 <= {1'b0,i_out[36:32]};
770
 
771
        // Shifter for Source2 
772
 
773
        assign diff_msb = {1'b0,msb_src2} - {2'b0,msb_src1};
774
 
775
        // negativ shift limited to 0 : Source2=0 calculated without special handling, result always 0
776
        assign shift_r = diff_msb[6] ? 5'd0 : diff_msb[5:1];    // LSB does not count
777
 
778
        always @(BWD or extdata or ivalue or pipe_reg)
779
                casex ({BWD,extdata})
780
                        3'b0x0  : din_mux = {31'b0,ivalue};     // the normal case for all except DEI
781
                        3'b001  : din_mux = {23'b0,pipe_reg,ivalue[7:0]};
782
                        3'b011  : din_mux = {15'b0,pipe_reg,ivalue[15:0]};
783
                        default : din_mux = {pipe_reg[30:0],ivalue};             // 63 Bit wide
784
                endcase
785
 
786
        assign shift_2  = shift_r[1] ? din_mux : {din_mux[60:0], 2'b0};
787
        assign shift_4  = shift_r[2] ? shift_2 : {shift_2[58:0], 4'b0};
788
        assign shift_8  = shift_r[3] ? shift_4 : {shift_4[54:0], 8'b0};
789
        assign shift_16 = shift_r[4] ? shift_8 : {shift_8[46:0],16'b0};  // Result is 63 Bit wide
790
 
791
        // 65 Bit result because of DEI 
792 11 ns32kum
        assign shift_32 = shift_r[5] ? {1'b0,pipe_reg,ivalue} : {shift_16,2'b00};       // special case DEI : 32 times shift
793 9 ns32kum
 
794
        always @(posedge BCLK or negedge BRST)  // Flag for rounding, only if DEST <>0 
795
                if (!BRST) divi_pipe3 <= 1'b0;
796
                  else
797
                    divi_pipe3 <= divi_pipe2 | (divi_pipe3 & ~stop_divi);
798
 
799
        always @(posedge BCLK)
800
                if (divi_pipe2) divi_counter <= shift_r;
801
                  else divi_counter <= divi_counter - {4'b000,~stop_divi};      // should stop at 0 
802
 
803
        assign stop_divi = (divi_counter == 5'h0);      // caclulation ready
804
 
805
        always @(posedge BCLK) divi_pipe4 <= divi_pipe3 & stop_divi;
806
 
807
        assign neg_flag  = neg_src1 ^ neg_src2;
808
        assign rest_null = (divreg[33:2] == 32'h0);
809
 
810
        always @(posedge BCLK) sub_case <= neg_flag & ~nul_src2;        // little help for MODi opcode
811
 
812
        // Result preparation :
813
        // DEST  SRC    QUO  REM /  DIV  MOD
814
        //  +33  +13 :   2    7  /   2    7
815
        //      +33  -13 :  -2    7  /  -3   -6
816
        //      -33  +13 :  -2   -7  /  -3    6
817
        //      -33  -13 :   2   -7  /   2   -7
818
        always @(*)
819
                case (OPCODE[1:0])
820
                  2'b00 : divi_result = {neg_flag,divsr[31:0]};          // QUO
821
                  2'b01 : divi_result = {neg_src2,divreg[33:2]};        // REM
822 11 ns32kum
                  2'b10 : divi_result = {neg_src1,((sub_case & ~rest_null) ? (save1[31:0] - divreg[33:2]) : divreg[33:2])};      // MOD
823 9 ns32kum
                  2'b11 : divi_result = {neg_flag,divsr[31:0]};          // DIV
824
                endcase
825
 
826
        always @(posedge BCLK) negativ <= divi_result[32];
827
 
828 11 ns32kum
        assign plus_1 = (OPCODE[1:0] == 2'b11) ? (negativ & rest_null) : negativ;        // Special case Rest=0 at DIV
829 9 ns32kum
 
830
        always @(posedge BCLK)
831 11 ns32kum
                if (divi_pipe4) DIVI_OUT[63:32] <= not_dei ? (({32{negativ}} ^ divi_result[31:0]) + {31'd0,plus_1}) : dei_result;
832 9 ns32kum
 
833
        always @(posedge BCLK) if (divi_pipe4) DIVI_OUT[31:0] <= divreg[33:2];
834
 
835
        always @(extdata or BWD or divsr or divreg)
836
                casex ({extdata,BWD})
837
                  3'b000  : dei_result = {16'hxxxx,divsr[7:0],divreg[9:2]};
838
                  3'b001  : dei_result = {divsr[15:0],divreg[17:2]};
839
                  default : dei_result = divsr[31:0];
840
                endcase
841
 
842
        // +++++++++++++++++++++++++++  Calculation path for Division  ++++++++++++++++++++++++++++
843
 
844
        always @(posedge BCLK or negedge BRST)
845
                if (!BRST) runflag <= 1'b0;
846
                        else
847
                                runflag <= START[2] | (~ende & runflag);
848
 
849
        always @(posedge BCLK) DONE <= (ende & runflag) | divi_pipe4;
850
 
851
        assign man_1 = (FL | run_divi) ? 21'h0 : MAN1;
852
        assign src_1 = run_divi ? {1'b0,ivalue} : ( FL ? {10'h001,SRC1[22:0]} : {SRC1,1'b0});
853
 
854
        assign load_src1 = START[2] | divi_pipe1;
855
 
856
        //                                                                                                              *2                 +       *1   
857 11 ns32kum
        always @(posedge BCLK) if (load_src1) dreimal <= {1'b0,man_1,src_1,1'b0} + {2'b00,man_1,src_1}; // 54 Bit Reg
858 9 ns32kum
 
859
        always @(posedge BCLK) if (load_src1) save1 <= src_1;
860
 
861
        assign sub1 = divreg - {3'b000, man_1,save1     };
862
        assign sub2 = divreg - {2'b00 ,man_1,save1,1'b0};
863
        assign sub3 = divreg - {1'b0, dreimal         };
864
 
865
        assign load_src2 = START[2] | divi_pipe2;
866
 
867
        always @(posedge BCLK)
868 11 ns32kum
                if (load_src2) divreg <= divi_pipe2 ? {23'h0,shift_32[64:32]} : ( FL ? {34'h0_0000_0001,SRC2[22:0]} : {3'b0,MAN2,SRC2,1'b0});
869 9 ns32kum
                else
870
                        begin
871
                          casex ({sub3[56],sub2[56],sub1[56]})
872
                                3'b0xx : divreg <=   {sub3[54:0],divreg_ext[31:30]};
873
                                3'b10x : divreg <=   {sub2[54:0],divreg_ext[31:30]};
874
                                3'b110 : divreg <=   {sub1[54:0],divreg_ext[31:30]};
875
                          default  : divreg <= {divreg[54:0],divreg_ext[31:30]};
876
                          endcase
877
                        end
878
 
879
        always @(posedge BCLK)  // Extension Register for Integer Division
880
                if (load_src2) divreg_ext <= divi_pipe2 ? shift_32[31:0] : 32'd0;
881
                  else
882
                    divreg_ext <= {divreg_ext[29:0],2'b0};
883
 
884
        always @(posedge BCLK)
885
                if (load_src2) divsr <= 57'h0;
886
                else
887
                        begin
888
                          casex ({sub3[56],sub2[56],sub1[56]})
889
                                3'b0xx : divsr <= {divsr[54:0],2'b11};
890
                                3'b10x : divsr <= {divsr[54:0],2'b10};
891
                                3'b110 : divsr <= {divsr[54:0],2'b01};
892
                          default  : divsr <= {divsr[54:0],2'b00};
893
                          endcase
894
                        end
895
 
896
        // Overflow Detection for DEI : serial calculation
897
        always @(posedge BCLK)
898
                if (load_src2) DEI_OVF[0] <= 1'b0;
899 11 ns32kum
                  else DEI_OVF[0] <= DEI_OVF[0] | (BWD[1] ? |divsr[33:32] : (BWD[0] ? |divsr[17:16] : |divsr[9:8]));
900 9 ns32kum
 
901
        always @(posedge BCLK) DEI_OVF[1] <= divi_pipe4;        // Timing pulse for OVF inclusiv for DIV and QUO
902
 
903
        assign short = (SRCFLAGS[3:0] != 4'h0) & runflag;
904
 
905
        assign ende = ((FL ? (divsr[26] | divsr[25]) : (divsr[56] | divsr[55])) & runflag) | short;
906
 
907
        assign sign = (SRCFLAGS[4] ^ SRCFLAGS[5]) & ~zero;
908
        assign zero =  SRCFLAGS[2] & ~SRCFLAGS[0];               // SRC2 = NULL -> NULL as result
909
        assign nan  =  SRCFLAGS[3] | SRCFLAGS[1] | (SRCFLAGS[2] & SRCFLAGS[0]);
910
                        // one of both NAN or both 0 -> invalid Operation 
911
 
912
        assign orlow_d = (divreg[56:27] != 29'b0) & ~zero & ~FL;        // is there Rest ? [1:0] are always 0.
913
        assign orlow_s = (divreg[26:2]  != 25'b0) & ~zero;
914
 
915
        assign restlsb  = divsr[0] | orlow_s;
916
        assign restlow  = (divsr[1:0] != 2'b00) | orlow_s | orlow_d;
917
        assign resthigh = divsr[2] | restlow;
918
 
919
        always @(posedge BCLK) if (START[0]) exponent <= FL ? ({5'b00,SRC2[30:23]} - {5'b00,SRC1[30:23]})
920
                                                                                                                : ({2'b00,SRC2[30:20]} - {2'b00,SRC1[30:20]});
921
        assign offset   = FL ? 13'h007E : 13'h03FE;
922
        assign expoh    = exponent + {offset[12:1],1'b1};       // Double = 3FF/3FE     Single = 7F/7E
923
        assign expol    = exponent + offset;                            // in case of normalizing
924
 
925
        always @(posedge BCLK)
926
          if (ende && runflag)
927
                casex ({FL,divsr[26],divsr[56]})
928 11 ns32kum
                  3'b11x : OUT <= {nan,zero,sign,expoh[9:8],expoh[7],expoh[7],expoh[7],expoh[7:0],divsr[25:3],28'b0,divsr[3:2],restlow};
929
                  3'b10x : OUT <= {nan,zero,sign,expol[9:8],expol[7],expol[7],expol[7],expol[7:0],divsr[24:2],28'b0,divsr[2:1],restlsb};
930 9 ns32kum
                  3'b0x1 : OUT <= {nan,zero,sign,expoh,divsr[55:3],resthigh};
931
                  3'b0x0 : OUT <= {nan,zero,sign,expol,divsr[54:2],restlow};
932
                endcase
933
 
934
endmodule
935
 
936 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
937 9 ns32kum
//
938
//      9. DP_LOGIK             Control logic and result path for different functions
939
//
940 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
941
module DP_LOGIK ( BCLK, BRESET, OPCODE, SRC1, SRC2, FSR, START, MRESULT, BWD, FL, MAN1, MAN2, WR_REG, CY_IN,
942 9 ns32kum
                                  COP_DONE, COP_OP, COP_IN,
943 11 ns32kum
                                  DOUT, TT_DP, DP_CMP, OVF_BCD, MEI, DFLOAT, DONE, UP_DP, CLR_LSB, WREN_L, LD_OUT_L, DVZ_TRAP, COP_GO );
944 9 ns32kum
 
945
// Definition of output word OUT of sub-moduls : the hidden-bit of the mantissa is already gone
946
//
947
//   N Z S   Exponent                   Mantissa                                                                                                 Round
948
//   A E I  Double : 13 Bit             52 Bit                                                                                                           2 Bit
949
//   N R G  Single : 10 Bit     23 Bit                                                                                                           2 Bit
950
//     O N                                 -mmmm.mmmm.mmmm.mmmm.mmmm.mmm-.--                                                      -m.
951
//  -F-F-F-E.EEEE.EEEE.EEEE-MMMM.MMMM.MMMM.MMMM.MMMM.MMMM.MMMM.MMMM.MMMM.MMMM.MMMM.MMMM.MMMM.RR
952
//
953
//   6 6 6 6 6666 6655 5555 5555 4444 4444 4433 3333 3333 2222 2222 2211 1111 1111 0000 0000 00
954
//   9 8 7 6 5432 1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 10
955
//
956
// Single FP delivers the exponent in a way, that it is identical for rounding :
957
//
958
//  Exponent 61 - 54 => kept
959
//  Bits 64 - 62 are filled with bit 61 , carry should come through
960
//  Exponent 62 => Bit 65  , Overflow
961
//  Exponent 63 => Bit 66  , Underflow
962
 
963
        input                   BCLK,BRESET;
964
        input    [7:0]   OPCODE;
965
        input   [31:0]   SRC1,SRC2;      // the input data
966
        input   [20:0]   MAN1,MAN2;      // the MSB of mantissa
967
        input    [8:3]  FSR;            // Floating Point Status Register
968
        input    [1:0]   START;
969
        input  [105:0]   MRESULT;        // Result of multiplier
970
        input    [1:0]   BWD;            // Size of integer
971
        input                   FL;
972
        input                   WR_REG;         // from DECODER
973
        input                   CY_IN;
974
        input                   COP_DONE;       // Coprozessor Interface
975
        input   [23:0]   COP_OP;
976
        input   [63:0]   COP_IN;
977
 
978
        output  [63:0]   DOUT;
979
        output   [4:0]   TT_DP;          // Trap-Info to FSR
980
        output   [2:0]   DP_CMP;         // CMPL result
981
        output   [3:0]   OVF_BCD;        // Integer Division Overflow + BCD Carry update
982
        output                  MEI,DFLOAT;
983
        output                  DONE,UP_DP;
984
        output                  CLR_LSB,WREN_L,LD_OUT_L;
985
        output                  DVZ_TRAP;
986
        output  reg             COP_GO;
987
 
988
        reg             [63:0]   DOUT;
989
        reg                             CLR_LSB;
990
        reg              [2:0]   DP_CMP;
991
        reg              [5:0]   preflags;
992
        reg              [5:0]   srcflags;
993
        reg             [69:0]   fpout;
994
        reg              [2:0]   tt;
995
        reg              [6:0]   select;
996
        reg              [4:0]   wctrl;
997
        reg              [2:1]  sequ;
998
        reg                             misc_op;
999
        reg                             misc_mux;
1000
        reg                     car_ry;
1001
        reg                             wr_part2;
1002
        reg                             up_flag;
1003
        reg                             ovf_div;
1004
 
1005
        wire                    zexp2,zman2,zexp1,zman1,znan1;
1006
        wire                    make_i;
1007
        wire                    scalbl,go_misc;
1008
        wire                    op_cmp;
1009
        wire    [69:0]   mulout,addout,divout,miscout;
1010
        wire                    go_divf,go_divi,divi_ops,div_done;
1011
        wire                    bcd_ops,man_ops;
1012
        wire    [31:0]   i_out;
1013
        wire    [63:0]   divi_out;
1014
        wire    [66:2]  rund,cy_val;    // Indexnumber like in xxxout
1015
        wire                    div_zero,overflow,underflow,inexact;
1016
        wire     [1:0]   cmpres;
1017
        wire    [63:0]   fp_out,fp_res;
1018
        wire                    wr_part1;
1019
        wire                    done_i;
1020
        wire    [31:0]   bcd_q;
1021
        wire                    bcd_done;
1022
        wire                    bcd_carry;
1023
        wire     [1:0]   dei_ovf;
1024
        wire                    quo_div;
1025
        wire                    copop;
1026
        wire                    copwr;
1027
 
1028
        // Control of datapath : together with START the Double Unit becomes activ 
1029
 
1030
        always @(OPCODE or FL)
1031
                casex (OPCODE)
1032
                  8'b1001_000x : select = 7'b00_01010;  // 0 1 0 :      MOViL
1033
                  8'b1001_010x : select = 7'b10_11000;  // MOVLF
1034
                  8'b1001_011x : select = 7'b01_11000;  // MOVFL
1035
                  8'b1001_100x : select = 7'b10_01011;  // 0 1 1 :      ROUNDLi
1036
                  8'b1001_101x : select = 7'b10_01011;  // 0 1 1 :  TRUNCLi
1037
                  8'b1001_111x : select = 7'b10_01011;  // 0 1 1 :      FLOORLi
1038
                  8'b1011_0000 : select = 7'bxx_01000;  // 0 0 0 :      ADDL
1039
                  8'b1011_0010 : select = 7'bxx_01001;  // 0 0 1 :      CMPL
1040
                  8'b1011_0100 : select = 7'bxx_01001;  // 0 0 1 :      SUBL
1041
                  8'b1011_1000 : select = 7'b11_01100;  // 1 0 1 :  DIVf , Default Float for srcflags
1042
                  8'b1011_1100 : select = 7'bxx_01100;  // 1 0 0 :      MULL
1043
                  8'b1011_0110 : select = 7'b11_11000;  // SCALBf , Default Float for srcflags
1044
                  8'b1011_0111 : select = {~FL,FL,5'b11000};    // LOGBf
1045
                  default      : select = 7'b0;
1046
                endcase
1047
 
1048
        assign MEI      = (OPCODE == 8'h79);
1049
        assign divi_ops = (OPCODE[7:2] == 6'b0111_11) | (OPCODE == 8'h7B);      // QUO/REM/MOD/DIV & DEI
1050
        assign go_divf  = (OPCODE == 8'hB8) & START[1];                                 // because of runflag in DIV Unit
1051
        assign go_divi  = divi_ops & (OPCODE[2] ? START[1] : START[0]);  // DEI starts with START[0]
1052
        assign bcd_ops  = (OPCODE == 8'h6F) | (OPCODE == 8'h6B);                // ADDP , SUBP
1053
 
1054 11 ns32kum
        assign man_ops  = (OPCODE == 8'hB1) | (OPCODE == 8'hB5) | (OPCODE == 8'hB9) | (OPCODE == 8'hBD);        // MOVf,NEGf,XXXf,ABSf
1055 9 ns32kum
 
1056
        assign DFLOAT   = (select[3] | copop) & ~FL;    // all Double Floating Point Operations for PREPDATA
1057 11 ns32kum
        assign make_i   = (select[2:0] == 3'b011) | divi_ops | bcd_ops;  // ROUND/TRUNC/FLOOR for output multiplexer
1058 9 ns32kum
        assign op_cmp   = (OPCODE == 8'hB2) & ~FL;
1059
        always @(posedge BCLK) misc_op <= select[4];    // for OUT-Multiplexer
1060
 
1061
        assign copop    = (OPCODE == 8'hDD);
1062 11 ns32kum
        assign copwr    = (COP_OP[18:17] == 2'd0) & (COP_OP[13:11] == 3'b111) & (COP_OP[7:5] == 3'b001);        // Custom Convert
1063 9 ns32kum
 
1064
        // very special solution for SCALBL
1065
        assign scalbl   = START[0] & ~FL & (OPCODE == 8'hB6);
1066
        assign go_misc  = START[1] | scalbl;
1067
        always @(posedge BCLK) misc_mux <= scalbl;      // switches at START[1] the input multiplexer
1068
 
1069
        // SRCFLAGS : special handling for operands is done locally
1070
 
1071
        assign zexp2 = (SRC2[30:20] == 11'd0);
1072
        assign zman2 = (SRC2[19:0] == 20'd0);
1073
        assign zexp1 = (SRC1[30:20] == 11'd0);
1074
        assign zman1 = (SRC1[19:0] == 20'd0);
1075
        assign znan1 = (SRC1[30:20] == 11'h7FF);
1076
 
1077
        always @(posedge BCLK)
1078
                if (START[0])
1079
                  begin
1080
                        srcflags[5] <= SRC2[31];
1081
                        srcflags[4] <= SRC1[31];
1082
                        preflags    <= {(SRC2[30:20] == 11'h7FF),zexp2,zman2,znan1,zexp1,zman1};
1083
                   end
1084
 
1085
        // case Definition : 00 : 0             , if START[i]=0 then there are always 2 long operands
1086
        //                                       01 : 1 Float Operand SCR1
1087
        //                                       10 : 1 Long Operand SRC1+SRC2
1088
        //                                       11 : 2 Float Operands SRC1 , SRC2
1089
 
1090
        always @(posedge BCLK)  // NaN 
1091
                if (START[1])
1092
                        casex ({START[0],select[6:5]})
1093 11 ns32kum
                           3'b0xx : srcflags[3] <= preflags[5] | (preflags[4] & (~preflags[3] | SRC2[31] | ~zexp2 | ~zman2));
1094
                           3'b111 : srcflags[3] <= (SRC2[30:23] == 8'hFF) | ((SRC2[30:23] == 8'd0) & ((SRC2[22:20] != 3'd0) | ~zman2)); // F:SRC2 = NaN
1095 9 ns32kum
                          default : srcflags[3] <= 1'b0;
1096
                        endcase
1097
 
1098
        always @(posedge BCLK)  // Zero : only exponent ! If denormalized => NaN !
1099
                if (START[0])
1100
                        casex ({START[1],select[6:5]})
1101
                           3'b0xx : srcflags[2] <= zexp2;       // L:(SRC1,SRC2) = Zero , SRC1 = MSB
1102
                           3'b111 : srcflags[2] <= (SRC2[30:23] == 8'd0);       // F:SRC2 = Zero
1103
                          default : srcflags[2] <= 1'b0;
1104
                        endcase
1105
 
1106
        always @(posedge BCLK)  // NaN 
1107
                if (START[1])
1108
                        casex ({START[0],select[6:5]})
1109 11 ns32kum
                           3'b0xx : srcflags[1] <= preflags[2] | (preflags[1] & (~preflags[0] | SRC1[31] | ~zexp1 | ~zman1));
1110
                           3'b1x1 : srcflags[1] <= (SRC1[30:23] == 8'hFF) | ((SRC1[30:23] == 8'd0) & ((SRC1[22:20] != 3'd0) | ~zman1)); // F:SRC1 = NaN
1111
                           3'b110 : srcflags[1] <= znan1 | (zexp1 & (~zman1 | SRC2[31] | ~zexp2 | ~zman2));     // L:(SRC1,SRC2) = NaN , SRC1 = MSB
1112 9 ns32kum
                          default : srcflags[1] <= 1'b0;
1113
                        endcase
1114
 
1115
        always @(posedge BCLK)  // Zero : only exponent ! If denormalized => NaN !
1116
                if (START[0])
1117
                        casex ({START[1],select[6:5]})
1118
                           3'b0xx : srcflags[0] <= zexp1;        // L:(SRC1,SRC2) = Zero , SRC1 = MSB
1119
                           3'b1x1 : srcflags[0] <= (SRC1[30:23] == 8'd0);        // F:SRC1 = Zero
1120
                           3'b110 : srcflags[0] <= zexp1;        // L:(SRC1,SRC2) = Zero , SRC1 = MSB
1121
                          default : srcflags[0] <= 1'b0;
1122
                        endcase
1123
 
1124
                        // The Sub-moduls : 
1125
 
1126
        DFPU_ADDSUB as_inst     ( .BCLK(BCLK), .START(START), .SRC1(SRC1), .SRC2(SRC2),
1127
                                                  .MAN1({~srcflags[0],MAN1[19:0]}), .MAN2({~srcflags[2],MAN2[19:0]}),
1128
                                                  .SRCFLAGS(srcflags), .BWD(BWD), .SELECT({OPCODE[2:1],select[1:0]}),
1129
                                                  .OUT(addout), .IOUT(i_out), .CMPRES(cmpres) );
1130
 
1131
        DFPU_MUL mul_inst       ( .BCLK(BCLK), .SRC1(SRC1), .SRC2(SRC2), .START(START[0]), .MRESULT(MRESULT),
1132
                                                  .OUT(mulout), .SRCFLAGS(srcflags) );
1133
 
1134 11 ns32kum
        DFPU_DIV div_inst       ( .BCLK(BCLK), .BRST(BRESET), .START({go_divi,go_divf,START}), .SRC1(SRC1), .SRC2(SRC2),
1135 9 ns32kum
                                                  .MAN1(MAN1), .MAN2(MAN2), .SRCFLAGS(srcflags), .FL(FL), .OUT(divout), .DONE(div_done),
1136 11 ns32kum
                                                  .BWD(BWD), .OPCODE(OPCODE[2:0]), .DIVI_OUT(divi_out), .DVZ_TRAP(DVZ_TRAP), .DEI_OVF(dei_ovf) );
1137 9 ns32kum
 
1138 11 ns32kum
        DFPU_MISC misc_inst     ( .BCLK(BCLK), .START(go_misc), .SRC1(SRC1), .SRC2(SRC2), .SRCFLAG(srcflags[2]),
1139 9 ns32kum
                                                  .MIMUX(misc_mux), .MODE({OPCODE[5],OPCODE[0],FL,OPCODE[1]}), .OUT(miscout) );
1140
 
1141 11 ns32kum
        DFPU_BCD bcd_inst       ( .BCLK(BCLK), .BRESET(BRESET), .START(START[1]), .DO_BCD(bcd_ops), .BWD(BWD), .SRC1(SRC1), .SRC2(SRC2),
1142 9 ns32kum
                                                  .CY_IN(CY_IN), .SUBP(~OPCODE[2]), .BCD_Q(bcd_q), .CY_OUT(bcd_carry), .BCD_DONE(bcd_done) );
1143
 
1144
        // FP - path : selection of result and rounding :
1145
 
1146
        always @(misc_op or OPCODE or mulout or addout or divout or miscout)
1147
                casex ({misc_op,OPCODE[5],OPCODE[3:2]}) //OPCODE[5] only for Flags i.e. NAN 
1148
                  4'b1xxx : fpout = miscout;            // for MOVLF,MOVFL,SCALB & LOGB
1149
                  4'b0110 : fpout = divout;
1150
                  4'b0111 : fpout = mulout;
1151
                  default : fpout = addout;
1152
                endcase
1153
 
1154
        always @(FSR or fpout)  // Calculation of Carry according to rounding mode, fpout[67] = sign bit
1155
                casex (FSR[8:7])
1156
                  2'b00 : car_ry = ((fpout[1:0] == 2'b10) & fpout[2]) | (fpout[1:0] == 2'b11);    // round to nearest
1157
                  2'b10 : car_ry = ~fpout[67] & (fpout[1:0] != 2'b00);   // round to positiv infinity
1158
                  2'b11 : car_ry =  fpout[67] & (fpout[1:0] != 2'b00);   // round to negativ infinity
1159
                default : car_ry = 1'b0;                                                                // round to zero
1160
                endcase
1161
 
1162
        assign cy_val = {35'h0,(FL & car_ry),28'h0,(~FL & car_ry)};
1163
 
1164
        assign rund = {fpout[66:2]} + cy_val;
1165
 
1166
        // Detection of Div-by-0, Overflow, Underflow and Inexact : Epxonent from [66:54] = 13 Bits
1167
        assign div_zero  = (srcflags[3:0] == 4'h1) & (OPCODE == 8'hB8);  // true FPU Divide by Zero
1168
        assign overflow  = ~rund[66] & (rund[65] | (rund[64:54] == 11'h7FF));
1169
        assign underflow = (rund[66] | (rund[65:54] == 12'h0)) & ~fpout[68];    // Zero-Flag
1170
        assign inexact   = (fpout[1:0] != 2'b00);
1171
 
1172
        always @(fpout or op_cmp or div_zero or overflow or underflow or inexact or FSR)
1173
                casex ({fpout[69],op_cmp,div_zero,overflow,FSR[3],underflow,FSR[5],inexact})    // [69] = NAN
1174
                        8'b1xxxxxxx : tt = 3'b101;      // Invalid operation
1175
                        8'b001xxxxx : tt = 3'b011;      // Divide by Zero
1176
                        8'b0001xxxx : tt = 3'b010;      // Overflow
1177
                        8'b000011xx : tt = 3'b001;      // Underflow
1178
                        8'b00000011 : tt = 3'b110;      // Inexact Result
1179
                        default         : tt = 3'b000;  // no error
1180
                endcase
1181
 
1182 11 ns32kum
        assign TT_DP = man_ops ? 5'd0 : {(inexact & ~op_cmp),(underflow & ~op_cmp),tt}; // at ABSf/NEGf no error : different to NS32381 !
1183 9 ns32kum
 
1184
        assign fp_res = FL ? {fpout[67],rund[61:31],rund[33:2]}
1185
                                           : {fpout[67],rund[64:2]};    // lower 32 bits identical
1186
 
1187
        // Underflow special case and get ZERO
1188
        assign fp_out = (underflow | fpout[68]) ? 64'h0 : fp_res;
1189
 
1190
        // 63..32 goes to memory if Word or Byte ! Also in ODD Register , 31..0 goes in EVEN Register
1191
        // DEI comes without WR_REG information
1192 11 ns32kum
        always @(make_i or copop or MEI or BWD or WR_REG or MRESULT or COP_IN or i_out or fp_out or divi_ops or divi_out or bcd_ops or bcd_q)
1193 9 ns32kum
                casex ({make_i,copop,MEI,BWD})
1194 11 ns32kum
                  5'b00100 : DOUT = {MRESULT[31:8], (WR_REG ? MRESULT[15:8]  : MRESULT[7:0]), MRESULT[31:0]};     // LSD always the same
1195 9 ns32kum
                  5'b00101 : DOUT = {MRESULT[31:16],(WR_REG ? MRESULT[31:16] : MRESULT[15:0]),MRESULT[31:0]};
1196
                  5'b0011x : DOUT =  MRESULT[63:0];
1197
                  5'b01xxx : DOUT =  COP_IN;    // true alignment in Coprocessor
1198 11 ns32kum
                  5'b1xxxx : DOUT = divi_ops ? divi_out : {(bcd_ops ? bcd_q : i_out),fp_out[31:0]};      // MSD is written first
1199 9 ns32kum
                  default  : DOUT = fp_out;
1200
                endcase
1201
 
1202
        always @(posedge BCLK) DP_CMP <= {(srcflags[3] | srcflags[1]),cmpres};  // Only valid if not NaN
1203
 
1204
        // Pipeline Control + Registerfile write control
1205
 
1206
        always @(posedge BCLK or negedge BRESET)
1207
                if (!BRESET) sequ <= 2'b00;
1208
                  else
1209
                        sequ <= {(sequ[1] & ~DONE),START[1]};
1210
 
1211
        always @(FL or OPCODE or copwr)
1212
                casex ({FL,OPCODE})     // WRITE Control : [2] = clr_lsb, [1] = wr_part2, [0] = wr_part1
1213
                  9'bx_1001_000x : wctrl = 5'b01_111;   // MOViL
1214
                  9'bx_1001_010x : wctrl = 5'b00_010;   // MOVLF
1215
                  9'bx_1001_011x : wctrl = 5'b01_111;   // MOVFL
1216
                  9'bx_1001_100x : wctrl = 5'b00_010;   // ROUNDLi      - DONE is one cycle earlier for this opcodes
1217
                  9'bx_1001_101x : wctrl = 5'b00_010;   // TRUNCLi      
1218
                  9'bx_1001_111x : wctrl = 5'b00_010;   // FLOORLi
1219
                  9'bx_1011_0000 : wctrl = 5'b01_111;   // ADDL
1220
                  9'bx_1011_0010 : wctrl = 5'b00_000;   // CMPL - via LD one cycle later in PSR
1221
                  9'bx_1011_0100 : wctrl = 5'b01_111;   // SUBL
1222
                  9'b1_1011_1000 : wctrl = 5'b10_001;   // DIVF - measured 18 cycles Reg-Reg
1223
                  9'b0_1011_1000 : wctrl = 5'b10_111;   // DIVL - measured 34 cycles Reg-Reg
1224
                  9'bx_1011_1100 : wctrl = 5'b01_111;   // MULL
1225
                  9'bx_0110_1x11 : wctrl = 5'b10_001;   // ADDP,SUBP
1226
                  9'bx_0111_1001 : wctrl = 5'b00_111;   // MEIi
1227
                  9'bx_0111_1011 : wctrl = 5'b10_111;   // DEIi
1228
                  9'bx_0111_11xx : wctrl = 5'b10_001;   // QUOi,REMi,MODi,DIVi
1229
                  9'b1_1011_011x : wctrl = 5'b00_010;   // SCALBF/LOGBF
1230
                  9'b0_1011_011x : wctrl = 5'b01_111;   // SCALBL/LOGBL
1231
                  9'bx_1101_1101 : wctrl = {4'b10_00,copwr};    // execute coprocessor opcode
1232
                  default        : wctrl = 5'b0;
1233
                endcase
1234
 
1235 11 ns32kum
        assign done_i = wctrl[4] ? (div_done | bcd_done | COP_DONE) : ( (wctrl[3] | ~WR_REG) ? sequ[2] : sequ[1] );
1236 9 ns32kum
        assign DONE = ~START[1] & done_i;       // DONE is valid for all opcodes
1237
 
1238
        assign wr_part1 = DONE & WR_REG & wctrl[0];
1239
 
1240
        always @(posedge BCLK) CLR_LSB  <= DONE & WR_REG & wctrl[2];
1241
        always @(posedge BCLK) wr_part2 <= DONE & WR_REG & wctrl[1];
1242
 
1243
        assign WREN_L   = wr_part1 | wr_part2;
1244
        assign LD_OUT_L = DONE & ~WR_REG;               // meaning is "Load Out-Reg from Long-Path"
1245
 
1246
        always @(posedge BCLK) up_flag <= DONE & ~wctrl[0];              // DONE one cycle later
1247 11 ns32kum
        assign UP_DP    = (select[3] & (wctrl[0] ? DONE : up_flag)) | man_ops;   // Update FSR Trap etc. : all FPU opcodes of DP_FPU
1248 9 ns32kum
 
1249
        // Overflow Trap for Division : DEI, QUO, DIV
1250
        assign quo_div = (OPCODE == 8'h7C) | (OPCODE == 8'h7F);
1251
        always @(*)
1252
                casex ({OPCODE[2],BWD})
1253
                   3'b100 : ovf_div = (divi_out[39] & SRC1[7]  & SRC2[7] ) & quo_div;
1254
                   3'b101 : ovf_div = (divi_out[47] & SRC1[15] & SRC2[15]) & quo_div;
1255
                   3'b11x : ovf_div = (divi_out[63] & SRC1[31] & SRC2[31]) & quo_div;
1256
                  default : ovf_div = dei_ovf[0] & (OPCODE == 8'h7B);    // DEI
1257
                endcase
1258
 
1259
        assign OVF_BCD = {dei_ovf[1],ovf_div,bcd_done,bcd_carry};       // to I_PFAD
1260
 
1261
        always @(posedge BCLK) COP_GO <= START[1] & copop;
1262
 
1263
endmodule
1264
 
1265 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1266 9 ns32kum
//
1267
// 10. DP_FPU           Top level of long operations datapath
1268
//
1269 11 ns32kum
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1270
module DP_FPU( BCLK, FL, BRESET, LD_LDQ, WR_REG, BWD, FSR, OPCODE, SRC1, SRC2, START, DONE, UP_DP, WREN_L,
1271 9 ns32kum
                           CLR_LSB, LD_OUT_L, DVZ_TRAP, DP_CMP, DP_OUT, DP_Q, TT_DP, CY_IN, OVF_BCD, COP_GO, COP_OP,
1272
                           COP_IN, COP_DONE, COP_OUT );
1273
 
1274
input                   BCLK;
1275
input                   FL;
1276
input                   BRESET;
1277
input                   LD_LDQ;
1278
input                   WR_REG;
1279
input    [1:0]   BWD;
1280
input    [8:3]  FSR;
1281
input    [7:0]   OPCODE;
1282
input   [31:0]   SRC1;
1283
input   [31:0]   SRC2;
1284
input    [1:0]   START;
1285
input                   CY_IN;
1286
input                   COP_DONE;
1287
input   [23:0]   COP_OP;
1288
input   [63:0]   COP_IN;
1289
 
1290
output                  DONE;
1291
output                  UP_DP;
1292
output                  WREN_L;
1293
output                  CLR_LSB;
1294
output                  LD_OUT_L;
1295
output                  DVZ_TRAP;
1296
output   [2:0]   DP_CMP;
1297
output  [31:0]   DP_OUT;
1298
output  [31:0]   DP_Q;
1299
output   [4:0]   TT_DP;
1300
output   [3:0]   OVF_BCD;
1301
output                  COP_GO;
1302
output [127:0]   COP_OUT;
1303
 
1304
reg             [52:0]   MDA;
1305
reg             [52:0]   MDB;
1306
reg             [31:0]   DP_Q;
1307
reg        [31:20]      RCOPA,RCOPB;
1308
 
1309
wire    [63:0]   DOUT;
1310
wire   [105:0]   MRESULT;
1311
wire                    MEI;
1312
wire                    DFLOAT;
1313
wire                    LOAD_MSD;
1314
wire                    LOAD_LSD1;
1315
wire                    LOAD_LSD2;
1316
wire    [31:0]   LSD_1;
1317
wire    [31:0]   LSD_2;
1318
wire   [52:32]  MSD_1;
1319
wire   [52:32]  MSD_2;
1320
 
1321
 
1322
DP_LOGIK        DOUBLE_U(
1323
        .FL(FL),
1324
        .BRESET(BRESET),
1325
        .BCLK(BCLK),
1326
        .WR_REG(WR_REG),
1327
        .BWD(BWD),
1328
        .FSR(FSR),
1329
        .MAN1(MDA[52:32]),
1330
        .MAN2(MDB[52:32]),
1331
        .MRESULT(MRESULT),
1332
        .OPCODE(OPCODE),
1333
        .SRC1(SRC1),
1334
        .SRC2(SRC2),
1335
        .START(START),
1336
        .MEI(MEI),
1337
        .DFLOAT(DFLOAT),
1338
        .DONE(DONE),
1339
        .UP_DP(UP_DP),
1340
        .CLR_LSB(CLR_LSB),
1341
        .WREN_L(WREN_L),
1342
        .LD_OUT_L(LD_OUT_L),
1343
        .DVZ_TRAP(DVZ_TRAP),
1344
        .DOUT(DOUT),
1345
        .DP_CMP(DP_CMP),
1346
        .TT_DP(TT_DP),
1347
        .CY_IN(CY_IN),
1348
        .OVF_BCD(OVF_BCD),
1349
        .COP_DONE(COP_DONE),
1350
        .COP_OP(COP_OP),
1351
        .COP_IN(COP_IN),
1352
        .COP_GO(COP_GO));
1353
 
1354
PREPDATA        DP_PREP(
1355
        .MEI(MEI),
1356
        .DFLOAT(DFLOAT),
1357
        .BWD(BWD),
1358
        .SRC1(SRC1),
1359
        .SRC2(SRC2),
1360
        .START(START),
1361
        .LOAD_LSD1(LOAD_LSD1),
1362
        .LOAD_LSD2(LOAD_LSD2),
1363
        .LOAD_MSD(LOAD_MSD),
1364
        .LSD_1(LSD_1),
1365
        .LSD_2(LSD_2),
1366
        .MSD_1(MSD_1),
1367
        .MSD_2(MSD_2));
1368
 
1369
        assign MRESULT = MDA * MDB;     // unsigned multiplier 53 * 53 bits = 106 bits
1370
 
1371
        assign DP_OUT = CLR_LSB ? DP_Q : DOUT[63:32];
1372
 
1373
        always@(posedge BCLK) if (LD_OUT_L || LD_LDQ || WREN_L) DP_Q <= LD_LDQ ? SRC2 : DOUT[31:0];
1374
 
1375
        always@(posedge BCLK) if (LOAD_LSD1) MDA[31:0] <= LSD_1;
1376
 
1377
        always@(posedge BCLK) if (LOAD_LSD2) MDB[31:0] <= LSD_2;
1378
 
1379
        always@(posedge BCLK)
1380
                if (LOAD_MSD)
1381
                        begin
1382
                                MDA[52:32] <= MSD_1;
1383
                                MDB[52:32] <= MSD_2;
1384
                                RCOPA      <= SRC1[31:20];
1385
                                RCOPB      <= SRC2[31:20];
1386
                        end
1387
 
1388
        assign COP_OUT = {RCOPA,MDA[51:32],SRC1,RCOPB,MDB[51:32],SRC2};
1389
 
1390
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.