1 |
29 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
2 |
9 |
ns32kum |
//
|
3 |
|
|
// This file is part of the M32632 project
|
4 |
|
|
// http://opencores.org/project,m32632
|
5 |
|
|
//
|
6 |
23 |
ns32kum |
// Filename: DP_FPU.v
|
7 |
29 |
ns32kum |
// Version: 3.0
|
8 |
|
|
// History: 2.0 of 14 August 2016
|
9 |
|
|
// 1.0 of 30 Mai 2015
|
10 |
|
|
// Date: 2 December 2018
|
11 |
9 |
ns32kum |
//
|
12 |
29 |
ns32kum |
// Copyright (C) 2018 Udo Moeller
|
13 |
9 |
ns32kum |
//
|
14 |
|
|
// This source file may be used and distributed without
|
15 |
|
|
// restriction provided that this copyright statement is not
|
16 |
|
|
// removed from the file and that any derivative work contains
|
17 |
|
|
// the original copyright notice and the associated disclaimer.
|
18 |
|
|
//
|
19 |
|
|
// This source file is free software; you can redistribute it
|
20 |
|
|
// and/or modify it under the terms of the GNU Lesser General
|
21 |
|
|
// Public License as published by the Free Software Foundation;
|
22 |
|
|
// either version 2.1 of the License, or (at your option) any
|
23 |
|
|
// later version.
|
24 |
|
|
//
|
25 |
|
|
// This source is distributed in the hope that it will be
|
26 |
|
|
// useful, but WITHOUT ANY WARRANTY; without even the implied
|
27 |
|
|
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
28 |
|
|
// PURPOSE. See the GNU Lesser General Public License for more
|
29 |
|
|
// details.
|
30 |
|
|
//
|
31 |
|
|
// You should have received a copy of the GNU Lesser General
|
32 |
|
|
// Public License along with this source; if not, download it
|
33 |
|
|
// from http://www.opencores.org/lgpl.shtml
|
34 |
|
|
//
|
35 |
29 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
36 |
9 |
ns32kum |
//
|
37 |
|
|
// Modules contained in this file:
|
38 |
|
|
// 1. PREPDATA Prepare data for the big multiplier
|
39 |
|
|
// 2. BCDADDER 4 bit BCD adder
|
40 |
|
|
// 3. DFPU_BCD Binary coded decimal (BCD) adder and subtractor
|
41 |
|
|
// 4. DFPU_ADDSUB Double precision floating point adder and subtractor
|
42 |
|
|
// 5. DFPU_MISC Double precision floating point miscellaneous operations
|
43 |
|
|
// 6. DFPU_MUL Double precision floating point multiplier
|
44 |
23 |
ns32kum |
// 7. SCANDIG Scan digit for leading one
|
45 |
|
|
// 8. DIVI_PREP Prepare data for the divider
|
46 |
|
|
// 9. DFPU_DIV The divider for all divide opcodes : double, single and integer
|
47 |
|
|
// 10. DP_LOGIK Control logic and result path for different functions
|
48 |
|
|
// 11. DP_FPU Top level of long operations datapath
|
49 |
9 |
ns32kum |
//
|
50 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
51 |
9 |
ns32kum |
|
52 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
53 |
9 |
ns32kum |
//
|
54 |
|
|
// 1. PREPDATA Prepare data for the big multiplier
|
55 |
|
|
//
|
56 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
57 |
9 |
ns32kum |
module PREPDATA ( START, MEI, DFLOAT, BWD, SRC1, SRC2,
|
58 |
|
|
MSD_1, MSD_2, LSD_1, LSD_2, LOAD_MSD, LOAD_LSD1, LOAD_LSD2 );
|
59 |
|
|
|
60 |
|
|
input [1:0] START;
|
61 |
|
|
input MEI,DFLOAT;
|
62 |
|
|
input [1:0] BWD;
|
63 |
|
|
input [31:0] SRC1,SRC2;
|
64 |
|
|
|
65 |
|
|
output [52:32] MSD_1,MSD_2;
|
66 |
|
|
output [31:0] LSD_1,LSD_2;
|
67 |
|
|
output LOAD_MSD,LOAD_LSD1,LOAD_LSD2;
|
68 |
|
|
|
69 |
|
|
reg [31:0] LSD_1,LSD_2;
|
70 |
|
|
|
71 |
|
|
assign MSD_1 = MEI ? 21'h0 : {1'b1,SRC1[19:0]};
|
72 |
|
|
assign MSD_2 = MEI ? 21'h0 : {1'b1,SRC2[19:0]};
|
73 |
|
|
|
74 |
|
|
always @(MEI or BWD or SRC1)
|
75 |
|
|
casex ({MEI,BWD})
|
76 |
|
|
3'b100 : LSD_1 = {24'h000000,SRC1[7:0]};
|
77 |
|
|
3'b101 : LSD_1 = {16'h0000,SRC1[15:0]};
|
78 |
|
|
default : LSD_1 = SRC1;
|
79 |
|
|
endcase
|
80 |
|
|
|
81 |
|
|
always @(MEI or BWD or SRC2)
|
82 |
|
|
casex ({MEI,BWD})
|
83 |
|
|
3'b100 : LSD_2 = {24'h000000,SRC2[7:0]};
|
84 |
|
|
3'b101 : LSD_2 = {16'h0000,SRC2[15:0]};
|
85 |
|
|
default : LSD_2 = SRC2;
|
86 |
|
|
endcase
|
87 |
|
|
|
88 |
|
|
assign LOAD_MSD = (START[0] & MEI) | (START[0] & DFLOAT); // 1. step data load at DFLOAT
|
89 |
|
|
assign LOAD_LSD1 = (START[0] & MEI) | (START[1] & DFLOAT); // 2. step execute at DFLOAT
|
90 |
|
|
assign LOAD_LSD2 = (START[1] & MEI) | (START[1] & DFLOAT); // 2. step execute at DFLOAT
|
91 |
|
|
|
92 |
|
|
endmodule
|
93 |
|
|
|
94 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
95 |
9 |
ns32kum |
//
|
96 |
|
|
// 2. BCDADDER 4 bit BCD adder
|
97 |
|
|
//
|
98 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
99 |
9 |
ns32kum |
module BCDADDER ( A_IN, B_IN, CY_IN, SUBP, OUT, CY_OUT );
|
100 |
|
|
|
101 |
|
|
input [3:0] A_IN,B_IN;
|
102 |
|
|
input CY_IN;
|
103 |
|
|
input SUBP;
|
104 |
|
|
|
105 |
|
|
output [3:0] OUT;
|
106 |
|
|
output CY_OUT;
|
107 |
|
|
|
108 |
23 |
ns32kum |
reg [4:0] data;
|
109 |
9 |
ns32kum |
wire [4:0] result;
|
110 |
|
|
wire over;
|
111 |
|
|
|
112 |
23 |
ns32kum |
always @(B_IN)
|
113 |
|
|
case (B_IN)
|
114 |
|
|
4'h0 : data = 5'h00;
|
115 |
|
|
4'h1 : data = 5'h1F;
|
116 |
|
|
4'h2 : data = 5'h1E;
|
117 |
|
|
4'h3 : data = 5'h1D;
|
118 |
|
|
4'h4 : data = 5'h1C;
|
119 |
|
|
4'h5 : data = 5'h1B;
|
120 |
|
|
4'h6 : data = 5'h1A;
|
121 |
|
|
4'h7 : data = 5'h19;
|
122 |
|
|
4'h8 : data = 5'h18;
|
123 |
|
|
4'h9 : data = 5'h17;
|
124 |
|
|
default : data = 5'hxx;
|
125 |
|
|
endcase
|
126 |
9 |
ns32kum |
|
127 |
23 |
ns32kum |
assign result = {1'b0,A_IN} + (SUBP ? data : {1'b0,B_IN}) + {{4{SUBP & CY_IN}},CY_IN};
|
128 |
|
|
|
129 |
9 |
ns32kum |
assign over = result[4] | (result[3] & (result[2] | result[1]));
|
130 |
|
|
|
131 |
|
|
// if result<0 : -6 if result>9 : -10
|
132 |
|
|
assign OUT = result[3:0] - (SUBP ? {1'b0,result[4],result[4],1'b0} : {over,1'b0,over,1'b0});
|
133 |
23 |
ns32kum |
|
134 |
9 |
ns32kum |
assign CY_OUT = SUBP ? result[4] : over;
|
135 |
|
|
|
136 |
|
|
endmodule
|
137 |
|
|
|
138 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
139 |
9 |
ns32kum |
//
|
140 |
|
|
// 3. DFPU_BCD Binary coded decimal (BCD) adder and subtractor
|
141 |
|
|
//
|
142 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
143 |
|
|
module DFPU_BCD ( BCLK, BRESET, START, DO_BCD, BWD, SRC1, SRC2, CY_IN, SUBP, BCD_Q, CY_OUT, BCD_DONE );
|
144 |
9 |
ns32kum |
|
145 |
|
|
// Byte : 3 cycles in shortest case REG-REG, Word : 4 cycles and Double : 6 cycles
|
146 |
|
|
input BCLK;
|
147 |
|
|
input BRESET;
|
148 |
|
|
input START; // START[1]
|
149 |
|
|
input DO_BCD; // BCD Opcode is valid
|
150 |
|
|
input [1:0] BWD;
|
151 |
|
|
input [31:0] SRC1,SRC2; // Source , Destination, data is stable during operation
|
152 |
|
|
input CY_IN; // comes from PSR
|
153 |
|
|
input SUBP; // SUBP = 1 : SUBP , 0 : ADDP
|
154 |
|
|
|
155 |
|
|
output reg [31:0] BCD_Q;
|
156 |
|
|
output reg CY_OUT; // went to PSR if DONE is valid
|
157 |
|
|
output BCD_DONE;
|
158 |
|
|
|
159 |
|
|
reg run_bcd;
|
160 |
|
|
reg [1:0] byte_cou;
|
161 |
|
|
reg [15:0] datain;
|
162 |
|
|
|
163 |
|
|
wire [7:0] result;
|
164 |
|
|
wire carry,carry_lsd,carry_msd;
|
165 |
|
|
|
166 |
|
|
// START : _/---\________________
|
167 |
|
|
// byte_cou : xxxxxx 0 x 1 x 2 x 3 x
|
168 |
|
|
// BCD_DONE : _____/---\____________ if BWD = Byte
|
169 |
|
|
|
170 |
|
|
always @(posedge BCLK or negedge BRESET)
|
171 |
|
|
if (!BRESET) run_bcd <= 1'b0;
|
172 |
|
|
else
|
173 |
23 |
ns32kum |
run_bcd <= (START & DO_BCD & (BWD != 2'd0)) | (run_bcd & (BWD != byte_cou));
|
174 |
9 |
ns32kum |
|
175 |
23 |
ns32kum |
always @(posedge BCLK) byte_cou <= START ? 2'd1 : byte_cou + {1'b0,run_bcd};
|
176 |
9 |
ns32kum |
|
177 |
|
|
always @(*)
|
178 |
|
|
casex ({START,byte_cou})
|
179 |
|
|
3'b1_xx : datain = {SRC1[7:0], SRC2[7:0]};
|
180 |
23 |
ns32kum |
3'b0_0x : datain = {SRC1[15:8], SRC2[15:8]};
|
181 |
|
|
3'b0_10 : datain = {SRC1[23:16],SRC2[23:16]};
|
182 |
|
|
3'b0_11 : datain = {SRC1[31:24],SRC2[31:24]};
|
183 |
9 |
ns32kum |
endcase
|
184 |
|
|
|
185 |
|
|
assign carry = START ? CY_IN : CY_OUT;
|
186 |
|
|
|
187 |
|
|
BCDADDER lsd_inst ( .A_IN(datain[3:0]), .B_IN(datain[11:8]), .CY_IN(carry), .SUBP(SUBP),
|
188 |
|
|
.OUT(result[3:0]), .CY_OUT(carry_lsd) );
|
189 |
|
|
|
190 |
|
|
BCDADDER msd_inst ( .A_IN(datain[7:4]), .B_IN(datain[15:12]), .CY_IN(carry_lsd), .SUBP(SUBP),
|
191 |
|
|
.OUT(result[7:4]), .CY_OUT(carry_msd) );
|
192 |
|
|
|
193 |
|
|
always @(posedge BCLK) CY_OUT <= carry_msd;
|
194 |
|
|
|
195 |
|
|
always @(posedge BCLK) if (START) BCD_Q[7:0] <= result;
|
196 |
23 |
ns32kum |
always @(posedge BCLK) if (~byte_cou[1]) BCD_Q[15:8] <= result;
|
197 |
|
|
always @(posedge BCLK) if (byte_cou == 2'd2) BCD_Q[23:16] <= result;
|
198 |
|
|
always @(posedge BCLK) if (byte_cou == 2'd3) BCD_Q[31:24] <= result;
|
199 |
9 |
ns32kum |
|
200 |
23 |
ns32kum |
assign BCD_DONE = (START & DO_BCD & (BWD == 2'd0)) | (run_bcd & (BWD == byte_cou));
|
201 |
9 |
ns32kum |
|
202 |
|
|
endmodule
|
203 |
|
|
|
204 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
205 |
9 |
ns32kum |
//
|
206 |
|
|
// 4. DFPU_ADDSUB Double precision floating point adder and subtractor
|
207 |
|
|
//
|
208 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
209 |
|
|
module DFPU_ADDSUB ( BCLK, START, SRC1, SRC2, MAN1, MAN2, SRCFLAGS, BWD, SELECT, OUT, IOUT, CMPRES );
|
210 |
9 |
ns32kum |
|
211 |
|
|
input BCLK;
|
212 |
|
|
input [1:0] START;
|
213 |
|
|
input [31:0] SRC1,SRC2; // The input data
|
214 |
|
|
input [20:0] MAN1,MAN2;
|
215 |
|
|
input [5:0] SRCFLAGS; // NAN, ZERO and SIGN of operands
|
216 |
|
|
input [1:0] BWD; // size of integer
|
217 |
|
|
input [3:0] SELECT; // upper 2 bits : R.T.F. code
|
218 |
|
|
|
219 |
|
|
output [69:0] OUT;
|
220 |
|
|
output [31:0] IOUT; // result of ROUNDLi/TRUNCLi/FLOORLi = R.T.F.
|
221 |
|
|
output [1:0] CMPRES;
|
222 |
|
|
|
223 |
|
|
reg [69:0] outreg;
|
224 |
|
|
reg [31:0] IOUT;
|
225 |
|
|
|
226 |
|
|
// MOViL : 2 cycles
|
227 |
|
|
// ROUNDLi : 3 cycles (+TRUNC & FLOOR)
|
228 |
|
|
// ADD/SUB : 4 cycles
|
229 |
|
|
// CMP : 2 cycles
|
230 |
|
|
|
231 |
|
|
// ++++++++++++++++++++++++++++++++++
|
232 |
|
|
// MOViL : 1. Pipeline stage : needs 3 cycles
|
233 |
|
|
|
234 |
23 |
ns32kum |
reg [31:0] movdat;
|
235 |
9 |
ns32kum |
reg [31:0] movif;
|
236 |
|
|
reg sign_movif;
|
237 |
|
|
|
238 |
|
|
always @(BWD or SRC1)
|
239 |
23 |
ns32kum |
casex(BWD)
|
240 |
|
|
2'b00 : movdat = {{24{SRC1[7]}}, SRC1[7:0]}; // Byte
|
241 |
|
|
2'b01 : movdat = {{16{SRC1[15]}},SRC1[15:0]}; // Word
|
242 |
|
|
default : movdat = SRC1[31:0]; // Double
|
243 |
9 |
ns32kum |
endcase
|
244 |
|
|
|
245 |
|
|
// This pipeline stage for better timing
|
246 |
23 |
ns32kum |
always @(posedge BCLK) movif <= ({32{movdat[31]}} ^ movdat) + {31'h0,movdat[31]}; // -2^31 is kept !
|
247 |
9 |
ns32kum |
|
248 |
|
|
always @(posedge BCLK) sign_movif <= movdat[31];
|
249 |
|
|
|
250 |
11 |
ns32kum |
// ROUNDLi/TRUNCLi/FLOORLi : 1. pipeline stage : can Opcode-Decoder deliver direct the 64 bit operand ? From register "yes"
|
251 |
9 |
ns32kum |
|
252 |
|
|
reg ovflag,ovflag2;
|
253 |
|
|
reg rovfl;
|
254 |
|
|
reg minint;
|
255 |
|
|
wire [11:0] rexdiff,rexo;
|
256 |
|
|
wire ganzklein; // Flag for 0
|
257 |
|
|
|
258 |
11 |
ns32kum |
assign rexdiff = 12'h41D - {1'b0,SRC1[30:20]}; // 4..0 is the right shift value : like Single FP same value space
|
259 |
9 |
ns32kum |
|
260 |
|
|
// ovflag2 at the end of rounding : Check for Overflow
|
261 |
|
|
always @(posedge BCLK) rovfl <= (ovflag | ovflag2) & (SELECT[1:0] == 2'b11) & ~minint;
|
262 |
|
|
|
263 |
|
|
// a large positiv difference is a very small number :
|
264 |
|
|
assign ganzklein = (~rexdiff[11] & (rexdiff[10:5] != 6'b0)); // 0 is implicit via SRC1[30:20]=0
|
265 |
|
|
|
266 |
|
|
// Detection of Overflow
|
267 |
|
|
assign rexo = ({1'b0,SRC1[30:20]} - {11'h1FF,~BWD[1]}); // subtract B/W = 3FF , D = 3FE
|
268 |
|
|
|
269 |
|
|
always @(BWD or rexo) // 0 ist in implicitly
|
270 |
|
|
casex (BWD)
|
271 |
|
|
2'b00 : ovflag = (~rexo[11] & (rexo[10:3] != 8'h0)); // Exponent 0..7 because -128.4 => -128
|
272 |
|
|
2'b01 : ovflag = (~rexo[11] & (rexo[10:4] != 7'h0)); // Exponent 0..15 look above
|
273 |
|
|
default : ovflag = (~rexo[11] & (rexo[10:5] != 6'h0)); // but Exponent only 0..30
|
274 |
|
|
endcase
|
275 |
|
|
|
276 |
|
|
always @(posedge BCLK)
|
277 |
|
|
if (START[1]) minint <= (SRC1 == 32'hC1E0_0000) & (SRC2 == 32'h0) & BWD[1]; // detection of -2^31
|
278 |
|
|
|
279 |
|
|
// ++++++++++++++++++++++++++++++++++++
|
280 |
|
|
// ADD/SUB : 1. Pipeline Stage : which operand ist bigger ? Exchange if neccessary
|
281 |
|
|
// SUB/CMP : SRC2 - SRC1
|
282 |
|
|
|
283 |
|
|
reg ex_null,ma_null,ex_msb,ma_msb;
|
284 |
|
|
reg [10:0] expo1,expo2;
|
285 |
|
|
wire [11:0] exdiff,exdiff12;
|
286 |
|
|
wire [20:0] madiff;
|
287 |
|
|
wire switch,nan,sign,sign1,sign2;
|
288 |
|
|
reg [5:0] shift1,shift2;
|
289 |
|
|
|
290 |
|
|
// Pipeline register :
|
291 |
|
|
reg [63:0] muxsrc2;
|
292 |
23 |
ns32kum |
wire [55:3] pipe1; // Nummbers for right shifter
|
293 |
|
|
wire [5:0] shift;
|
294 |
|
|
reg [2:0] pshift;
|
295 |
9 |
ns32kum |
reg vorz,addflag;
|
296 |
|
|
|
297 |
|
|
wire [52:0] muxsrc1;
|
298 |
|
|
wire [32:0] lowdiff;
|
299 |
|
|
|
300 |
11 |
ns32kum |
assign nan = (SELECT[1:0] == 2'b11) ? SRCFLAGS[1] : (~SELECT[1] & (SRCFLAGS[3] | SRCFLAGS[1])); // used at the end
|
301 |
9 |
ns32kum |
|
302 |
|
|
assign exdiff = {1'b0,SRC2[30:20]} - {1'b0,SRC1[30:20]}; // Difference of Exponents
|
303 |
|
|
assign madiff = {1'b0,SRC2[19:0]} - {1'b0,SRC1[19:0]}; // Difference of Mantissa
|
304 |
|
|
assign exdiff12 = {1'b0,SRC1[30:20]} - {1'b0,SRC2[30:20]}; // Diff. Exponents exchanged
|
305 |
|
|
|
306 |
|
|
always @(posedge BCLK)
|
307 |
|
|
if (START[0])
|
308 |
|
|
begin
|
309 |
|
|
ex_null <= (exdiff[10:0] == 11'h0);
|
310 |
|
|
ma_null <= (madiff[19:0] == 20'h0);
|
311 |
|
|
ex_msb <= exdiff[11];
|
312 |
|
|
ma_msb <= madiff[20];
|
313 |
|
|
shift1 <= (exdiff[10:6] != 5'h0) ? 6'h3F : exdiff[5:0];
|
314 |
|
|
shift2 <= (exdiff12[10:6] != 5'h0) ? 6'h3F : exdiff12[5:0];
|
315 |
|
|
expo1 <= SRC1[30:20];
|
316 |
|
|
expo2 <= SRC2[30:20];
|
317 |
|
|
end
|
318 |
|
|
|
319 |
|
|
assign lowdiff = {1'b0,SRC2} - {1'b0,SRC1}; // LSD compare
|
320 |
|
|
|
321 |
|
|
assign switch = ex_msb | (ex_null & (ma_msb | (ma_null & lowdiff[32]))); // exchange ?
|
322 |
|
|
|
323 |
|
|
assign muxsrc1 = switch ? {MAN2,SRC2} : {MAN1,SRC1};
|
324 |
|
|
|
325 |
23 |
ns32kum |
assign pipe1 = SELECT[1] ? (ganzklein ? 53'd0 : {1'b1,SRC1[19:0],SRC2}) : muxsrc1; // feeding of R.T.F.
|
326 |
|
|
assign shift = SELECT[1] ? {1'b0,rexdiff[4:0]} : (switch ? shift2 : shift1);
|
327 |
|
|
|
328 |
9 |
ns32kum |
always @(posedge BCLK) // Pipeline Reg
|
329 |
|
|
begin
|
330 |
23 |
ns32kum |
muxsrc2 <= switch ? {expo1,MAN1,SRC1} : {expo2,MAN2,SRC2}; // Incl. Exponent & "1" of mantisse
|
331 |
|
|
pshift <= shift[2:0];
|
332 |
9 |
ns32kum |
end
|
333 |
23 |
ns32kum |
|
334 |
9 |
ns32kum |
// SRC2 SRC1 : switch = 0 SRC2 SRC1 : switch = 1
|
335 |
|
|
// 5 + 3 : +(5 + 3) = 8 3 + 5 : +(5 + 3) = 8 SELECT[0] = 0
|
336 |
|
|
// 5 + (-3) : +(5 - 3) = 2 3 + (-5) : -(5 - 3) = -2
|
337 |
|
|
// (-5) + 3 : -(5 - 3) = -2 (-3) + 5 : +(5 - 3) = 2
|
338 |
|
|
// (-5) + (-3) : -(5 + 3) = -8 (-3) + (-5) : -(5 + 3) = -8
|
339 |
|
|
// 5 - 3 : +(5 - 3) = 2 3 - 5 : -(5 - 3) = -2 SELECT[0] = 1
|
340 |
|
|
// 5 - (-3) : +(5 + 3) = 8 3 - (-5) : +(5 + 3) = 8
|
341 |
|
|
// (-5) - 3 : -(5 + 3) = -8 (-3) - 5 : -(5 + 3) = -8
|
342 |
|
|
// (-5) - (-3) : -(5 - 3) = -2 (-3) - (-5) : +(5 - 3) = 2
|
343 |
|
|
|
344 |
|
|
assign sign1 = SRCFLAGS[4];
|
345 |
|
|
assign sign2 = SRCFLAGS[5];
|
346 |
|
|
|
347 |
|
|
always @(posedge BCLK) // Pipeline Reg
|
348 |
|
|
begin
|
349 |
|
|
vorz <= switch ? (SELECT[0] ^ sign1) : sign2;
|
350 |
|
|
addflag <= ~(SELECT[0] ^ (sign1 ^ sign2));
|
351 |
|
|
end
|
352 |
|
|
|
353 |
|
|
// CMPF : 1. Pipeline Stage : first result : is stored one level higer in Reg
|
354 |
|
|
|
355 |
|
|
assign CMPRES[1] = ~CMPRES[0] & (switch ? ~sign1 : sign2); // look table above
|
356 |
11 |
ns32kum |
assign CMPRES[0] = (ex_null & ma_null & (sign1 == sign2) & (lowdiff == 33'h0)) | (SRCFLAGS[2] & SRCFLAGS[0]);
|
357 |
9 |
ns32kum |
|
358 |
|
|
// ++++++++++++++++++++++++++++++++++
|
359 |
|
|
// ADD/SUB + ROUND/TRUNC : 2. Step : Barrelshifter to the right -->
|
360 |
|
|
|
361 |
23 |
ns32kum |
wire [55:0] brshifta,brshiftb,brshiftd,brshifte,brshiftf;
|
362 |
|
|
reg [55:0] brshiftc;
|
363 |
9 |
ns32kum |
|
364 |
|
|
// 5..33322222222221111111111 is this picture still correct ? Took over from Single FP
|
365 |
|
|
// 5..2109876543210987654321098765432-10
|
366 |
|
|
// 1..VVVVVVVVVVVVVVVVVVVVVVVV0000000-00 // last 2 bit for rounding
|
367 |
|
|
|
368 |
23 |
ns32kum |
assign brshifta = shift[5] ? {32'h0, pipe1[55:33], (pipe1[32:3] != 30'h0)} : {pipe1,3'h0};
|
369 |
|
|
assign brshiftb = shift[4] ? {16'h0,brshifta[55:17],(brshifta[16:0] != 17'h0)} : brshifta;
|
370 |
|
|
always @(posedge BCLK)
|
371 |
|
|
brshiftc <= shift[3] ? { 8'h0, brshiftb[55:9], (brshiftb[8:0] != 9'h0)} : brshiftb;
|
372 |
|
|
assign brshiftd = pshift[2] ? { 4'h0, brshiftc[55:5], (brshiftc[4:0] != 5'h0)} : brshiftc;
|
373 |
|
|
assign brshifte = pshift[1] ? { 2'h0, brshiftd[55:3], (brshiftd[2:0] != 3'h0)} : brshiftd;
|
374 |
|
|
assign brshiftf = pshift[0] ? { 1'b0, brshifte[55:2], (brshifte[1:0] != 2'h0)} : brshifte;
|
375 |
9 |
ns32kum |
|
376 |
|
|
// ++++++++++++++++++++++++++++++++++
|
377 |
|
|
// ROUNDLi/TRUNCLi/FLOORLi : 3. Step : round to Integer
|
378 |
|
|
|
379 |
|
|
reg car_ry;
|
380 |
|
|
wire [1:0] inex;
|
381 |
23 |
ns32kum |
wire [32:0] iadder;
|
382 |
9 |
ns32kum |
wire restbits;
|
383 |
|
|
|
384 |
|
|
assign restbits = (brshiftf[23:0] != 24'h0);
|
385 |
11 |
ns32kum |
assign inex = {brshiftf[24],restbits}; // Inexact-Flag-Data transfered to multiplexer at the end
|
386 |
9 |
ns32kum |
|
387 |
|
|
always @(SELECT or sign1 or brshiftf or restbits or inex or ganzklein)
|
388 |
|
|
casex (SELECT[3:2])
|
389 |
11 |
ns32kum |
2'b00 : car_ry = sign1 ^ (((brshiftf[25:24] == 2'b11) & ~restbits) | (inex == 2'b11)); // ROUNDLi
|
390 |
|
|
2'b1x : car_ry = sign1 ? (~ganzklein & (inex == 2'b00)) : 1'b0; // +numbers like TRUNCLi, -numbers to "-infinity" round
|
391 |
9 |
ns32kum |
default : car_ry = sign1; // TRUNCLi , simple cut off
|
392 |
|
|
endcase
|
393 |
|
|
|
394 |
23 |
ns32kum |
assign iadder = (sign1 ? {2'b11,~brshiftf[55:25]} : {2'b0,brshiftf[55:25]}) + {32'h0,car_ry};
|
395 |
9 |
ns32kum |
|
396 |
23 |
ns32kum |
always @(posedge BCLK) IOUT <= minint ? 32'h8000_0000 : iadder[31:0];
|
397 |
9 |
ns32kum |
|
398 |
|
|
always @(iadder or BWD or sign1) // special overflow detection i.e. -129 to -255 at Byte
|
399 |
|
|
casex (BWD) // or 127.9 -> 128 = error !
|
400 |
|
|
2'b00 : ovflag2 = (iadder[8] != iadder[7]); // Byte
|
401 |
|
|
2'b01 : ovflag2 = (iadder[16] != iadder[15]); // Word
|
402 |
23 |
ns32kum |
default : ovflag2 = (iadder[32] != iadder[31]); // Double
|
403 |
9 |
ns32kum |
endcase
|
404 |
|
|
|
405 |
|
|
// ++++++++++++++++++++++++++++++++++
|
406 |
|
|
// ADD/SUB : 3. Step : Addition or Subtraction
|
407 |
|
|
|
408 |
|
|
wire [67:0] result;
|
409 |
|
|
wire [55:0] blshifti;
|
410 |
|
|
wire [12:0] shiftl;
|
411 |
|
|
wire shift_32;
|
412 |
|
|
wire [65:0] add_q;
|
413 |
|
|
|
414 |
|
|
// The central adder : the subtraction needs 3 Guard-Bits after LSB for correct rounding
|
415 |
11 |
ns32kum |
assign result = {1'b0,muxsrc2,3'b000} + (addflag ? {12'h0,brshiftf} : {12'hFFF,~brshiftf}) + {67'd0,~addflag};
|
416 |
9 |
ns32kum |
|
417 |
11 |
ns32kum |
assign blshifti = SELECT[1] ? {movif,24'h0} : result[55:0]; // Feeding of MOViL, comes from Register
|
418 |
9 |
ns32kum |
|
419 |
|
|
assign shiftl = SELECT[1] ? 13'h041E : {1'b0,result[67:56]}; // MOViL
|
420 |
|
|
|
421 |
|
|
assign shift_32 = (blshifti[55:24] == 32'h0);
|
422 |
|
|
|
423 |
|
|
// In case of ADD the result bypasses the barrelshifter : LSB of exponent has changed
|
424 |
|
|
assign add_q = (muxsrc2[53] != result[56]) ? {result[67:3],(result[2:0] != 3'b000)}
|
425 |
|
|
: {result[67:56],result[54:2],(result[1:0] != 2'b00)} ;
|
426 |
|
|
|
427 |
|
|
// ++++++++++++++++++++++++++++++++++
|
428 |
|
|
// ADD/SUB : 4. Step : Barrelshifter left for SUB and MOViF :
|
429 |
|
|
|
430 |
|
|
wire shift_16,shift_8,shift_4,shift_2,shift_1,zero;
|
431 |
|
|
wire [1:0] lsb_bl;
|
432 |
|
|
wire [55:0] blshifta,blshiftb,blshiftc,blshiftd,blshifte,blshiftf;
|
433 |
|
|
wire [12:0] expol;
|
434 |
|
|
|
435 |
|
|
assign blshifta = shift_32 ? {blshifti[23:0],32'h0} : blshifti;
|
436 |
|
|
assign shift_16 = (blshifta[55:40] == 16'h0);
|
437 |
|
|
assign blshiftb = shift_16 ? {blshifta[39:0],16'h0} : blshifta;
|
438 |
|
|
assign shift_8 = (blshiftb[55:48] == 8'h00);
|
439 |
|
|
assign blshiftc = shift_8 ? {blshiftb[47:0],8'h0} : blshiftb;
|
440 |
|
|
assign shift_4 = (blshiftc[55:52] == 4'h0);
|
441 |
|
|
assign blshiftd = shift_4 ? {blshiftc[51:0],4'h0} : blshiftc;
|
442 |
|
|
assign shift_2 = (blshiftd[55:54] == 2'b00);
|
443 |
|
|
assign blshifte = shift_2 ? {blshiftd[53:0],2'b0} : blshiftd;
|
444 |
|
|
assign shift_1 = ~blshifte[55];
|
445 |
|
|
assign blshiftf = shift_1 ? {blshifte[54:0],1'b0} : blshifte;
|
446 |
|
|
|
447 |
|
|
// Overflow at ROUNDLi/TRUNCLi/FLOORLi is shown in overflow of exponent , SELECT[1] is then 1
|
448 |
|
|
assign expol = shiftl - {7'h00,shift_32,shift_16,shift_8,shift_4,shift_2,shift_1};
|
449 |
|
|
|
450 |
|
|
// Inexact at ROUNDLi/TRUNCLi/FLOORLi : evaluation for all one level higher
|
451 |
|
|
assign lsb_bl = (SELECT == 2'b11) ? inex : {blshiftf[2],(blshiftf[1:0] != 2'b0)};
|
452 |
|
|
|
453 |
|
|
assign zero = (~SELECT[1] & SRCFLAGS[2] & SRCFLAGS[0])
|
454 |
|
|
| ((blshifti == 56'h0) & ((~addflag & ~SELECT[1]) | (SELECT[1:0] == 2'b10)));
|
455 |
|
|
|
456 |
|
|
assign sign = SELECT[1] ? sign_movif : (vorz & ~zero); // sign for MOViL
|
457 |
|
|
|
458 |
|
|
// 2. Pipeline register for ADD , SUB and MOViL
|
459 |
|
|
always @(posedge BCLK)
|
460 |
|
|
outreg <= (addflag & ~SELECT[1]) ? {nan,zero,sign,1'b0,add_q}
|
461 |
|
|
: {nan,zero,sign,expol,blshiftf[54:3],lsb_bl};
|
462 |
|
|
|
463 |
|
|
// ++++++++++++++++++++++++++++++++++
|
464 |
|
|
|
465 |
|
|
assign OUT = {outreg[69:67],(rovfl ? 2'b01 : outreg[66:65]),outreg[64:0]};
|
466 |
|
|
|
467 |
|
|
endmodule
|
468 |
|
|
|
469 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
470 |
9 |
ns32kum |
//
|
471 |
|
|
// 5. DFPU_MISC Double precision floating point miscellaneous operations
|
472 |
|
|
//
|
473 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
474 |
23 |
ns32kum |
module DFPU_MISC ( BCLK, START, SRC1, SRC2, MAN2, SRCFLAGS, MODE, OUT );
|
475 |
9 |
ns32kum |
|
476 |
|
|
input BCLK;
|
477 |
23 |
ns32kum |
input [1:0] START;
|
478 |
9 |
ns32kum |
input [31:0] SRC1,SRC2;
|
479 |
23 |
ns32kum |
input [19:0] MAN2;
|
480 |
|
|
input [5:0] SRCFLAGS;
|
481 |
9 |
ns32kum |
input [3:0] MODE;
|
482 |
|
|
output [69:0] OUT;
|
483 |
|
|
|
484 |
|
|
reg [69:0] OUT;
|
485 |
|
|
reg [63:0] daten;
|
486 |
|
|
|
487 |
23 |
ns32kum |
wire sign;
|
488 |
9 |
ns32kum |
wire [12:0] lexpo,sexpo;
|
489 |
|
|
wire [69:0] scalb_res,logb_res,fl_lf;
|
490 |
|
|
|
491 |
23 |
ns32kum |
always @(posedge BCLK) if (START[1]) daten <= {SRC1,SRC2};
|
492 |
|
|
assign sign = daten[63];
|
493 |
9 |
ns32kum |
|
494 |
23 |
ns32kum |
// +++++++++++++++++++++++++++ MOVFL and MOVLF +++++++++++++++++++++++++++++++++++
|
495 |
9 |
ns32kum |
|
496 |
|
|
assign lexpo = {5'b0,daten[62:55]} + 13'h0380; // -7F + 3FF
|
497 |
|
|
|
498 |
|
|
assign sexpo = (daten[62:52] > 11'h47E) ? 13'h0FFF
|
499 |
|
|
: ((daten[62:52] < 11'h381) ? 13'h0 : {2'b0,{4{daten[62]}},daten[58:52]});
|
500 |
|
|
|
501 |
23 |
ns32kum |
assign fl_lf = MODE[0] ? {SRCFLAGS[1:0],sign,lexpo,daten[54:32],31'h0} // MOVFL
|
502 |
|
|
: {SRCFLAGS[1:0],sign,sexpo,daten[51:29],28'h0,daten[29:28],(daten[27:0] != 28'h0)}; // MOVLF
|
503 |
9 |
ns32kum |
|
504 |
|
|
// +++++++++++++++++++++++++++ LOGBf +++++++++++++++++++++++++++++++++++
|
505 |
|
|
|
506 |
|
|
wire [9:0] sel_data,unbiased,shift_l8,shift_l4,shift_l2;
|
507 |
|
|
wire [8:0] shift_l;
|
508 |
|
|
wire posi_8,posi_4,posi_2,posi_1;
|
509 |
|
|
wire [4:0] calc_exp;
|
510 |
|
|
wire [6:0] logb_exp;
|
511 |
|
|
|
512 |
|
|
assign sel_data = MODE[1] ? {{3{~daten[62]}},daten[61:55]} : daten[61:52];
|
513 |
|
|
assign unbiased = daten[62] ? (sel_data + 10'h001) : ~sel_data;
|
514 |
|
|
|
515 |
|
|
// detection of leading "1"
|
516 |
|
|
assign posi_8 = (unbiased[9:2] == 8'h00);
|
517 |
|
|
assign shift_l8 = posi_8 ? {unbiased[1:0],8'h00} : unbiased;
|
518 |
|
|
assign posi_4 = (shift_l8[9:6] == 4'h0);
|
519 |
|
|
assign shift_l4 = posi_4 ? {shift_l8[5:0],4'h0} : shift_l8;
|
520 |
|
|
assign posi_2 = (shift_l4[9:8] == 2'b00);
|
521 |
|
|
assign shift_l2 = posi_2 ? {shift_l4[7:0],2'b0} : shift_l4;
|
522 |
|
|
assign posi_1 = ~shift_l2[9];
|
523 |
|
|
assign shift_l = posi_1 ? {shift_l2[7:0],1'b0} : shift_l2[8:0]; // top bit is hidden "1"
|
524 |
|
|
|
525 |
11 |
ns32kum |
assign calc_exp = 5'h08 - {1'b0,posi_8,posi_4,posi_2,posi_1}; // Minimum is "F" = for exponent +/-1 <=> 2^0
|
526 |
9 |
ns32kum |
|
527 |
|
|
// exponent is set one level higher for F and L
|
528 |
|
|
assign logb_exp = MODE[1] ? {{4{~calc_exp[4]}},{3{calc_exp[4]}}} : {~calc_exp[4],{6{calc_exp[4]}}};
|
529 |
|
|
|
530 |
23 |
ns32kum |
assign logb_res = {SRCFLAGS[1],1'b0,~daten[62],2'b00,logb_exp,calc_exp[3:0],shift_l,45'h0};
|
531 |
9 |
ns32kum |
|
532 |
|
|
// ++++++++++++++++++++++++ SCALBf ++++++++++++++++++++++++++++++++++
|
533 |
|
|
|
534 |
23 |
ns32kum |
reg [3:0] rshift;
|
535 |
|
|
reg [10:0] shf_r0,dexpo; // dexpo = Exponent Destination
|
536 |
|
|
reg huge;
|
537 |
|
|
reg svorz,dvorz;
|
538 |
9 |
ns32kum |
|
539 |
23 |
ns32kum |
wire [10:0] shf_r1,shf_r2,shf_r4,shf_r8;
|
540 |
|
|
wire [12:0] addexp,newexp,finexp;
|
541 |
|
|
wire nan;
|
542 |
9 |
ns32kum |
|
543 |
23 |
ns32kum |
always @(posedge BCLK) // 2**0,9.. is transformed to 2**0 = 1 -> no change at SRC2
|
544 |
|
|
if (START[0])
|
545 |
|
|
begin
|
546 |
|
|
shf_r0 <= ( SRC1[30] | ((SRC1[29:23] == 7'h7F) & (MODE[1] | (SRC1[22:20] == 3'd7))) ) ?
|
547 |
|
|
(MODE[1] ? {4'd1,SRC1[22:16]} : {1'b1,SRC1[19:10]}) : 11'd0;
|
548 |
|
|
rshift <= MODE[1] ? 4'd6 - SRC1[26:23] : 4'd9 - SRC1[23:20];
|
549 |
|
|
huge <= MODE[1] ? ( SRC1[30] & ((SRC1[29:26] != 4'd0) | (SRC1[25:23] == 3'h7)) ) // >406 in Double Style
|
550 |
|
|
: ( SRC1[30] & ((SRC1[29:24] != 6'd0) | (SRC1[23] & (SRC1[22] | SRC1[21]))) ); // >409
|
551 |
|
|
svorz <= SRC1[31];
|
552 |
|
|
dvorz <= SRC2[31];
|
553 |
|
|
dexpo <= MODE[1] ? {3'd0,SRC2[30:23]} : SRC2[30:20];
|
554 |
|
|
end
|
555 |
|
|
|
556 |
|
|
assign shf_r1 = rshift[0] ? {1'b0,shf_r0[10:1]} : shf_r0; // a mini-TRUNC of 11 Bits
|
557 |
|
|
assign shf_r2 = rshift[1] ? {2'd0,shf_r1[10:2]} : shf_r1;
|
558 |
|
|
assign shf_r4 = rshift[2] ? {4'd0,shf_r2[10:4]} : shf_r2;
|
559 |
|
|
assign shf_r8 = rshift[3] ? {8'd0,shf_r4[10:8]} : shf_r4;
|
560 |
|
|
|
561 |
|
|
assign addexp = svorz ? {2'd0,dexpo} - {2'd0,shf_r8} : {2'd0,dexpo} + {2'd0,shf_r8};
|
562 |
|
|
|
563 |
|
|
assign newexp = MODE[1] ? {addexp[9:8],{3{addexp[7]}},addexp[7:0]} : addexp[12:0];
|
564 |
|
|
|
565 |
|
|
assign finexp = SRCFLAGS[2] ? {3'd0,newexp[9:0]} // never an Overflow if SRC2 = 0.0 !
|
566 |
|
|
: {(huge ? {svorz,1'b1} : newexp[12:11]),newexp[10:0]}; // Overflow or Underflow
|
567 |
|
|
|
568 |
|
|
assign nan = SRCFLAGS[3] | SRCFLAGS[1];
|
569 |
|
|
|
570 |
|
|
assign scalb_res = MODE[1] ? // Mantisse doesn't change !
|
571 |
|
|
{nan,SRCFLAGS[2],daten[31],finexp,daten[22:0],daten[28:0],2'b00}
|
572 |
|
|
: {nan,SRCFLAGS[2],dvorz,finexp,MAN2,daten[31:0],2'b00};
|
573 |
9 |
ns32kum |
|
574 |
|
|
// ++++++++++++++++++++++++ Output ++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
575 |
|
|
|
576 |
11 |
ns32kum |
always @(posedge BCLK) OUT <= MODE[3] ? (MODE[2] ? logb_res : scalb_res) : fl_lf ; // LOGB/SCALB : MOVLF/MOVFL
|
577 |
9 |
ns32kum |
|
578 |
|
|
endmodule
|
579 |
|
|
|
580 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
581 |
9 |
ns32kum |
//
|
582 |
|
|
// 6. DFPU_MUL Double precision floating point multiplier
|
583 |
|
|
//
|
584 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
585 |
9 |
ns32kum |
module DFPU_MUL ( BCLK, SRC1, SRC2, START, MRESULT, SRCFLAGS, OUT );
|
586 |
|
|
|
587 |
|
|
input BCLK;
|
588 |
|
|
input [31:0] SRC1,SRC2;
|
589 |
|
|
input START; // that is START[0]
|
590 |
|
|
input [105:0] MRESULT;
|
591 |
|
|
input [5:0] SRCFLAGS; // NAN and ZERO flags
|
592 |
|
|
output [69:0] OUT; // The result
|
593 |
|
|
|
594 |
|
|
reg [69:0] OUT;
|
595 |
|
|
reg [12:0] exponent;
|
596 |
|
|
wire orlow;
|
597 |
|
|
wire [12:0] expoh,expol;
|
598 |
|
|
wire [1:0] restlow,resthigh;
|
599 |
|
|
wire zero,nan,sign;
|
600 |
|
|
|
601 |
|
|
assign zero = SRCFLAGS[2] | SRCFLAGS[0]; // one is NULL -> NULL is the result
|
602 |
|
|
assign nan = SRCFLAGS[3] | SRCFLAGS[1]; // one is NAN -> error
|
603 |
|
|
assign sign = (SRCFLAGS[5] ^ SRCFLAGS[4]) & ~zero;
|
604 |
|
|
|
605 |
|
|
assign orlow = (MRESULT[50:0] != 51'b0);
|
606 |
|
|
|
607 |
|
|
assign restlow = {MRESULT[51],orlow};
|
608 |
|
|
assign resthigh = {MRESULT[52],(MRESULT[51] | orlow)};
|
609 |
|
|
|
610 |
|
|
always @(posedge BCLK) if (START) exponent <= {2'b00,SRC1[30:20]} + {2'b00,SRC2[30:20]};
|
611 |
|
|
|
612 |
|
|
assign expoh = exponent - 13'h03FE;
|
613 |
|
|
assign expol = exponent - 13'h03FF; // for MSB if MRESULT=0
|
614 |
|
|
|
615 |
|
|
always @(posedge BCLK)
|
616 |
|
|
OUT <= MRESULT[105] ? {nan,zero,sign,expoh,MRESULT[104:53],resthigh} // 52 Bit Mantissa
|
617 |
|
|
: {nan,zero,sign,expol,MRESULT[103:52],restlow};
|
618 |
|
|
|
619 |
|
|
endmodule
|
620 |
|
|
|
621 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
622 |
9 |
ns32kum |
//
|
623 |
23 |
ns32kum |
// 7. SCANDIG Scan digit for leading one
|
624 |
9 |
ns32kum |
//
|
625 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
626 |
23 |
ns32kum |
module SCANDIG (DIN, MBIT, LBIT, NONZ);
|
627 |
|
|
|
628 |
|
|
input [3:0] DIN;
|
629 |
|
|
output MBIT,LBIT,NONZ;
|
630 |
|
|
|
631 |
|
|
assign MBIT = DIN[3] | DIN[2]; // 1xxx = 11
|
632 |
|
|
assign LBIT = DIN[3] | (DIN[3:1] == 3'b001); // 01xx = 10
|
633 |
|
|
assign NONZ = (DIN != 4'd0); // 001x = 01
|
634 |
|
|
|
635 |
|
|
endmodule
|
636 |
|
|
|
637 |
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
638 |
|
|
//
|
639 |
|
|
// 8. DIVI_PREP Prepare data for the divider
|
640 |
|
|
//
|
641 |
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
642 |
9 |
ns32kum |
module DIVI_PREP (SRC, BWD, NOT_DEI, EXTDATA, DOUT, MSB, NULL, MINUS);
|
643 |
|
|
|
644 |
|
|
input [31:0] SRC;
|
645 |
|
|
input [1:0] BWD;
|
646 |
|
|
input NOT_DEI;
|
647 |
|
|
input EXTDATA;
|
648 |
|
|
|
649 |
|
|
output [31:0] DOUT;
|
650 |
|
|
output [4:0] MSB;
|
651 |
|
|
output NULL;
|
652 |
|
|
output MINUS;
|
653 |
|
|
|
654 |
|
|
reg [31:0] double;
|
655 |
23 |
ns32kum |
reg [2:0] select;
|
656 |
9 |
ns32kum |
|
657 |
|
|
wire [1:0] modus;
|
658 |
23 |
ns32kum |
wire [7:0] mbits,lbits,dnonz;
|
659 |
9 |
ns32kum |
|
660 |
|
|
assign modus = (NOT_DEI | EXTDATA) ? BWD : {(BWD[1] | BWD[0]),1'b1};
|
661 |
|
|
|
662 |
|
|
always @(modus or SRC or NOT_DEI)
|
663 |
|
|
casex (modus)
|
664 |
|
|
2'b00 : double = {{24{SRC[7] & NOT_DEI}},SRC[7:0]};
|
665 |
|
|
2'b01 : double = {{16{SRC[15] & NOT_DEI}},SRC[15:0]};
|
666 |
|
|
2'b1x : double = SRC;
|
667 |
|
|
endcase
|
668 |
|
|
|
669 |
|
|
assign MINUS = double[31] & NOT_DEI;
|
670 |
|
|
|
671 |
11 |
ns32kum |
assign DOUT = ({32{MINUS}} ^ double) + {31'h0,MINUS}; // assign DOUT = MINUS ? (32'd0 - double) : double;
|
672 |
9 |
ns32kum |
|
673 |
|
|
// now find most significant set bit : FFS
|
674 |
|
|
|
675 |
23 |
ns32kum |
SCANDIG digit_0 (.DIN(DOUT[3:0]), .MBIT(mbits[0]), .LBIT(lbits[0]), .NONZ(dnonz[0]) );
|
676 |
|
|
SCANDIG digit_1 (.DIN(DOUT[7:4]), .MBIT(mbits[1]), .LBIT(lbits[1]), .NONZ(dnonz[1]) );
|
677 |
|
|
SCANDIG digit_2 (.DIN(DOUT[11:8]), .MBIT(mbits[2]), .LBIT(lbits[2]), .NONZ(dnonz[2]) );
|
678 |
|
|
SCANDIG digit_3 (.DIN(DOUT[15:12]), .MBIT(mbits[3]), .LBIT(lbits[3]), .NONZ(dnonz[3]) );
|
679 |
|
|
SCANDIG digit_4 (.DIN(DOUT[19:16]), .MBIT(mbits[4]), .LBIT(lbits[4]), .NONZ(dnonz[4]) );
|
680 |
|
|
SCANDIG digit_5 (.DIN(DOUT[23:20]), .MBIT(mbits[5]), .LBIT(lbits[5]), .NONZ(dnonz[5]) );
|
681 |
|
|
SCANDIG digit_6 (.DIN(DOUT[27:24]), .MBIT(mbits[6]), .LBIT(lbits[6]), .NONZ(dnonz[6]) );
|
682 |
|
|
SCANDIG digit_7 (.DIN(DOUT[31:28]), .MBIT(mbits[7]), .LBIT(lbits[7]), .NONZ(dnonz[7]) );
|
683 |
|
|
|
684 |
|
|
always @(dnonz)
|
685 |
|
|
casex (dnonz[7:1])
|
686 |
|
|
7'b1xxx_xxx : select = 3'b111;
|
687 |
|
|
7'b01xx_xxx : select = 3'b110;
|
688 |
|
|
7'b001x_xxx : select = 3'b101;
|
689 |
|
|
7'b0001_xxx : select = 3'b100;
|
690 |
|
|
7'b0000_1xx : select = 3'b011;
|
691 |
|
|
7'b0000_01x : select = 3'b010;
|
692 |
|
|
7'b0000_001 : select = 3'b001;
|
693 |
|
|
default : select = 3'b000;
|
694 |
|
|
endcase
|
695 |
9 |
ns32kum |
|
696 |
23 |
ns32kum |
assign NULL = (dnonz == 8'd0);
|
697 |
9 |
ns32kum |
|
698 |
23 |
ns32kum |
assign MSB = {select,mbits[select],lbits[select]};
|
699 |
|
|
|
700 |
9 |
ns32kum |
endmodule
|
701 |
|
|
|
702 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
703 |
9 |
ns32kum |
//
|
704 |
23 |
ns32kum |
// 9. DFPU_DIV The divider for all divide opcodes : double, single and integer
|
705 |
9 |
ns32kum |
//
|
706 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
707 |
|
|
module DFPU_DIV ( BCLK, BRST, START, SRC1, SRC2, MAN1, MAN2, SRCFLAGS, FL, BWD, OPCODE, OUT, DONE, DIVI_OUT, DVZ_TRAP, DEI_OVF );
|
708 |
9 |
ns32kum |
|
709 |
|
|
// This version needs for Double 28+1 cycles if MAN1<MAN2 otherwise 28+2.
|
710 |
|
|
// For Single it needs 13+1 cyckes or 13+2.
|
711 |
|
|
|
712 |
|
|
input BCLK,BRST;
|
713 |
|
|
input [3:0] START; // START & recognized Divider Operation
|
714 |
|
|
input [31:0] SRC1,SRC2; // input data
|
715 |
|
|
input [20:0] MAN1,MAN2;
|
716 |
|
|
input [5:0] SRCFLAGS; // NAN and ZERO
|
717 |
|
|
input FL;
|
718 |
|
|
input [1:0] BWD;
|
719 |
|
|
input [2:0] OPCODE; // for all DIVi variants
|
720 |
|
|
|
721 |
|
|
output [69:0] OUT; // the result
|
722 |
|
|
output reg DONE; // Pipeline-Flag
|
723 |
|
|
output [63:0] DIVI_OUT; // for Integer Division
|
724 |
|
|
output DVZ_TRAP; // Divide by Zero Trap
|
725 |
|
|
output [1:0] DEI_OVF; // DEI Overflow
|
726 |
|
|
|
727 |
|
|
// ++++++++++++++ for Integer Division ++++++++++++++
|
728 |
|
|
reg run_divi;
|
729 |
|
|
reg divi_pipe1,divi_pipe2,divi_pipe3,divi_pipe4;
|
730 |
|
|
reg neg_src1,neg_src2,nul_src2;
|
731 |
|
|
reg [4:0] msb_src1;
|
732 |
|
|
reg [5:0] msb_src2;
|
733 |
|
|
reg [31:0] ivalue,src2_reg,pipe_reg;
|
734 |
|
|
reg [4:0] divi_counter;
|
735 |
|
|
reg sub_case;
|
736 |
|
|
reg negativ;
|
737 |
|
|
reg [32:0] divi_result;
|
738 |
|
|
reg [63:0] DIVI_OUT;
|
739 |
|
|
reg DVZ_TRAP,dvz_pipe;
|
740 |
|
|
reg sel_in;
|
741 |
|
|
reg [62:0] din_mux;
|
742 |
|
|
reg dei_pipe;
|
743 |
|
|
reg extdata; // extended data : 2 data packets, only apply to DEI
|
744 |
|
|
reg [2:0] addoff;
|
745 |
|
|
reg next_msb2;
|
746 |
|
|
reg [31:0] dei_result;
|
747 |
|
|
reg [1:0] DEI_OVF;
|
748 |
|
|
|
749 |
|
|
wire [31:0] i_in;
|
750 |
|
|
wire [37:0] i_out;
|
751 |
|
|
wire [6:0] diff_msb;
|
752 |
|
|
wire [5:1] shift_r;
|
753 |
|
|
wire [62:0] shift_2;
|
754 |
|
|
wire [62:0] shift_4;
|
755 |
|
|
wire [62:0] shift_8;
|
756 |
|
|
wire [62:0] shift_16;
|
757 |
|
|
wire [64:0] shift_32;
|
758 |
|
|
wire stop_divi,neg_flag;
|
759 |
|
|
wire rest_null,plus_1,ist_null;
|
760 |
|
|
wire not_dei;
|
761 |
|
|
wire valdata; // Data <> 0 at DEI
|
762 |
|
|
|
763 |
|
|
// ++++++++++++++ Floating Point & calculation path ++++++++
|
764 |
|
|
reg [69:0] OUT;
|
765 |
|
|
reg [32:0] save1;
|
766 |
|
|
reg runflag;
|
767 |
|
|
reg [55:0] dreimal;
|
768 |
|
|
reg [56:0] divreg,divsr;
|
769 |
|
|
reg [31:0] divreg_ext;
|
770 |
|
|
reg [12:0] exponent;
|
771 |
|
|
|
772 |
|
|
wire load_src1,load_src2;
|
773 |
|
|
wire [56:0] sub1,sub2,sub3;
|
774 |
|
|
wire [32:0] src_1;
|
775 |
|
|
wire [20:0] man_1;
|
776 |
|
|
wire [12:0] expoh,expol,offset;
|
777 |
|
|
wire restlsb,restlow,resthigh;
|
778 |
|
|
wire zero,nan,sign,ende;
|
779 |
|
|
wire orlow_s,orlow_d;
|
780 |
|
|
wire short;
|
781 |
|
|
|
782 |
|
|
// +++++++++++++++++++++++++++ Integer Division, DEI +++++++++++++++++++++++++++
|
783 |
|
|
|
784 |
|
|
assign not_dei = OPCODE[2]; // 0 = DEI
|
785 |
|
|
always @(posedge BCLK) if (START[3]) extdata <= ~START[1]; // during START[0] for SRC1 not valid
|
786 |
|
|
|
787 |
|
|
always @(posedge BCLK or negedge BRST)
|
788 |
|
|
if (!BRST) run_divi <= 1'b0;
|
789 |
|
|
else
|
790 |
|
|
run_divi <= (START[3] & ~ist_null) | (~divi_pipe4 & run_divi); // Abort at DVZ Trap
|
791 |
|
|
|
792 |
|
|
always @(posedge BCLK) divi_pipe1 <= START[3] & ~ist_null; // no start if SRC1 = 0 : DVZ Trap
|
793 |
|
|
always @(posedge BCLK) dei_pipe <= divi_pipe1 & extdata;
|
794 |
|
|
always @(posedge BCLK) divi_pipe2 <= extdata ? dei_pipe : divi_pipe1;
|
795 |
|
|
|
796 |
|
|
always @(posedge BCLK) src2_reg <= SRC2;
|
797 |
|
|
|
798 |
|
|
always @(posedge BCLK) sel_in <= START[3] | divi_pipe1; // two times data for DEI
|
799 |
|
|
assign i_in = sel_in ? src2_reg : SRC1;
|
800 |
|
|
|
801 |
|
|
DIVI_PREP prep_inst ( .SRC(i_in), .BWD(BWD), .NOT_DEI(not_dei), .EXTDATA(extdata | START[0]),
|
802 |
|
|
.DOUT(i_out[31:0]), .MSB(i_out[36:32]), .NULL(ist_null), .MINUS(i_out[37]) );
|
803 |
|
|
|
804 |
|
|
always @(posedge BCLK) dvz_pipe <= START[3] & ist_null; // Pulse 1 cycle long
|
805 |
|
|
always @(posedge BCLK) DVZ_TRAP <= dvz_pipe; // one cycle later if DEI with extdata
|
806 |
|
|
|
807 |
|
|
always @(posedge BCLK)
|
808 |
|
|
if (START[3])
|
809 |
|
|
begin
|
810 |
|
|
neg_src1 <= i_out[37];
|
811 |
|
|
msb_src1 <= i_out[36:32];
|
812 |
|
|
end
|
813 |
|
|
|
814 |
|
|
always @(posedge BCLK)
|
815 |
|
|
if (divi_pipe1)
|
816 |
|
|
begin
|
817 |
|
|
nul_src2 <= ist_null;
|
818 |
|
|
neg_src2 <= i_out[37];
|
819 |
|
|
end
|
820 |
|
|
|
821 |
|
|
always @(posedge BCLK) ivalue <= i_out[31:0];
|
822 |
|
|
|
823 |
|
|
// The following is only for DEI :
|
824 |
|
|
always @(posedge BCLK) pipe_reg <= {32{extdata}} & ivalue; // Register must be 0 if not used
|
825 |
|
|
|
826 |
|
|
assign valdata = extdata & ~ist_null;
|
827 |
|
|
always @(BWD or valdata)
|
828 |
|
|
casex (BWD)
|
829 |
|
|
2'b00 : addoff = { 1'b0, 1'b0,valdata};
|
830 |
|
|
2'b01 : addoff = { 1'b0,valdata, 1'b0};
|
831 |
|
|
default : addoff = {valdata, 1'b0, 1'b0};
|
832 |
|
|
endcase
|
833 |
|
|
|
834 |
11 |
ns32kum |
always @(posedge BCLK) next_msb2 <= extdata & ist_null & divi_pipe1; // Special case at DEI : MSD = 0
|
835 |
9 |
ns32kum |
|
836 |
|
|
always @(posedge BCLK)
|
837 |
|
|
if (divi_pipe1) msb_src2 <= {addoff[2],(addoff[1:0] | i_out[36:35]),i_out[34:32]};
|
838 |
|
|
else
|
839 |
|
|
if (next_msb2) msb_src2 <= {1'b0,i_out[36:32]};
|
840 |
|
|
|
841 |
|
|
// Shifter for Source2
|
842 |
|
|
|
843 |
|
|
assign diff_msb = {1'b0,msb_src2} - {2'b0,msb_src1};
|
844 |
|
|
|
845 |
|
|
// negativ shift limited to 0 : Source2=0 calculated without special handling, result always 0
|
846 |
|
|
assign shift_r = diff_msb[6] ? 5'd0 : diff_msb[5:1]; // LSB does not count
|
847 |
|
|
|
848 |
|
|
always @(BWD or extdata or ivalue or pipe_reg)
|
849 |
|
|
casex ({BWD,extdata})
|
850 |
|
|
3'b0x0 : din_mux = {31'b0,ivalue}; // the normal case for all except DEI
|
851 |
|
|
3'b001 : din_mux = {23'b0,pipe_reg,ivalue[7:0]};
|
852 |
|
|
3'b011 : din_mux = {15'b0,pipe_reg,ivalue[15:0]};
|
853 |
|
|
default : din_mux = {pipe_reg[30:0],ivalue}; // 63 Bit wide
|
854 |
|
|
endcase
|
855 |
|
|
|
856 |
|
|
assign shift_2 = shift_r[1] ? din_mux : {din_mux[60:0], 2'b0};
|
857 |
|
|
assign shift_4 = shift_r[2] ? shift_2 : {shift_2[58:0], 4'b0};
|
858 |
|
|
assign shift_8 = shift_r[3] ? shift_4 : {shift_4[54:0], 8'b0};
|
859 |
|
|
assign shift_16 = shift_r[4] ? shift_8 : {shift_8[46:0],16'b0}; // Result is 63 Bit wide
|
860 |
|
|
|
861 |
|
|
// 65 Bit result because of DEI
|
862 |
11 |
ns32kum |
assign shift_32 = shift_r[5] ? {1'b0,pipe_reg,ivalue} : {shift_16,2'b00}; // special case DEI : 32 times shift
|
863 |
9 |
ns32kum |
|
864 |
|
|
always @(posedge BCLK or negedge BRST) // Flag for rounding, only if DEST <>0
|
865 |
|
|
if (!BRST) divi_pipe3 <= 1'b0;
|
866 |
|
|
else
|
867 |
|
|
divi_pipe3 <= divi_pipe2 | (divi_pipe3 & ~stop_divi);
|
868 |
|
|
|
869 |
|
|
always @(posedge BCLK)
|
870 |
|
|
if (divi_pipe2) divi_counter <= shift_r;
|
871 |
|
|
else divi_counter <= divi_counter - {4'b000,~stop_divi}; // should stop at 0
|
872 |
|
|
|
873 |
|
|
assign stop_divi = (divi_counter == 5'h0); // caclulation ready
|
874 |
|
|
|
875 |
|
|
always @(posedge BCLK) divi_pipe4 <= divi_pipe3 & stop_divi;
|
876 |
|
|
|
877 |
|
|
assign neg_flag = neg_src1 ^ neg_src2;
|
878 |
|
|
assign rest_null = (divreg[33:2] == 32'h0);
|
879 |
|
|
|
880 |
|
|
always @(posedge BCLK) sub_case <= neg_flag & ~nul_src2; // little help for MODi opcode
|
881 |
|
|
|
882 |
|
|
// Result preparation :
|
883 |
|
|
// DEST SRC QUO REM / DIV MOD
|
884 |
|
|
// +33 +13 : 2 7 / 2 7
|
885 |
|
|
// +33 -13 : -2 7 / -3 -6
|
886 |
|
|
// -33 +13 : -2 -7 / -3 6
|
887 |
|
|
// -33 -13 : 2 -7 / 2 -7
|
888 |
|
|
always @(*)
|
889 |
|
|
case (OPCODE[1:0])
|
890 |
|
|
2'b00 : divi_result = {neg_flag,divsr[31:0]}; // QUO
|
891 |
|
|
2'b01 : divi_result = {neg_src2,divreg[33:2]}; // REM
|
892 |
11 |
ns32kum |
2'b10 : divi_result = {neg_src1,((sub_case & ~rest_null) ? (save1[31:0] - divreg[33:2]) : divreg[33:2])}; // MOD
|
893 |
9 |
ns32kum |
2'b11 : divi_result = {neg_flag,divsr[31:0]}; // DIV
|
894 |
|
|
endcase
|
895 |
|
|
|
896 |
|
|
always @(posedge BCLK) negativ <= divi_result[32];
|
897 |
|
|
|
898 |
11 |
ns32kum |
assign plus_1 = (OPCODE[1:0] == 2'b11) ? (negativ & rest_null) : negativ; // Special case Rest=0 at DIV
|
899 |
9 |
ns32kum |
|
900 |
|
|
always @(posedge BCLK)
|
901 |
11 |
ns32kum |
if (divi_pipe4) DIVI_OUT[63:32] <= not_dei ? (({32{negativ}} ^ divi_result[31:0]) + {31'd0,plus_1}) : dei_result;
|
902 |
9 |
ns32kum |
|
903 |
|
|
always @(posedge BCLK) if (divi_pipe4) DIVI_OUT[31:0] <= divreg[33:2];
|
904 |
|
|
|
905 |
|
|
always @(extdata or BWD or divsr or divreg)
|
906 |
|
|
casex ({extdata,BWD})
|
907 |
|
|
3'b000 : dei_result = {16'hxxxx,divsr[7:0],divreg[9:2]};
|
908 |
|
|
3'b001 : dei_result = {divsr[15:0],divreg[17:2]};
|
909 |
|
|
default : dei_result = divsr[31:0];
|
910 |
|
|
endcase
|
911 |
|
|
|
912 |
|
|
// +++++++++++++++++++++++++++ Calculation path for Division ++++++++++++++++++++++++++++
|
913 |
|
|
|
914 |
|
|
always @(posedge BCLK or negedge BRST)
|
915 |
|
|
if (!BRST) runflag <= 1'b0;
|
916 |
|
|
else
|
917 |
|
|
runflag <= START[2] | (~ende & runflag);
|
918 |
|
|
|
919 |
|
|
always @(posedge BCLK) DONE <= (ende & runflag) | divi_pipe4;
|
920 |
|
|
|
921 |
|
|
assign man_1 = (FL | run_divi) ? 21'h0 : MAN1;
|
922 |
|
|
assign src_1 = run_divi ? {1'b0,ivalue} : ( FL ? {10'h001,SRC1[22:0]} : {SRC1,1'b0});
|
923 |
|
|
|
924 |
|
|
assign load_src1 = START[2] | divi_pipe1;
|
925 |
|
|
|
926 |
|
|
// *2 + *1
|
927 |
11 |
ns32kum |
always @(posedge BCLK) if (load_src1) dreimal <= {1'b0,man_1,src_1,1'b0} + {2'b00,man_1,src_1}; // 54 Bit Reg
|
928 |
9 |
ns32kum |
|
929 |
|
|
always @(posedge BCLK) if (load_src1) save1 <= src_1;
|
930 |
|
|
|
931 |
|
|
assign sub1 = divreg - {3'b000, man_1,save1 };
|
932 |
|
|
assign sub2 = divreg - {2'b00 ,man_1,save1,1'b0};
|
933 |
|
|
assign sub3 = divreg - {1'b0, dreimal };
|
934 |
|
|
|
935 |
|
|
assign load_src2 = START[2] | divi_pipe2;
|
936 |
|
|
|
937 |
|
|
always @(posedge BCLK)
|
938 |
11 |
ns32kum |
if (load_src2) divreg <= divi_pipe2 ? {23'h0,shift_32[64:32]} : ( FL ? {34'h0_0000_0001,SRC2[22:0]} : {3'b0,MAN2,SRC2,1'b0});
|
939 |
9 |
ns32kum |
else
|
940 |
|
|
begin
|
941 |
|
|
casex ({sub3[56],sub2[56],sub1[56]})
|
942 |
|
|
3'b0xx : divreg <= {sub3[54:0],divreg_ext[31:30]};
|
943 |
|
|
3'b10x : divreg <= {sub2[54:0],divreg_ext[31:30]};
|
944 |
|
|
3'b110 : divreg <= {sub1[54:0],divreg_ext[31:30]};
|
945 |
|
|
default : divreg <= {divreg[54:0],divreg_ext[31:30]};
|
946 |
|
|
endcase
|
947 |
|
|
end
|
948 |
|
|
|
949 |
|
|
always @(posedge BCLK) // Extension Register for Integer Division
|
950 |
|
|
if (load_src2) divreg_ext <= divi_pipe2 ? shift_32[31:0] : 32'd0;
|
951 |
|
|
else
|
952 |
|
|
divreg_ext <= {divreg_ext[29:0],2'b0};
|
953 |
|
|
|
954 |
|
|
always @(posedge BCLK)
|
955 |
|
|
if (load_src2) divsr <= 57'h0;
|
956 |
|
|
else
|
957 |
|
|
begin
|
958 |
|
|
casex ({sub3[56],sub2[56],sub1[56]})
|
959 |
|
|
3'b0xx : divsr <= {divsr[54:0],2'b11};
|
960 |
|
|
3'b10x : divsr <= {divsr[54:0],2'b10};
|
961 |
|
|
3'b110 : divsr <= {divsr[54:0],2'b01};
|
962 |
|
|
default : divsr <= {divsr[54:0],2'b00};
|
963 |
|
|
endcase
|
964 |
|
|
end
|
965 |
|
|
|
966 |
|
|
// Overflow Detection for DEI : serial calculation
|
967 |
|
|
always @(posedge BCLK)
|
968 |
|
|
if (load_src2) DEI_OVF[0] <= 1'b0;
|
969 |
11 |
ns32kum |
else DEI_OVF[0] <= DEI_OVF[0] | (BWD[1] ? |divsr[33:32] : (BWD[0] ? |divsr[17:16] : |divsr[9:8]));
|
970 |
9 |
ns32kum |
|
971 |
|
|
always @(posedge BCLK) DEI_OVF[1] <= divi_pipe4; // Timing pulse for OVF inclusiv for DIV and QUO
|
972 |
|
|
|
973 |
|
|
assign short = (SRCFLAGS[3:0] != 4'h0) & runflag;
|
974 |
|
|
|
975 |
|
|
assign ende = ((FL ? (divsr[26] | divsr[25]) : (divsr[56] | divsr[55])) & runflag) | short;
|
976 |
|
|
|
977 |
|
|
assign sign = (SRCFLAGS[4] ^ SRCFLAGS[5]) & ~zero;
|
978 |
|
|
assign zero = SRCFLAGS[2] & ~SRCFLAGS[0]; // SRC2 = NULL -> NULL as result
|
979 |
|
|
assign nan = SRCFLAGS[3] | SRCFLAGS[1] | (SRCFLAGS[2] & SRCFLAGS[0]);
|
980 |
|
|
// one of both NAN or both 0 -> invalid Operation
|
981 |
|
|
|
982 |
|
|
assign orlow_d = (divreg[56:27] != 29'b0) & ~zero & ~FL; // is there Rest ? [1:0] are always 0.
|
983 |
|
|
assign orlow_s = (divreg[26:2] != 25'b0) & ~zero;
|
984 |
|
|
|
985 |
|
|
assign restlsb = divsr[0] | orlow_s;
|
986 |
|
|
assign restlow = (divsr[1:0] != 2'b00) | orlow_s | orlow_d;
|
987 |
|
|
assign resthigh = divsr[2] | restlow;
|
988 |
|
|
|
989 |
|
|
always @(posedge BCLK) if (START[0]) exponent <= FL ? ({5'b00,SRC2[30:23]} - {5'b00,SRC1[30:23]})
|
990 |
|
|
: ({2'b00,SRC2[30:20]} - {2'b00,SRC1[30:20]});
|
991 |
|
|
assign offset = FL ? 13'h007E : 13'h03FE;
|
992 |
|
|
assign expoh = exponent + {offset[12:1],1'b1}; // Double = 3FF/3FE Single = 7F/7E
|
993 |
|
|
assign expol = exponent + offset; // in case of normalizing
|
994 |
|
|
|
995 |
|
|
always @(posedge BCLK)
|
996 |
|
|
if (ende && runflag)
|
997 |
|
|
casex ({FL,divsr[26],divsr[56]})
|
998 |
11 |
ns32kum |
3'b11x : OUT <= {nan,zero,sign,expoh[9:8],expoh[7],expoh[7],expoh[7],expoh[7:0],divsr[25:3],28'b0,divsr[3:2],restlow};
|
999 |
|
|
3'b10x : OUT <= {nan,zero,sign,expol[9:8],expol[7],expol[7],expol[7],expol[7:0],divsr[24:2],28'b0,divsr[2:1],restlsb};
|
1000 |
9 |
ns32kum |
3'b0x1 : OUT <= {nan,zero,sign,expoh,divsr[55:3],resthigh};
|
1001 |
|
|
3'b0x0 : OUT <= {nan,zero,sign,expol,divsr[54:2],restlow};
|
1002 |
|
|
endcase
|
1003 |
|
|
|
1004 |
|
|
endmodule
|
1005 |
|
|
|
1006 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
1007 |
9 |
ns32kum |
//
|
1008 |
23 |
ns32kum |
// 10. DP_LOGIK Control logic and result path for different functions
|
1009 |
9 |
ns32kum |
//
|
1010 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
1011 |
|
|
module DP_LOGIK ( BCLK, BRESET, OPCODE, SRC1, SRC2, FSR, START, MRESULT, BWD, FL, MAN1, MAN2, WR_REG, CY_IN,
|
1012 |
9 |
ns32kum |
COP_DONE, COP_OP, COP_IN,
|
1013 |
23 |
ns32kum |
DOUT, TT_DP, DP_CMP, OVF_BCD, MEI, DFLOAT, DONE, UP_DP, CLR_LSB, WREN_L, DVZ_TRAP, COP_GO );
|
1014 |
9 |
ns32kum |
|
1015 |
|
|
// Definition of output word OUT of sub-moduls : the hidden-bit of the mantissa is already gone
|
1016 |
|
|
//
|
1017 |
|
|
// N Z S Exponent Mantissa Round
|
1018 |
|
|
// A E I Double : 13 Bit 52 Bit 2 Bit
|
1019 |
|
|
// N R G Single : 10 Bit 23 Bit 2 Bit
|
1020 |
|
|
// O N -mmmm.mmmm.mmmm.mmmm.mmmm.mmm-.-- -m.
|
1021 |
|
|
// -F-F-F-E.EEEE.EEEE.EEEE-MMMM.MMMM.MMMM.MMMM.MMMM.MMMM.MMMM.MMMM.MMMM.MMMM.MMMM.MMMM.MMMM.RR
|
1022 |
|
|
//
|
1023 |
|
|
// 6 6 6 6 6666 6655 5555 5555 4444 4444 4433 3333 3333 2222 2222 2211 1111 1111 0000 0000 00
|
1024 |
|
|
// 9 8 7 6 5432 1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 10
|
1025 |
|
|
//
|
1026 |
|
|
// Single FP delivers the exponent in a way, that it is identical for rounding :
|
1027 |
|
|
//
|
1028 |
|
|
// Exponent 61 - 54 => kept
|
1029 |
|
|
// Bits 64 - 62 are filled with bit 61 , carry should come through
|
1030 |
|
|
// Exponent 62 => Bit 65 , Overflow
|
1031 |
|
|
// Exponent 63 => Bit 66 , Underflow
|
1032 |
|
|
|
1033 |
|
|
input BCLK,BRESET;
|
1034 |
|
|
input [7:0] OPCODE;
|
1035 |
|
|
input [31:0] SRC1,SRC2; // the input data
|
1036 |
|
|
input [20:0] MAN1,MAN2; // the MSB of mantissa
|
1037 |
|
|
input [8:3] FSR; // Floating Point Status Register
|
1038 |
|
|
input [1:0] START;
|
1039 |
|
|
input [105:0] MRESULT; // Result of multiplier
|
1040 |
|
|
input [1:0] BWD; // Size of integer
|
1041 |
|
|
input FL;
|
1042 |
|
|
input WR_REG; // from DECODER
|
1043 |
|
|
input CY_IN;
|
1044 |
|
|
input COP_DONE; // Coprozessor Interface
|
1045 |
|
|
input [23:0] COP_OP;
|
1046 |
|
|
input [63:0] COP_IN;
|
1047 |
|
|
|
1048 |
|
|
output [63:0] DOUT;
|
1049 |
|
|
output [4:0] TT_DP; // Trap-Info to FSR
|
1050 |
|
|
output [2:0] DP_CMP; // CMPL result
|
1051 |
|
|
output [3:0] OVF_BCD; // Integer Division Overflow + BCD Carry update
|
1052 |
|
|
output MEI,DFLOAT;
|
1053 |
23 |
ns32kum |
output DONE,UP_DP,WREN_L;
|
1054 |
|
|
output reg CLR_LSB;
|
1055 |
9 |
ns32kum |
output DVZ_TRAP;
|
1056 |
|
|
output reg COP_GO;
|
1057 |
|
|
|
1058 |
|
|
reg [63:0] DOUT;
|
1059 |
|
|
reg [2:0] DP_CMP;
|
1060 |
|
|
reg [5:0] preflags;
|
1061 |
|
|
reg [5:0] srcflags;
|
1062 |
|
|
reg [69:0] fpout;
|
1063 |
|
|
reg [2:0] tt;
|
1064 |
23 |
ns32kum |
reg [7:0] select;
|
1065 |
|
|
reg [5:0] wctrl;
|
1066 |
9 |
ns32kum |
reg [2:1] sequ;
|
1067 |
|
|
reg misc_op;
|
1068 |
|
|
reg car_ry;
|
1069 |
|
|
reg wr_part2;
|
1070 |
|
|
reg up_flag;
|
1071 |
|
|
reg ovf_div;
|
1072 |
23 |
ns32kum |
reg late_bcd_done;
|
1073 |
9 |
ns32kum |
|
1074 |
|
|
wire zexp2,zman2,zexp1,zman1,znan1;
|
1075 |
|
|
wire make_i;
|
1076 |
|
|
wire op_cmp;
|
1077 |
|
|
wire [69:0] mulout,addout,divout,miscout;
|
1078 |
|
|
wire go_divf,go_divi,divi_ops,div_done;
|
1079 |
|
|
wire bcd_ops,man_ops;
|
1080 |
|
|
wire [31:0] i_out;
|
1081 |
|
|
wire [63:0] divi_out;
|
1082 |
|
|
wire [66:2] rund,cy_val; // Indexnumber like in xxxout
|
1083 |
|
|
wire div_zero,overflow,underflow,inexact;
|
1084 |
|
|
wire [1:0] cmpres;
|
1085 |
|
|
wire [63:0] fp_out,fp_res;
|
1086 |
|
|
wire wr_part1;
|
1087 |
|
|
wire done_i;
|
1088 |
23 |
ns32kum |
wire later;
|
1089 |
9 |
ns32kum |
wire [31:0] bcd_q;
|
1090 |
|
|
wire bcd_done;
|
1091 |
|
|
wire bcd_carry;
|
1092 |
|
|
wire [1:0] dei_ovf;
|
1093 |
|
|
wire quo_div;
|
1094 |
|
|
wire copop;
|
1095 |
|
|
wire copwr;
|
1096 |
|
|
|
1097 |
|
|
// Control of datapath : together with START the Double Unit becomes activ
|
1098 |
|
|
|
1099 |
|
|
always @(OPCODE or FL)
|
1100 |
|
|
casex (OPCODE)
|
1101 |
23 |
ns32kum |
8'b1001_000x : select = {5'b0000_0, ~FL ,2'b10}; // 0 1 0 : MOViL
|
1102 |
|
|
8'b1001_010x : select = {5'b0001_1, 1'b1,2'b00}; // MOVLF
|
1103 |
|
|
8'b1001_011x : select = {5'b0011_1, 1'b1,2'b00}; // MOVFL
|
1104 |
|
|
8'b1001_100x : select = {5'b0001_0, ~FL ,2'b11}; // 0 1 1 : ROUNDLi
|
1105 |
|
|
8'b1001_101x : select = {5'b0001_0, ~FL ,2'b11}; // 0 1 1 : TRUNCLi
|
1106 |
|
|
8'b1001_111x : select = {5'b0001_0, ~FL ,2'b11}; // 0 1 1 : FLOORLi
|
1107 |
|
|
8'b1011_0000 : select = {5'b1010_0, ~FL ,2'b00}; // 0 0 0 : ADDL Es werden Shifter wiederverwendet...
|
1108 |
|
|
8'b1011_0010 : select = {5'b1010_0, ~FL ,2'b01}; // 0 0 1 : CMPL
|
1109 |
|
|
8'b1011_0100 : select = {5'b1010_0, ~FL ,2'b01}; // 0 0 1 : SUBL
|
1110 |
|
|
8'b1011_1000 : select = {1'b1,FL,1'b1,FL,1'b0,1'b1,2'b00}; // 1 0 1 : DIVf , Default Float fuer srcflags
|
1111 |
|
|
8'b1011_1100 : select = {5'b1010_0, ~FL ,2'b00}; // 1 0 0 : MULL
|
1112 |
|
|
8'b1011_0110 : select = {1'b1,FL,1'b1,FL,1'b1,1'b1,2'b00}; // SCALBf
|
1113 |
|
|
8'b1011_0111 : select = {2'b00,FL,2'b1_1, 1'b1,2'b00}; // LOGBf
|
1114 |
|
|
default : select = 8'b0;
|
1115 |
9 |
ns32kum |
endcase
|
1116 |
|
|
|
1117 |
|
|
assign MEI = (OPCODE == 8'h79);
|
1118 |
|
|
assign divi_ops = (OPCODE[7:2] == 6'b0111_11) | (OPCODE == 8'h7B); // QUO/REM/MOD/DIV & DEI
|
1119 |
|
|
assign go_divf = (OPCODE == 8'hB8) & START[1]; // because of runflag in DIV Unit
|
1120 |
|
|
assign go_divi = divi_ops & (OPCODE[2] ? START[1] : START[0]); // DEI starts with START[0]
|
1121 |
23 |
ns32kum |
assign bcd_ops = (OPCODE == 8'h71) | (OPCODE == 8'h70); // ADDP , SUBP
|
1122 |
9 |
ns32kum |
|
1123 |
11 |
ns32kum |
assign man_ops = (OPCODE == 8'hB1) | (OPCODE == 8'hB5) | (OPCODE == 8'hB9) | (OPCODE == 8'hBD); // MOVf,NEGf,XXXf,ABSf
|
1124 |
9 |
ns32kum |
|
1125 |
23 |
ns32kum |
assign DFLOAT = (select[2] | copop) & ~FL; // all Double Floating Point Operations for PREPDATA
|
1126 |
|
|
assign make_i = (select[1:0] == 2'b11) | divi_ops | bcd_ops; // ROUND/TRUNC/FLOOR for output multiplexer
|
1127 |
9 |
ns32kum |
assign op_cmp = (OPCODE == 8'hB2) & ~FL;
|
1128 |
23 |
ns32kum |
always @(posedge BCLK) misc_op <= select[3]; // for OUT-Multiplexer
|
1129 |
9 |
ns32kum |
|
1130 |
|
|
assign copop = (OPCODE == 8'hDD);
|
1131 |
11 |
ns32kum |
assign copwr = (COP_OP[18:17] == 2'd0) & (COP_OP[13:11] == 3'b111) & (COP_OP[7:5] == 3'b001); // Custom Convert
|
1132 |
9 |
ns32kum |
|
1133 |
|
|
// SRCFLAGS : special handling for operands is done locally
|
1134 |
|
|
|
1135 |
|
|
assign zexp2 = (SRC2[30:20] == 11'd0);
|
1136 |
|
|
assign zman2 = (SRC2[19:0] == 20'd0);
|
1137 |
|
|
assign zexp1 = (SRC1[30:20] == 11'd0);
|
1138 |
|
|
assign zman1 = (SRC1[19:0] == 20'd0);
|
1139 |
|
|
assign znan1 = (SRC1[30:20] == 11'h7FF);
|
1140 |
|
|
|
1141 |
|
|
always @(posedge BCLK)
|
1142 |
|
|
if (START[0])
|
1143 |
|
|
begin
|
1144 |
|
|
srcflags[5] <= SRC2[31];
|
1145 |
|
|
srcflags[4] <= SRC1[31];
|
1146 |
|
|
preflags <= {(SRC2[30:20] == 11'h7FF),zexp2,zman2,znan1,zexp1,zman1};
|
1147 |
|
|
end
|
1148 |
|
|
|
1149 |
|
|
// case Definition : 00 : 0 , if START[i]=0 then there are always 2 long operands
|
1150 |
|
|
// 01 : 1 Float Operand SCR1
|
1151 |
|
|
// 10 : 1 Long Operand SRC1+SRC2
|
1152 |
|
|
// 11 : 2 Float Operands SRC1 , SRC2
|
1153 |
|
|
|
1154 |
|
|
always @(posedge BCLK) // NaN
|
1155 |
|
|
if (START[1])
|
1156 |
23 |
ns32kum |
case (select[7:6])
|
1157 |
|
|
2'b10 : srcflags[3] <= preflags[5] | (preflags[4] & (~preflags[3] | SRC2[31] | ~zexp2 | ~zman2));
|
1158 |
|
|
2'b11 : srcflags[3] <= (SRC2[30:23] == 8'hFF) | ((SRC2[30:23] == 8'd0) & ((SRC2[22:20] != 3'd0) | ~zman2)); // F:SRC2 = NaN
|
1159 |
|
|
default : srcflags[3] <= 1'b0;
|
1160 |
9 |
ns32kum |
endcase
|
1161 |
|
|
|
1162 |
|
|
always @(posedge BCLK) // Zero : only exponent ! If denormalized => NaN !
|
1163 |
23 |
ns32kum |
if (START[1])
|
1164 |
|
|
case (select[7:6])
|
1165 |
|
|
2'b10 : srcflags[2] <= preflags[4]; // L:SRC2 = Zero , 2*SRC2
|
1166 |
|
|
2'b11 : srcflags[2] <= (SRC2[30:23] == 8'd0); // F:SRC2 = Zero
|
1167 |
|
|
default : srcflags[2] <= 1'b0;
|
1168 |
9 |
ns32kum |
endcase
|
1169 |
|
|
|
1170 |
|
|
always @(posedge BCLK) // NaN
|
1171 |
|
|
if (START[1])
|
1172 |
23 |
ns32kum |
case (select[5:4])
|
1173 |
|
|
2'b01 : srcflags[1] <= znan1 | (zexp1 & (~zman1 | SRC2[31] | ~zexp2 | ~zman2)); // L:(SRC1,SRC2) = NaN , SRC1 = MSB
|
1174 |
|
|
2'b10 : srcflags[1] <= preflags[2] | (preflags[1] & (~preflags[0] | SRC1[31] | ~zexp1 | ~zman1));
|
1175 |
|
|
2'b11 : srcflags[1] <= (SRC1[30:23] == 8'hFF) | ((SRC1[30:23] == 8'd0) & ((SRC1[22:20] != 3'd0) | ~zman1)); // F:SRC1 = NaN
|
1176 |
|
|
default : srcflags[1] <= 1'b0;
|
1177 |
9 |
ns32kum |
endcase
|
1178 |
|
|
|
1179 |
|
|
always @(posedge BCLK) // Zero : only exponent ! If denormalized => NaN !
|
1180 |
23 |
ns32kum |
if (START[1])
|
1181 |
|
|
case (select[5:4])
|
1182 |
|
|
2'b01 : srcflags[0] <= zexp1; // L:(SRC1,SRC2) = Zero , SRC1 = MSB, Special Case ROUNDL,etc.
|
1183 |
|
|
2'b10 : srcflags[0] <= preflags[1]; // L:SRC1 = Zero , 2*SRC1
|
1184 |
|
|
2'b11 : srcflags[0] <= (SRC1[30:23] == 8'd0); // F:SRC1 = Zero
|
1185 |
|
|
default : srcflags[0] <= 1'b0;
|
1186 |
9 |
ns32kum |
endcase
|
1187 |
|
|
|
1188 |
|
|
// The Sub-moduls :
|
1189 |
|
|
|
1190 |
|
|
DFPU_ADDSUB as_inst ( .BCLK(BCLK), .START(START), .SRC1(SRC1), .SRC2(SRC2),
|
1191 |
23 |
ns32kum |
.MAN1({~preflags[1],MAN1[19:0]}), .MAN2({~preflags[4],MAN2[19:0]}),
|
1192 |
9 |
ns32kum |
.SRCFLAGS(srcflags), .BWD(BWD), .SELECT({OPCODE[2:1],select[1:0]}),
|
1193 |
|
|
.OUT(addout), .IOUT(i_out), .CMPRES(cmpres) );
|
1194 |
|
|
|
1195 |
|
|
DFPU_MUL mul_inst ( .BCLK(BCLK), .SRC1(SRC1), .SRC2(SRC2), .START(START[0]), .MRESULT(MRESULT),
|
1196 |
|
|
.OUT(mulout), .SRCFLAGS(srcflags) );
|
1197 |
|
|
|
1198 |
11 |
ns32kum |
DFPU_DIV div_inst ( .BCLK(BCLK), .BRST(BRESET), .START({go_divi,go_divf,START}), .SRC1(SRC1), .SRC2(SRC2),
|
1199 |
9 |
ns32kum |
.MAN1(MAN1), .MAN2(MAN2), .SRCFLAGS(srcflags), .FL(FL), .OUT(divout), .DONE(div_done),
|
1200 |
11 |
ns32kum |
.BWD(BWD), .OPCODE(OPCODE[2:0]), .DIVI_OUT(divi_out), .DVZ_TRAP(DVZ_TRAP), .DEI_OVF(dei_ovf) );
|
1201 |
9 |
ns32kum |
|
1202 |
23 |
ns32kum |
DFPU_MISC misc_inst ( .BCLK(BCLK), .START(START), .SRC1(SRC1), .SRC2(SRC2), .MAN2(MAN2[19:0]), .SRCFLAGS(srcflags),
|
1203 |
|
|
.MODE({OPCODE[5],OPCODE[0],FL,OPCODE[1]}), .OUT(miscout) );
|
1204 |
9 |
ns32kum |
|
1205 |
11 |
ns32kum |
DFPU_BCD bcd_inst ( .BCLK(BCLK), .BRESET(BRESET), .START(START[1]), .DO_BCD(bcd_ops), .BWD(BWD), .SRC1(SRC1), .SRC2(SRC2),
|
1206 |
23 |
ns32kum |
.CY_IN(CY_IN), .SUBP(~OPCODE[0]), .BCD_Q(bcd_q), .CY_OUT(bcd_carry), .BCD_DONE(bcd_done) );
|
1207 |
9 |
ns32kum |
|
1208 |
|
|
// FP - path : selection of result and rounding :
|
1209 |
|
|
|
1210 |
|
|
always @(misc_op or OPCODE or mulout or addout or divout or miscout)
|
1211 |
|
|
casex ({misc_op,OPCODE[5],OPCODE[3:2]}) //OPCODE[5] only for Flags i.e. NAN
|
1212 |
|
|
4'b1xxx : fpout = miscout; // for MOVLF,MOVFL,SCALB & LOGB
|
1213 |
|
|
4'b0110 : fpout = divout;
|
1214 |
|
|
4'b0111 : fpout = mulout;
|
1215 |
|
|
default : fpout = addout;
|
1216 |
|
|
endcase
|
1217 |
|
|
|
1218 |
|
|
always @(FSR or fpout) // Calculation of Carry according to rounding mode, fpout[67] = sign bit
|
1219 |
|
|
casex (FSR[8:7])
|
1220 |
|
|
2'b00 : car_ry = ((fpout[1:0] == 2'b10) & fpout[2]) | (fpout[1:0] == 2'b11); // round to nearest
|
1221 |
|
|
2'b10 : car_ry = ~fpout[67] & (fpout[1:0] != 2'b00); // round to positiv infinity
|
1222 |
|
|
2'b11 : car_ry = fpout[67] & (fpout[1:0] != 2'b00); // round to negativ infinity
|
1223 |
|
|
default : car_ry = 1'b0; // round to zero
|
1224 |
|
|
endcase
|
1225 |
|
|
|
1226 |
|
|
assign cy_val = {35'h0,(FL & car_ry),28'h0,(~FL & car_ry)};
|
1227 |
|
|
|
1228 |
|
|
assign rund = {fpout[66:2]} + cy_val;
|
1229 |
|
|
|
1230 |
|
|
// Detection of Div-by-0, Overflow, Underflow and Inexact : Epxonent from [66:54] = 13 Bits
|
1231 |
23 |
ns32kum |
assign div_zero = (srcflags[3:0] == 4'h1) & ((OPCODE == 8'hB8) | (OPCODE == 8'hB7)); // true FPU Divide by Zero also for LOGBf
|
1232 |
9 |
ns32kum |
assign overflow = ~rund[66] & (rund[65] | (rund[64:54] == 11'h7FF));
|
1233 |
|
|
assign underflow = (rund[66] | (rund[65:54] == 12'h0)) & ~fpout[68]; // Zero-Flag
|
1234 |
|
|
assign inexact = (fpout[1:0] != 2'b00);
|
1235 |
|
|
|
1236 |
|
|
always @(fpout or op_cmp or div_zero or overflow or underflow or inexact or FSR)
|
1237 |
|
|
casex ({fpout[69],op_cmp,div_zero,overflow,FSR[3],underflow,FSR[5],inexact}) // [69] = NAN
|
1238 |
|
|
8'b1xxxxxxx : tt = 3'b101; // Invalid operation
|
1239 |
|
|
8'b001xxxxx : tt = 3'b011; // Divide by Zero
|
1240 |
|
|
8'b0001xxxx : tt = 3'b010; // Overflow
|
1241 |
|
|
8'b000011xx : tt = 3'b001; // Underflow
|
1242 |
|
|
8'b00000011 : tt = 3'b110; // Inexact Result
|
1243 |
|
|
default : tt = 3'b000; // no error
|
1244 |
|
|
endcase
|
1245 |
|
|
|
1246 |
11 |
ns32kum |
assign TT_DP = man_ops ? 5'd0 : {(inexact & ~op_cmp),(underflow & ~op_cmp),tt}; // at ABSf/NEGf no error : different to NS32381 !
|
1247 |
9 |
ns32kum |
|
1248 |
|
|
assign fp_res = FL ? {fpout[67],rund[61:31],rund[33:2]}
|
1249 |
|
|
: {fpout[67],rund[64:2]}; // lower 32 bits identical
|
1250 |
|
|
|
1251 |
|
|
// Underflow special case and get ZERO
|
1252 |
|
|
assign fp_out = (underflow | fpout[68]) ? 64'h0 : fp_res;
|
1253 |
|
|
|
1254 |
|
|
// 63..32 goes to memory if Word or Byte ! Also in ODD Register , 31..0 goes in EVEN Register
|
1255 |
|
|
// DEI comes without WR_REG information
|
1256 |
11 |
ns32kum |
always @(make_i or copop or MEI or BWD or WR_REG or MRESULT or COP_IN or i_out or fp_out or divi_ops or divi_out or bcd_ops or bcd_q)
|
1257 |
9 |
ns32kum |
casex ({make_i,copop,MEI,BWD})
|
1258 |
11 |
ns32kum |
5'b00100 : DOUT = {MRESULT[31:8], (WR_REG ? MRESULT[15:8] : MRESULT[7:0]), MRESULT[31:0]}; // LSD always the same
|
1259 |
9 |
ns32kum |
5'b00101 : DOUT = {MRESULT[31:16],(WR_REG ? MRESULT[31:16] : MRESULT[15:0]),MRESULT[31:0]};
|
1260 |
|
|
5'b0011x : DOUT = MRESULT[63:0];
|
1261 |
|
|
5'b01xxx : DOUT = COP_IN; // true alignment in Coprocessor
|
1262 |
11 |
ns32kum |
5'b1xxxx : DOUT = divi_ops ? divi_out : {(bcd_ops ? bcd_q : i_out),fp_out[31:0]}; // MSD is written first
|
1263 |
9 |
ns32kum |
default : DOUT = fp_out;
|
1264 |
|
|
endcase
|
1265 |
|
|
|
1266 |
|
|
always @(posedge BCLK) DP_CMP <= {(srcflags[3] | srcflags[1]),cmpres}; // Only valid if not NaN
|
1267 |
|
|
|
1268 |
|
|
// Pipeline Control + Registerfile write control
|
1269 |
|
|
|
1270 |
|
|
always @(posedge BCLK or negedge BRESET)
|
1271 |
|
|
if (!BRESET) sequ <= 2'b00;
|
1272 |
|
|
else
|
1273 |
23 |
ns32kum |
sequ <= {(sequ[1] & ~DONE),(START[1] & ~wctrl[5])};
|
1274 |
9 |
ns32kum |
|
1275 |
|
|
always @(FL or OPCODE or copwr)
|
1276 |
|
|
casex ({FL,OPCODE}) // WRITE Control : [2] = clr_lsb, [1] = wr_part2, [0] = wr_part1
|
1277 |
23 |
ns32kum |
9'b0_1001_000x : wctrl = 6'b001_111; // MOViL
|
1278 |
|
|
9'b1_1001_000x : wctrl = 6'b100_010; // MOViF <= SFPU
|
1279 |
|
|
9'bx_1001_010x : wctrl = 6'b000_010; // MOVLF
|
1280 |
|
|
9'bx_1001_011x : wctrl = 6'b001_111; // MOVFL
|
1281 |
|
|
9'b0_1001_100x : wctrl = 6'b000_010; // ROUNDLi
|
1282 |
|
|
9'b0_1001_101x : wctrl = 6'b000_010; // TRUNCLi
|
1283 |
|
|
9'b0_1001_111x : wctrl = 6'b000_010; // FLOORLi
|
1284 |
|
|
9'b1_1001_100x : wctrl = 6'b100_010; // ROUNDFi
|
1285 |
|
|
9'b1_1001_101x : wctrl = 6'b100_010; // TRUNCFi
|
1286 |
|
|
9'b1_1001_111x : wctrl = 6'b100_010; // FLOORFi
|
1287 |
|
|
9'b0_1011_0000 : wctrl = 6'b001_111; // ADDL
|
1288 |
|
|
9'b1_1011_0000 : wctrl = 6'b100_010; // ADDF <= SFPU
|
1289 |
|
|
9'b0_1011_0010 : wctrl = 6'b000_000; // CMPL
|
1290 |
|
|
9'b1_1011_0010 : wctrl = 6'b100_000; // CMPF
|
1291 |
|
|
9'b0_1011_0100 : wctrl = 6'b001_111; // SUBL
|
1292 |
|
|
9'b1_1011_0100 : wctrl = 6'b100_010; // SUBF <= SFPU
|
1293 |
|
|
9'b1_1011_1000 : wctrl = 6'b010_001; // DIVF - measured 18 clocks Reg-Reg
|
1294 |
|
|
9'b0_1011_1000 : wctrl = 6'b010_111; // DIVL - measured 34 clocks Reg-Reg
|
1295 |
|
|
9'b0_1011_1100 : wctrl = 6'b001_111; // MULL
|
1296 |
|
|
9'b1_1011_1100 : wctrl = 6'b100_010; // MULF <= SFPU
|
1297 |
|
|
9'bx_0111_000x : wctrl = 6'b100_010; // ADDP,SUBP
|
1298 |
|
|
9'bx_0111_1001 : wctrl = 6'b000_111; // MEIi
|
1299 |
|
|
9'bx_0111_1011 : wctrl = 6'b010_111; // DEIi
|
1300 |
|
|
9'bx_0111_11xx : wctrl = 6'b010_001; // QUOi,REMi,MODi,DIVi
|
1301 |
|
|
9'b1_1011_011x : wctrl = 6'b000_010; // SCALBF/LOGBF
|
1302 |
|
|
9'b0_1011_011x : wctrl = 6'b001_111; // SCALBL/LOGBL
|
1303 |
|
|
9'bx_1101_1101 : wctrl = {5'b010_00,copwr}; // Coprocessor opcode
|
1304 |
|
|
default : wctrl = 6'b00;
|
1305 |
9 |
ns32kum |
endcase
|
1306 |
|
|
|
1307 |
23 |
ns32kum |
assign later = wctrl[3] & WR_REG; // if DEST = Reg and 64 bit of data then DONE comes 1 clock later
|
1308 |
|
|
assign done_i = wctrl[4] ? (div_done | COP_DONE) : ( later ? sequ[2] : sequ[1] );
|
1309 |
|
|
assign DONE = wctrl[5] ? (bcd_ops ? bcd_done : START[1]) : ~START[1] & done_i; // DONE is valid for all opcodes
|
1310 |
9 |
ns32kum |
|
1311 |
|
|
assign wr_part1 = DONE & WR_REG & wctrl[0];
|
1312 |
|
|
|
1313 |
|
|
always @(posedge BCLK) CLR_LSB <= DONE & WR_REG & wctrl[2];
|
1314 |
|
|
always @(posedge BCLK) wr_part2 <= DONE & WR_REG & wctrl[1];
|
1315 |
|
|
|
1316 |
|
|
assign WREN_L = wr_part1 | wr_part2;
|
1317 |
|
|
|
1318 |
23 |
ns32kum |
always @(posedge BCLK) up_flag <= DONE & ~later; // DONE one cycle later
|
1319 |
|
|
assign UP_DP = (select[2] & (later ? DONE : up_flag)) | man_ops; // Update FSR Trap etc. : all FPU opcodes of DP_FPU
|
1320 |
9 |
ns32kum |
|
1321 |
|
|
// Overflow Trap for Division : DEI, QUO, DIV
|
1322 |
|
|
assign quo_div = (OPCODE == 8'h7C) | (OPCODE == 8'h7F);
|
1323 |
|
|
always @(*)
|
1324 |
|
|
casex ({OPCODE[2],BWD})
|
1325 |
|
|
3'b100 : ovf_div = (divi_out[39] & SRC1[7] & SRC2[7] ) & quo_div;
|
1326 |
|
|
3'b101 : ovf_div = (divi_out[47] & SRC1[15] & SRC2[15]) & quo_div;
|
1327 |
|
|
3'b11x : ovf_div = (divi_out[63] & SRC1[31] & SRC2[31]) & quo_div;
|
1328 |
|
|
default : ovf_div = dei_ovf[0] & (OPCODE == 8'h7B); // DEI
|
1329 |
|
|
endcase
|
1330 |
|
|
|
1331 |
23 |
ns32kum |
always @(posedge BCLK) late_bcd_done <= bcd_done; // parallel to data write
|
1332 |
|
|
|
1333 |
|
|
assign OVF_BCD = {dei_ovf[1],ovf_div,late_bcd_done,bcd_carry}; // to I_PFAD
|
1334 |
9 |
ns32kum |
|
1335 |
|
|
always @(posedge BCLK) COP_GO <= START[1] & copop;
|
1336 |
|
|
|
1337 |
|
|
endmodule
|
1338 |
|
|
|
1339 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
1340 |
9 |
ns32kum |
//
|
1341 |
23 |
ns32kum |
// 11. DP_FPU Top level of long operations datapath
|
1342 |
9 |
ns32kum |
//
|
1343 |
11 |
ns32kum |
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
1344 |
23 |
ns32kum |
module DP_FPU( BCLK, FL, BRESET, LD_OUT, WR_REG, BWD, FSR, OPCODE, SRC1, SRC2, START, CY_IN, COP_DONE, COP_OP, COP_IN,
|
1345 |
|
|
DONE, UP_DP, WREN_L, CLR_LSB, DVZ_TRAP, DP_CMP, DP_OUT, DP_Q, TT_DP, OVF_BCD, COP_GO, COP_OUT );
|
1346 |
9 |
ns32kum |
|
1347 |
|
|
input BCLK;
|
1348 |
|
|
input FL;
|
1349 |
|
|
input BRESET;
|
1350 |
23 |
ns32kum |
input [1:0] LD_OUT;
|
1351 |
9 |
ns32kum |
input WR_REG;
|
1352 |
|
|
input [1:0] BWD;
|
1353 |
|
|
input [8:3] FSR;
|
1354 |
|
|
input [7:0] OPCODE;
|
1355 |
|
|
input [31:0] SRC1;
|
1356 |
|
|
input [31:0] SRC2;
|
1357 |
|
|
input [1:0] START;
|
1358 |
|
|
input CY_IN;
|
1359 |
|
|
input COP_DONE;
|
1360 |
|
|
input [23:0] COP_OP;
|
1361 |
|
|
input [63:0] COP_IN;
|
1362 |
|
|
|
1363 |
|
|
output DONE;
|
1364 |
|
|
output UP_DP;
|
1365 |
|
|
output WREN_L;
|
1366 |
|
|
output CLR_LSB;
|
1367 |
|
|
output DVZ_TRAP;
|
1368 |
|
|
output [2:0] DP_CMP;
|
1369 |
|
|
output [31:0] DP_OUT;
|
1370 |
|
|
output [31:0] DP_Q;
|
1371 |
|
|
output [4:0] TT_DP;
|
1372 |
|
|
output [3:0] OVF_BCD;
|
1373 |
|
|
output COP_GO;
|
1374 |
|
|
output [127:0] COP_OUT;
|
1375 |
|
|
|
1376 |
|
|
reg [52:0] MDA;
|
1377 |
|
|
reg [52:0] MDB;
|
1378 |
|
|
reg [31:0] DP_Q;
|
1379 |
|
|
reg [31:20] RCOPA,RCOPB;
|
1380 |
|
|
|
1381 |
|
|
wire [63:0] DOUT;
|
1382 |
|
|
wire [105:0] MRESULT;
|
1383 |
|
|
wire MEI;
|
1384 |
|
|
wire DFLOAT;
|
1385 |
|
|
wire LOAD_MSD;
|
1386 |
|
|
wire LOAD_LSD1;
|
1387 |
|
|
wire LOAD_LSD2;
|
1388 |
|
|
wire [31:0] LSD_1;
|
1389 |
|
|
wire [31:0] LSD_2;
|
1390 |
|
|
wire [52:32] MSD_1;
|
1391 |
|
|
wire [52:32] MSD_2;
|
1392 |
|
|
|
1393 |
|
|
|
1394 |
|
|
DP_LOGIK DOUBLE_U(
|
1395 |
|
|
.FL(FL),
|
1396 |
|
|
.BRESET(BRESET),
|
1397 |
|
|
.BCLK(BCLK),
|
1398 |
|
|
.WR_REG(WR_REG),
|
1399 |
|
|
.BWD(BWD),
|
1400 |
|
|
.FSR(FSR),
|
1401 |
|
|
.MAN1(MDA[52:32]),
|
1402 |
|
|
.MAN2(MDB[52:32]),
|
1403 |
|
|
.MRESULT(MRESULT),
|
1404 |
|
|
.OPCODE(OPCODE),
|
1405 |
|
|
.SRC1(SRC1),
|
1406 |
|
|
.SRC2(SRC2),
|
1407 |
|
|
.START(START),
|
1408 |
|
|
.MEI(MEI),
|
1409 |
|
|
.DFLOAT(DFLOAT),
|
1410 |
|
|
.DONE(DONE),
|
1411 |
|
|
.UP_DP(UP_DP),
|
1412 |
|
|
.CLR_LSB(CLR_LSB),
|
1413 |
|
|
.WREN_L(WREN_L),
|
1414 |
|
|
.DVZ_TRAP(DVZ_TRAP),
|
1415 |
|
|
.DOUT(DOUT),
|
1416 |
|
|
.DP_CMP(DP_CMP),
|
1417 |
|
|
.TT_DP(TT_DP),
|
1418 |
|
|
.CY_IN(CY_IN),
|
1419 |
|
|
.OVF_BCD(OVF_BCD),
|
1420 |
|
|
.COP_DONE(COP_DONE),
|
1421 |
|
|
.COP_OP(COP_OP),
|
1422 |
|
|
.COP_IN(COP_IN),
|
1423 |
|
|
.COP_GO(COP_GO));
|
1424 |
|
|
|
1425 |
|
|
PREPDATA DP_PREP(
|
1426 |
|
|
.MEI(MEI),
|
1427 |
|
|
.DFLOAT(DFLOAT),
|
1428 |
|
|
.BWD(BWD),
|
1429 |
|
|
.SRC1(SRC1),
|
1430 |
|
|
.SRC2(SRC2),
|
1431 |
|
|
.START(START),
|
1432 |
|
|
.LOAD_LSD1(LOAD_LSD1),
|
1433 |
|
|
.LOAD_LSD2(LOAD_LSD2),
|
1434 |
|
|
.LOAD_MSD(LOAD_MSD),
|
1435 |
|
|
.LSD_1(LSD_1),
|
1436 |
|
|
.LSD_2(LSD_2),
|
1437 |
|
|
.MSD_1(MSD_1),
|
1438 |
|
|
.MSD_2(MSD_2));
|
1439 |
|
|
|
1440 |
|
|
assign MRESULT = MDA * MDB; // unsigned multiplier 53 * 53 bits = 106 bits
|
1441 |
|
|
|
1442 |
|
|
assign DP_OUT = CLR_LSB ? DP_Q : DOUT[63:32];
|
1443 |
|
|
|
1444 |
23 |
ns32kum |
always@(posedge BCLK) if (LD_OUT[1] || LD_OUT[0] || WREN_L) DP_Q <= LD_OUT[0] ? SRC2 : DOUT[31:0];
|
1445 |
9 |
ns32kum |
|
1446 |
|
|
always@(posedge BCLK) if (LOAD_LSD1) MDA[31:0] <= LSD_1;
|
1447 |
|
|
|
1448 |
|
|
always@(posedge BCLK) if (LOAD_LSD2) MDB[31:0] <= LSD_2;
|
1449 |
|
|
|
1450 |
|
|
always@(posedge BCLK)
|
1451 |
|
|
if (LOAD_MSD)
|
1452 |
|
|
begin
|
1453 |
|
|
MDA[52:32] <= MSD_1;
|
1454 |
|
|
MDB[52:32] <= MSD_2;
|
1455 |
|
|
RCOPA <= SRC1[31:20];
|
1456 |
|
|
RCOPB <= SRC2[31:20];
|
1457 |
|
|
end
|
1458 |
|
|
|
1459 |
|
|
assign COP_OUT = {RCOPA,MDA[51:32],SRC1,RCOPB,MDB[51:32],SRC2};
|
1460 |
|
|
|
1461 |
|
|
endmodule
|