OpenCores

Rev 48	Rev 49
Line 5...	Line 5...
`// \/_// robfinch@finitron.ca`	`// \/_// robfinch@finitron.ca`
`// \|\|`	`// \|\|`
`//`	`//`
`// fpMultiply.v`	`// fpMultiply.v`
`// - floating point multiplier`	`// - floating point multiplier`
`// - two cycle latency`	`// - two cycle latency minimum (latency depends on precision)`
`// - can issue every clock cycle`	`// - can issue every clock cycle`
`// - parameterized width`	`// - parameterized width`
`// - IEEE 754 representation`	`// - IEEE 754 representation`
`//`	`//`
`//`	`//`
`// This source file is free software: you can redistribute it and/or modify`	`// BSD 3-Clause License`
`// it under the terms of the GNU Lesser General Public License as published`	`// Redistribution and use in source and binary forms, with or without`
`// by the Free Software Foundation, either version 3 of the License, or`	`// modification, are permitted provided that the following conditions are met:`
`// (at your option) any later version.`	`//`
`//`	`// 1. Redistributions of source code must retain the above copyright notice, this`
`// This source file is distributed in the hope that it will be useful,`	`// list of conditions and the following disclaimer.`
`// but WITHOUT ANY WARRANTY; without even the implied warranty of`	`//`
`// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`	`// 2. Redistributions in binary form must reproduce the above copyright notice,`
`// GNU General Public License for more details.`	`// this list of conditions and the following disclaimer in the documentation`
	`// and/or other materials provided with the distribution.`
	`//`
	`// 3. Neither the name of the copyright holder nor the names of its`
	`// contributors may be used to endorse or promote products derived from`
	`// this software without specific prior written permission.`
	`//`
	`// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"`
	`// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
	`// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE`
	`// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE`
	`// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL`
	`// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR`
	`// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER`
	`// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,`
	`// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE`
	`// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
`//`	`//`
`// You should have received a copy of the GNU General Public License`
`// along with this program. If not, see .`
`//`	`//`
`// Floating Point Multiplier / Divider`	`// Floating Point Multiplier`
`//`	`//`
`// This multiplier/divider handles denormalized numbers.`	`// This multiplier handles denormalized numbers.`
`// The output format is of an internal expanded representation`	`// The output format is of an internal expanded representation`
`// in preparation to be fed into a normalization unit, then`	`// in preparation to be fed into a normalization unit, then`
`// rounding. Basically, it's the same as the regular format`	`// rounding. Basically, it's the same as the regular format`
`// except the mantissa is doubled in size, the leading two`	`// except the mantissa is doubled in size, the leading two`
`// bits of which are assumed to be whole bits.`	`// bits of which are assumed to be whole bits.`
Line 40...	Line 54...
`//`	`//`
`// Properties:`	`// Properties:`
`// +-inf * +-inf = -+inf (this is handled by exOver)`	`// +-inf * +-inf = -+inf (this is handled by exOver)`
`// +-inf * 0 = QNaN`	`// +-inf * 0 = QNaN`
`//`	`//`
`// 1 sign number`
`// 8 exponent`
`// 48 mantissa`
`//`
`// ============================================================================`	`// ============================================================================`

`import fp::*;`	`import fp::*;`

`module fpMultiply(clk, ce, a, b, o, sign_exe, inf, overflow, underflow);`	`module fpMultiply(clk, ce, a, b, o, sign_exe, inf, overflow, underflow);`
Line 57...	Line 67...
`output [EX:0] o;`	`output [EX:0] o;`
`output sign_exe;`	`output sign_exe;`
`output inf;`	`output inf;`
`output overflow;`	`output overflow;`
`output underflow;`	`output underflow;`
	`parameter DELAY =`
	`(FPWID == 128 ? 17 :`
	`FPWID == 80 ? 17 :`
	`FPWID == 64 ? 13 :`
	`FPWID == 40 ? 8 :`
	`FPWID == 32 ? 2 :`
	`FPWID == 16 ? 2 : 2);`

`reg [EMSB:0] xo1; // extra bit for sign`	`reg [EMSB:0] xo1; // extra bit for sign`
`reg [FX:0] mo1;`	`reg [FX:0] mo1;`

`// constants`	`// constants`
Line 85...	Line 102...
`wire aNan, bNan, aNan1, bNan1;`	`wire aNan, bNan, aNan1, bNan1;`
`wire az, bz;`	`wire az, bz;`
`wire aInf, bInf, aInf1, bInf1;`	`wire aInf, bInf, aInf1, bInf1;`


`// -----------------------------------------------------------`	`// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`
`// First clock`	`// Clock #1`
`// - decode the input operands`	`// - decode the input operands`
`// - derive basic information`	`// - derive basic information`
`// - calculate exponent`	`// - calculate exponent`
`// - calculate fraction`	`// - calculate fraction`
	`// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`

	`// -----------------------------------------------------------`
	`// First clock`
`// -----------------------------------------------------------`	`// -----------------------------------------------------------`

`fpDecomp u1a (.i(a), .sgn(sa), .exp(xa), .fract(fracta), .xz(a_dn), .vz(az), .inf(aInf), .nan(aNan) );`	`fpDecomp u1a (.i(a), .sgn(sa), .exp(xa), .fract(fracta), .xz(a_dn), .vz(az), .inf(aInf), .nan(aNan) );`
`fpDecomp u1b (.i(b), .sgn(sb), .exp(xb), .fract(fractb), .xz(b_dn), .vz(bz), .inf(bInf), .nan(bNan) );`	`fpDecomp u1b (.i(b), .sgn(sb), .exp(xb), .fract(fractb), .xz(b_dn), .vz(bz), .inf(bInf), .nan(bNan) );`

`// Compute the sum of the exponents.`	`// Compute the sum of the exponents.`
`// correct the exponent for denormalized operands`	`// correct the exponent for denormalized operands`
`// adjust the sum by the exponent offset (subtract 127)`	`// adjust the sum by the exponent offset (subtract 127)`
`// mul: ex1 = xa + xb, result should always be < 1ffh`	`// mul: ex1 = xa + xb, result should always be < 1ffh`
	`ifdef SUPPORT_DENORMALS
`assign ex1 = (az\|bz) ? 0 : (xa\|a_dn) + (xb\|b_dn) - bias;`	`assign ex1 = (az\|bz) ? 0 : (xa\|a_dn) + (xb\|b_dn) - bias;`
	`else
	`assign ex1 = (az\|bz) ? 0 : xa + xb - bias;`
	`endif

`generate`	`generate`
`if (FPWID==80) begin`	`if (FPWID==128) begin`
`reg [31:0] p00,p01,p02,p03;`	`wire [255:0] fractoo;`
`reg [31:0] p10,p11,p12,p13;`	`mult128x128 umul1 (.clk(clk), .ce(ce), .a({16'b0,fracta}), .b({16'b0,fractb}), .o(fractoo));`
`reg [31:0] p20,p21,p22,p23;`
`reg [31:0] p30,p31,p32,p33;`
`always @(posedge clk)`	`always @(posedge clk)`
`if (ce) begin`	`if (ce) fract1 <= fractoo[224:0];`
`p00 <= fracta[15: 0] * fractb[15: 0];`
`p01 <= fracta[31:16] * fractb[15: 0];`
`p02 <= fracta[47:32] * fractb[15: 0];`
`p03 <= fracta[63:48] * fractb[15: 0];`

`p10 <= fracta[15: 0] * fractb[31:16];`
`p11 <= fracta[31:16] * fractb[31:16];`
`p12 <= fracta[47:32] * fractb[31:16];`
`p13 <= fracta[63:48] * fractb[31:16];`

`p20 <= fracta[15: 0] * fractb[47:32];`
`p21 <= fracta[31:16] * fractb[47:32];`
`p22 <= fracta[47:32] * fractb[47:32];`
`p23 <= fracta[63:48] * fractb[47:32];`

`p30 <= fracta[15: 0] * fractb[63:48];`
`p31 <= fracta[31:16] * fractb[63:48];`
`p32 <= fracta[47:32] * fractb[63:48];`
`p33 <= fracta[63:48] * fractb[63:48];`

`fract1 <= {p03,48'b0} + {p02,32'b0} + {p01,16'b0} + p00 +`
`{p13,64'b0} + {p12,48'b0} + {p11,32'b0} + {p10,16'b0} +`
`{p23,80'b0} + {p22,64'b0} + {p21,48'b0} + {p20,32'b0} +`
`{p33,96'b0} + {p32,80'b0} + {p31,64'b0} + {p30,48'b0}`
`;`
`end`	`end`
	`else if (FPWID==80) begin`
	`wire [255:0] fractoo;`
	`mult128x128 umul1 (.clk(clk), .ce(ce), .a({63'd0,fracta}), .b({63'd0,fractb}), .o(fractoo));`
	`always @(posedge clk)`
	`if (ce) fract1 <= fractoo[130:0];`
`end`	`end`
`else if (FPWID==64) begin`	`else if (FPWID==64) begin`
`reg [35:0] p00,p01,p02;`	`wire [127:0] fractoo;`
`reg [35:0] p10,p11,p12;`	`mult64x64 umul1 (.clk(clk), .ce(ce), .a({11'd0,fracta}), .b({11'd0,fractb}), .o(fractoo));`
`reg [35:0] p20,p21,p22;`
`always @(posedge clk)`	`always @(posedge clk)`
`if (ce) begin`	`if (ce) fract1 <= fractoo[106:0];`
`p00 <= fracta[17: 0] * fractb[17: 0];`
`p01 <= fracta[35:18] * fractb[17: 0];`
`p02 <= fracta[52:36] * fractb[17: 0];`
`p10 <= fracta[17: 0] * fractb[35:18];`
`p11 <= fracta[35:18] * fractb[35:18];`
`p12 <= fracta[52:36] * fractb[35:18];`
`p20 <= fracta[17: 0] * fractb[52:36];`
`p21 <= fracta[35:18] * fractb[52:36];`
`p22 <= fracta[52:36] * fractb[52:36];`
`fract1 <= {p02,36'b0} + {p01,18'b0} + p00 +`
`{p12,54'b0} + {p11,36'b0} + {p10,18'b0} +`
`{p22,72'b0} + {p21,54'b0} + {p20,36'b0}`
`;`
`end`	`end`
	`else if (FPWID==40) begin`
	`wire [63:0] fractoo;`
	`mult32x32 umul1 (.clk(clk), .ce(ce), .a({3'd0,fracta}), .b({3'd0,fractb}), .o(fractoo));`
	`always @(posedge clk)`
	`if (ce) fract1 <= fractoo[58:0];`
`end`	`end`
`else if (FPWID==32) begin`	`else if (FPWID==32) begin`
`reg [23:0] p00,p01,p02;`	`reg [23:0] p00,p11;`
`reg [23:0] p10,p11,p12;`
`reg [23:0] p20,p21,p22;`
`always @(posedge clk)`	`always @(posedge clk)`
`if (ce) begin`	`if (ce) begin`
`p00 <= fracta[11: 0] * fractb[11: 0];`	`p00 <= fracta[23: 0] * fractb[11: 0];`
`p01 <= fracta[23:12] * fractb[11: 0];`	`p11 <= fracta[23: 0] * fractb[23:12];`
`p10 <= fracta[11: 0] * fractb[23:12];`	`fract1 <= {p11,12'b0} + p00;`
`p11 <= fracta[23:12] * fractb[23:12];`
`fract1 <= {p11,p00} + {p01,12'b0} + {p10,12'b0};`
`end`	`end`
`end`	`end`
`else begin`	`else begin`
`always @(posedge clk)`	`always @(posedge clk)`
`if (ce) begin`	`if (ce) begin`
Line 185...	Line 175...
`// Status`	`// Status`
`wire under1, over1;`	`wire under1, over1;`
`wire under = ex1[EMSB+2]; // exponent underflow`	`wire under = ex1[EMSB+2]; // exponent underflow`
`wire over = (&ex1[EMSB:0] \| ex1[EMSB+1]) & !ex1[EMSB+2];`	`wire over = (&ex1[EMSB:0] \| ex1[EMSB+1]) & !ex1[EMSB+2];`

`delay2 #(EMSB+1) u3 (.clk(clk), .ce(ce), .i(ex1[EMSB:0]), .o(ex2) );`	`delay #(.WID(EMSB+1),.DEP(DELAY)) u3 (.clk(clk), .ce(ce), .i(ex1[EMSB:0]), .o(ex2) );`
`delay2 u2a (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) );`	`delay #(.WID(1),.DEP(DELAY)) u2a (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) );`
`delay2 u2b (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) );`	`delay #(.WID(1),.DEP(DELAY)) u2b (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) );`
`delay2 u6 (.clk(clk), .ce(ce), .i(under), .o(under1) );`	`delay #(.WID(1),.DEP(DELAY)) u6 (.clk(clk), .ce(ce), .i(under), .o(under1) );`
`delay2 u7 (.clk(clk), .ce(ce), .i(over), .o(over1) );`	`delay #(.WID(1),.DEP(DELAY)) u7 (.clk(clk), .ce(ce), .i(over), .o(over1) );`

`// determine when a NaN is output`	`// determine when a NaN is output`
`wire qNaNOut;`	`wire qNaNOut;`
`wire [FPWID-1:0] a1,b1;`	`wire [FPWID-1:0] a1,b1;`
`delay2 u5 (.clk(clk), .ce(ce), .i((aInf&bz)\|(bInf&az)), .o(qNaNOut) );`	`delay #(.WID(1),.DEP(DELAY)) u5 (.clk(clk), .ce(ce), .i((aInf&bz)\|(bInf&az)), .o(qNaNOut) );`
`delay2 u14 (.clk(clk), .ce(ce), .i(aNan), .o(aNan1) );`	`delay #(.WID(1),.DEP(DELAY)) u14 (.clk(clk), .ce(ce), .i(aNan), .o(aNan1) );`
`delay2 u15 (.clk(clk), .ce(ce), .i(bNan), .o(bNan1) );`	`delay #(.WID(1),.DEP(DELAY)) u15 (.clk(clk), .ce(ce), .i(bNan), .o(bNan1) );`
`delay2 #(FPWID) u16 (.clk(clk), .ce(ce), .i(a), .o(a1) );`	`delay #(.WID(FPWID),.DEP(DELAY)) u16 (.clk(clk), .ce(ce), .i(a), .o(a1) );`
`delay2 #(FPWID) u17 (.clk(clk), .ce(ce), .i(b), .o(b1) );`	`delay #(.WID(FPWID),.DEP(DELAY)) u17 (.clk(clk), .ce(ce), .i(b), .o(b1) );`

`// -----------------------------------------------------------`	`// -----------------------------------------------------------`
`// Second clock`	`// Second clock`
`// - correct xponent and mantissa for exceptional conditions`	`// - correct xponent and mantissa for exceptional conditions`
`// -----------------------------------------------------------`	`// -----------------------------------------------------------`

`wire so1;`	`wire so1;`
`delay3 u8 (.clk(clk), .ce(ce), .i(sa ^ sb), .o(so1) );// two clock delay!`	`delay #(.WID(1),.DEP(DELAY+1)) u8 (.clk(clk), .ce(ce), .i(sa ^ sb), .o(so1) );// two clock delay!`

`always @(posedge clk)`	`always @(posedge clk)`
`if (ce)`	`if (ce)`
`casez({qNaNOut\|aNan1\|bNan1,aInf1,bInf1,over1,under1})`	`casez({qNaNOut\|aNan1\|bNan1,aInf1,bInf1,over1,under1})`
`5'b1????: xo1 = infXp; // qNaN - infinity * zero`	`5'b1????: xo1 = infXp; // qNaN - infinity * zero`
Line 219...	Line 209...
`5'b0001?: xo1 = infXp; // result overflow`	`5'b0001?: xo1 = infXp; // result overflow`
`5'b00001: xo1 = ex2[EMSB:0];//0; // underflow`	`5'b00001: xo1 = ex2[EMSB:0];//0; // underflow`
`default: xo1 = ex2[EMSB:0]; // situation normal`	`default: xo1 = ex2[EMSB:0]; // situation normal`
`endcase`	`endcase`

	`// Force mantissa to zero when underflow or zero exponent when not supporting denormals.`
`always @(posedge clk)`	`always @(posedge clk)`
`if (ce)`	`if (ce)`
	`ifdef SUPPORT_DENORMALS
`casez({aNan1,bNan1,qNaNOut,aInf1,bInf1,over1})`	`casez({aNan1,bNan1,qNaNOut,aInf1,bInf1,over1})`
	`else
	`casez({aNan1,bNan1,qNaNOut,aInf1,bInf1,over1\|under1})`
	`endif
`6'b1?????: mo1 = {1'b1,a1[FMSB:0],{FMSB+1{1'b0}}};`	`6'b1?????: mo1 = {1'b1,a1[FMSB:0],{FMSB+1{1'b0}}};`
`6'b01????: mo1 = {1'b1,b1[FMSB:0],{FMSB+1{1'b0}}};`	`6'b01????: mo1 = {1'b1,b1[FMSB:0],{FMSB+1{1'b0}}};`
`6'b001???: mo1 = {1'b1,qNaN\|3'd4,{FMSB+1{1'b0}}}; // multiply inf * zero`	`6'b001???: mo1 = {1'b1,qNaN\|3'd4,{FMSB+1{1'b0}}}; // multiply inf * zero`
`6'b0001??: mo1 = 0; // mul inf's`	`6'b0001??: mo1 = 0; // mul inf's`
`6'b00001?: mo1 = 0; // mul inf's`	`6'b00001?: mo1 = 0; // mul inf's`
`6'b000001: mo1 = 0; // mul overflow`	`6'b000001: mo1 = 0; // mul overflow`
`default: mo1 = fract1;`	`default: mo1 = fract1;`
`endcase`	`endcase`

`delay3 u10 (.clk(clk), .ce(ce), .i(sa & sb), .o(sign_exe) );`	`delay #(.WID(1),.DEP(DELAY+1)) u10 (.clk(clk), .ce(ce), .i(sa & sb), .o(sign_exe) );`
`delay1 u11 (.clk(clk), .ce(ce), .i(over1), .o(overflow) );`	`delay1 u11 (.clk(clk), .ce(ce), .i(over1), .o(overflow) );`
`delay1 u12 (.clk(clk), .ce(ce), .i(over1), .o(inf) );`	`delay1 u12 (.clk(clk), .ce(ce), .i(over1), .o(inf) );`
`delay1 u13 (.clk(clk), .ce(ce), .i(under1), .o(underflow) );`	`delay1 u13 (.clk(clk), .ce(ce), .i(under1), .o(underflow) );`

`assign o = {so1,xo1,mo1};`	`assign o = {so1,xo1,mo1};`
Line 243...	Line 238...
`endmodule`	`endmodule`


`// Multiplier with normalization and rounding.`	`// Multiplier with normalization and rounding.`

`module fpMulnr(clk, ce, a, b, o, rm, sign_exe, inf, overflow, underflow);`	`module fpMultiplynr(clk, ce, a, b, o, rm, sign_exe, inf, overflow, underflow);`
`input clk;`	`input clk;`
`input ce;`	`input ce;`
`input [MSB:0] a, b;`	`input [MSB:0] a, b;`
`output [MSB:0] o;`	`output [MSB:0] o;`
`input [2:0] rm;`	`input [2:0] rm;`
Line 258...	Line 253...

`wire [EX:0] o1;`	`wire [EX:0] o1;`
`wire sign_exe1, inf1, overflow1, underflow1;`	`wire sign_exe1, inf1, overflow1, underflow1;`
`wire [MSB+3:0] fpn0;`	`wire [MSB+3:0] fpn0;`

`fpMul #(FPWID) u1 (clk, ce, a, b, o1, sign_exe1, inf1, overflow1, underflow1);`	`fpMultiply u1 (clk, ce, a, b, o1, sign_exe1, inf1, overflow1, underflow1);`
`fpNormalize #(FPWID) u2(.clk(clk), .ce(ce), .under_i(underflow1), .i(o1), .o(fpn0) );`	`fpNormalize u2(.clk(clk), .ce(ce), .under_i(underflow1), .i(o1), .o(fpn0) );`
`fpRound #(FPWID) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );`	`fpRound u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );`
`delay2 #(1) u4(.clk(clk), .ce(ce), .i(sign_exe1), .o(sign_exe));`	`delay2 #(1) u4(.clk(clk), .ce(ce), .i(sign_exe1), .o(sign_exe));`
`delay2 #(1) u5(.clk(clk), .ce(ce), .i(inf1), .o(inf));`	`delay2 #(1) u5(.clk(clk), .ce(ce), .i(inf1), .o(inf));`
`delay2 #(1) u6(.clk(clk), .ce(ce), .i(overflow1), .o(overflow));`	`delay2 #(1) u6(.clk(clk), .ce(ce), .i(overflow1), .o(overflow));`
`delay2 #(1) u7(.clk(clk), .ce(ce), .i(underflow1), .o(underflow));`	`delay2 #(1) u7(.clk(clk), .ce(ce), .i(underflow1), .o(underflow));`
`endmodule`	`endmodule`

Line 5...

//     \/_//     robfinch@finitron.ca

//     \/_//     robfinch@finitron.ca

//       ||

//       ||

//

//

//      fpMultiply.v

//      fpMultiply.v

//              - floating point multiplier

//              - floating point multiplier

//              - two cycle latency

//              - two cycle latency minimum (latency depends on precision)

//              - can issue every clock cycle

//              - can issue every clock cycle

//              - parameterized width

//              - parameterized width

//              - IEEE 754 representation

//              - IEEE 754 representation

//

//

//

//

// This source file is free software: you can redistribute it and/or modify

// BSD 3-Clause License

// it under the terms of the GNU Lesser General Public License as published

// Redistribution and use in source and binary forms, with or without

// by the Free Software Foundation, either version 3 of the License, or

// modification, are permitted provided that the following conditions are met:

// (at your option) any later version.

//

//

// 1. Redistributions of source code must retain the above copyright notice, this

// This source file is distributed in the hope that it will be useful,

//    list of conditions and the following disclaimer.

// but WITHOUT ANY WARRANTY; without even the implied warranty of

//

// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

// 2. Redistributions in binary form must reproduce the above copyright notice,

// GNU General Public License for more details.

//    this list of conditions and the following disclaimer in the documentation

//    and/or other materials provided with the distribution.

//

// 3. Neither the name of the copyright holder nor the names of its

//    contributors may be used to endorse or promote products derived from

//    this software without specific prior written permission.

//

// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE

// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR

// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER

// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,

// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

//

//

// You should have received a copy of the GNU General Public License

// along with this program.  If not, see .

//

//

//      Floating Point Multiplier / Divider

//      Floating Point Multiplier

//

//

//      This multiplier/divider handles denormalized numbers.

//      This multiplier handles denormalized numbers.

//      The output format is of an internal expanded representation

//      The output format is of an internal expanded representation

//      in preparation to be fed into a normalization unit, then

//      in preparation to be fed into a normalization unit, then

//      rounding. Basically, it's the same as the regular format

//      rounding. Basically, it's the same as the regular format

//      except the mantissa is doubled in size, the leading two

//      except the mantissa is doubled in size, the leading two

//      bits of which are assumed to be whole bits.

//      bits of which are assumed to be whole bits.

Line 40...

Line 54...

//

//

//      Properties:

//      Properties:

//      +-inf * +-inf = -+inf   (this is handled by exOver)

//      +-inf * +-inf = -+inf   (this is handled by exOver)

//      +-inf * 0     = QNaN

//      +-inf * 0     = QNaN

//

//

//      1 sign number

//      8 exponent

//      48 mantissa

//

// ============================================================================

// ============================================================================

import fp::*;

import fp::*;

module fpMultiply(clk, ce, a, b, o, sign_exe, inf, overflow, underflow);

module fpMultiply(clk, ce, a, b, o, sign_exe, inf, overflow, underflow);

Line 57...

Line 67...

output [EX:0] o;

output [EX:0] o;

output sign_exe;

output sign_exe;

output inf;

output inf;

output overflow;

output overflow;

output underflow;

output underflow;

parameter DELAY =

  (FPWID == 128 ? 17 :

  FPWID == 80 ? 17 :

  FPWID == 64 ? 13 :

  FPWID == 40 ? 8 :

  FPWID == 32 ? 2 :

  FPWID == 16 ? 2 : 2);

reg [EMSB:0] xo1;               // extra bit for sign

reg [EMSB:0] xo1;               // extra bit for sign

reg [FX:0] mo1;

reg [FX:0] mo1;

// constants

// constants

Line 85...

Line 102...

wire aNan, bNan, aNan1, bNan1;

wire aNan, bNan, aNan1, bNan1;

wire az, bz;

wire az, bz;

wire aInf, bInf, aInf1, bInf1;

wire aInf, bInf, aInf1, bInf1;

// -----------------------------------------------------------

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

// First clock

// Clock #1

// - decode the input operands

// - decode the input operands

// - derive basic information

// - derive basic information

// - calculate exponent

// - calculate exponent

// - calculate fraction

// - calculate fraction

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

// -----------------------------------------------------------

// First clock

// -----------------------------------------------------------

// -----------------------------------------------------------

fpDecomp u1a (.i(a), .sgn(sa), .exp(xa), .fract(fracta), .xz(a_dn), .vz(az), .inf(aInf), .nan(aNan) );

fpDecomp u1a (.i(a), .sgn(sa), .exp(xa), .fract(fracta), .xz(a_dn), .vz(az), .inf(aInf), .nan(aNan) );

fpDecomp u1b (.i(b), .sgn(sb), .exp(xb), .fract(fractb), .xz(b_dn), .vz(bz), .inf(bInf), .nan(bNan) );

fpDecomp u1b (.i(b), .sgn(sb), .exp(xb), .fract(fractb), .xz(b_dn), .vz(bz), .inf(bInf), .nan(bNan) );

// Compute the sum of the exponents.

// Compute the sum of the exponents.

// correct the exponent for denormalized operands

// correct the exponent for denormalized operands

// adjust the sum by the exponent offset (subtract 127)

// adjust the sum by the exponent offset (subtract 127)

// mul: ex1 = xa + xb,  result should always be < 1ffh

// mul: ex1 = xa + xb,  result should always be < 1ffh

`ifdef SUPPORT_DENORMALS

assign ex1 = (az|bz) ? 0 : (xa|a_dn) + (xb|b_dn) - bias;

assign ex1 = (az|bz) ? 0 : (xa|a_dn) + (xb|b_dn) - bias;

`else

assign ex1 = (az|bz) ? 0 : xa + xb - bias;

`endif

generate

generate

if (FPWID==80) begin

if (FPWID==128) begin

reg [31:0] p00,p01,p02,p03;

  wire [255:0] fractoo;

reg [31:0] p10,p11,p12,p13;

  mult128x128 umul1 (.clk(clk), .ce(ce), .a({16'b0,fracta}), .b({16'b0,fractb}), .o(fractoo));

reg [31:0] p20,p21,p22,p23;

reg [31:0] p30,p31,p32,p33;

        always @(posedge clk)

        always @(posedge clk)

        if (ce) begin

    if (ce) fract1 <= fractoo[224:0];

                p00 <= fracta[15: 0] * fractb[15: 0];

                p01 <= fracta[31:16] * fractb[15: 0];

                p02 <= fracta[47:32] * fractb[15: 0];

                p03 <= fracta[63:48] * fractb[15: 0];

                p10 <= fracta[15: 0] * fractb[31:16];

                p11 <= fracta[31:16] * fractb[31:16];

                p12 <= fracta[47:32] * fractb[31:16];

                p13 <= fracta[63:48] * fractb[31:16];

                p20 <= fracta[15: 0] * fractb[47:32];

                p21 <= fracta[31:16] * fractb[47:32];

                p22 <= fracta[47:32] * fractb[47:32];

                p23 <= fracta[63:48] * fractb[47:32];

                p30 <= fracta[15: 0] * fractb[63:48];

                p31 <= fracta[31:16] * fractb[63:48];

                p32 <= fracta[47:32] * fractb[63:48];

                p33 <= fracta[63:48] * fractb[63:48];

                fract1 <=                                               {p03,48'b0} + {p02,32'b0} + {p01,16'b0} + p00 +

                                                                  {p13,64'b0} + {p12,48'b0} + {p11,32'b0} + {p10,16'b0} +

                                        {p23,80'b0} + {p22,64'b0} + {p21,48'b0} + {p20,32'b0} +

      {p33,96'b0} + {p32,80'b0} + {p31,64'b0} + {p30,48'b0}

end

end

else if (FPWID==80) begin

  wire [255:0] fractoo;

  mult128x128 umul1 (.clk(clk), .ce(ce), .a({63'd0,fracta}), .b({63'd0,fractb}), .o(fractoo));

  always @(posedge clk)

    if (ce) fract1 <= fractoo[130:0];

end

end

else if (FPWID==64) begin

else if (FPWID==64) begin

reg [35:0] p00,p01,p02;

  wire [127:0] fractoo;

reg [35:0] p10,p11,p12;

  mult64x64 umul1 (.clk(clk), .ce(ce), .a({11'd0,fracta}), .b({11'd0,fractb}), .o(fractoo));

reg [35:0] p20,p21,p22;

        always @(posedge clk)

        always @(posedge clk)

        if (ce) begin

    if (ce) fract1 <= fractoo[106:0];

                p00 <= fracta[17: 0] * fractb[17: 0];

                p01 <= fracta[35:18] * fractb[17: 0];

                p02 <= fracta[52:36] * fractb[17: 0];

                p10 <= fracta[17: 0] * fractb[35:18];

                p11 <= fracta[35:18] * fractb[35:18];

                p12 <= fracta[52:36] * fractb[35:18];

                p20 <= fracta[17: 0] * fractb[52:36];

                p21 <= fracta[35:18] * fractb[52:36];

                p22 <= fracta[52:36] * fractb[52:36];

                fract1 <=                                   {p02,36'b0} + {p01,18'b0} + p00 +

                                                                  {p12,54'b0} + {p11,36'b0} + {p10,18'b0} +

                                        {p22,72'b0} + {p21,54'b0} + {p20,36'b0}

end

end

else if (FPWID==40) begin

  wire [63:0] fractoo;

  mult32x32 umul1 (.clk(clk), .ce(ce), .a({3'd0,fracta}), .b({3'd0,fractb}), .o(fractoo));

  always @(posedge clk)

    if (ce) fract1 <= fractoo[58:0];

end

end

else if (FPWID==32) begin

else if (FPWID==32) begin

reg [23:0] p00,p01,p02;

  reg [23:0] p00,p11;

reg [23:0] p10,p11,p12;

reg [23:0] p20,p21,p22;

        always @(posedge clk)

        always @(posedge clk)

        if (ce) begin

        if (ce) begin

                p00 <= fracta[11: 0] * fractb[11: 0];

          p00 <= fracta[23: 0] * fractb[11: 0];

                p01 <= fracta[23:12] * fractb[11: 0];

          p11 <= fracta[23: 0] * fractb[23:12];

                p10 <= fracta[11: 0] * fractb[23:12];

                fract1 <= {p11,12'b0} + p00;

                p11 <= fracta[23:12] * fractb[23:12];

                fract1 <= {p11,p00} + {p01,12'b0} + {p10,12'b0};

end

end

end

end

else begin

else begin

        always @(posedge clk)

        always @(posedge clk)

    if (ce) begin

    if (ce) begin

Line 185...

Line 175...

// Status

// Status

wire under1, over1;

wire under1, over1;

wire under = ex1[EMSB+2];       // exponent underflow

wire under = ex1[EMSB+2];       // exponent underflow

wire over = (&ex1[EMSB:0] | ex1[EMSB+1]) & !ex1[EMSB+2];

wire over = (&ex1[EMSB:0] | ex1[EMSB+1]) & !ex1[EMSB+2];

delay2 #(EMSB+1) u3 (.clk(clk), .ce(ce), .i(ex1[EMSB:0]), .o(ex2) );

delay #(.WID(EMSB+1),.DEP(DELAY)) u3 (.clk(clk), .ce(ce), .i(ex1[EMSB:0]), .o(ex2) );

delay2 u2a (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) );

delay #(.WID(1),.DEP(DELAY)) u2a (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) );

delay2 u2b (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) );

delay #(.WID(1),.DEP(DELAY)) u2b (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) );

delay2 u6  (.clk(clk), .ce(ce), .i(under), .o(under1) );

delay #(.WID(1),.DEP(DELAY)) u6  (.clk(clk), .ce(ce), .i(under), .o(under1) );

delay2 u7  (.clk(clk), .ce(ce), .i(over), .o(over1) );

delay #(.WID(1),.DEP(DELAY)) u7  (.clk(clk), .ce(ce), .i(over), .o(over1) );

// determine when a NaN is output

// determine when a NaN is output

wire qNaNOut;

wire qNaNOut;

wire [FPWID-1:0] a1,b1;

wire [FPWID-1:0] a1,b1;

delay2 u5 (.clk(clk), .ce(ce), .i((aInf&bz)|(bInf&az)), .o(qNaNOut) );

delay #(.WID(1),.DEP(DELAY)) u5 (.clk(clk), .ce(ce), .i((aInf&bz)|(bInf&az)), .o(qNaNOut) );

delay2 u14 (.clk(clk), .ce(ce), .i(aNan), .o(aNan1) );

delay #(.WID(1),.DEP(DELAY)) u14 (.clk(clk), .ce(ce), .i(aNan), .o(aNan1) );

delay2 u15 (.clk(clk), .ce(ce), .i(bNan), .o(bNan1) );

delay #(.WID(1),.DEP(DELAY)) u15 (.clk(clk), .ce(ce), .i(bNan), .o(bNan1) );

delay2 #(FPWID) u16 (.clk(clk), .ce(ce), .i(a), .o(a1) );

delay #(.WID(FPWID),.DEP(DELAY))  u16 (.clk(clk), .ce(ce), .i(a), .o(a1) );

delay2 #(FPWID) u17 (.clk(clk), .ce(ce), .i(b), .o(b1) );

delay #(.WID(FPWID),.DEP(DELAY))  u17 (.clk(clk), .ce(ce), .i(b), .o(b1) );

// -----------------------------------------------------------

// -----------------------------------------------------------

// Second clock

// Second clock

// - correct xponent and mantissa for exceptional conditions

// - correct xponent and mantissa for exceptional conditions

// -----------------------------------------------------------

// -----------------------------------------------------------

wire so1;

wire so1;

delay3 u8 (.clk(clk), .ce(ce), .i(sa ^ sb), .o(so1) );// two clock delay!

delay #(.WID(1),.DEP(DELAY+1)) u8 (.clk(clk), .ce(ce), .i(sa ^ sb), .o(so1) );// two clock delay!

always @(posedge clk)

always @(posedge clk)

        if (ce)

        if (ce)

                casez({qNaNOut|aNan1|bNan1,aInf1,bInf1,over1,under1})

                casez({qNaNOut|aNan1|bNan1,aInf1,bInf1,over1,under1})

                5'b1????:       xo1 = infXp;    // qNaN - infinity * zero

                5'b1????:       xo1 = infXp;    // qNaN - infinity * zero

Line 219...

Line 209...

                5'b0001?:       xo1 = infXp;    // result overflow

                5'b0001?:       xo1 = infXp;    // result overflow

                5'b00001:       xo1 = ex2[EMSB:0];//0;          // underflow

                5'b00001:       xo1 = ex2[EMSB:0];//0;          // underflow

                default:        xo1 = ex2[EMSB:0];      // situation normal

                default:        xo1 = ex2[EMSB:0];      // situation normal

                endcase

                endcase

// Force mantissa to zero when underflow or zero exponent when not supporting denormals.

always @(posedge clk)

always @(posedge clk)

        if (ce)

        if (ce)

`ifdef SUPPORT_DENORMALS

                casez({aNan1,bNan1,qNaNOut,aInf1,bInf1,over1})

                casez({aNan1,bNan1,qNaNOut,aInf1,bInf1,over1})

`else

                casez({aNan1,bNan1,qNaNOut,aInf1,bInf1,over1|under1})

`endif

                6'b1?????:  mo1 = {1'b1,a1[FMSB:0],{FMSB+1{1'b0}}};

                6'b1?????:  mo1 = {1'b1,a1[FMSB:0],{FMSB+1{1'b0}}};

    6'b01????:  mo1 = {1'b1,b1[FMSB:0],{FMSB+1{1'b0}}};

    6'b01????:  mo1 = {1'b1,b1[FMSB:0],{FMSB+1{1'b0}}};

                6'b001???:      mo1 = {1'b1,qNaN|3'd4,{FMSB+1{1'b0}}};  // multiply inf * zero

                6'b001???:      mo1 = {1'b1,qNaN|3'd4,{FMSB+1{1'b0}}};  // multiply inf * zero

                6'b0001??:      mo1 = 0;        // mul inf's

                6'b0001??:      mo1 = 0;        // mul inf's

                6'b00001?:      mo1 = 0;        // mul inf's

                6'b00001?:      mo1 = 0;        // mul inf's

                6'b000001:      mo1 = 0;        // mul overflow

                6'b000001:      mo1 = 0;        // mul overflow

                default:        mo1 = fract1;

                default:        mo1 = fract1;

                endcase

                endcase

delay3 u10 (.clk(clk), .ce(ce), .i(sa & sb), .o(sign_exe) );

delay #(.WID(1),.DEP(DELAY+1)) u10 (.clk(clk), .ce(ce), .i(sa & sb), .o(sign_exe) );

delay1 u11 (.clk(clk), .ce(ce), .i(over1),  .o(overflow) );

delay1 u11 (.clk(clk), .ce(ce), .i(over1),  .o(overflow) );

delay1 u12 (.clk(clk), .ce(ce), .i(over1),  .o(inf) );

delay1 u12 (.clk(clk), .ce(ce), .i(over1),  .o(inf) );

delay1 u13 (.clk(clk), .ce(ce), .i(under1), .o(underflow) );

delay1 u13 (.clk(clk), .ce(ce), .i(under1), .o(underflow) );

assign o = {so1,xo1,mo1};

assign o = {so1,xo1,mo1};

Line 243...

Line 238...

endmodule

endmodule

// Multiplier with normalization and rounding.

// Multiplier with normalization and rounding.

module fpMulnr(clk, ce, a, b, o, rm, sign_exe, inf, overflow, underflow);

module fpMultiplynr(clk, ce, a, b, o, rm, sign_exe, inf, overflow, underflow);

input clk;

input clk;

input ce;

input ce;

input  [MSB:0] a, b;

input  [MSB:0] a, b;

output [MSB:0] o;

output [MSB:0] o;

input [2:0] rm;

input [2:0] rm;

Line 258...

Line 253...

wire [EX:0] o1;

wire [EX:0] o1;

wire sign_exe1, inf1, overflow1, underflow1;

wire sign_exe1, inf1, overflow1, underflow1;

wire [MSB+3:0] fpn0;

wire [MSB+3:0] fpn0;

fpMul       #(FPWID) u1 (clk, ce, a, b, o1, sign_exe1, inf1, overflow1, underflow1);

fpMultiply  u1 (clk, ce, a, b, o1, sign_exe1, inf1, overflow1, underflow1);

fpNormalize #(FPWID) u2(.clk(clk), .ce(ce), .under_i(underflow1), .i(o1), .o(fpn0) );

fpNormalize u2(.clk(clk), .ce(ce), .under_i(underflow1), .i(o1), .o(fpn0) );

fpRound     #(FPWID) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );

fpRound     u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );

delay2      #(1)   u4(.clk(clk), .ce(ce), .i(sign_exe1), .o(sign_exe));

delay2      #(1)   u4(.clk(clk), .ce(ce), .i(sign_exe1), .o(sign_exe));

delay2      #(1)   u5(.clk(clk), .ce(ce), .i(inf1), .o(inf));

delay2      #(1)   u5(.clk(clk), .ce(ce), .i(inf1), .o(inf));

delay2      #(1)   u6(.clk(clk), .ce(ce), .i(overflow1), .o(overflow));

delay2      #(1)   u6(.clk(clk), .ce(ce), .i(overflow1), .o(overflow));

delay2      #(1)   u7(.clk(clk), .ce(ce), .i(underflow1), .o(underflow));

delay2      #(1)   u7(.clk(clk), .ce(ce), .i(underflow1), .o(underflow));

endmodule

endmodule

Browse

Tools

Subversion Repositories ft816float

[/] [ft816float/] [trunk/] [rtl/] [verilog2/] [fpMultiply.sv] - Diff between revs 48 and 49