OpenCores

Rev 48	Rev 49
`// ============================================================================`	`// ============================================================================`
`// __`	`// __`
`// \\__/ o\ (C) 2006-2020 Robert Finch, Waterloo`	`// \\__/ o\ (C) 2006-2020 Robert Finch, Waterloo`
`// \ __ / All rights reserved.`	`// \ __ / All rights reserved.`
`// \/_// robfinch@finitron.ca`	`// \/_// robfinch@finitron.ca`
`// \|\|`	`// \|\|`
`//`	`//`
`// fpNormalize.sv`	`// fpNormalize.sv`
`// - floating point normalization unit`	`// - floating point normalization unit`
`// - eight cycle latency`	`// - eight cycle latency`
`// - parameterized width`	`// - parameterized width`
`// - IEEE 754 representation`	`// - IEEE 754 representation`
`//`	`//`
`//`	`//`
`// This source file is free software: you can redistribute it and/or modify`	`// This source file is free software: you can redistribute it and/or modify`
`// it under the terms of the GNU Lesser General Public License as published`	`// it under the terms of the GNU Lesser General Public License as published`
`// by the Free Software Foundation, either version 3 of the License, or`	`// by the Free Software Foundation, either version 3 of the License, or`
`// (at your option) any later version.`	`// (at your option) any later version.`
`//`	`//`
`// This source file is distributed in the hope that it will be useful,`	`// This source file is distributed in the hope that it will be useful,`
`// but WITHOUT ANY WARRANTY; without even the implied warranty of`	`// but WITHOUT ANY WARRANTY; without even the implied warranty of`
`// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`	`// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
`// GNU General Public License for more details.`	`// GNU General Public License for more details.`
`//`	`//`
`// You should have received a copy of the GNU General Public License`	`// You should have received a copy of the GNU General Public License`
`// along with this program. If not, see .`	`// along with this program. If not, see .`
`//`	`//`
`// This unit takes a floating point number in an intermediate`	`// This unit takes a floating point number in an intermediate`
`// format and normalizes it. No normalization occurs`	`// format and normalizes it. No normalization occurs`
`// for NaN's or infinities. The unit has a two cycle latency.`	`// for NaN's or infinities. The unit has a two cycle latency.`
`//`	`//`
`// The mantissa is assumed to start with two whole bits on`	`// The mantissa is assumed to start with two whole bits on`
`// the left. The remaining bits are fractional.`	`// the left. The remaining bits are fractional.`
`//`	`//`
`// The width of the incoming format is reduced via a generation`	`// The width of the incoming format is reduced via a generation`
`// of sticky bit in place of the low order fractional bits.`	`// of sticky bit in place of the low order fractional bits.`
`//`	`//`
`// On an underflowed input, the incoming exponent is assumed`	`// On an underflowed input, the incoming exponent is assumed`
`// to be negative. A right shift is needed.`	`// to be negative. A right shift is needed.`
`// ============================================================================`	`// ============================================================================`

`import fp::*;`	`import fp::*;`

`module fpNormalize(clk, ce, i, o, under_i, under_o, inexact_o);`	`module fpNormalize(clk, ce, i, o, under_i, under_o, inexact_o);`
`input clk;`	`input clk;`
`input ce;`	`input ce;`
`input [EX:0] i; // expanded format input`	`input [EX:0] i; // expanded format input`
`output [MSB+3:0] o; // normalized output + guard, sticky and round bits, + 1 whole digit`	`output [MSB+3:0] o; // normalized output + guard, sticky and round bits, + 1 whole digit`
`input under_i;`	`input under_i;`
`output under_o;`	`output under_o;`
`output inexact_o;`	`output inexact_o;`

	`integer n;`
`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`
`// No Clock required`	`// No Clock required`
`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`
`reg [EMSB:0] xo0;`	`reg [EMSB:0] xo0;`
`reg so0;`	`reg so0;`

`always @*`	`always @*`
`xo0 <= i[EX-1:FX+1];`	`xo0 <= i[EX-1:FX+1];`
`always @*`	`always @*`
`so0 <= i[EX]; // sign doesn't change`	`so0 <= i[EX]; // sign doesn't change`

`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`
`// Clock #1`	`// Clock #1`
`// - Capture exponent information`	`// - Capture exponent information`
`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`
`reg xInf1a, xInf1b, xInf1c;`	`reg xInf1a, xInf1b, xInf1c;`
`wire [FX:0] i1;`	`wire [FX:0] i1;`
`delay1 #(FX+1) u11 (.clk(clk), .ce(ce), .i(i), .o(i1));`	`delay1 #(FX+1) u11 (.clk(clk), .ce(ce), .i(i), .o(i1));`

`always @(posedge clk)`	`always @(posedge clk)`
`if (ce) xInf1a <= &xo0 & !under_i;`	`if (ce) xInf1a <= &xo0 & !under_i;`
`always @(posedge clk)`	`always @(posedge clk)`
`if (ce) xInf1b <= &xo0[EMSB:1] & !under_i;`	`if (ce) xInf1b <= &xo0[EMSB:1] & !under_i;`
`always @(posedge clk)`	`always @(posedge clk)`
`if (ce) xInf1c = &xo0;`	`if (ce) xInf1c = &xo0;`

`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`
`// Clock #2`	`// Clock #2`
`// - determine exponent increment`	`// - determine exponent increment`
`// Since the there are three whole digits in the incoming format`	`// Since the there are three whole digits in the incoming format`
`// the number of whole digits needs to be reduced. If the MSB is`	`// the number of whole digits needs to be reduced. If the MSB is`
`// set, then increment the exponent and no shift is needed.`	`// set, then increment the exponent and no shift is needed.`
`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`
`wire xInf2c, xInf2b;`	`wire xInf2c, xInf2b;`
`wire [EMSB:0] xo2;`	`wire [EMSB:0] xo2;`
`reg incExpByOne2, incExpByTwo2;`	`reg incExpByOne2, incExpByTwo2;`
`delay1 u21 (.clk(clk), .ce(ce), .i(xInf1c), .o(xInf2c));`	`delay1 u21 (.clk(clk), .ce(ce), .i(xInf1c), .o(xInf2c));`
`delay1 u22 (.clk(clk), .ce(ce), .i(xInf1b), .o(xInf2b));`	`delay1 u22 (.clk(clk), .ce(ce), .i(xInf1b), .o(xInf2b));`
`delay2 #(EMSB+1) u23 (.clk(clk), .ce(ce), .i(xo0), .o(xo2));`	`delay2 #(EMSB+1) u23 (.clk(clk), .ce(ce), .i(xo0), .o(xo2));`
`delay2 u24 (.clk(clk), .ce(ce), .i(under_i), .o(under2));`	`delay2 u24 (.clk(clk), .ce(ce), .i(under_i), .o(under2));`

`always @(posedge clk)`	`always @(posedge clk)`
`if (ce) incExpByTwo2 <= !xInf1b & i1[FX];`	`if (ce) incExpByTwo2 <= !xInf1b & i1[FX];`
`always @(posedge clk)`	`always @(posedge clk)`
`if (ce) incExpByOne2 <= !xInf1a & i1[FX-1];`	`if (ce) incExpByOne2 <= !xInf1a & i1[FX-1];`

`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`
`// Clock #3`	`// Clock #3`
`// - increment exponent`	`// - increment exponent`
`// - detect a zero mantissa`	`// - detect a zero mantissa`
`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`

`wire incExpByTwo3;`	`wire incExpByTwo3;`
`wire incExpByOne3;`	`wire incExpByOne3;`
`wire [FX:0] i3;`	`wire [FX:0] i3;`
`reg [EMSB:0] xo3;`	`reg [EMSB:0] xo3;`
`reg zeroMan3;`	`reg zeroMan3;`
`delay1 u31 (.clk(clk), .ce(ce), .i(incExpByTwo2), .o(incExpByTwo3));`	`delay1 u31 (.clk(clk), .ce(ce), .i(incExpByTwo2), .o(incExpByTwo3));`
`delay1 u32 (.clk(clk), .ce(ce), .i(incExpByOne2), .o(incExpByOne3));`	`delay1 u32 (.clk(clk), .ce(ce), .i(incExpByOne2), .o(incExpByOne3));`
`delay3 #(FX+1) u33 (.clk(clk), .ce(ce), .i(i[FX:0]), .o(i3));`	`delay3 #(FX+1) u33 (.clk(clk), .ce(ce), .i(i[FX:0]), .o(i3));`
`wire [EMSB+1:0] xv3a = xo2 + {incExpByTwo2,1'b0};`	`wire [EMSB+1:0] xv3a = xo2 + {incExpByTwo2,1'b0};`
`wire [EMSB+1:0] xv3b = xo2 + incExpByOne2;`	`wire [EMSB+1:0] xv3b = xo2 + incExpByOne2;`

`always @(posedge clk)`	`always @(posedge clk)`
`if (ce) xo3 <= xo2 + (incExpByTwo2 ? 2'd2 : incExpByOne2 ? 2'd1 : 2'd0);`	`if (ce) xo3 <= xo2 + (incExpByTwo2 ? 2'd2 : incExpByOne2 ? 2'd1 : 2'd0);`

`always @(posedge clk)`	`always @(posedge clk)`
`if(ce) zeroMan3 <= ((xv3b[EMSB+1]\|\| &xv3b[EMSB:0])\|\|(xv3a[EMSB+1]\| &xv3a[EMSB:0]))`	`if(ce) zeroMan3 <= ((xv3b[EMSB+1]\|\| &xv3b[EMSB:0])\|\|(xv3a[EMSB+1]\| &xv3a[EMSB:0]))`
`&& !under2 && !xInf2c;`	`&& !under2 && !xInf2c;`

`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`
`// Clock #4`	`// Clock #4`
`// - Shift mantissa left`	`// - Shift mantissa left`
`// - If infinity is reached then set the mantissa to zero`	`// - If infinity is reached then set the mantissa to zero`
`// shift mantissa left to reduce to a single whole digit`	`// shift mantissa left to reduce to a single whole digit`
`// - create sticky bit`	`// - create sticky bit`
`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`

`reg [FMSB+4:0] mo4;`	`reg [FMSB+4:0] mo4;`
`reg inexact4;`	`reg inexact4;`

`always @(posedge clk)`	`always @(posedge clk)`
`if(ce)`	`if(ce)`
`casez({zeroMan3,incExpByTwo3,incExpByOne3})`	`casez({zeroMan3,incExpByTwo3,incExpByOne3})`
`3'b1??: mo4 <= 1'd0;`	`3'b1??: mo4 <= 1'd0;`
`3'b01?: mo4 <= {i3[FX:FMSB+1],\|i3[FMSB:0]};`	`3'b01?: mo4 <= {i3[FX:FMSB+1],\|i3[FMSB:0]};`
`3'b001: mo4 <= {i3[FX-1:FMSB],\|i3[FMSB-1:0]};`	`3'b001: mo4 <= {i3[FX-1:FMSB],\|i3[FMSB-1:0]};`
`default: mo4 <= {i3[FX-2:FMSB-1],\|i3[FMSB-2:0]};`	`default: mo4 <= {i3[FX-2:FMSB-1],\|i3[FMSB-2:0]};`
`endcase`	`endcase`

`always @(posedge clk)`	`always @(posedge clk)`
`if(ce)`	`if(ce)`
`casez({zeroMan3,incExpByTwo3,incExpByOne3})`	`casez({zeroMan3,incExpByTwo3,incExpByOne3})`
`3'b1??: inexact4 <= 1'd0;`	`3'b1??: inexact4 <= 1'd0;`
`3'b01?: inexact4 <= \|i3[FMSB:0];`	`3'b01?: inexact4 <= \|i3[FMSB:0];`
`3'b001: inexact4 <= \|i3[FMSB-1:0];`	`3'b001: inexact4 <= \|i3[FMSB-1:0];`
`default: inexact4 <= \|i3[FMSB-2:0];`	`default: inexact4 <= \|i3[FMSB-2:0];`
`endcase`	`endcase`

`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`
`// Clock edge #5`	`// Clock edge #5`
`// - count leading zeros`	`// - count leading zeros`
`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`
`wire [7:0] leadingZeros5;`	`wire [7:0] leadingZeros5;`
`wire [EMSB:0] xo5;`	`wire [EMSB:0] xo5;`
`wire xInf5;`	`wire xInf5;`
`delay2 #(EMSB+1) u51 (.clk(clk), .ce(ce), .i(xo3), .o(xo5));`	`delay2 #(EMSB+1) u51 (.clk(clk), .ce(ce), .i(xo3), .o(xo5));`
`delay3 #(1) u52 (.clk(clk), .ce(ce), .i(xInf2c), .o(xInf5) );`	`delay3 #(1) u52 (.clk(clk), .ce(ce), .i(xInf2c), .o(xInf5) );`

`generate`	`generate`
`begin`	`begin`
`if (FPWID <= 32) begin`	`if (FPWID <= 32) begin`
`cntlz32Reg clz0 (.clk(clk), .ce(ce), .i({mo4,5'b0}), .o(leadingZeros5) );`	`cntlz32Reg clz0 (.clk(clk), .ce(ce), .i({mo4,5'b0}), .o(leadingZeros5) );`
`assign leadingZeros5[7:6] = 2'b00;`	`assign leadingZeros5[7:6] = 2'b00;`
`end`	`end`
`else if (FPWID<=64) begin`	`else if (FPWID<=64) begin`
`assign leadingZeros5[7] = 1'b0;`	`assign leadingZeros5[7] = 1'b0;`
`cntlz64Reg clz0 (.clk(clk), .ce(ce), .i({mo4,8'h0}), .o(leadingZeros5) );`	`cntlz64Reg clz0 (.clk(clk), .ce(ce), .i({mo4,8'h0}), .o(leadingZeros5) );`
`end`	`end`
`else if (FPWID<=80) begin`	`else if (FPWID<=80) begin`
`assign leadingZeros5[7] = 1'b0;`	`assign leadingZeros5[7] = 1'b0;`
`cntlz80Reg clz0 (.clk(clk), .ce(ce), .i({mo4,12'b0}), .o(leadingZeros5) );`	`cntlz80Reg clz0 (.clk(clk), .ce(ce), .i({mo4,12'b0}), .o(leadingZeros5) );`
`end`	`end`
`else if (FPWID<=84) begin`	`else if (FPWID<=84) begin`
`assign leadingZeros5[7] = 1'b0;`	`assign leadingZeros5[7] = 1'b0;`
`cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,24'b0}), .o(leadingZeros5) );`	`cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,24'b0}), .o(leadingZeros5) );`
`end`	`end`
`else if (FPWID<=96) begin`	`else if (FPWID<=96) begin`
`assign leadingZeros5[7] = 1'b0;`	`assign leadingZeros5[7] = 1'b0;`
`cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,12'b0}), .o(leadingZeros5) );`	`cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,12'b0}), .o(leadingZeros5) );`
`end`	`end`
`else if (FPWID<=128)`	`else if (FPWID<=128)`
`cntlz128Reg clz0 (.clk(clk), .ce(ce), .i({mo4,12'b0}), .o(leadingZeros5) );`	`cntlz128Reg clz0 (.clk(clk), .ce(ce), .i({mo4,12'b0}), .o(leadingZeros5) );`
`end`	`end`
`endgenerate`	`endgenerate`


`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`
`// Clock edge #6`	`// Clock edge #6`
`// - Compute how much we want to decrement exponent by`	`// - Compute how much we want to decrement exponent by`
`// - compute amount to shift left and right`	`// - compute amount to shift left and right`
`// - at infinity the exponent can't be incremented, so we can't shift right`	`// - at infinity the exponent can't be incremented, so we can't shift right`
`// otherwise it was an underflow situation so the exponent was negative`	`// otherwise it was an underflow situation so the exponent was negative`
`// shift amount needs to be negated for shift register`	`// shift amount needs to be negated for shift register`
`// If the exponent underflowed, then the shift direction must be to the`	`// If the exponent underflowed, then the shift direction must be to the`
`// right regardless of mantissa bits; the number is denormalized.`	`// right regardless of mantissa bits; the number is denormalized.`
`// Otherwise the shift direction must be to the left.`	`// Otherwise the shift direction must be to the left.`
`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`
`reg [7:0] lshiftAmt6;`	`reg [7:0] lshiftAmt6;`
`reg [7:0] rshiftAmt6;`	`reg [7:0] rshiftAmt6;`
`wire rightOrLeft6; // 0=left,1=right`	`wire rightOrLeft6; // 0=left,1=right`
`wire xInf6;`	`wire xInf6;`
`wire [EMSB:0] xo6;`	`wire [EMSB:0] xo6;`
`wire [FMSB+4:0] mo6;`	`wire [FMSB+4:0] mo6;`
`wire zeroMan6;`	`wire zeroMan6;`
`vtdl #(1) u61 (.clk(clk), .ce(ce), .a(4'd5), .d(under_i), .q(rightOrLeft6) );`	`vtdl #(1) u61 (.clk(clk), .ce(ce), .a(4'd5), .d(under_i), .q(rightOrLeft6) );`
`delay1 #(EMSB+1) u62 (.clk(clk), .ce(ce), .i(xo5), .o(xo6));`	`delay1 #(EMSB+1) u62 (.clk(clk), .ce(ce), .i(xo5), .o(xo6));`
`delay2 #(FMSB+5) u63 (.clk(clk), .ce(ce), .i(mo4), .o(mo6) );`	`delay2 #(FMSB+5) u63 (.clk(clk), .ce(ce), .i(mo4), .o(mo6) );`
`delay1 #(1) u64 (.clk(clk), .ce(ce), .i(xInf5), .o(xInf6) );`	`delay1 #(1) u64 (.clk(clk), .ce(ce), .i(xInf5), .o(xInf6) );`
`delay3 u65 (.clk(clk), .ce(ce), .i(zeroMan3), .o(zeroMan6));`	`delay3 u65 (.clk(clk), .ce(ce), .i(zeroMan3), .o(zeroMan6));`

`always @(posedge clk)`	`always @(posedge clk)`
`if (ce) lshiftAmt6 <= leadingZeros5 > xo5 ? xo5 : leadingZeros5;`	`if (ce) lshiftAmt6 <= leadingZeros5 > xo5 ? xo5 : leadingZeros5;`

`always @(posedge clk)`	`always @(posedge clk)`
`if (ce) rshiftAmt6 <= xInf5 ? 1'd0 : $signed(xo5) > 1'd0 ? 1'd0 : ~xo5+2'd1; // xo2 is negative !`	`if (ce) rshiftAmt6 <= xInf5 ? 1'd0 : $signed(xo5) > 1'd0 ? 1'd0 : ~xo5+2'd1; // xo2 is negative !`

`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`
`// Clock edge #7`	`// Clock edge #7`
`// - fogure exponent`	`// - figure exponent`
`// - shift mantissa`	`// - shift mantissa`
	`// - figure sticky bit`
`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`

`reg [EMSB:0] xo7;`	`reg [EMSB:0] xo7;`
`wire rightOrLeft7;`	`wire rightOrLeft7;`
`reg [FMSB+4:0] mo7l, mo7r;`	`reg [FMSB+4:0] mo7l, mo7r;`
	`reg St6,St7;`
`delay1 u71 (.clk(clk), .ce(ce), .i(rightOrLeft6), .o(rightOrLeft7));`	`delay1 u71 (.clk(clk), .ce(ce), .i(rightOrLeft6), .o(rightOrLeft7));`

`always @(posedge clk)`	`always @(posedge clk)`
`if (ce)`	`if (ce)`
`xo7 <= zeroMan6 ? xo6 :`	`xo7 <= zeroMan6 ? xo6 :`
`xInf6 ? xo6 : // an infinite exponent is either a NaN or infinity; no need to change`	`xInf6 ? xo6 : // an infinite exponent is either a NaN or infinity; no need to change`
`rightOrLeft6 ? 1'd0 : // on a right shift, the exponent was negative, it's being made to zero`	`rightOrLeft6 ? 1'd0 : // on a right shift, the exponent was negative, it's being made to zero`
`xo6 - lshiftAmt6; // on a left shift, the exponent can't be decremented below zero`	`xo6 - lshiftAmt6; // on a left shift, the exponent can't be decremented below zero`

`always @(posedge clk)`	`always @(posedge clk)`
`if (ce) mo7r <= mo6 >> rshiftAmt6;`	`if (ce) mo7r <= mo6 >> rshiftAmt6;`
`always @(posedge clk)`	`always @(posedge clk)`
`if (ce) mo7l <= mo6 << lshiftAmt6;`	`if (ce) mo7l <= mo6 << lshiftAmt6;`

	`// The sticky bit is set if the bits shifted out on a right shift are set.`
	`always @*`
	`begin`
	`St6 = 1'b0;`
	`for (n = 0; n < FMSB+5; n = n + 1)`
	`if (n <= rshiftAmt6 + 1) St6 = St6\|mo6[n];`
	`end`
	`always @(posedge clk)`
	`if (ce) St7 <= St6;`

`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`
`// Clock edge #8`	`// Clock edge #8`
`// - select mantissa`	`// - select mantissa`
`// ----------------------------------------------------------------------------`	`// ----------------------------------------------------------------------------`

`wire so;`	`wire so;`
`wire [EMSB:0] xo;`	`wire [EMSB:0] xo;`
`reg [FMSB+4:0] mo;`	`reg [FMSB+4:0] mo;`
`vtdl #(1) u81 (.clk(clk), .ce(ce), .a(4'd7), .d(so0), .q(so) );`	`vtdl #(1) u81 (.clk(clk), .ce(ce), .a(4'd7), .d(so0), .q(so) );`
`delay1 #(EMSB+1) u82 (.clk(clk), .ce(ce), .i(xo7), .o(xo));`	`delay1 #(EMSB+1) u82 (.clk(clk), .ce(ce), .i(xo7), .o(xo));`
`vtdl u83 (.clk(clk), .ce(ce), .a(4'd3), .d(inexact4), .q(inexact_o));`	`vtdl u83 (.clk(clk), .ce(ce), .a(4'd3), .d(inexact4), .q(inexact_o));`
`delay1 u84 (.clk(clk), .ce(ce), .i(rightOrLeft7), .o(under_o));`	`delay1 u84 (.clk(clk), .ce(ce), .i(rightOrLeft7), .o(under_o));`

`always @(posedge clk)`	`always @(posedge clk)`
`if (ce) mo <= rightOrLeft7 ? mo7r : mo7l;`	`if (ce) mo <= rightOrLeft7 ? mo7r\|{St7,1'b0} : mo7l;`

`assign o = {so,xo,mo[FMSB+4:1]};`	`assign o = {so,xo,mo[FMSB+4:1]};`

`endmodule`	`endmodule`

// ============================================================================

// ============================================================================

//        __

//        __

//   \\__/ o\    (C) 2006-2020  Robert Finch, Waterloo

//   \\__/ o\    (C) 2006-2020  Robert Finch, Waterloo

//    \  __ /    All rights reserved.

//    \  __ /    All rights reserved.

//     \/_//     robfinch@finitron.ca

//     \/_//     robfinch@finitron.ca

//       ||

//       ||

//

//

//      fpNormalize.sv

//      fpNormalize.sv

//    - floating point normalization unit

//    - floating point normalization unit

//    - eight cycle latency

//    - eight cycle latency

//    - parameterized width

//    - parameterized width

//    - IEEE 754 representation

//    - IEEE 754 representation

//

//

//

//

// This source file is free software: you can redistribute it and/or modify

// This source file is free software: you can redistribute it and/or modify

// it under the terms of the GNU Lesser General Public License as published

// it under the terms of the GNU Lesser General Public License as published

// by the Free Software Foundation, either version 3 of the License, or

// by the Free Software Foundation, either version 3 of the License, or

// (at your option) any later version.

// (at your option) any later version.

//

//

// This source file is distributed in the hope that it will be useful,

// This source file is distributed in the hope that it will be useful,

// but WITHOUT ANY WARRANTY; without even the implied warranty of

// but WITHOUT ANY WARRANTY; without even the implied warranty of

// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

// GNU General Public License for more details.

// GNU General Public License for more details.

//

//

// You should have received a copy of the GNU General Public License

// You should have received a copy of the GNU General Public License

// along with this program.  If not, see .

// along with this program.  If not, see .

//

//

//      This unit takes a floating point number in an intermediate

//      This unit takes a floating point number in an intermediate

// format and normalizes it. No normalization occurs

// format and normalizes it. No normalization occurs

// for NaN's or infinities. The unit has a two cycle latency.

// for NaN's or infinities. The unit has a two cycle latency.

//

//

// The mantissa is assumed to start with two whole bits on

// The mantissa is assumed to start with two whole bits on

// the left. The remaining bits are fractional.

// the left. The remaining bits are fractional.

//

//

// The width of the incoming format is reduced via a generation

// The width of the incoming format is reduced via a generation

// of sticky bit in place of the low order fractional bits.

// of sticky bit in place of the low order fractional bits.

//

//

// On an underflowed input, the incoming exponent is assumed

// On an underflowed input, the incoming exponent is assumed

// to be negative. A right shift is needed.

// to be negative. A right shift is needed.

// ============================================================================

// ============================================================================

import fp::*;

import fp::*;

module fpNormalize(clk, ce, i, o, under_i, under_o, inexact_o);

module fpNormalize(clk, ce, i, o, under_i, under_o, inexact_o);

input clk;

input clk;

input ce;

input ce;

input [EX:0] i;         // expanded format input

input [EX:0] i;         // expanded format input

output [MSB+3:0] o;             // normalized output + guard, sticky and round bits, + 1 whole digit

output [MSB+3:0] o;             // normalized output + guard, sticky and round bits, + 1 whole digit

input under_i;

input under_i;

output under_o;

output under_o;

output inexact_o;

output inexact_o;

integer n;

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

// No Clock required

// No Clock required

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

reg [EMSB:0] xo0;

reg [EMSB:0] xo0;

reg so0;

reg so0;

always @*

always @*

        xo0 <= i[EX-1:FX+1];

        xo0 <= i[EX-1:FX+1];

always @*

always @*

        so0 <= i[EX];           // sign doesn't change

        so0 <= i[EX];           // sign doesn't change

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

// Clock #1

// Clock #1

// - Capture exponent information

// - Capture exponent information

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

reg xInf1a, xInf1b, xInf1c;

reg xInf1a, xInf1b, xInf1c;

wire [FX:0] i1;

wire [FX:0] i1;

delay1 #(FX+1) u11 (.clk(clk), .ce(ce), .i(i), .o(i1));

delay1 #(FX+1) u11 (.clk(clk), .ce(ce), .i(i), .o(i1));

always @(posedge clk)

always @(posedge clk)

        if (ce) xInf1a <= &xo0 & !under_i;

        if (ce) xInf1a <= &xo0 & !under_i;

always @(posedge clk)

always @(posedge clk)

        if (ce) xInf1b <= &xo0[EMSB:1] & !under_i;

        if (ce) xInf1b <= &xo0[EMSB:1] & !under_i;

always @(posedge clk)

always @(posedge clk)

        if (ce) xInf1c = &xo0;

        if (ce) xInf1c = &xo0;

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

// Clock #2

// Clock #2

// - determine exponent increment

// - determine exponent increment

// Since the there are *three* whole digits in the incoming format

// Since the there are *three* whole digits in the incoming format

// the number of whole digits needs to be reduced. If the MSB is

// the number of whole digits needs to be reduced. If the MSB is

// set, then increment the exponent and no shift is needed.

// set, then increment the exponent and no shift is needed.

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

wire xInf2c, xInf2b;

wire xInf2c, xInf2b;

wire [EMSB:0] xo2;

wire [EMSB:0] xo2;

reg incExpByOne2, incExpByTwo2;

reg incExpByOne2, incExpByTwo2;

delay1 u21 (.clk(clk), .ce(ce), .i(xInf1c), .o(xInf2c));

delay1 u21 (.clk(clk), .ce(ce), .i(xInf1c), .o(xInf2c));

delay1 u22 (.clk(clk), .ce(ce), .i(xInf1b), .o(xInf2b));

delay1 u22 (.clk(clk), .ce(ce), .i(xInf1b), .o(xInf2b));

delay2 #(EMSB+1) u23 (.clk(clk), .ce(ce), .i(xo0), .o(xo2));

delay2 #(EMSB+1) u23 (.clk(clk), .ce(ce), .i(xo0), .o(xo2));

delay2 u24 (.clk(clk), .ce(ce), .i(under_i), .o(under2));

delay2 u24 (.clk(clk), .ce(ce), .i(under_i), .o(under2));

always @(posedge clk)

always @(posedge clk)

        if (ce) incExpByTwo2 <= !xInf1b & i1[FX];

        if (ce) incExpByTwo2 <= !xInf1b & i1[FX];

always @(posedge clk)

always @(posedge clk)

        if (ce) incExpByOne2 <= !xInf1a & i1[FX-1];

        if (ce) incExpByOne2 <= !xInf1a & i1[FX-1];

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

// Clock #3

// Clock #3

// - increment exponent

// - increment exponent

// - detect a zero mantissa

// - detect a zero mantissa

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

wire incExpByTwo3;

wire incExpByTwo3;

wire incExpByOne3;

wire incExpByOne3;

wire [FX:0] i3;

wire [FX:0] i3;

reg [EMSB:0] xo3;

reg [EMSB:0] xo3;

reg zeroMan3;

reg zeroMan3;

delay1 u31 (.clk(clk), .ce(ce), .i(incExpByTwo2), .o(incExpByTwo3));

delay1 u31 (.clk(clk), .ce(ce), .i(incExpByTwo2), .o(incExpByTwo3));

delay1 u32 (.clk(clk), .ce(ce), .i(incExpByOne2), .o(incExpByOne3));

delay1 u32 (.clk(clk), .ce(ce), .i(incExpByOne2), .o(incExpByOne3));

delay3 #(FX+1) u33 (.clk(clk), .ce(ce), .i(i[FX:0]), .o(i3));

delay3 #(FX+1) u33 (.clk(clk), .ce(ce), .i(i[FX:0]), .o(i3));

wire [EMSB+1:0] xv3a = xo2 + {incExpByTwo2,1'b0};

wire [EMSB+1:0] xv3a = xo2 + {incExpByTwo2,1'b0};

wire [EMSB+1:0] xv3b = xo2 + incExpByOne2;

wire [EMSB+1:0] xv3b = xo2 + incExpByOne2;

always @(posedge clk)

always @(posedge clk)

        if (ce) xo3 <= xo2 + (incExpByTwo2 ? 2'd2 : incExpByOne2 ? 2'd1 : 2'd0);

        if (ce) xo3 <= xo2 + (incExpByTwo2 ? 2'd2 : incExpByOne2 ? 2'd1 : 2'd0);

always @(posedge clk)

always @(posedge clk)

        if(ce) zeroMan3 <= ((xv3b[EMSB+1]|| &xv3b[EMSB:0])||(xv3a[EMSB+1]| &xv3a[EMSB:0]))

        if(ce) zeroMan3 <= ((xv3b[EMSB+1]|| &xv3b[EMSB:0])||(xv3a[EMSB+1]| &xv3a[EMSB:0]))

                                                                                         && !under2 && !xInf2c;

                                                                                         && !under2 && !xInf2c;

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

// Clock #4

// Clock #4

// - Shift mantissa left

// - Shift mantissa left

// - If infinity is reached then set the mantissa to zero

// - If infinity is reached then set the mantissa to zero

//   shift mantissa left to reduce to a single whole digit

//   shift mantissa left to reduce to a single whole digit

// - create sticky bit

// - create sticky bit

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

reg [FMSB+4:0] mo4;

reg [FMSB+4:0] mo4;

reg inexact4;

reg inexact4;

always @(posedge clk)

always @(posedge clk)

if(ce)

if(ce)

casez({zeroMan3,incExpByTwo3,incExpByOne3})

casez({zeroMan3,incExpByTwo3,incExpByOne3})

3'b1??: mo4 <= 1'd0;

3'b1??: mo4 <= 1'd0;

3'b01?: mo4 <= {i3[FX:FMSB+1],|i3[FMSB:0]};

3'b01?: mo4 <= {i3[FX:FMSB+1],|i3[FMSB:0]};

3'b001: mo4 <= {i3[FX-1:FMSB],|i3[FMSB-1:0]};

3'b001: mo4 <= {i3[FX-1:FMSB],|i3[FMSB-1:0]};

default:        mo4 <= {i3[FX-2:FMSB-1],|i3[FMSB-2:0]};

default:        mo4 <= {i3[FX-2:FMSB-1],|i3[FMSB-2:0]};

endcase

endcase

always @(posedge clk)

always @(posedge clk)

if(ce)

if(ce)

casez({zeroMan3,incExpByTwo3,incExpByOne3})

casez({zeroMan3,incExpByTwo3,incExpByOne3})

3'b1??: inexact4 <= 1'd0;

3'b1??: inexact4 <= 1'd0;

3'b01?: inexact4 <= |i3[FMSB:0];

3'b01?: inexact4 <= |i3[FMSB:0];

3'b001: inexact4 <= |i3[FMSB-1:0];

3'b001: inexact4 <= |i3[FMSB-1:0];

default:        inexact4 <= |i3[FMSB-2:0];

default:        inexact4 <= |i3[FMSB-2:0];

endcase

endcase

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

// Clock edge #5

// Clock edge #5

// - count leading zeros

// - count leading zeros

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

wire [7:0] leadingZeros5;

wire [7:0] leadingZeros5;

wire [EMSB:0] xo5;

wire [EMSB:0] xo5;

wire xInf5;

wire xInf5;

delay2 #(EMSB+1) u51 (.clk(clk), .ce(ce), .i(xo3), .o(xo5));

delay2 #(EMSB+1) u51 (.clk(clk), .ce(ce), .i(xo3), .o(xo5));

delay3 #(1)      u52 (.clk(clk), .ce(ce), .i(xInf2c), .o(xInf5) );

delay3 #(1)      u52 (.clk(clk), .ce(ce), .i(xInf2c), .o(xInf5) );

generate

generate

begin

begin

if (FPWID <= 32) begin

if (FPWID <= 32) begin

cntlz32Reg clz0 (.clk(clk), .ce(ce), .i({mo4,5'b0}), .o(leadingZeros5) );

cntlz32Reg clz0 (.clk(clk), .ce(ce), .i({mo4,5'b0}), .o(leadingZeros5) );

assign leadingZeros5[7:6] = 2'b00;

assign leadingZeros5[7:6] = 2'b00;

end

end

else if (FPWID<=64) begin

else if (FPWID<=64) begin

assign leadingZeros5[7] = 1'b0;

assign leadingZeros5[7] = 1'b0;

cntlz64Reg clz0 (.clk(clk), .ce(ce), .i({mo4,8'h0}), .o(leadingZeros5) );

cntlz64Reg clz0 (.clk(clk), .ce(ce), .i({mo4,8'h0}), .o(leadingZeros5) );

end

end

else if (FPWID<=80) begin

else if (FPWID<=80) begin

assign leadingZeros5[7] = 1'b0;

assign leadingZeros5[7] = 1'b0;

cntlz80Reg clz0 (.clk(clk), .ce(ce), .i({mo4,12'b0}), .o(leadingZeros5) );

cntlz80Reg clz0 (.clk(clk), .ce(ce), .i({mo4,12'b0}), .o(leadingZeros5) );

end

end

else if (FPWID<=84) begin

else if (FPWID<=84) begin

assign leadingZeros5[7] = 1'b0;

assign leadingZeros5[7] = 1'b0;

cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,24'b0}), .o(leadingZeros5) );

cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,24'b0}), .o(leadingZeros5) );

end

end

else if (FPWID<=96) begin

else if (FPWID<=96) begin

assign leadingZeros5[7] = 1'b0;

assign leadingZeros5[7] = 1'b0;

cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,12'b0}), .o(leadingZeros5) );

cntlz96Reg clz0 (.clk(clk), .ce(ce), .i({mo4,12'b0}), .o(leadingZeros5) );

end

end

else if (FPWID<=128)

else if (FPWID<=128)

cntlz128Reg clz0 (.clk(clk), .ce(ce), .i({mo4,12'b0}), .o(leadingZeros5) );

cntlz128Reg clz0 (.clk(clk), .ce(ce), .i({mo4,12'b0}), .o(leadingZeros5) );

end

end

endgenerate

endgenerate

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

// Clock edge #6

// Clock edge #6

// - Compute how much we want to decrement exponent by

// - Compute how much we want to decrement exponent by

// - compute amount to shift left and right

// - compute amount to shift left and right

// - at infinity the exponent can't be incremented, so we can't shift right

// - at infinity the exponent can't be incremented, so we can't shift right

//   otherwise it was an underflow situation so the exponent was negative

//   otherwise it was an underflow situation so the exponent was negative

//   shift amount needs to be negated for shift register

//   shift amount needs to be negated for shift register

// If the exponent underflowed, then the shift direction must be to the

// If the exponent underflowed, then the shift direction must be to the

// right regardless of mantissa bits; the number is denormalized.

// right regardless of mantissa bits; the number is denormalized.

// Otherwise the shift direction must be to the left.

// Otherwise the shift direction must be to the left.

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

reg [7:0] lshiftAmt6;

reg [7:0] lshiftAmt6;

reg [7:0] rshiftAmt6;

reg [7:0] rshiftAmt6;

wire rightOrLeft6;      // 0=left,1=right

wire rightOrLeft6;      // 0=left,1=right

wire xInf6;

wire xInf6;

wire [EMSB:0] xo6;

wire [EMSB:0] xo6;

wire [FMSB+4:0] mo6;

wire [FMSB+4:0] mo6;

wire zeroMan6;

wire zeroMan6;

vtdl #(1) u61 (.clk(clk), .ce(ce), .a(4'd5), .d(under_i), .q(rightOrLeft6) );

vtdl #(1) u61 (.clk(clk), .ce(ce), .a(4'd5), .d(under_i), .q(rightOrLeft6) );

delay1 #(EMSB+1) u62 (.clk(clk), .ce(ce), .i(xo5), .o(xo6));

delay1 #(EMSB+1) u62 (.clk(clk), .ce(ce), .i(xo5), .o(xo6));

delay2 #(FMSB+5) u63 (.clk(clk), .ce(ce), .i(mo4), .o(mo6) );

delay2 #(FMSB+5) u63 (.clk(clk), .ce(ce), .i(mo4), .o(mo6) );

delay1 #(1)      u64 (.clk(clk), .ce(ce), .i(xInf5), .o(xInf6) );

delay1 #(1)      u64 (.clk(clk), .ce(ce), .i(xInf5), .o(xInf6) );

delay3 u65 (.clk(clk), .ce(ce),  .i(zeroMan3), .o(zeroMan6));

delay3 u65 (.clk(clk), .ce(ce),  .i(zeroMan3), .o(zeroMan6));

always @(posedge clk)

always @(posedge clk)

        if (ce) lshiftAmt6 <= leadingZeros5 > xo5 ? xo5 : leadingZeros5;

        if (ce) lshiftAmt6 <= leadingZeros5 > xo5 ? xo5 : leadingZeros5;

always @(posedge clk)

always @(posedge clk)

        if (ce) rshiftAmt6 <= xInf5 ? 1'd0 : $signed(xo5) > 1'd0 ? 1'd0 : ~xo5+2'd1;    // xo2 is negative !

        if (ce) rshiftAmt6 <= xInf5 ? 1'd0 : $signed(xo5) > 1'd0 ? 1'd0 : ~xo5+2'd1;    // xo2 is negative !

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

// Clock edge #7

// Clock edge #7

// - fogure exponent

// - figure exponent

// - shift mantissa

// - shift mantissa

// - figure sticky bit

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

reg [EMSB:0] xo7;

reg [EMSB:0] xo7;

wire rightOrLeft7;

wire rightOrLeft7;

reg [FMSB+4:0] mo7l, mo7r;

reg [FMSB+4:0] mo7l, mo7r;

reg St6,St7;

delay1 u71 (.clk(clk), .ce(ce), .i(rightOrLeft6), .o(rightOrLeft7));

delay1 u71 (.clk(clk), .ce(ce), .i(rightOrLeft6), .o(rightOrLeft7));

always @(posedge clk)

always @(posedge clk)

if (ce)

if (ce)

        xo7 <= zeroMan6 ? xo6 :

        xo7 <= zeroMan6 ? xo6 :

                xInf6 ? xo6 :                                   // an infinite exponent is either a NaN or infinity; no need to change

                xInf6 ? xo6 :                                   // an infinite exponent is either a NaN or infinity; no need to change

                rightOrLeft6 ? 1'd0 :   // on a right shift, the exponent was negative, it's being made to zero

                rightOrLeft6 ? 1'd0 :   // on a right shift, the exponent was negative, it's being made to zero

                xo6 - lshiftAmt6;                       // on a left shift, the exponent can't be decremented below zero

                xo6 - lshiftAmt6;                       // on a left shift, the exponent can't be decremented below zero

always @(posedge clk)

always @(posedge clk)

        if (ce) mo7r <= mo6 >> rshiftAmt6;

        if (ce) mo7r <= mo6 >> rshiftAmt6;

always @(posedge clk)

always @(posedge clk)

        if (ce) mo7l <= mo6 << lshiftAmt6;

        if (ce) mo7l <= mo6 << lshiftAmt6;

// The sticky bit is set if the bits shifted out on a right shift are set.

always @*

begin

  St6 = 1'b0;

  for (n = 0; n < FMSB+5; n = n + 1)

    if (n <= rshiftAmt6 + 1) St6 = St6|mo6[n];

end

always @(posedge clk)

  if (ce) St7 <= St6;

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

// Clock edge #8

// Clock edge #8

// - select mantissa

// - select mantissa

// ----------------------------------------------------------------------------

// ----------------------------------------------------------------------------

wire so;

wire so;

wire [EMSB:0] xo;

wire [EMSB:0] xo;

reg [FMSB+4:0] mo;

reg [FMSB+4:0] mo;

vtdl #(1) u81 (.clk(clk), .ce(ce), .a(4'd7), .d(so0), .q(so) );

vtdl #(1) u81 (.clk(clk), .ce(ce), .a(4'd7), .d(so0), .q(so) );

delay1 #(EMSB+1) u82 (.clk(clk), .ce(ce), .i(xo7), .o(xo));

delay1 #(EMSB+1) u82 (.clk(clk), .ce(ce), .i(xo7), .o(xo));

vtdl u83 (.clk(clk), .ce(ce), .a(4'd3), .d(inexact4), .q(inexact_o));

vtdl u83 (.clk(clk), .ce(ce), .a(4'd3), .d(inexact4), .q(inexact_o));

delay1 u84 (.clk(clk), .ce(ce), .i(rightOrLeft7), .o(under_o));

delay1 u84 (.clk(clk), .ce(ce), .i(rightOrLeft7), .o(under_o));

always @(posedge clk)

always @(posedge clk)

        if (ce) mo <= rightOrLeft7 ? mo7r : mo7l;

        if (ce) mo <= rightOrLeft7 ? mo7r|{St7,1'b0} : mo7l;

assign o = {so,xo,mo[FMSB+4:1]};

assign o = {so,xo,mo[FMSB+4:1]};

endmodule

endmodule

Browse

Tools

Subversion Repositories ft816float

[/] [ft816float/] [trunk/] [rtl/] [verilog2/] [fpNormalize.sv] - Diff between revs 48 and 49