tags/arelease/Double_FPU.PDF Property changes : Added: svn:mime-type ## -0,0 +1 ## +application/octet-stream \ No newline at end of property Index: tags/arelease/fpu_exceptions.v =================================================================== --- tags/arelease/fpu_exceptions.v (nonexistent) +++ tags/arelease/fpu_exceptions.v (revision 12) @@ -0,0 +1,280 @@ +///////////////////////////////////////////////////////////////////// +//// //// +//// FPU //// +//// Floating Point Unit (Double precision) //// +//// //// +//// Author: David Lundgren //// +//// davidklun@gmail.com //// +//// //// +///////////////////////////////////////////////////////////////////// +//// //// +//// Copyright (C) 2009 David Lundgren //// +//// davidklun@gmail.com //// +//// //// +//// This source file may be used and distributed without //// +//// restriction provided that this copyright statement is not //// +//// removed from the file and that any derivative work contains //// +//// the original copyright notice and the associated disclaimer.//// +//// //// +//// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY //// +//// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED //// +//// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS //// +//// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR //// +//// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, //// +//// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES //// +//// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE //// +//// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR //// +//// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF //// +//// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT //// +//// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT //// +//// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE //// +//// POSSIBILITY OF SUCH DAMAGE. //// +//// //// +///////////////////////////////////////////////////////////////////// + + +`timescale 1ns / 100ps + +module fpu_exceptions( clk, rst, enable, rmode, opa, opb, in_except, +exponent_in, mantissa_in, fpu_op, out, ex_enable, underflow, overflow, +inexact, exception, invalid); +input clk; +input rst; +input enable; +input [1:0] rmode; +input [63:0] opa; +input [63:0] opb; +input [63:0] in_except; +input [11:0] exponent_in; +input [1:0] mantissa_in; +input [2:0] fpu_op; +output [63:0] out; +output ex_enable; +output underflow; +output overflow; +output inexact; +output exception; +output invalid; + +reg [63:0] out; +reg ex_enable; +reg underflow; +reg overflow; +reg inexact; +reg exception; +reg invalid; + +reg in_et_zero; +reg opa_et_zero; +reg opb_et_zero; +reg input_et_zero; +reg add; +reg subtract; +reg multiply; +reg divide; +reg opa_QNaN; +reg opb_QNaN; +reg opa_SNaN; +reg opb_SNaN; +reg opa_pos_inf; +reg opb_pos_inf; +reg opa_neg_inf; +reg opb_neg_inf; +reg opa_inf; +reg opb_inf; +reg NaN_input; +reg SNaN_input; +reg a_NaN; +reg div_by_0; +reg div_0_by_0; +reg div_inf_by_inf; +reg div_by_inf; +reg mul_0_by_inf; +reg mul_inf; +reg div_inf; +reg add_inf; +reg sub_inf; +reg addsub_inf_invalid; +reg addsub_inf; +reg out_inf_trigger; +reg out_pos_inf; +reg out_neg_inf; +reg round_nearest; +reg round_to_zero; +reg round_to_pos_inf; +reg round_to_neg_inf; +reg inf_round_down_trigger; +reg mul_uf; +reg div_uf; +reg underflow_trigger; +reg invalid_trigger; +reg overflow_trigger; +reg inexact_trigger; +reg except_trigger; +reg enable_trigger; +reg NaN_out_trigger; +reg SNaN_trigger; + + +wire [10:0] exp_2047 = 11'b11111111111; +wire [10:0] exp_2046 = 11'b11111111110; +reg [62:0] NaN_output_0; +reg [62:0] NaN_output; +wire [51:0] mantissa_max = 52'b1111111111111111111111111111111111111111111111111111; +reg [62:0] inf_round_down; +reg [62:0] out_inf; +reg [63:0] out_0; +reg [63:0] out_1; +reg [63:0] out_2; + +always @(posedge clk) +begin + if (rst) begin + in_et_zero <= 0; + opa_et_zero <= 0; + opb_et_zero <= 0; + input_et_zero <= 0; + add <= 0; + subtract <= 0; + multiply <= 0; + divide <= 0; + opa_QNaN <= 0; + opb_QNaN <= 0; + opa_SNaN <= 0; + opb_SNaN <= 0; + opa_pos_inf <= 0; + opb_pos_inf <= 0; + opa_neg_inf <= 0; + opb_neg_inf <= 0; + opa_inf <= 0; + opb_inf <= 0; + NaN_input <= 0; + SNaN_input <= 0; + a_NaN <= 0; + div_by_0 <= 0; + div_0_by_0 <= 0; + div_inf_by_inf <= 0; + div_by_inf <= 0; + mul_0_by_inf <= 0; + mul_inf <= 0; + div_inf <= 0; + add_inf <= 0; + sub_inf <= 0; + addsub_inf_invalid <= 0; + addsub_inf <= 0; + out_inf_trigger <= 0; + out_pos_inf <= 0; + out_neg_inf <= 0; + round_nearest <= 0; + round_to_zero <= 0; + round_to_pos_inf <= 0; + round_to_neg_inf <= 0; + inf_round_down_trigger <= 0; + mul_uf <= 0; + div_uf <= 0; + underflow_trigger <= 0; + invalid_trigger <= 0; + overflow_trigger <= 0; + inexact_trigger <= 0; + except_trigger <= 0; + enable_trigger <= 0; + NaN_out_trigger <= 0; + SNaN_trigger <= 0; + NaN_output_0 <= 0; + NaN_output <= 0; + inf_round_down <= 0; + out_inf <= 0; + out_0 <= 0; + out_1 <= 0; + out_2 <= 0; + end + else if (enable) begin + in_et_zero <= !(|in_except[62:0]); + opa_et_zero <= !(|opa[62:0]); + opb_et_zero <= !(|opb[62:0]); + input_et_zero <= !(|in_except[62:0]); + add <= fpu_op == 3'b000; + subtract <= fpu_op == 3'b001; + multiply <= fpu_op == 3'b010; + divide <= fpu_op == 3'b011; + opa_QNaN <= (opa[62:52] == 2047) & |opa[51:0] & opa[51]; + opb_QNaN <= (opb[62:52] == 2047) & |opb[51:0] & opb[51]; + opa_SNaN <= (opa[62:52] == 2047) & |opa[51:0] & !opa[51]; + opb_SNaN <= (opb[62:52] == 2047) & |opb[51:0] & !opb[51]; + opa_pos_inf <= !opa[63] & (opa[62:52] == 2047) & !(|opa[51:0]); + opb_pos_inf <= !opb[63] & (opb[62:52] == 2047) & !(|opb[51:0]); + opa_neg_inf <= opa[63] & (opa[62:52] == 2047) & !(|opa[51:0]); + opb_neg_inf <= opb[63] & (opb[62:52] == 2047) & !(|opb[51:0]); + opa_inf <= (opa[62:52] == 2047) & !(|opa[51:0]); + opb_inf <= (opb[62:52] == 2047) & !(|opb[51:0]); + NaN_input <= opa_QNaN | opb_QNaN | opa_SNaN | opb_SNaN; + SNaN_input <= opa_SNaN | opb_SNaN; + a_NaN <= opa_QNaN | opa_SNaN; + div_by_0 <= divide & opb_et_zero & !opa_et_zero; + div_0_by_0 <= divide & opb_et_zero & opa_et_zero; + div_inf_by_inf <= divide & opa_inf & opb_inf; + div_by_inf <= divide & !opa_inf & opb_inf; + mul_0_by_inf <= multiply & ((opa_inf & opb_et_zero) | (opa_et_zero & opb_inf)); + mul_inf <= multiply & (opa_inf | opb_inf) & !mul_0_by_inf; + div_inf <= divide & opa_inf & !opb_inf; + add_inf <= (add & (opa_inf | opb_inf)); + sub_inf <= (subtract & (opa_inf | opb_inf)); + addsub_inf_invalid <= (add & opa_pos_inf & opb_neg_inf) | (add & opa_neg_inf & opb_pos_inf) | + (subtract & opa_pos_inf & opb_pos_inf) | (subtract & opa_neg_inf & opb_neg_inf); + addsub_inf <= (add_inf | sub_inf) & !addsub_inf_invalid; + out_inf_trigger <= addsub_inf | mul_inf | div_inf | div_by_0 | (exponent_in > 2046); + out_pos_inf <= out_inf_trigger & !in_except[63]; + out_neg_inf <= out_inf_trigger & in_except[63]; + round_nearest <= (rmode == 2'b00); + round_to_zero <= (rmode == 2'b01); + round_to_pos_inf <= (rmode == 2'b10); + round_to_neg_inf <= (rmode == 2'b11); + inf_round_down_trigger <= (out_pos_inf & round_to_neg_inf) | + (out_neg_inf & round_to_pos_inf) | + (out_inf_trigger & round_to_zero); + mul_uf <= multiply & !opa_et_zero & !opb_et_zero & in_et_zero; + div_uf <= divide & !opa_et_zero & in_et_zero; + underflow_trigger <= div_by_inf | mul_uf | div_uf; + invalid_trigger <= SNaN_input | addsub_inf_invalid | mul_0_by_inf | + div_0_by_0 | div_inf_by_inf; + overflow_trigger <= out_inf_trigger & !NaN_input; + inexact_trigger <= (|mantissa_in[1:0] | out_inf_trigger | underflow_trigger) & + !NaN_input; + except_trigger <= invalid_trigger | overflow_trigger | underflow_trigger | + inexact_trigger; + enable_trigger <= except_trigger | out_inf_trigger | NaN_input; + NaN_out_trigger <= NaN_input | invalid_trigger; + SNaN_trigger <= invalid_trigger & !SNaN_input; + NaN_output_0 <= a_NaN ? { exp_2047, 1'b1, opa[50:0]} : { exp_2047, 1'b1, opb[50:0]}; + NaN_output <= SNaN_trigger ? { exp_2047, 2'b01, opa[49:0]} : NaN_output_0; + inf_round_down <= { exp_2046, mantissa_max }; + out_inf <= inf_round_down_trigger ? inf_round_down : { exp_2047, 52'b0 }; + out_0 <= underflow_trigger ? { in_except[63], 63'b0 } : in_except; + out_1 <= out_inf_trigger ? { in_except[63], out_inf } : out_0; + out_2 <= NaN_out_trigger ? { in_except[63], NaN_output} : out_1; + end +end + +always @(posedge clk) +begin + if (rst) begin + ex_enable <= 0; + underflow <= 0; + overflow <= 0; + inexact <= 0; + exception <= 0; + invalid <= 0; + out <= 0; + end + else if (enable) begin + ex_enable <= enable_trigger; + underflow <= underflow_trigger; + overflow <= overflow_trigger; + inexact <= inexact_trigger; + exception <= except_trigger; + invalid <= invalid_trigger; + out <= out_2; + end +end + +endmodule Index: tags/arelease/fpu_TB.v =================================================================== --- tags/arelease/fpu_TB.v (nonexistent) +++ tags/arelease/fpu_TB.v (revision 12) @@ -0,0 +1,845 @@ +///////////////////////////////////////////////////////////////////// +//// //// +//// FPU //// +//// Floating Point Unit (Double precision) //// +//// //// +//// Author: David Lundgren //// +//// davidklun@gmail.com //// +//// //// +///////////////////////////////////////////////////////////////////// +//// //// +//// Copyright (C) 2009 David Lundgren //// +//// davidklun@gmail.com //// +//// //// +//// This source file may be used and distributed without //// +//// restriction provided that this copyright statement is not //// +//// removed from the file and that any derivative work contains //// +//// the original copyright notice and the associated disclaimer.//// +//// //// +//// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY //// +//// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED //// +//// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS //// +//// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR //// +//// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, //// +//// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES //// +//// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE //// +//// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR //// +//// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF //// +//// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT //// +//// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT //// +//// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE //// +//// POSSIBILITY OF SUCH DAMAGE. //// +//// //// +///////////////////////////////////////////////////////////////////// + +`timescale 1ps / 1ps + +module fpu_tb; + +reg clk; +reg rst; +reg enable; +reg [1:0]rmode; +reg [2:0]fpu_op; +reg [63:0]opa; +reg [63:0]opb; +wire [63:0]out; +wire ready; +wire underflow; +wire overflow; +wire inexact; +wire exception; +wire invalid; + +reg [6:0] count; + + + fpu UUT ( + .clk(clk), + .rst(rst), + .enable(enable), + .rmode(rmode), + .fpu_op(fpu_op), + .opa(opa), + .opb(opb), + .out(out), + .ready(ready), + .underflow(underflow), + .overflow(overflow), + .inexact(inexact), + .exception(exception), + .invalid(invalid)); + + +initial +begin : STIMUL + #0 + count = 0; + rst = 1'b1; + #20000; + rst = 1'b0; // paste after this +//inputA:1.6999999999e-314 +//inputB:4.0000000000e-300 +enable = 1'b1; +opa = 64'b0000000000000000000000000000000011001101000101110000011010100010; +opb = 64'b0000000111000101011011100001111111000010111110001111001101011001; +fpu_op = 3'b011; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:4.249999999722977e-015 +if (out==64'h3CF323EA98D06FB6) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:3.0000000000e-290 +//inputB:3.0000000000e-021 +enable = 1'b1; +opa = 64'b0000001111010010101101100000010001001001010000101111100001010101; +opb = 64'b0011101110101100010101011000111000001111000101011110100011110111; +fpu_op = 3'b010; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:9.000000000000022e-311 +if (out==64'h000010914A4C025A) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:4.6500000000e+002 +//inputB:6.5000000000e+001 +enable = 1'b1; +opa = 64'b0100000001111101000100000000000000000000000000000000000000000000; +opb = 64'b0100000001010000010000000000000000000000000000000000000000000000; +fpu_op = 3'b001; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:4.000000000000000e+002 +if (out==64'h4079000000000000) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:2.2700000000e-001 +//inputB:3.4000000000e+001 +enable = 1'b1; +opa = 64'b0011111111001101000011100101011000000100000110001001001101110101; +opb = 64'b0100000001000001000000000000000000000000000000000000000000000000; +fpu_op = 3'b000; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:3.422700000000000e+001 +if (out==64'h40411D0E56041894) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:2.2300000000e+002 +//inputB:5.6000000000e+001 +enable = 1'b1; +opa = 64'b0100000001101011111000000000000000000000000000000000000000000000; +opb = 64'b0100000001001100000000000000000000000000000000000000000000000000; +fpu_op = 3'b011; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:3.982142857142857e+000 +if (out==64'h400FDB6DB6DB6DB7) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:-9.5000000000e+001 +//inputB:2.0000000000e+002 +enable = 1'b1; +opa = 64'b1100000001010111110000000000000000000000000000000000000000000000; +opb = 64'b0100000001101001000000000000000000000000000000000000000000000000; +fpu_op = 3'b010; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:-1.900000000000000e+004 +if (out==64'hC0D28E0000000000) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:-4.5000000000e+001 +//inputB:-3.2000000000e+001 +enable = 1'b1; +opa = 64'b1100000001000110100000000000000000000000000000000000000000000000; +opb = 64'b1100000001000000000000000000000000000000000000000000000000000000; +fpu_op = 3'b001; +rmode = 2'b11; +#20000; +enable = 1'b0; +#800000; +//Output:-1.300000000000000e+001 +if (out==64'hC02A000000000000) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:-9.0300000000e+002 +//inputB:2.1000000000e+001 +enable = 1'b1; +opa = 64'b1100000010001100001110000000000000000000000000000000000000000000; +opb = 64'b0100000000110101000000000000000000000000000000000000000000000000; +fpu_op = 3'b000; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:-8.820000000000000e+002 +if (out==64'hC08B900000000000) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:4.5500000000e+002 +//inputB:-4.5900000000e+002 +enable = 1'b1; +opa = 64'b0100000001111100011100000000000000000000000000000000000000000000; +opb = 64'b1100000001111100101100000000000000000000000000000000000000000000; +fpu_op = 3'b011; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:-9.912854030501089e-001 +if (out==64'hBFEFB89C2A6346D5) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:2.3577000000e+002 +//inputB:2.0000000000e-002 +enable = 1'b1; +opa = 64'b0100000001101101011110001010001111010111000010100011110101110001; +opb = 64'b0011111110010100011110101110000101000111101011100001010001111011; +fpu_op = 3'b010; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:4.715400000000001e+000 +if (out==64'h4012DC91D14E3BCE) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:4.0195000000e+002 +//inputB:-3.3600000000e+001 +enable = 1'b1; +opa = 64'b0100000001111001000111110011001100110011001100110011001100110011; +opb = 64'b1100000001000000110011001100110011001100110011001100110011001101; +fpu_op = 3'b001; +rmode = 2'b11; +#20000; +enable = 1'b0; +#800000; +//Output:4.355500000000000e+002 +if (out==64'h407B38CCCCCCCCCC) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:-1.0000000000e-309 +//inputB:1.1000000000e-309 +enable = 1'b1; +opa = 64'b1000000000000000101110000001010101110010011010001111110110101110; +opb = 64'b0000000000000000110010100111110111111101110110011110001111011001; +fpu_op = 3'b000; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:9.999999999999969e-311 +if (out==64'h000012688B70E62B) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:4.0000000000e-200 +//inputB:2.0000000000e+002 +enable = 1'b1; +opa = 64'b0001011010001000011111101001001000010101010011101111011110101100; +opb = 64'b0100000001101001000000000000000000000000000000000000000000000000; +fpu_op = 3'b011; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:2.000000000000000e-202 +if (out==64'h160F5A549627A36C) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:4.0000000000e+020 +//inputB:2.0000000000e+002 +enable = 1'b1; +opa = 64'b0100010000110101101011110001110101111000101101011000110001000000; +opb = 64'b0100000001101001000000000000000000000000000000000000000000000000; +fpu_op = 3'b011; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:2.000000000000000e+018 +if (out==64'h43BBC16D674EC800) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:5.0000000000e+000 +//inputB:2.5000000000e+000 +enable = 1'b1; +opa = 64'b0100000000010100000000000000000000000000000000000000000000000000; +opb = 64'b0100000000000100000000000000000000000000000000000000000000000000; +fpu_op = 3'b011; +rmode = 2'b11; +#20000; +enable = 1'b0; +#800000; +//Output:2.000000000000000e+000 +if (out==64'h4000000000000000) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:1.0000000000e-312 +//inputB:1.0000000000e+000 +enable = 1'b1; +opa = 64'b0000000000000000000000000010111100100000000111010100100111111011; +opb = 64'b0011111111110000000000000000000000000000000000000000000000000000; +fpu_op = 3'b011; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:9.999999999984653e-313 +if (out==64'h0000002F201D49FB) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:4.8999000000e+004 +//inputB:2.3600000000e+001 +enable = 1'b1; +opa = 64'b0100000011100111111011001110000000000000000000000000000000000000; +opb = 64'b0100000000110111100110011001100110011001100110011001100110011010; +fpu_op = 3'b001; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:4.897540000000000e+004 +if (out==64'h40E7E9ECCCCCCCCD) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:4.0000000000e-200 +//inputB:3.0000000000e+111 +enable = 1'b1; +opa = 64'b0001011010001000011111101001001000010101010011101111011110101100; +opb = 64'b0101011100010011111101011000110101000011010010100010101110101110; +fpu_op = 3'b011; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:1.333333333333758e-311 +if (out==64'h0000027456DBDA6D) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:7.0000000000e-310 +//inputB:8.0000000000e-100 +enable = 1'b1; +opa = 64'b0000000000000000100000001101101111010000000101100100101100101101; +opb = 64'b0010101101011011111111110010111011100100100011100000010100110000; +fpu_op = 3'b011; +rmode = 2'b11; +#20000; +enable = 1'b0; +#800000; +//Output:8.749999999999972e-211 +if (out==64'h14526914EEBBD470) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:1.4000000000e-311 +//inputB:2.5000000000e-310 +enable = 1'b1; +opa = 64'b0000000000000000000000101001001111000001100110100000101110111110; +opb = 64'b0000000000000000001011100000010101011100100110100011111101101011; +fpu_op = 3'b011; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:5.599999999999383e-002 +if (out==64'h3FACAC083126E600) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:-4.0600000000e+001 +//inputB:-3.5700000000e+001 +enable = 1'b1; +opa = 64'b1100000001000100010011001100110011001100110011001100110011001101; +opb = 64'b1100000001000001110110011001100110011001100110011001100110011010; +fpu_op = 3'b000; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:-7.630000000000001e+001 +if (out==64'hC053133333333334) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:3.4500000000e+002 +//inputB:-3.4400000000e+002 +enable = 1'b1; +opa = 64'b0100000001110101100100000000000000000000000000000000000000000000; +opb = 64'b1100000001110101100000000000000000000000000000000000000000000000; +fpu_op = 3'b000; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:1.000000000000000e+000 +if (out==64'h3FF0000000000000) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:2.3770000000e+001 +//inputB:-4.5000000000e+001 +enable = 1'b1; +opa = 64'b0100000000110111110001010001111010111000010100011110101110000101; +opb = 64'b1100000001000110100000000000000000000000000000000000000000000000; +fpu_op = 3'b001; +rmode = 2'b11; +#20000; +enable = 1'b0; +#800000; +//Output:6.877000000000000e+001 +if (out==64'h40513147AE147AE1) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:-4.7700000000e+002 +//inputB:4.8960000000e+002 +enable = 1'b1; +opa = 64'b1100000001111101110100000000000000000000000000000000000000000000; +opb = 64'b0100000001111110100110011001100110011001100110011001100110011010; +fpu_op = 3'b010; +rmode = 2'b11; +#20000; +enable = 1'b0; +#800000; +//Output:-2.335392000000000e+005 +if (out==64'hC10C82199999999A) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:2.0000000000e-311 +//inputB:0.0000000000e+000 +enable = 1'b1; +opa = 64'b0000000000000000000000111010111010000010010010011100011110100010; +opb = 64'b0000000000000000000000000000000000000000000000000000000000000000; +fpu_op = 3'b000; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:1.999999999999895e-311 +if (out==64'h000003AE8249C7A2) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:0.0000000000e+000 +//inputB:9.0000000000e+050 +enable = 1'b1; +opa = 64'b0000000000000000000000000000000000000000000000000000000000000000; +opb = 64'b0100101010000011001111100111000010011110001011100011000100101101; +fpu_op = 3'b010; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:0.000000000000000e+000 +if (out==64'h0000000000000000) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:5.4000000000e+001 +//inputB:0.0000000000e+000 +enable = 1'b1; +opa = 64'b0100000001001011000000000000000000000000000000000000000000000000; +opb = 64'b0000000000000000000000000000000000000000000000000000000000000000; +fpu_op = 3'b000; +rmode = 2'b11; +#20000; +enable = 1'b0; +#800000; +//Output:5.400000000000000e+001 +if (out==64'h404B000000000000) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:-6.7000000000e+001 +//inputB:0.0000000000e+000 +enable = 1'b1; +opa = 64'b1100000001010000110000000000000000000000000000000000000000000000; +opb = 64'b0000000000000000000000000000000000000000000000000000000000000000; +fpu_op = 3'b011; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:-1.#INF00000000000e+000 +if (out==64'hFFEFFFFFFFFFFFFF) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:-4.5600000000e+001 +//inputB:-6.9000000000e+001 +enable = 1'b1; +opa = 64'b1100000001000110110011001100110011001100110011001100110011001101; +opb = 64'b1100000001010001010000000000000000000000000000000000000000000000; +fpu_op = 3'b011; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:6.608695652173914e-001 +if (out==64'h3FE525D7EE30F953) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:-5.9900000000e+002 +//inputB:2.7000000000e-002 +enable = 1'b1; +opa = 64'b1100000010000010101110000000000000000000000000000000000000000000; +opb = 64'b0011111110011011101001011110001101010011111101111100111011011001; +fpu_op = 3'b011; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:-2.218518518518519e+004 +if (out==64'hC0D5AA4BDA12F685) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:2.1000000000e-308 +//inputB:2.0000000000e-308 +enable = 1'b1; +opa = 64'b0000000000001111000110011100001001100010100111001100111101010011; +opb = 64'b0000000000001110011000011010110011110000001100111101000110100100; +fpu_op = 3'b000; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:4.100000000000000e-308 +if (out==64'h001D7B6F52D0A0F7) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:5.0000000000e-308 +//inputB:2.0000000000e-312 +enable = 1'b1; +opa = 64'b0000000000100001111110100001100000101100010000001100011000001101; +opb = 64'b0000000000000000000000000101111001000000001110101001001111110110; +fpu_op = 3'b000; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:5.000199999999999e-308 +if (out==64'h0021FA474C5E1008) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:3.9800000000e+000 +//inputB:3.7700000000e+000 +enable = 1'b1; +opa = 64'b0100000000001111110101110000101000111101011100001010001111010111; +opb = 64'b0100000000001110001010001111010111000010100011110101110000101001; +fpu_op = 3'b000; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:7.750000000000000e+000 +if (out==64'h401F000000000000) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:4.4000000000e+001 +//inputB:7.9000000000e-002 +enable = 1'b1; +opa = 64'b0100000001000110000000000000000000000000000000000000000000000000; +opb = 64'b0011111110110100001110010101100000010000011000100100110111010011; +fpu_op = 3'b000; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:4.407900000000000e+001 +if (out==64'h40460A1CAC083127) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:5.0000000000e-311 +//inputB:9.0000000000e+009 +enable = 1'b1; +opa = 64'b0000000000000000000010010011010001000101101110000111001100010101; +opb = 64'b0100001000000000110000111000100011010000000000000000000000000000; +fpu_op = 3'b010; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:4.499999999999764e-301 +if (out==64'h01934982FC467380) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:-4.0000000000e-305 +//inputB:2.0000000000e-008 +enable = 1'b1; +opa = 64'b1000000010111100000101101100010111000101001001010011010101110101; +opb = 64'b0011111001010101011110011000111011100010001100001000110000111010; +fpu_op = 3'b010; +rmode = 2'b11; +#20000; +enable = 1'b0; +#800000; +//Output:-8.000000000007485e-313 +if (out==64'h80000025B34AA196) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:3.0000000000e-308 +//inputB:1.0000000000e-012 +enable = 1'b1; +opa = 64'b0000000000010101100100101000001101101000010011011011101001110111; +opb = 64'b0011110101110001100101111001100110000001001011011110101000010001; +fpu_op = 3'b010; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:2.999966601548049e-320 +if (out==64'h00000000000017B8) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:5.6999990000e+006 +//inputB:5.6999989900e+006 +enable = 1'b1; +opa = 64'b0100000101010101101111100110011111000000000000000000000000000000; +opb = 64'b0100000101010101101111100110011110111111010111000010100011110110; +fpu_op = 3'b001; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:9.999999776482582e-003 +if (out==64'h3F847AE140000000) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:-4.0000000000e+000 +//inputB:9.0000000000e+000 +enable = 1'b1; +opa = 64'b1100000000010000000000000000000000000000000000000000000000000000; +opb = 64'b0100000000100010000000000000000000000000000000000000000000000000; +fpu_op = 3'b001; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:-1.300000000000000e+001 +if (out==64'hC02A000000000000) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:3.9700000000e+001 +//inputB:2.5700000000e-002 +enable = 1'b1; +opa = 64'b0100000001000011110110011001100110011001100110011001100110011010; +opb = 64'b0011111110011010010100010001100111001110000001110101111101110000; +fpu_op = 3'b001; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:3.967430000000001e+001 +if (out==64'h4043D64F765FD8AF) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:2.3000000000e+000 +//inputB:7.0000000000e-002 +enable = 1'b1; +opa = 64'b0100000000000010011001100110011001100110011001100110011001100110; +opb = 64'b0011111110110001111010111000010100011110101110000101000111101100; +fpu_op = 3'b001; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:2.230000000000000e+000 +if (out==64'h4001D70A3D70A3D7) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:1.9999999673e-316 +//inputB:1.9999999673e-317 +enable = 1'b1; +opa = 64'b0000000000000000000000000000000000000010011010011010111011000010; +opb = 64'b0000000000000000000000000000000000000000001111011100010010101101; +fpu_op = 3'b001; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:1.799999970587486e-316 +if (out==64'h00000000022BEA15) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:1.9999999970e-315 +//inputB:-1.9999999673e-316 +enable = 1'b1; +opa = 64'b0000000000000000000000000000000000011000001000001101001110011010; +opb = 64'b1000000000000000000000000000000000000010011010011010111011000010; +fpu_op = 3'b001; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:2.199999993695311e-315 +if (out==64'h000000001A8A825C) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:4.0000000000e+000 +//inputB:1.0000000000e-025 +enable = 1'b1; +opa = 64'b0100000000010000000000000000000000000000000000000000000000000000; +opb = 64'b0011101010111110111100101101000011110101110110100111110111011001; +fpu_op = 3'b001; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:4.000000000000000e+000 +if (out==64'h4010000000000000) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:3.0000000000e-310 +//inputB:4.0000000000e-304 +enable = 1'b1; +opa = 64'b0000000000000000001101110011100110100010010100101011001010000001; +opb = 64'b0000000011110001100011100011101110011011001101110100000101101001; +fpu_op = 3'b000; +rmode = 2'b10; +#20000; +enable = 1'b0; +#800000; +//Output:4.000003000000000e-304 +if (out==64'h00F18E3C781DCAB4) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:3.5000000000e-313 +//inputB:7.0000000000e+004 +enable = 1'b1; +opa = 64'b0000000000000000000000000001000001111110011100001010011010110001; +opb = 64'b0100000011110001000101110000000000000000000000000000000000000000; +fpu_op = 3'b011; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:4.999998683134458e-318 +if (out==64'h00000000000F712B) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:-5.1000000000e-306 +//inputB:2.0480000000e+003 +enable = 1'b1; +opa = 64'b1000000010001100101001101001011010000110100001110011101110100101; +opb = 64'b0100000010100000000000000000000000000000000000000000000000000000; +fpu_op = 3'b011; +rmode = 2'b11; +#20000; +enable = 1'b0; +#800000; +//Output:-2.490234375000003e-309 +if (out==64'h8001CA69686873BB) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:-1.5000000000e-305 +//inputB:1.0240000000e+003 +enable = 1'b1; +opa = 64'b1000000010100101000100010001010001010011110110111110100000011000; +opb = 64'b0100000010010000000000000000000000000000000000000000000000000000; +fpu_op = 3'b011; +rmode = 2'b11; +#20000; +enable = 1'b0; +#800000; +//Output:-1.464843750000000e-308 +if (out==64'h800A888A29EDF40C) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:-3.4000000000e+056 +//inputB:-4.0000000000e+199 +enable = 1'b1; +opa = 64'b1100101110101011101110111000100000000000101110111001110000000101; +opb = 64'b1110100101100000101110001110000010101100101011000100111010101111; +fpu_op = 3'b011; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:8.500000000000000e-144 +if (out==64'h223A88ECC2AC8317) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); +//inputA:1.3559000000e-001 +//inputB:2.3111240000e+003 +enable = 1'b1; +opa = 64'b0011111111000001010110110000001101011011110101010001001011101100; +opb = 64'b0100000010100010000011100011111101111100111011011001000101101000; +fpu_op = 3'b011; +rmode = 2'b00; +#20000; +enable = 1'b0; +#800000; +//Output:5.866842281071894e-005 +if (out==64'h3F0EC257A882625F) + $display($time,"ps Answer is correct %h", out); +else + $display($time,"ps Error! out is incorrect %h", out); + +// end of paste +$finish; +end + +always +begin : CLOCK_clk + + clk = 1'b0; + #5000; + clk = 1'b1; + #5000; +end + +endmodule Index: tags/arelease/fpu_round.v =================================================================== --- tags/arelease/fpu_round.v (nonexistent) +++ tags/arelease/fpu_round.v (revision 12) @@ -0,0 +1,92 @@ +///////////////////////////////////////////////////////////////////// +//// //// +//// FPU //// +//// Floating Point Unit (Double precision) //// +//// //// +//// Author: David Lundgren //// +//// davidklun@gmail.com //// +//// //// +///////////////////////////////////////////////////////////////////// +//// //// +//// Copyright (C) 2009 David Lundgren //// +//// davidklun@gmail.com //// +//// //// +//// This source file may be used and distributed without //// +//// restriction provided that this copyright statement is not //// +//// removed from the file and that any derivative work contains //// +//// the original copyright notice and the associated disclaimer.//// +//// //// +//// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY //// +//// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED //// +//// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS //// +//// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR //// +//// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, //// +//// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES //// +//// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE //// +//// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR //// +//// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF //// +//// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT //// +//// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT //// +//// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE //// +//// POSSIBILITY OF SUCH DAMAGE. //// +//// //// +///////////////////////////////////////////////////////////////////// + + +`timescale 1ns / 100ps + +module fpu_round( clk, rst, enable, round_mode, sign_term, +mantissa_term, exponent_term, round_out, exponent_final); +input clk; +input rst; +input enable; +input [1:0] round_mode; +input sign_term; +input [55:0] mantissa_term; +input [11:0] exponent_term; +output [63:0] round_out; +output [11:0] exponent_final; + +wire [55:0] rounding_amount = { 53'b0, 1'b1, 2'b0}; +wire round_nearest = (round_mode == 2'b00); +wire round_to_zero = (round_mode == 2'b01); +wire round_to_pos_inf = (round_mode == 2'b10); +wire round_to_neg_inf = (round_mode == 2'b11); +wire round_nearest_trigger = round_nearest & mantissa_term[1]; +wire round_to_pos_inf_trigger = !sign_term & |mantissa_term[1:0]; +wire round_to_neg_inf_trigger = sign_term & |mantissa_term[1:0]; +wire round_trigger = ( round_nearest & round_nearest_trigger) + | (round_to_pos_inf & round_to_pos_inf_trigger) + | (round_to_neg_inf & round_to_neg_inf_trigger); + + +reg [55:0] sum_round; +wire sum_round_overflow = sum_round[55]; + // will be 0 if no carry, 1 if overflow from the rounding unit + // overflow from rounding is extremely rare, but possible +reg [55:0] sum_round_2; +reg [11:0] exponent_round; +reg [55:0] sum_final; +reg [11:0] exponent_final; +reg [63:0] round_out; + +always @(posedge clk) + begin + if (rst) begin + sum_round <= 0; + sum_round_2 <= 0; + exponent_round <= 0; + sum_final <= 0; + exponent_final <= 0; + round_out <= 0; + end + else begin + sum_round <= rounding_amount + mantissa_term; + sum_round_2 <= sum_round_overflow ? sum_round >> 1 : sum_round; + exponent_round <= sum_round_overflow ? (exponent_term + 1) : exponent_term; + sum_final <= round_trigger ? sum_round_2 : mantissa_term; + exponent_final <= round_trigger ? exponent_round : exponent_term; + round_out <= { sign_term, exponent_final[10:0], sum_final[53:2] }; + end + end +endmodule \ No newline at end of file Index: tags/arelease/fpu_add.v =================================================================== --- tags/arelease/fpu_add.v (nonexistent) +++ tags/arelease/fpu_add.v (revision 12) @@ -0,0 +1,130 @@ +///////////////////////////////////////////////////////////////////// +//// //// +//// FPU //// +//// Floating Point Unit (Double precision) //// +//// //// +//// Author: David Lundgren //// +//// davidklun@gmail.com //// +//// //// +///////////////////////////////////////////////////////////////////// +//// //// +//// Copyright (C) 2009 David Lundgren //// +//// davidklun@gmail.com //// +//// //// +//// This source file may be used and distributed without //// +//// restriction provided that this copyright statement is not //// +//// removed from the file and that any derivative work contains //// +//// the original copyright notice and the associated disclaimer.//// +//// //// +//// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY //// +//// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED //// +//// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS //// +//// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR //// +//// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, //// +//// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES //// +//// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE //// +//// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR //// +//// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF //// +//// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT //// +//// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT //// +//// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE //// +//// POSSIBILITY OF SUCH DAMAGE. //// +//// //// +///////////////////////////////////////////////////////////////////// + +`timescale 1ns / 100ps + +module fpu_add( clk, rst, enable, opa, opb, sign, sum_2, exponent_2); +input clk; +input rst; +input enable; +input [63:0] opa, opb; +output sign; +output [55:0] sum_2; +output [10:0] exponent_2; + +reg sign; +reg [10:0] exponent_a; +reg [10:0] exponent_b; +reg [51:0] mantissa_a; +reg [51:0] mantissa_b; +reg expa_gt_expb; +reg [10:0] exponent_small; +reg [10:0] exponent_large; +reg [51:0] mantissa_small; +reg [51:0] mantissa_large; +reg small_is_denorm; +reg large_is_denorm; +reg large_norm_small_denorm; +reg [10:0] exponent_diff; +reg [55:0] large_add; +reg [55:0] small_add; +reg [55:0] small_shift; +wire small_shift_nonzero = |small_shift[55:0]; +wire small_is_nonzero = (exponent_small > 0) | |mantissa_small[51:0]; +wire small_fraction_enable = small_is_nonzero & !small_shift_nonzero; +wire [55:0] small_shift_2 = { 55'b0, 1'b1 }; +reg [55:0] small_shift_3; +reg [55:0] sum; +wire sum_overflow = sum[55]; // sum[55] will be 0 if there was no carry from adding the 2 numbers +reg [55:0] sum_2; +reg [10:0] exponent; +wire sum_leading_one = sum_2[54]; // this is where the leading one resides, unless denorm +reg denorm_to_norm; +reg [10:0] exponent_2; + +always @(posedge clk) + begin + if (rst) begin + sign <= 0; + exponent_a <= 0; + exponent_b <= 0; + mantissa_a <= 0; + mantissa_b <= 0; + expa_gt_expb <= 0; + exponent_small <= 0; + exponent_large <= 0; + mantissa_small <= 0; + mantissa_large <= 0; + small_is_denorm <= 0; + large_is_denorm <= 0; + large_norm_small_denorm <= 0; + exponent_diff <= 0; + large_add <= 0; + small_add <= 0; + small_shift <= 0; + small_shift_3 <= 0; + sum <= 0; + sum_2 <= 0; + exponent <= 0; + denorm_to_norm <= 0; + exponent_2 <= 0; + end + else if (enable) begin + sign <= opa[63]; + exponent_a <= opa[62:52]; + exponent_b <= opb[62:52]; + mantissa_a <= opa[51:0]; + mantissa_b <= opb[51:0]; + expa_gt_expb <= exponent_a > exponent_b; + exponent_small <= expa_gt_expb ? exponent_b : exponent_a; + exponent_large <= expa_gt_expb ? exponent_a : exponent_b; + mantissa_small <= expa_gt_expb ? mantissa_b : mantissa_a; + mantissa_large <= expa_gt_expb ? mantissa_a : mantissa_b; + small_is_denorm <= !(exponent_small > 0); + large_is_denorm <= !(exponent_large > 0); + large_norm_small_denorm <= (small_is_denorm && !large_is_denorm); + exponent_diff <= exponent_large - exponent_small - large_norm_small_denorm; + large_add <= { 1'b0, !large_is_denorm, mantissa_large, 2'b0 }; + small_add <= { 1'b0, !small_is_denorm, mantissa_small, 2'b0 }; + small_shift <= small_add >> exponent_diff; + small_shift_3 <= small_fraction_enable ? small_shift_2 : small_shift; + sum <= large_add + small_shift_3; + sum_2 <= sum_overflow ? sum >> 1 : sum; + exponent <= sum_overflow ? exponent_large + 1: exponent_large; + denorm_to_norm <= sum_leading_one & large_is_denorm; + exponent_2 <= denorm_to_norm ? exponent + 1 : exponent; + end + end + +endmodule Index: tags/arelease/fpu_double.v =================================================================== --- tags/arelease/fpu_double.v (nonexistent) +++ tags/arelease/fpu_double.v (revision 12) @@ -0,0 +1,302 @@ +///////////////////////////////////////////////////////////////////// +//// //// +//// FPU //// +//// Floating Point Unit (Double precision) //// +//// //// +//// Author: David Lundgren //// +//// davidklun@gmail.com //// +//// //// +///////////////////////////////////////////////////////////////////// +//// //// +//// Copyright (C) 2009 David Lundgren //// +//// davidklun@gmail.com //// +//// //// +//// This source file may be used and distributed without //// +//// restriction provided that this copyright statement is not //// +//// removed from the file and that any derivative work contains //// +//// the original copyright notice and the associated disclaimer.//// +//// //// +//// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY //// +//// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED //// +//// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS //// +//// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR //// +//// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, //// +//// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES //// +//// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE //// +//// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR //// +//// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF //// +//// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT //// +//// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT //// +//// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE //// +//// POSSIBILITY OF SUCH DAMAGE. //// +//// //// +///////////////////////////////////////////////////////////////////// + + +`timescale 1ns / 100ps +/* FPU Operations (fpu_op): +======================== +0 = add +1 = sub +2 = mul +3 = div + +Rounding Modes (rmode): +======================= +0 = round_nearest_even +1 = round_to_zero +2 = round_up +3 = round_down */ + +module fpu( clk, rst, enable, rmode, fpu_op, opa, opb, out, ready, underflow, +overflow, inexact, exception, invalid); +input clk; +input rst; +input enable; +input [1:0] rmode; +input [2:0] fpu_op; +input [63:0] opa, opb; +output [63:0] out; +output ready; +output underflow; +output overflow; +output inexact; +output exception; +output invalid; + +reg [63:0] opa_reg; +reg [63:0] opb_reg; +reg [2:0] fpu_op_reg; +reg [1:0] rmode_reg; +reg enable_reg; +reg enable_reg_1; // high for one clock cycle +reg enable_reg_2; // high for one clock cycle +reg enable_reg_3; // high for two clock cycles +reg op_enable; +reg [63:0] out; +reg [6:0] count_cycles; +reg [6:0] count_ready; +wire count_busy = (count_ready <= count_cycles); +reg ready; +reg ready_0; +reg ready_1; +reg underflow; +reg overflow; +reg inexact; +reg exception; +reg invalid; +wire underflow_0; +wire overflow_0; +wire inexact_0; +wire exception_0; +wire invalid_0; + +wire add_enable_0 = (fpu_op_reg == 3'b000) & !(opa_reg[63] ^ opb_reg[63]); +wire add_enable_1 = (fpu_op_reg == 3'b001) & (opa_reg[63] ^ opb_reg[63]); +reg add_enable; +wire sub_enable_0 = (fpu_op_reg == 3'b000) & (opa_reg[63] ^ opb_reg[63]); +wire sub_enable_1 = (fpu_op_reg == 3'b001) & !(opa_reg[63] ^ opb_reg[63]); +reg sub_enable; +reg mul_enable; +reg div_enable; +wire [55:0] sum_out; +wire [55:0] diff_out; +reg [55:0] addsub_out; +wire [55:0] mul_out; +wire [55:0] div_out; +reg [55:0] mantissa_round; +wire [10:0] exp_add_out; +wire [10:0] exp_sub_out; +wire [11:0] exp_mul_out; +wire [11:0] exp_div_out; +reg [11:0] exponent_round; +reg [11:0] exp_addsub; +wire [11:0] exponent_post_round; +wire add_sign; +wire sub_sign; +wire mul_sign; +wire div_sign; +reg addsub_sign; +reg sign_round; +wire [63:0] out_round; +wire [63:0] out_except; + +fpu_add u1( + .clk(clk),.rst(rst),.enable(add_enable),.opa(opa_reg),.opb(opb_reg), + .sign(add_sign),.sum_2(sum_out),.exponent_2(exp_add_out)); + +fpu_sub u2( + .clk(clk),.rst(rst),.enable(sub_enable),.opa(opa_reg),.opb(opb_reg), + .fpu_op(fpu_op_reg),.sign(sub_sign),.diff_2(diff_out), + .exponent_2(exp_sub_out)); + +fpu_mul u3( + .clk(clk),.rst(rst),.enable(mul_enable),.opa(opa_reg),.opb(opb_reg), + .sign(mul_sign),.product_7(mul_out),.exponent_5(exp_mul_out)); + +fpu_div u4( + .clk(clk),.rst(rst),.enable(div_enable),.opa(opa_reg),.opb(opb_reg), + .sign(div_sign),.mantissa_7(div_out),.exponent_out(exp_div_out)); + +fpu_round u5(.clk(clk),.rst(rst),.enable(op_enable), .round_mode(rmode_reg), + .sign_term(sign_round),.mantissa_term(mantissa_round), .exponent_term(exponent_round), + .round_out(out_round),.exponent_final(exponent_post_round)); + +fpu_exceptions u6(.clk(clk),.rst(rst),.enable(op_enable),.rmode(rmode_reg), + .opa(opa_reg),.opb(opb_reg), + .in_except(out_round), .exponent_in(exponent_post_round), + .mantissa_in(mantissa_round[1:0]),.fpu_op(fpu_op_reg),.out(out_except), + .ex_enable(except_enable),.underflow(underflow_0),.overflow(overflow_0), + .inexact(inexact_0),.exception(exception_0),.invalid(invalid_0)); + + +always @(posedge clk) +begin + case (fpu_op_reg) + 3'b000: mantissa_round <= addsub_out; + 3'b001: mantissa_round <= addsub_out; + 3'b010: mantissa_round <= mul_out; + 3'b011: mantissa_round <= div_out; + default: mantissa_round <= 0; + endcase +end + +always @(posedge clk) +begin + case (fpu_op_reg) + 3'b000: exponent_round <= exp_addsub; + 3'b001: exponent_round <= exp_addsub; + 3'b010: exponent_round <= exp_mul_out; + 3'b011: exponent_round <= exp_div_out; + default: exponent_round <= 0; + endcase +end + +always @(posedge clk) +begin + case (fpu_op_reg) + 3'b000: sign_round <= addsub_sign; + 3'b001: sign_round <= addsub_sign; + 3'b010: sign_round <= mul_sign; + 3'b011: sign_round <= div_sign; + default: sign_round <= 0; + endcase +end + +always @(posedge clk) +begin + case (fpu_op_reg) + 3'b000: count_cycles <= 20; + 3'b001: count_cycles <= 21; + 3'b010: count_cycles <= 24; + 3'b011: count_cycles <= 71; + default: count_cycles <= 0; + endcase +end + +always @(posedge clk) +begin + if (rst) begin + add_enable <= 0; + sub_enable <= 0; + mul_enable <= 0; + div_enable <= 0; + addsub_out <= 0; + addsub_sign <= 0; + exp_addsub <= 0; + end + else begin + add_enable <= (add_enable_0 | add_enable_1) & op_enable; + sub_enable <= (sub_enable_0 | sub_enable_1) & op_enable; + mul_enable <= (fpu_op_reg == 3'b010) & op_enable; + div_enable <= (fpu_op_reg == 3'b011) & op_enable & enable_reg_3; + // div_enable needs to be high for two clock cycles + addsub_out <= add_enable ? sum_out : diff_out; + addsub_sign <= add_enable ? add_sign : sub_sign; + exp_addsub <= add_enable ? { 1'b0, exp_add_out} : { 1'b0, exp_sub_out}; + end +end + +always @ (posedge clk) +begin + if (rst) + count_ready <= 0; + else if (enable_reg_1) + count_ready <= 0; + else if (count_busy) + count_ready <= count_ready + 1; +end + +always @(posedge clk) +begin + if (rst) begin + enable_reg <= 0; + enable_reg_1 <= 0; + enable_reg_2 <= 0; + enable_reg_3 <= 0; + end + else begin + enable_reg <= enable; + enable_reg_1 <= enable & !enable_reg; + enable_reg_2 <= enable_reg_1; + enable_reg_3 <= enable_reg_1 | enable_reg_2; + end +end + +always @(posedge clk) +begin + if (rst) begin + opa_reg <= 0; + opb_reg <= 0; + fpu_op_reg <= 0; + rmode_reg <= 0; + op_enable <= 0; + end + else if (enable_reg_1) begin + opa_reg <= opa; + opb_reg <= opb; + fpu_op_reg <= fpu_op; + rmode_reg <= rmode; + op_enable <= 1; + end +end + +always @(posedge clk) +begin + if (rst) begin + ready_0 <= 0; + ready_1 <= 0; + ready <= 0; + end + else if (enable_reg_1) begin + ready_0 <= 0; + ready_1 <= 0; + ready <= 0; + end + else begin + ready_0 <= !count_busy; + ready_1 <= ready_0; + ready <= ready_1; + end +end + +always @(posedge clk) +begin + if (rst) begin + underflow <= 0; + overflow <= 0; + inexact <= 0; + exception <= 0; + invalid <= 0; + out <= 0; + end + else if (ready_1) begin + underflow <= underflow_0; + overflow <= overflow_0; + inexact <= inexact_0; + exception <= exception_0; + invalid <= invalid_0; + out <= except_enable ? out_except : out_round; + end +end +endmodule Index: tags/arelease/Readme.txt =================================================================== --- tags/arelease/Readme.txt (nonexistent) +++ tags/arelease/Readme.txt (revision 12) @@ -0,0 +1,130 @@ +The following describes the IEEE-Standard-754 compliant, double-precision floating point unit, +written in Verilog. The module consists of the following files: + +1. fpu_double.v (top level) +2. fpu_add.v +3. fpu_sub.v +4. fpu_mul.v +5. fpu_div.v +6. fpu_round.v +7. fpu_exceptions.v + +And a testbench file is included, containing 50 test-case operations: +1. fpu_tb.v + +This unit has been extensively simulated, covering all operations, rounding modes, exceptions +like underflow and overflow, and even the obscure corner cases, like when overflowing from +denormalized to normalized, and vice-versa. + +The floating point unit supports denormalized numbers, +4 operations (add, subtract, multiply, divide), and 4 rounding +modes (nearest, zero, + inf, - inf). The unit was synthesized with an +estimated frequency of 230 MHz, for a Virtex5 target device. The synthesis results +are below. fpu_double.v is the top-level module, and it contains the input +and output signals from the unit. The unit was designed to be synchronous with +one global clock, and all of the registers can be reset with an synchronous global reset. +When the inputs signals (a and b operands, fpu operation code, rounding mode code) are +available, set the enable input high, then set it low after 2 clock cycles. When the +operation is complete and the output is available, the ready signal will go high. To start +the next operation, set the enable input high. + +Each operation takes the following amount of clock cycles to complete: +1. addition : 20 clock cycles +2. subtraction: 21 clock cycles +3. multiplication: 24 clock cycles +4. division: 71 clock cycles + +This is longer than other floating point units, but supporting denormalized numbers +requires more signals and logic levels to accommodate gradual underflow. The supported +clock speed of 230 MHz makes up for the large number of clock cycles required for each +operation to complete. If you have a lower clock speed, the code can be changed to +reduce the number of registers and latency of each operation. I purposely increased the +number of logic levels to get the code to synthesize to a faster clock frequency, but of course, +this led to longer latency. I guess it depends on your application what is more important. + +The following output signals are also available: underflow, overflow, inexact, exception, +and invalid. They are compliant with the IEEE-754 definition of each signal. The unit +will handle QNaN and SNaN inputs per the standard. + +I'm planning on adding more operations, like square root, sin, cos, tan, etc., +so check back for updates. + +Multiply: +The multiply module is written specifically for a Virtex5 target device. The DSP48E slices +can perform a 25-bit by 18-bit Twos-complement multiply (24 by 17 unsigned multiply). I broke up the multiply to +fit these DSP48E slices. The breakdown is similar to the design in Figure 4-15 of the +Xilinx User Guide Document, "Virtex-5 FPGA XtremeDSP Design Considerations", also known as UG193. +You can find this document at xilinx.com by searching for "UG193". +Depending on your device, the multiply can be changed to match the bit-widths of the available +multipliers. A total of 9 DSP48E slices are used to do the 53-bit by 53-bit multiply of 2 +floating point numbers. + +If you have any questions, please email me at: davidklun@gmail.com + +Thanks, +David Lundgren + +----- + +Synthesis Results: + + + + +Performance Summary +******************* + + +Worst slack in design: -0.971 + + Requested Estimated Requested Estimated Clock Clock +Starting Clock Frequency Frequency Period Period Slack Type Group +----------------------------------------------------------------------------------------------------------- +fpu|clk 300.0 MHz 232.3 MHz 3.333 4.304 -0.971 inferred +========================================================================== + +--------------------------------------- +Resource Usage Report for fpu + +Mapping to part: xc5vsx95tff1136-2 +Cell usage: +DSP48E 9 uses +FD 5 uses +FDR 519 uses +FDRE 3920 uses +FDRSE 1 use +GND 6 uses +LD 6 uses +MUXCY 35 uses +MUXCY_L 704 uses +MUXF7 1 use +VCC 5 uses +XORCY 491 uses +XORCY_L 12 uses +LUT1 185 uses +LUT2 725 uses +LUT3 1523 uses +LUT4 738 uses +LUT5 604 uses +LUT6 2506 uses + +I/O ports: 206 +I/O primitives: 205 +IBUF 135 uses +OBUF 70 uses + +BUFGP 1 use + +I/O Register bits: 0 +Register bits not including I/Os: 4445 (7%) +Latch bits not including I/Os: 6 (0%) + +Global Clock Buffers: 1 of 32 (3%) + +Total load per clock: + fpu|clk: 4454 + +Mapping Summary: +Total LUTs: 6281 (10%) + +

Browse

Tools

Subversion Repositories double_fpu

Compare Revisions

Rev 11 → Rev 12