OpenCores
URL https://opencores.org/ocsvn/amber/amber/trunk

Subversion Repositories amber

[/] [amber/] [trunk/] [hw/] [vlog/] [amber23/] [a23_barrel_shift_fpga.v] - Diff between revs 74 and 82

Only display areas with differences | Details | Blame | View Log

Rev 74 Rev 82
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
//                                                              //
//                                                              //
//  Barrel Shifter for Amber 2 Core                             //
//  Barrel Shifter for Amber 2 Core                             //
//                                                              //
//                                                              //
//  The design is optimized for Altera family of FPGAs,         //
//  The design is optimized for Altera family of FPGAs,         //
//  and it can be used directly or adapted other N-to-1 LUT     //
//  and it can be used directly or adapted other N-to-1 LUT     //
//  FPGA platforms.                                             //
//  FPGA platforms.                                             //
//                                                              //
//                                                              //
//  This file is part of the Amber project                      //
//  This file is part of the Amber project                      //
//  http://www.opencores.org/project,amber                      //
//  http://www.opencores.org/project,amber                      //
//                                                              //
//                                                              //
//  Description                                                 //
//  Description                                                 //
//  Provides 32-bit shifts LSL, LSR, ASR and ROR                //
//  Provides 32-bit shifts LSL, LSR, ASR and ROR                //
//                                                              //
//                                                              //
//  Author(s):                                                  //
//  Author(s):                                                  //
//      - Dmitry Tarnyagin, dmitry.tarnyagin@lockless.no        //
//      - Dmitry Tarnyagin, dmitry.tarnyagin@lockless.no        //
//                                                              //
//                                                              //
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
//                                                              //
//                                                              //
// Copyright (C) 2010-2013 Authors and OPENCORES.ORG            //
// Copyright (C) 2010-2013 Authors and OPENCORES.ORG            //
//                                                              //
//                                                              //
// This source file may be used and distributed without         //
// This source file may be used and distributed without         //
// restriction provided that this copyright statement is not    //
// restriction provided that this copyright statement is not    //
// removed from the file and that any derivative work contains  //
// removed from the file and that any derivative work contains  //
// the original copyright notice and the associated disclaimer. //
// the original copyright notice and the associated disclaimer. //
//                                                              //
//                                                              //
// This source file is free software; you can redistribute it   //
// This source file is free software; you can redistribute it   //
// and/or modify it under the terms of the GNU Lesser General   //
// and/or modify it under the terms of the GNU Lesser General   //
// Public License as published by the Free Software Foundation; //
// Public License as published by the Free Software Foundation; //
// either version 2.1 of the License, or (at your option) any   //
// either version 2.1 of the License, or (at your option) any   //
// later version.                                               //
// later version.                                               //
//                                                              //
//                                                              //
// This source is distributed in the hope that it will be       //
// This source is distributed in the hope that it will be       //
// useful, but WITHOUT ANY WARRANTY; without even the implied   //
// useful, but WITHOUT ANY WARRANTY; without even the implied   //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
// PURPOSE.  See the GNU Lesser General Public License for more //
// PURPOSE.  See the GNU Lesser General Public License for more //
// details.                                                     //
// details.                                                     //
//                                                              //
//                                                              //
// You should have received a copy of the GNU Lesser General    //
// You should have received a copy of the GNU Lesser General    //
// Public License along with this source; if not, download it   //
// Public License along with this source; if not, download it   //
// from http://www.opencores.org/lgpl.shtml                     //
// from http://www.opencores.org/lgpl.shtml                     //
//                                                              //
//                                                              //
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
 
 
 
 
module a23_barrel_shift_fpga (
module a23_barrel_shift_fpga (
 
 
input       [31:0]          i_in,
input       [31:0]          i_in,
input                       i_carry_in,
input                       i_carry_in,
input       [7:0]           i_shift_amount,     // uses 8 LSBs of Rs, or a 5 bit immediate constant
input       [7:0]           i_shift_amount,     // uses 8 LSBs of Rs, or a 5 bit immediate constant
input                       i_shift_imm_zero,   // high when immediate shift value of zero selected
input                       i_shift_imm_zero,   // high when immediate shift value of zero selected
input       [1:0]           i_function,
input       [1:0]           i_function,
 
 
output      [31:0]          o_out,
output      [31:0]          o_out,
output                      o_carry_out
output                      o_carry_out
 
 
);
);
 
 
`include "a23_localparams.v"
`include "a23_localparams.vh"
 
 
wire [31:0] rot_prod;                           // Input rotated by the shift amount
wire [31:0] rot_prod;                           // Input rotated by the shift amount
 
 
wire [1:0]  lsl_out;                            // LSL: {carry, bit_31}
wire [1:0]  lsl_out;                            // LSL: {carry, bit_31}
wire [1:0]  lsr_out;                            // LSR: {carry, bit_31}
wire [1:0]  lsr_out;                            // LSR: {carry, bit_31}
wire [1:0]  asr_out;                            // ASR: {carry, bit_31}
wire [1:0]  asr_out;                            // ASR: {carry, bit_31}
wire [1:0]  ror_out;                            // ROR: {carry, bit_31}
wire [1:0]  ror_out;                            // ROR: {carry, bit_31}
 
 
reg [32:0]  lsl_mask;                           // Left-hand mask
reg [32:0]  lsl_mask;                           // Left-hand mask
reg [32:0]  lsr_mask;                           // Right-hand mask
reg [32:0]  lsr_mask;                           // Right-hand mask
reg [15:0]  low_mask;                           // Mask calculation helper
reg [15:0]  low_mask;                           // Mask calculation helper
 
 
reg [4:0]   shift_amount;                       // Shift amount for the low-level shifter
reg [4:0]   shift_amount;                       // Shift amount for the low-level shifter
 
 
reg [2:0]   lsl_selector;                       // Left shift {shift_32, shift_over, shift_amount[4]}
reg [2:0]   lsl_selector;                       // Left shift {shift_32, shift_over, shift_amount[4]}
reg [2:0]   lsr_selector;                       // Right shift {shift_32, shift_over, shift_amount[4]}
reg [2:0]   lsr_selector;                       // Right shift {shift_32, shift_over, shift_amount[4]}
reg [3:0]   low_selector;                       // {shift_amount[3:0]}
reg [3:0]   low_selector;                       // {shift_amount[3:0]}
 
 
reg         shift_nzero;                        // Amount is not zero
reg         shift_nzero;                        // Amount is not zero
reg         shift_over;                         // Amount is 32 or higher
reg         shift_over;                         // Amount is 32 or higher
reg         shift_32;                           // Amount is exactly 32
reg         shift_32;                           // Amount is exactly 32
reg         asr_sign;                           // Sign for ASR shift
reg         asr_sign;                           // Sign for ASR shift
reg         direction;                          // Shift direction
reg         direction;                          // Shift direction
 
 
wire [31:0] p_r;                                // 1 bit rotated rot_prod
wire [31:0] p_r;                                // 1 bit rotated rot_prod
wire [31:0] p_l;                                // Alias for the rot_prod 
wire [31:0] p_l;                                // Alias for the rot_prod 
 
 
 
 
// Implementation details:
// Implementation details:
// Design is based on masking of rotated input by a left- and right- hand masks.
// Design is based on masking of rotated input by a left- and right- hand masks.
// Rotated product calculation requires 5 levels of combinational logic, and masks
// Rotated product calculation requires 5 levels of combinational logic, and masks
// must be ready before the product is ready. In fact masks require just 3 to 4 levels
// must be ready before the product is ready. In fact masks require just 3 to 4 levels
// of logic cells using 4-to-1/2x3-to-1 Altera.
// of logic cells using 4-to-1/2x3-to-1 Altera.
 
 
always @*
always @*
begin
begin
        shift_32 = i_shift_amount == 32;
        shift_32 = i_shift_amount == 32;
 
 
        shift_over = |i_shift_amount[7:5];
        shift_over = |i_shift_amount[7:5];
 
 
        shift_nzero = |i_shift_amount[7:0];
        shift_nzero = |i_shift_amount[7:0];
 
 
        shift_amount = i_shift_amount[4:0];
        shift_amount = i_shift_amount[4:0];
 
 
        if (i_shift_imm_zero) begin
        if (i_shift_imm_zero) begin
                if (i_function == LSR || i_function == ASR) begin
                if (i_function == LSR || i_function == ASR) begin
                        // The form of the shift field which might be
                        // The form of the shift field which might be
                        // expected to correspond to LSR #0 is used
                        // expected to correspond to LSR #0 is used
                        // to encode LSR #32, which has a zero result
                        // to encode LSR #32, which has a zero result
                        // with bit 31 of Rm as the carry output. 
                        // with bit 31 of Rm as the carry output. 
                        shift_nzero = 1'b1;
                        shift_nzero = 1'b1;
                        shift_over = 1'b1;
                        shift_over = 1'b1;
                        // Redundant and can be optimized out
                        // Redundant and can be optimized out
                        // shift_32 = 1'b1;
                        // shift_32 = 1'b1;
                end else if (i_function == ROR) begin
                end else if (i_function == ROR) begin
                        // RXR, (ROR w/ imm 0)
                        // RXR, (ROR w/ imm 0)
                        shift_amount[0] = 1'b1;
                        shift_amount[0] = 1'b1;
                        shift_nzero = 1'b1;
                        shift_nzero = 1'b1;
                end
                end
        end
        end
 
 
        // LSB sub-selector calculation. Usually it is taken
        // LSB sub-selector calculation. Usually it is taken
        // directly from the shift_amount, but ROR requires
        // directly from the shift_amount, but ROR requires
        // no masking at all.
        // no masking at all.
        case (i_function)
        case (i_function)
                LSL: low_selector = shift_amount[3:0];
                LSL: low_selector = shift_amount[3:0];
                LSR: low_selector = shift_amount[3:0];
                LSR: low_selector = shift_amount[3:0];
                ASR: low_selector = shift_amount[3:0];
                ASR: low_selector = shift_amount[3:0];
                ROR: low_selector = 4'b0000;
                ROR: low_selector = 4'b0000;
        endcase
        endcase
 
 
        // Left-hand MSB sub-selector calculation. Opaque for every function but LSL.
        // Left-hand MSB sub-selector calculation. Opaque for every function but LSL.
        case (i_function)
        case (i_function)
                LSL: lsl_selector = {shift_32, shift_over, shift_amount[4]};
                LSL: lsl_selector = {shift_32, shift_over, shift_amount[4]};
                LSR: lsl_selector = 3'b0_1_0; // Opaque mask selector
                LSR: lsl_selector = 3'b0_1_0; // Opaque mask selector
                ASR: lsl_selector = 3'b0_1_0; // Opaque mask selector
                ASR: lsl_selector = 3'b0_1_0; // Opaque mask selector
                ROR: lsl_selector = 3'b0_1_0; // Opaque mask selector
                ROR: lsl_selector = 3'b0_1_0; // Opaque mask selector
        endcase
        endcase
 
 
        // Right-hand MSB sub-selector calculation. Opaque for LSL, transparent for ROR.
        // Right-hand MSB sub-selector calculation. Opaque for LSL, transparent for ROR.
        case (i_function)
        case (i_function)
                LSL: lsr_selector = 3'b0_1_0; // Opaque mask selector
                LSL: lsr_selector = 3'b0_1_0; // Opaque mask selector
                LSR: lsr_selector = {shift_32, shift_over, shift_amount[4]};
                LSR: lsr_selector = {shift_32, shift_over, shift_amount[4]};
                ASR: lsr_selector = {shift_32, shift_over, shift_amount[4]};
                ASR: lsr_selector = {shift_32, shift_over, shift_amount[4]};
                ROR: lsr_selector = 3'b0_0_0; // Transparent mask selector
                ROR: lsr_selector = 3'b0_0_0; // Transparent mask selector
        endcase
        endcase
 
 
        // Direction
        // Direction
        case (i_function)
        case (i_function)
                LSL: direction = 1'b0; // Left shift
                LSL: direction = 1'b0; // Left shift
                LSR: direction = 1'b1; // Right shift
                LSR: direction = 1'b1; // Right shift
                ASR: direction = 1'b1; // Right shift
                ASR: direction = 1'b1; // Right shift
                ROR: direction = 1'b1; // Right shift
                ROR: direction = 1'b1; // Right shift
        endcase
        endcase
 
 
        // Sign for ASR shift
        // Sign for ASR shift
        asr_sign = 1'b0;
        asr_sign = 1'b0;
        if (i_function == ASR && i_in[31])
        if (i_function == ASR && i_in[31])
                asr_sign = 1'b1;
                asr_sign = 1'b1;
end
end
 
 
// Generic rotate. Theoretical cost: 32x5 4-to-1 LUTs.
// Generic rotate. Theoretical cost: 32x5 4-to-1 LUTs.
// Practically a bit higher due to high fanout of "direction".
// Practically a bit higher due to high fanout of "direction".
generate
generate
genvar i, j;
genvar i, j;
        for (i = 0; i < 5; i = i + 1)
        for (i = 0; i < 5; i = i + 1)
        begin : netgen
        begin : netgen
                wire [31:0] in;
                wire [31:0] in;
                reg [31:0] out;
                reg [31:0] out;
                for (j = 0; j < 32; j = j + 1)
                for (j = 0; j < 32; j = j + 1)
                begin : net
                begin : net
                        always @*
                        always @*
                                out[j] = in[j] & (~shift_amount[i] ^ direction) |
                                out[j] = in[j] & (~shift_amount[i] ^ direction) |
                                         in[wrap(j, i)] & (shift_amount[i] ^ direction);
                                         in[wrap(j, i)] & (shift_amount[i] ^ direction);
                end
                end
        end
        end
 
 
        // Order is reverted with respect to volatile shift_amount[0]
        // Order is reverted with respect to volatile shift_amount[0]
        assign netgen[4].in = i_in;
        assign netgen[4].in = i_in;
        for (i = 1; i < 5; i = i + 1)
        for (i = 1; i < 5; i = i + 1)
        begin : router
        begin : router
                assign netgen[i-1].in = netgen[i].out;
                assign netgen[i-1].in = netgen[i].out;
        end
        end
endgenerate
endgenerate
 
 
// Aliasing
// Aliasing
assign rot_prod = netgen[0].out;
assign rot_prod = netgen[0].out;
 
 
// Submask calculated from LSB sub-selector.
// Submask calculated from LSB sub-selector.
// Cost: 16 4-to-1 LUTs.
// Cost: 16 4-to-1 LUTs.
always @*
always @*
case (low_selector) // synthesis full_case parallel_case
case (low_selector) // synthesis full_case parallel_case
        4'b0000:        low_mask = 16'hffff;
        4'b0000:        low_mask = 16'hffff;
        4'b0001:        low_mask = 16'hfffe;
        4'b0001:        low_mask = 16'hfffe;
        4'b0010:        low_mask = 16'hfffc;
        4'b0010:        low_mask = 16'hfffc;
        4'b0011:        low_mask = 16'hfff8;
        4'b0011:        low_mask = 16'hfff8;
        4'b0100:        low_mask = 16'hfff0;
        4'b0100:        low_mask = 16'hfff0;
        4'b0101:        low_mask = 16'hffe0;
        4'b0101:        low_mask = 16'hffe0;
        4'b0110:        low_mask = 16'hffc0;
        4'b0110:        low_mask = 16'hffc0;
        4'b0111:        low_mask = 16'hff80;
        4'b0111:        low_mask = 16'hff80;
        4'b1000:        low_mask = 16'hff00;
        4'b1000:        low_mask = 16'hff00;
        4'b1001:        low_mask = 16'hfe00;
        4'b1001:        low_mask = 16'hfe00;
        4'b1010:        low_mask = 16'hfc00;
        4'b1010:        low_mask = 16'hfc00;
        4'b1011:        low_mask = 16'hf800;
        4'b1011:        low_mask = 16'hf800;
        4'b1100:        low_mask = 16'hf000;
        4'b1100:        low_mask = 16'hf000;
        4'b1101:        low_mask = 16'he000;
        4'b1101:        low_mask = 16'he000;
        4'b1110:        low_mask = 16'hc000;
        4'b1110:        low_mask = 16'hc000;
        4'b1111:        low_mask = 16'h8000;
        4'b1111:        low_mask = 16'h8000;
endcase
endcase
 
 
// Left-hand mask calculation.
// Left-hand mask calculation.
// Cost: 33 4-to-1 LUTs.
// Cost: 33 4-to-1 LUTs.
always @*
always @*
casez (lsl_selector) // synthesis full_case parallel_case
casez (lsl_selector) // synthesis full_case parallel_case
        7'b1??: lsl_mask =  33'h_1_0000_0000;
        7'b1??: lsl_mask =  33'h_1_0000_0000;
        7'b01?: lsl_mask =  33'h_0_0000_0000;
        7'b01?: lsl_mask =  33'h_0_0000_0000;
        7'b001: lsl_mask = { 1'h_1, low_mask, 16'h_0000};
        7'b001: lsl_mask = { 1'h_1, low_mask, 16'h_0000};
        7'b000: lsl_mask = {17'h_1_ffff, low_mask};
        7'b000: lsl_mask = {17'h_1_ffff, low_mask};
endcase
endcase
 
 
// Right-hand mask calculation.
// Right-hand mask calculation.
// Cost: 33 4-to-1 LUTs.
// Cost: 33 4-to-1 LUTs.
always @*
always @*
casez (lsr_selector) // synthesis full_case parallel_case
casez (lsr_selector) // synthesis full_case parallel_case
        7'b1??: lsr_mask =  33'h_1_0000_0000;
        7'b1??: lsr_mask =  33'h_1_0000_0000;
        7'b01?: lsr_mask =  33'h_0_0000_0000;
        7'b01?: lsr_mask =  33'h_0_0000_0000;
        7'b000: lsr_mask = { 1'h_1, bit_swap(low_mask), 16'h_ffff};
        7'b000: lsr_mask = { 1'h_1, bit_swap(low_mask), 16'h_ffff};
        7'b001: lsr_mask = {17'h_1_0000, bit_swap(low_mask)};
        7'b001: lsr_mask = {17'h_1_0000, bit_swap(low_mask)};
endcase
endcase
 
 
// Alias: right-rotated
// Alias: right-rotated
assign p_r = {rot_prod[30:0], rot_prod[31]};
assign p_r = {rot_prod[30:0], rot_prod[31]};
 
 
// Alias: left-rotated
// Alias: left-rotated
assign p_l = rot_prod[31:0];
assign p_l = rot_prod[31:0];
 
 
// ROR MSB, handling special cases
// ROR MSB, handling special cases
assign ror_out[0] = i_shift_imm_zero ?   i_carry_in :
assign ror_out[0] = i_shift_imm_zero ?   i_carry_in :
                                        p_r[31];
                                        p_r[31];
 
 
// ROR carry, handling special cases
// ROR carry, handling special cases
assign ror_out[1] = i_shift_imm_zero ?  i_in[0] :
assign ror_out[1] = i_shift_imm_zero ?  i_in[0] :
                        shift_nzero ?   p_r[31] :
                        shift_nzero ?   p_r[31] :
                                        i_carry_in;
                                        i_carry_in;
 
 
// LSL MSB
// LSL MSB
assign lsl_out[0] =      p_l[31] & lsl_mask[31];
assign lsl_out[0] =      p_l[31] & lsl_mask[31];
 
 
// LSL carry, handling special cases
// LSL carry, handling special cases
assign lsl_out[1] =     shift_nzero ?   p_l[0] & lsl_mask[32]:
assign lsl_out[1] =     shift_nzero ?   p_l[0] & lsl_mask[32]:
                                        i_carry_in;
                                        i_carry_in;
 
 
// LSR MSB
// LSR MSB
assign lsr_out[0] =      p_r[31] & lsr_mask[31];
assign lsr_out[0] =      p_r[31] & lsr_mask[31];
 
 
// LSR carry, handling special cases
// LSR carry, handling special cases
assign lsr_out[1] = i_shift_imm_zero ?  i_in[31] :
assign lsr_out[1] = i_shift_imm_zero ?  i_in[31] :
                        shift_nzero ?   p_r[31] & lsr_mask[32]:
                        shift_nzero ?   p_r[31] & lsr_mask[32]:
                                        i_carry_in;
                                        i_carry_in;
 
 
// ASR MSB
// ASR MSB
assign asr_out[0] =      i_in[31] ?      i_in[31] :
assign asr_out[0] =      i_in[31] ?      i_in[31] :
                                        p_r[31] & lsr_mask[31] ;
                                        p_r[31] & lsr_mask[31] ;
 
 
// LSR carry, handling special cases
// LSR carry, handling special cases
assign asr_out[1] =     shift_over ?    i_in[31] :
assign asr_out[1] =     shift_over ?    i_in[31] :
                        shift_nzero ?   p_r[31] :
                        shift_nzero ?   p_r[31] :
                                        i_carry_in;
                                        i_carry_in;
 
 
// Carry and MSB are calculated as above
// Carry and MSB are calculated as above
assign {o_carry_out, o_out[31]} = i_function == LSL ? lsl_out :
assign {o_carry_out, o_out[31]} = i_function == LSL ? lsl_out :
                              i_function == LSR ? lsr_out :
                              i_function == LSR ? lsr_out :
                              i_function == ASR ? asr_out :
                              i_function == ASR ? asr_out :
                                                  ror_out ;
                                                  ror_out ;
 
 
// And the rest of result is the masked rotated input.
// And the rest of result is the masked rotated input.
assign o_out[30:0] =     (p_l[30:0] & lsl_mask[30:0]) |
assign o_out[30:0] =     (p_l[30:0] & lsl_mask[30:0]) |
                        (p_r[30:0] & lsr_mask[30:0]) |
                        (p_r[30:0] & lsr_mask[30:0]) |
                        (~lsr_mask[30:0] & {31{asr_sign}});
                        (~lsr_mask[30:0] & {31{asr_sign}});
 
 
// Rotate: calculate bit pos for level "level" and offset "pos"
// Rotate: calculate bit pos for level "level" and offset "pos"
function [4:0] wrap;
function [4:0] wrap;
input integer pos;
input integer pos;
input integer level;
input integer level;
integer out;
integer out;
begin
begin
        out = pos - (1 << level);
        out = pos - (1 << level);
        wrap = out[4:0];
        wrap = out[4:0];
end
end
endfunction
endfunction
 
 
// Swap bits in the input 16-bit value
// Swap bits in the input 16-bit value
function [15:0] bit_swap;
function [15:0] bit_swap;
input [15:0] value;
input [15:0] value;
integer i;
integer i;
begin
begin
        for (i = 0; i < 16; i = i + 1)
        for (i = 0; i < 16; i = i + 1)
                bit_swap[i] = value[15 - i];
                bit_swap[i] = value[15 - i];
end
end
endfunction
endfunction
 
 
endmodule
endmodule
 
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.