---------------------------------------------------------------------
|
---------------------------------------------------------------------
|
-- Optimized multiplier
|
-- Optimized multiplier
|
--
|
--
|
-- Part of the LXP32 CPU
|
-- Part of the LXP32 CPU
|
--
|
--
|
-- Copyright (c) 2016 by Alex I. Kuznetsov
|
-- Copyright (c) 2016 by Alex I. Kuznetsov
|
--
|
--
|
-- This multiplier is designed for technologies that don't provide
|
-- This multiplier is designed for technologies that don't provide
|
-- fast 16x16 multipliers. One multiplication takes 6 cycles.
|
-- fast 16x16 multipliers. One multiplication takes 6 cycles.
|
--
|
--
|
-- The multiplication algorithm is based on carry-save accumulation
|
-- The multiplication algorithm is based on carry-save accumulation
|
-- of partial products.
|
-- of partial products.
|
---------------------------------------------------------------------
|
---------------------------------------------------------------------
|
|
|
library ieee;
|
library ieee;
|
use ieee.std_logic_1164.all;
|
use ieee.std_logic_1164.all;
|
use ieee.numeric_std.all;
|
use ieee.numeric_std.all;
|
|
|
entity lxp32_mul_opt is
|
entity lxp32_mul_opt is
|
port(
|
port(
|
clk_i: in std_logic;
|
clk_i: in std_logic;
|
rst_i: in std_logic;
|
rst_i: in std_logic;
|
ce_i: in std_logic;
|
ce_i: in std_logic;
|
op1_i: in std_logic_vector(31 downto 0);
|
op1_i: in std_logic_vector(31 downto 0);
|
op2_i: in std_logic_vector(31 downto 0);
|
op2_i: in std_logic_vector(31 downto 0);
|
ce_o: out std_logic;
|
ce_o: out std_logic;
|
result_o: out std_logic_vector(31 downto 0)
|
result_o: out std_logic_vector(31 downto 0)
|
);
|
);
|
end entity;
|
end entity;
|
|
|
architecture rtl of lxp32_mul_opt is
|
architecture rtl of lxp32_mul_opt is
|
|
|
function csa_sum(a: unsigned; b: unsigned; c: unsigned; n: integer) return unsigned is
|
function csa_sum(a: unsigned; b: unsigned; c: unsigned; n: integer) return unsigned is
|
variable r: unsigned(n-1 downto 0);
|
variable r: unsigned(n-1 downto 0);
|
begin
|
begin
|
for i in r'range loop
|
for i in r'range loop
|
r(i):=a(i) xor b(i) xor c(i);
|
r(i):=a(i) xor b(i) xor c(i);
|
end loop;
|
end loop;
|
return r;
|
return r;
|
end function;
|
end function;
|
|
|
function csa_carry(a: unsigned; b: unsigned; c: unsigned; n: integer) return unsigned is
|
function csa_carry(a: unsigned; b: unsigned; c: unsigned; n: integer) return unsigned is
|
variable r: unsigned(n-1 downto 0);
|
variable r: unsigned(n-1 downto 0);
|
begin
|
begin
|
for i in r'range loop
|
for i in r'range loop
|
r(i):=(a(i) and b(i)) or (a(i) and c(i)) or (b(i) and c(i));
|
r(i):=(a(i) and b(i)) or (a(i) and c(i)) or (b(i) and c(i));
|
end loop;
|
end loop;
|
return r&"0";
|
return r&"0";
|
end function;
|
end function;
|
|
|
signal reg1: unsigned(op1_i'range);
|
signal reg1: unsigned(op1_i'range);
|
signal reg2: unsigned(op2_i'range);
|
signal reg2: unsigned(op2_i'range);
|
|
|
type pp_type is array (7 downto 0) of unsigned(31 downto 0);
|
type pp_type is array (7 downto 0) of unsigned(31 downto 0);
|
signal pp: pp_type;
|
signal pp: pp_type;
|
|
|
type pp_sum_type is array (7 downto 0) of unsigned(31 downto 0);
|
type pp_sum_type is array (7 downto 0) of unsigned(31 downto 0);
|
signal pp_sum: pp_sum_type;
|
signal pp_sum: pp_sum_type;
|
|
|
type pp_carry_type is array (7 downto 0) of unsigned(32 downto 0);
|
type pp_carry_type is array (7 downto 0) of unsigned(32 downto 0);
|
signal pp_carry: pp_carry_type;
|
signal pp_carry: pp_carry_type;
|
|
|
signal acc_sum: unsigned(31 downto 0);
|
signal acc_sum: unsigned(31 downto 0);
|
signal acc_carry: unsigned(31 downto 0);
|
signal acc_carry: unsigned(31 downto 0);
|
|
|
signal cnt: integer range 0 to 4:=0;
|
signal cnt: integer range 0 to 4:=0;
|
|
|
signal result: std_logic_vector(result_o'range);
|
signal result: std_logic_vector(result_o'range);
|
signal ceo: std_logic:='0';
|
signal ceo: std_logic:='0';
|
|
|
begin
|
begin
|
|
|
-- Calculate 8 partial products in parallel
|
-- Calculate 8 partial products in parallel
|
|
|
pp_gen: for i in pp'range generate
|
pp_gen: for i in pp'range generate
|
pp(i)<=shift_left(reg1,i) when reg2(i)='1' else (others=>'0');
|
pp(i)<=shift_left(reg1,i) when reg2(i)='1' else (others=>'0');
|
end generate;
|
end generate;
|
|
|
-- Add partial products to the accumulator using carry-save adder tree
|
-- Add partial products to the accumulator using carry-save adder tree
|
|
|
pp_sum(0)<=csa_sum(pp(0),pp(1),pp(2),32);
|
pp_sum(0)<=csa_sum(pp(0),pp(1),pp(2),32);
|
pp_carry(0)<=csa_carry(pp(0),pp(1),pp(2),32);
|
pp_carry(0)<=csa_carry(pp(0),pp(1),pp(2),32);
|
|
|
pp_sum(1)<=csa_sum(pp(3),pp(4),pp(5),32);
|
pp_sum(1)<=csa_sum(pp(3),pp(4),pp(5),32);
|
pp_carry(1)<=csa_carry(pp(3),pp(4),pp(5),32);
|
pp_carry(1)<=csa_carry(pp(3),pp(4),pp(5),32);
|
|
|
pp_sum(2)<=csa_sum(pp(6),pp(7),acc_sum,32);
|
pp_sum(2)<=csa_sum(pp(6),pp(7),acc_sum,32);
|
pp_carry(2)<=csa_carry(pp(6),pp(7),acc_sum,32);
|
pp_carry(2)<=csa_carry(pp(6),pp(7),acc_sum,32);
|
|
|
pp_sum(3)<=csa_sum(pp_sum(0),pp_carry(0),pp_sum(1),32);
|
pp_sum(3)<=csa_sum(pp_sum(0),pp_carry(0),pp_sum(1),32);
|
pp_carry(3)<=csa_carry(pp_sum(0),pp_carry(0),pp_sum(1),32);
|
pp_carry(3)<=csa_carry(pp_sum(0),pp_carry(0),pp_sum(1),32);
|
|
|
pp_sum(4)<=csa_sum(pp_carry(1),pp_sum(2),pp_carry(2),32);
|
pp_sum(4)<=csa_sum(pp_carry(1),pp_sum(2),pp_carry(2),32);
|
pp_carry(4)<=csa_carry(pp_carry(1),pp_sum(2),pp_carry(2),32);
|
pp_carry(4)<=csa_carry(pp_carry(1),pp_sum(2),pp_carry(2),32);
|
|
|
pp_sum(5)<=csa_sum(pp_sum(3),pp_carry(3),pp_sum(4),32);
|
pp_sum(5)<=csa_sum(pp_sum(3),pp_carry(3),pp_sum(4),32);
|
pp_carry(5)<=csa_carry(pp_sum(3),pp_carry(3),pp_sum(4),32);
|
pp_carry(5)<=csa_carry(pp_sum(3),pp_carry(3),pp_sum(4),32);
|
|
|
pp_sum(6)<=csa_sum(pp_sum(5),pp_carry(5),pp_carry(4),32);
|
pp_sum(6)<=csa_sum(pp_sum(5),pp_carry(5),pp_carry(4),32);
|
pp_carry(6)<=csa_carry(pp_sum(5),pp_carry(5),pp_carry(4),32);
|
pp_carry(6)<=csa_carry(pp_sum(5),pp_carry(5),pp_carry(4),32);
|
|
|
pp_sum(7)<=csa_sum(pp_sum(6),pp_carry(6),acc_carry,32);
|
pp_sum(7)<=csa_sum(pp_sum(6),pp_carry(6),acc_carry,32);
|
pp_carry(7)<=csa_carry(pp_sum(6),pp_carry(6),acc_carry,32);
|
pp_carry(7)<=csa_carry(pp_sum(6),pp_carry(6),acc_carry,32);
|
|
|
-- Multiplier state machine
|
-- Multiplier state machine
|
|
|
process (clk_i) is
|
process (clk_i) is
|
begin
|
begin
|
if rising_edge(clk_i) then
|
if rising_edge(clk_i) then
|
if rst_i='1' then
|
if rst_i='1' then
|
ceo<='0';
|
ceo<='0';
|
cnt<=0;
|
cnt<=0;
|
reg1<=(others=>'-');
|
reg1<=(others=>'-');
|
reg2<=(others=>'-');
|
reg2<=(others=>'-');
|
acc_sum<=(others=>'-');
|
acc_sum<=(others=>'-');
|
acc_carry<=(others=>'-');
|
acc_carry<=(others=>'-');
|
else
|
else
|
if cnt=1 then
|
if cnt=1 then
|
ceo<='1';
|
ceo<='1';
|
else
|
else
|
ceo<='0';
|
ceo<='0';
|
end if;
|
end if;
|
|
|
if ce_i='1' then
|
if ce_i='1' then
|
cnt<=4;
|
cnt<=4;
|
reg1<=unsigned(op1_i);
|
reg1<=unsigned(op1_i);
|
reg2<=unsigned(op2_i);
|
reg2<=unsigned(op2_i);
|
acc_sum<=(others=>'0');
|
acc_sum<=(others=>'0');
|
acc_carry<=(others=>'0');
|
acc_carry<=(others=>'0');
|
else
|
else
|
acc_sum<=pp_sum(7);
|
acc_sum<=pp_sum(7);
|
acc_carry<=pp_carry(7)(acc_carry'range);
|
acc_carry<=pp_carry(7)(acc_carry'range);
|
reg1<=reg1(reg1'high-8 downto 0)&X"00";
|
reg1<=reg1(reg1'high-8 downto 0)&X"00";
|
reg2<=X"00"®2(reg2'high downto 8);
|
reg2<=X"00"®2(reg2'high downto 8);
|
if cnt>0 then
|
if cnt>0 then
|
cnt<=cnt-1;
|
cnt<=cnt-1;
|
end if;
|
end if;
|
end if;
|
end if;
|
end if;
|
end if;
|
end if;
|
end if;
|
end process;
|
end process;
|
|
|
result<=std_logic_vector(acc_sum+acc_carry);
|
result<=std_logic_vector(acc_sum+acc_carry);
|
|
|
result_o<=result;
|
result_o<=result;
|
ce_o<=ceo;
|
ce_o<=ceo;
|
|
|
-- A simulation-time multiplication check
|
-- A simulation-time multiplication check
|
|
|
-- synthesis translate_off
|
-- synthesis translate_off
|
|
|
process (clk_i) is
|
process (clk_i) is
|
variable p: unsigned(op1_i'length+op2_i'length-1 downto 0);
|
variable p: unsigned(op1_i'length+op2_i'length-1 downto 0);
|
begin
|
begin
|
if rising_edge(clk_i) then
|
if rising_edge(clk_i) then
|
if ce_i='1' then
|
if ce_i='1' then
|
p:=unsigned(op1_i)*unsigned(op2_i);
|
p:=unsigned(op1_i)*unsigned(op2_i);
|
elsif ceo='1' then
|
elsif ceo='1' then
|
assert result=std_logic_vector(p(result'range))
|
assert result=std_logic_vector(p(result'range))
|
report "Incorrect multiplication result"
|
report "Incorrect multiplication result"
|
severity failure;
|
severity failure;
|
end if;
|
end if;
|
end if;
|
end if;
|
end process;
|
end process;
|
|
|
-- synthesis translate_on
|
-- synthesis translate_on
|
|
|
end architecture;
|
end architecture;
|
|
|