Commit 29360044 authored by Marc's avatar Marc
Browse files

Added different versions of the design, current simd fits time requirement

parent 01af559f
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library marcmod;
use marcmod.simdmod.all;
library grlib;
use grlib.stdlib.all;
entity alu8b is
generic(
VLEN : integer range 0 to 32 := 8
);
port(
op : in std_logic_vector(4 downto 0);
sr1 : in std_logic_vector(VLEN-1 downto 0);
sr2 : in std_logic_vector(VLEN-1 downto 0);
res : out std_logic_vector(VLEN-1 downto 0)
);
end;
architecture rtl of alu8b is
subtype data is std_logic_vector(VLEN-1 downto 0);
signal sign, saturate : std_logic;
signal nop, movb, add, sadd, sub, ssub : data;
signal max, min, umax, smax, umin, smin : data;
signal sand, sor, sxor, snand, snor, sxnor : data;
signal mul : data;
begin
sign <= not op(4);
saturate <= op(3);
nop <= sr1; movb <= sr2;
add <= sadd when saturate = '1' else
sr1+sr2;
sub <= ssub when saturate = '1' else
sr1-sr2;
umax <= sr1 when sr1 > sr2 else
sr2;
smax <= sr1 when signed(sr1) > signed(sr2) else
sr2;
umin <= sr2 when sr1 > sr2 else
sr1;
smin <= sr2 when signed(sr1) > signed(sr2) else
sr1;
max <= smax when sign = '1' else
umax;
min <= smin when sign = '1' else
umin;
sand <= sr1 and sr2;
sor <= sr1 or sr2;
sxor <= sr1 xor sr2;
snand <= sr1 nand sr2;
snor <= sr1 nor sr2;
sxnor <= sr1 xnor sr2;
mult : lpmul
generic map (VLEN)
port map(sign, saturate, sr1, sr2, mul);
adder : process(sr1, sr2, sign)
variable z : std_logic_vector(VLEN downto 0);
constant S_MAX : data := '0'&(VLEN-2 downto 0 => '1');
constant S_MIN : data := '1'&(VLEN-2 downto 0 => '0');
begin
z := ((sign and sr1(sr1'left)) & sr1) + ((sign and sr2(sr2'left)) & sr2);
if sign = '0' then
if z(z'left) = '1' then
z := (others => '1');
end if;
else
if sr1(sr1'left) = sr2(sr2'left) and sr1(sr1'left) /= z(z'left-1) then
if z(z'left-1) = '1' then
z:= '0'&S_MAX;
else z:= '1'&S_MIN;
end if;
end if;
end if;
sadd<=z(data'range);
end process;
subtractor : process (sr1, sr2, sign)
variable z : std_logic_vector(VLEN downto 0);
constant S_MAX : data := '0'&(VLEN-2 downto 0 => '1');
constant S_MIN : data := '1'&(VLEN-2 downto 0 => '0');
begin
z := ((sign and sr1(sr1'left)) & sr1) - ((sign and sr2(sr2'left)) & sr2);
if sign = '0' then
if z(z'left) = '1' then
z := (others => '0');
end if;
else
if sr1(sr1'left) /= sr2(sr2'left) and sr2(sr2'left) = z(z'left-1) then
if z(z'left-1) = '1' then
z:= '0'&S_MAX;
else z:= '0'&S_MIN;
end if;
end if;
end if;
ssub<=z(data'range);
end process;
mux : process (op, nop, movb, add, sub, max, min, sand, sor, sxor, snand, snor, sxnor, mul)
variable alu_res : data;
begin
case op is
when S1_NOP => alu_res := nop;
when S1_MOVB => alu_res := movb;
when S1_ADD | S1_SADD | S1_USADD =>
alu_res := add;
when S1_SUB | S1_SSUB | S1_USSUB =>
alu_res := sub;
when S1_MUL | S1_UMUL | S1_SMUL | S1_USMUL =>
alu_res := mul;
when S1_MAX | S1_UMAX =>
alu_res := max;
when S1_MIN | S1_UMIN =>
alu_res := min;
when S1_AND => alu_res := sand;
when S1_OR => alu_res := sor;
when S1_XOR => alu_res := sxor;
when S1_NAND => alu_res := snand;
when S1_NOR => alu_res := snor;
when S1_XNOR => alu_res := sxnor;
when others =>
alu_res := (others => '0');
end case;
res <= alu_res;
end process;
end;
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library marcmod;
use marcmod.simdmod.all;
library grlib;
use grlib.stdlib.all;
entity l1_redalu is
generic(
VLEN : integer range 0 to 32 := 8
);
port(
op : in std_logic_vector(4 downto 0);
sr1 : in std_logic_vector(VLEN-1 downto 0);
sr2 : in std_logic_vector(VLEN-1 downto 0);
res : out std_logic_vector(VLEN-1 downto 0)
);
end;
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library marcmod;
use marcmod.simdmod.all;
library grlib;
use grlib.stdlib.all;
entity lpmul is
port(
muli : in lpmul_in_type;
mulo : out lpmul_out_type
);
end;
architecture rtl of lpmul is
constant SMAX : std_logic_vector(VLEN-1 downto 0) := "0" & (VLEN-2 downto 0 => '1');
constant SMIN : std_logic_vector(VLEN-1 downto 0) := "1" & (VLEN-2 downto 0 => '0');
constant UMAX : std_logic_vector(VLEN-1 downto 0) := (others => '1');
function sign_invert(a : std_logic_vector) return std_logic_vector is
begin
return std_logic_vector(-signed(a));
end sign_invert;
function product (a, b : std_logic_vector) return std_logic_vector is
variable aux : std_logic_vector(a'range);
variable z : std_logic_vector(a'length*2-1 downto 0);
begin
z := (others => '0'); aux := a;
for i in 0 to VLEN-1 loop
if b(i) = '1' then
z := std_logic_vector(unsigned(aux)+unsigned(z));
end if;
aux := aux(aux'left-1 downto 0) & '0';
end loop;
return z;
end product;
function sat_mux (asign, bsign, rsign, sign, sat : std_logic;
z2 : std_logic_vector) return std_logic_vector is
variable sel : std_logic_vector(2 downto 0);
begin
sel := "000"; -- result as it is, no saturation
if sat = '1' then
if sign = '1' then
if asign = bsign then -- positive result
if z2&rsign /= (z2'range => '0')&'0' then
sel := "011"; -- result is 7f signed max
end if;
else
if z2 /= (z2'range => '0') then
sel := "100"; -- result is 80 signed min
else sel := "001"; -- result is ca2 negative
end if;
end if;
else
if z2 /= (z2'range => '0') then
sel := "111"; -- result is ff unsigned max
end if;
end if;
end if;
return sel;
end sat_mux;
procedure sat_sel (sel : in std_logic_vector(2 downto 0);
r, nr : in std_logic_vector(VLEN-1 downto 0);
mulres : out std_logic_vector(VLEN-1 downto 0)) is
begin
case sel is
when "000" => mulres := r;
when "001" => mulres := nr;
when "011" => mulres := SMAX;
when "100" => mulres := SMIN;
when "111" => mulres := UMAX;
when others => mulres := (others => '0');
end case;
end sat_sel;
begin
comb : process( muli)
variable z : std_logic_vector(VLEN*2-1 downto 0);
variable r : std_logic_vector(VLEN-1 downto 0);
variable a, b : std_logic_vector(VLEN-1 downto 0);
variable signA, signB : std_logic;
variable mux : std_logic_vector(2 downto 0);
begin
a := muli.opA; b := muli.opB;
signA := muli.opA(muli.opA'left);
signB := muli.opB(muli.opB'left);
if (muli.sign and signA and muli.sat) = '1' then
a := sign_invert(muli.opA);
end if;
if (muli.sign and signB and muli.sat) = '1' then
b := sign_invert(muli.opB);
end if;
z := product(a, b);
mux := sat_mux(signA, signB, z(r'left), muli.sign, muli.sat, z(z'left downto r'length));
sat_sel(mux, z(r'range), sign_invert(z(r'range)), r);
mulo.mul_res <= r;
end process;
end;
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library marcmod;
use marcmod.simdmod.all;
library grlib;
use grlib.stdlib.all;
entity redalu is
generic(
SIZE : integer range 0 to 32 := 8
);
port(
op : in std_logic_vector(2 downto 0);
sat : in std_logic;
sr1 : in std_logic_vector(SIZE-1 downto 0);
sr2 : in std_logic_vector(SIZE-1 downto 0);
res : out std_logic_vector(SIZE downto 0)
);
end;
architecture rtl of redalu is
subtype data is std_logic_vector(SIZE-1 downto 0);
subtype result is std_logic_vector(SIZE downto 0);
signal sign : std_logic;
signal ssum, sum, max, min, sxor : result;
signal smax, smin, umax, umin : data;
begin
sign <= not op(2);
sxor <= '0' & (sr1 xor sr2);
sum <= ssum when sat = '1' else
((sign and sr1(sr1'left)) & sr1) + ((sign and sr2(sr2'left)) & sr2);
umax <= sr1 when sr1 > sr2 else
sr2;
smax <= sr1 when signed(sr1) > signed(sr2) else
sr2;
umin <= sr2 when sr1 > sr2 else
sr1;
smin <= sr2 when signed(sr1) > signed(sr2) else
sr1;
max <= (sign and smax(smax'left)) & smax when sign = '1' else
'0' & umax;
min <= (sign and smin(smin'left)) & smin when sign = '1' else
'0' & umin;
adder : process(sr1, sr2, sign)
variable z : std_logic_vector(SIZE downto 0);
constant S_MAX : data := '0'&(SIZE-2 downto 0 => '1');
constant S_MIN : data := '1'&(SIZE-2 downto 0 => '0');
begin
z := ((sign and sr1(sr1'left)) & sr1) + ((sign and sr2(sr2'left)) & sr2);
if sign = '0' then
if z(z'left) = '1' then
z := (others => '1');
end if;
else
if sr1(sr1'left) = sr2(sr2'left) and sr1(sr1'left) /= z(z'left-1) then
if z(z'left-1) = '1' then
z:= '0'&S_MAX;
else z:= '1'&S_MIN;
end if;
end if;
end if;
ssum<=z;
end process;
mux : process (sum, max, min, sxor)
variable alu_res : result;
begin
case op is
when S2_SUM | S2_USUM => alu_res := sum;
when S2_MAX | S2_UMAX => alu_res := max;
when S2_MIN | S2_UMIN => alu_res := min;
when S2_XOR => alu_res := sxor;
when others => alu_res := (others => '0');
end case;
res <= alu_res;
end process;
end;
This diff is collapsed.
......@@ -3,50 +3,102 @@ use ieee.std_logic_1164.all;
library grlib;
use grlib.stdlib.all;
package simdmod is
component simd is
generic(
XLEN : integer := 32;
VLEN : integer range 0 to 32 := 8;
RSIZE: integer := 5;
LOGSZ: integer := 2 --integer(ceil(ieee.math_real.log2(real(XLEN/VLEN))))
);
constant XLEN : integer := 32; --CFG_XLEN;
constant VLEN : integer := 8; --CFG_VLEN;
constant LOGSZ : integer := 2; -- CFG_LOGSZ;
constant VSIZE : integer := XLEN/VLEN;
type simd_in_type is record
ra : std_logic_vector (XLEN-1 downto 0); -- operand 1 data
rb : std_logic_vector (XLEN-1 downto 0); -- operand 2 data
op1 : std_logic_vector (4 downto 0); -- operation code stage1
op2 : std_logic_vector (2 downto 0); -- operation code stage2
rc_we : std_logic; -- we on destination (work)
ctrl_reg_we : std_logic; -- we on the mask register
mask_value : std_logic_vector (VSIZE-1 downto 0); -- new value for the mask
res_byte_en : std_logic_vector (VSIZE-1 downto 0); -- a set bit indicates s2 operation written in byte
swiz_veca : std_logic_vector (VSIZE*LOGSZ-1 downto 0); -- swizling for operand a
swiz_vecb : std_logic_vector (VSIZE*LOGSZ-1 downto 0); -- swizling for operand b
end record;
type simd_out_type is record
simd_res : std_logic_vector(XLEN-1 downto 0); -- output data
s1bp : std_logic_vector(XLEN-1 downto 0); -- s1 bypass output data
s2bp : std_logic_vector(XLEN-1 downto 0); -- s2 bp output data
end record;
type lpmul_in_type is record
opA : std_logic_vector(VLEN-1 downto 0);
opB : std_logic_vector(VLEN-1 downto 0);
sign : std_logic;
sat : std_logic;
end record;
type lpmul_out_type is record
mul_res : std_logic_vector(VLEN-1 downto 0);
end record;
---------------------------------------------------------------
-- CONSTANTS FOR OPERATIONS --
--------------------------------------------------------------
--constants function operations stage1 (simd_code 4-0)
constant S1_NOP : std_logic_vector (4 downto 0) := "00000";
constant S1_ADD : std_logic_vector (4 downto 0) := "00001";
constant S1_SUB : std_logic_vector (4 downto 0) := "00010";
constant S1_MUL : std_logic_vector (4 downto 0) := "00011";
constant S1_DIV : std_logic_vector (4 downto 0) := "00100";
constant S1_MAX : std_logic_vector (4 downto 0) := "00101";
constant S1_MIN : std_logic_vector (4 downto 0) := "00110";
constant S1_AND : std_logic_vector (4 downto 0) := "00111";
constant S1_OR : std_logic_vector (4 downto 0) := "01000";
constant S1_XOR : std_logic_vector (4 downto 0) := "01001";
constant S1_NAND : std_logic_vector (4 downto 0) := "01010";
constant S1_NOR : std_logic_vector (4 downto 0) := "01011";
constant S1_XNOR : std_logic_vector (4 downto 0) := "01100";
constant S1_SADD : std_logic_vector (4 downto 0) := "01101";
constant S1_SSUB : std_logic_vector (4 downto 0) := "01110";
constant S1_SMUL : std_logic_vector (4 downto 0) := "01111";
constant S1_MOVB : std_logic_vector (4 downto 0) := "10000";
constant S1_SHFT : std_logic_vector (4 downto 0) := "10001";
constant S1_UMUL : std_logic_vector (4 downto 0) := "10011";
constant S1_UDIV : std_logic_vector (4 downto 0) := "10100";
constant S1_UMAX : std_logic_vector (4 downto 0) := "10101";
constant S1_UMIN : std_logic_vector (4 downto 0) := "10110";
constant S1_SSHFT : std_logic_vector (4 downto 0):= "11001";
constant S1_USADD : std_logic_vector (4 downto 0):= "11101";
constant S1_USSUB : std_logic_vector (4 downto 0):= "11110";
constant S1_USMUL : std_logic_vector (4 downto 0):= "11111";
--constants function operations stage2 (simd_code 7-5)
constant S2_NOP : std_logic_vector (2 downto 0) := "000";
constant S2_SUM : std_logic_vector (2 downto 0) := "001";
constant S2_MAX : std_logic_vector (2 downto 0) := "010";
constant S2_MIN : std_logic_vector (2 downto 0) := "011";
constant S2_XOR : std_logic_vector (2 downto 0) := "100";
constant S2_USUM: std_logic_vector (2 downto 0) := "101";
constant S2_UMAX: std_logic_vector (2 downto 0) := "110";
constant S2_UMIN: std_logic_vector (2 downto 0) := "111";
--SIMD COMPONENTS
component simd_module is
port(
-- general inputs
clk : in std_ulogic;
rstn : in std_ulogic;
holdn : in std_ulogic;
-- inst for debug
inst : in std_logic_vector(31 downto 0);
rc_we_i : in std_logic;
rc_addr_i : in std_logic_vector (RSIZE-1 downto 0);
-- vector operations inputs
ra_i : in std_logic_vector (XLEN-1 downto 0);
rb_i : in std_logic_vector (XLEN-1 downto 0);
op_i : in std_logic_vector (7 downto 0);
-- memory bypass input
ldbpa_i : in std_logic;
ldra_i : in std_logic_vector (XLEN-1 downto 0);
ldbpb_i : in std_logic;
ldrb_i : in std_logic_vector (XLEN-1 downto 0);
-- mask modification inputs
ctrl_reg_we_i : in std_logic;
mask_value_i : in std_logic_vector((XLEN/VLEN)-1 downto 0);
res_byte_en_i : in std_logic_vector((XLEN/VLEN)-1 downto 0);
swiz_veca_i : in std_logic_vector(XLEN/VLEN*LOGSZ-1 downto 0);
swiz_vecb_i : in std_logic_vector(XLEN/VLEN*LOGSZ-1 downto 0);
-- outputs
rc_data_o : out std_logic_vector (XLEN-1 downto 0);
s1bp_o : out std_logic_vector (XLEN-1 downto 0); -- data from stage 1 to bypass if needed
s2bp_o : out std_logic_vector (XLEN-1 downto 0) -- data from stage 2 to bypass if needed
--exceptions out
sdi : in simd_in_type;
sdo : out simd_out_type
);
end component;
component lpmul is
port(
muli : in lpmul_in_type;
mulo : out lpmul_out_type
);
end component;
end package;
This diff is collapsed.
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library grlib;
use grlib.stdlib.all;
library marcmod;
use marcmod.simdmod.all;
entity simd is
generic(
XLEN : integer := 32;
VLEN : integer range 0 to 32 := 8;
RSIZE: integer := 5;
LOGSZ: integer := 2 --integer(ceil(log2(real(XLEN/VLEN))))
);
port(
-- general inputs
clk : in std_ulogic;
rstn : in std_ulogic;
holdn : in std_ulogic;
-- signals for debug
inst : in std_logic_vector(31 downto 0);
rc_we_i : in std_logic;
rc_addr_i : in std_logic_vector (RSIZE-1 downto 0);
-- vector operations inputs
ra_i : in std_logic_vector (XLEN-1 downto 0);
rb_i : in std_logic_vector (XLEN-1 downto 0);
op_i : in std_logic_vector (7 downto 0);
-- memory bypass input
ldbpa_i : in std_logic;
ldbpb_i : in std_logic;
lddata_i: in std_logic_vector (XLEN-1 downto 0);
-- mask modification inputs
ctrl_reg_we_i : in std_logic;
mask_value_i : in std_logic_vector((XLEN/VLEN)-1 downto 0);
res_byte_en_i : in std_logic_vector((XLEN/VLEN)-1 downto 0);
swiz_veca_i : in std_logic_vector(XLEN/VLEN*LOGSZ-1 downto 0);
swiz_vecb_i : in std_logic_vector(XLEN/VLEN*LOGSZ-1 downto 0);
-- outputs
rc_data_o : out std_logic_vector (XLEN-1 downto 0);
s1bp_o : out std_logic_vector (XLEN-1 downto 0); -- data from stage 1 to bypass if needed
s2bp_o : out std_logic_vector (XLEN-1 downto 0) -- data from stage 2 to bypass if needed
--exceptions out
);
end;
architecture rtl of simd is
constant VSIZE : integer := XLEN/VLEN;
---------------------------------------------------------------
-- REGISTER TYPES DEFINITION --
--------------------------------------------------------------
--vector register type
subtype vector_component is std_logic_vector(VLEN-1 downto 0);
type vector_reg_type is array (0 to VSIZE-1) of vector_component;
type s2l1 is array (0 to VSIZE/2-1) of std_logic_vector(VLEN downto 0);
subtype word is std_logic_vector(XLEN-1 downto 0);
-- mask registers (predicate)
subtype mask_reg_type is std_logic_vector(VSIZE-1 downto 0);
-- stage 2 byte enable (result in byte x)
subtype s2byteen_reg_type is std_logic_vector(VSIZE-1 downto 0);
-- swizling registers (reordering)
subtype log_length is integer range 0 to VSIZE-1;
type swizling_reg_type is array (0 to VSIZE-1) of log_length;
-- Control registers for extra functions
type ctrl_reg_type is record
mk : mask_reg_type;
sa : swizling_reg_type;
sb : swizling_reg_type;
be : s2byteen_reg_type;
ac : vector_reg_type;
end record;
-- First stage register
type s1_reg_type is record
ra : vector_reg_type;
rb : vector_reg_type;