Commit 5397299a authored by Marc's avatar Marc
Browse files

module extension

parent f4745d10
......@@ -29,12 +29,27 @@ simd_test: simd_test.c bin_change
./bin_change tmp.simd_test.srec simd_test.list simd_test.srec 83288001
cp simd_test.srec test.srec
sparc-gaisler-elf-objdump -d simd_test.o > simd_test.dump
simd_mask: simd_mask_test.c bin_change
$(XCC) $(XCFLAGS0) -c simd_mask_test.c
$(XCC) $(XCFLAGS0) simd_mask_test.o $(XLDFLAGS) -o simd_mask_test.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 simd_mask_test.exe tmp.simd_mask_test.srec
./bin_change tmp.simd_mask_test.srec simd_mask_test.list simd_mask_test.srec 83288001
cp simd_mask_test.srec test.srec
sparc-gaisler-elf-objdump -d simd_mask_test.o > simd_mask_test.dump
mat_mul: mat_mul.o
$(XCC) $(XCFLAGS) mat_mul.o $(XLDFLAGS)-o mat_mul.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 mat_mul.exe mat_mul.srec
cp mat_mul.srec test.srec
sparc-gaisler-elf-objdump -d mat_mul.o > mat_mul.dump
mat_mul_simd: mat_mul_simd.o
$(XCC) $(XCFLAGS) mat_mul_simd.o $(XLDFLAGS)-o mat_mul_simd.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 mat_mul_simd.exe mat_mul_simd.srec
cp mat_mul_simd.srec test.srec
sparc-gaisler-elf-objdump -d mat_mul_simd.o > mat_mul_simd.dump
clean:
rm -f *.exe *.o *.dump bin_change make.x
rm -f *.exe *.o *.dump bin_change make.x tmp.*
#include <stdio.h>
#include <time.h>
#include <string.h>
#include <stdlib.h>
#define N 4
main()
int main()
{
char string[3*(3+(6*N*N+N))];
int pos = 0;
// puts("Matrix Multiplication");
int A[N][N], B[N][N], C[N][N];
int A[4][4] = {{1,2,3,4},{1,2,3,4},{1,2,3,4},{1,2,3,4}};
int D[N][N], B[N][N], C[N][N];
printf("a: %p\n",&A);
printf("b: %p\n",&B);
printf("c: %p\n",&C);
srand(N);
for(int i=0; i<N; i++)
for(int j=0; j<N; j++) {
A[i][j] = rand()%10;
B[i][j] = rand()%10;
A[i][j] = i;
B[i][j] = j;
}
int sum = 0;
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
for(int i=0; i<N; i++)
for(int j=0; j<N; j++){
for(int k=0; k<N; k++)
for(int k=0; k<N; k++){
__asm__("nop");
__asm__("nop");
sum=sum+A[i][k]*B[k][j];
__asm__("nop");
__asm__("nop");
}
C[i][j] = sum;
sum = 0;
}
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
pos += sprintf(&string[pos],"A:\n");
for(int i=0; i<N; i++){
......
......@@ -13,19 +13,8 @@ int main()
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("sll %g2, %g1, %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("st %g1, [ %fp + -12 ]");
//nop 82488001
printf("NOP (move) c=%#010x, expected 0x01020304\n", c);
......@@ -35,17 +24,8 @@ int main()
__asm__("ld [%fp + -8], %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("sll %g2, %g1, %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("st %g1, [ %fp + -12 ]");
//add a b
printf("add: c=%#010x, expected result 0x01030507\n", c);
......@@ -56,19 +36,8 @@ int main()
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("sll %g2, %g1, %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("st %g1, [ %fp + -12 ]");
printf("sadd: c=%#010x, expected result 0x80807e7f\n", c);
......@@ -78,18 +47,8 @@ int main()
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("sll %g2, %g1, %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("st %g1, [ %fp + -12 ]");
printf("sub: c=%#010x, expected result 0x0a0500ff\n", c);
......@@ -99,18 +58,8 @@ int main()
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("sll %g2, %g1, %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("st %g1, [ %fp + -12 ]");
printf("ssub: c=%#010x, expected result 0x807f7ff1\n", c);
......@@ -120,18 +69,8 @@ int main()
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("sll %g2, %g1, %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("st %g1, [ %fp + -12 ]");
printf("signed max max: c=%#010x, expected result 0x00000040\n", c);
......@@ -140,17 +79,8 @@ int main()
__asm__("ld [%fp + -8], %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("sll %g2, %g1, %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("st %g1, [ %fp + -12 ]");
printf("unsigned max max: c=%#010x, expected result 0x000000a0\n", c);
......@@ -158,18 +88,8 @@ int main()
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("sll %g2, %g1, %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("st %g1, [ %fp + -12 ]");
printf("unsigned min min: c=%#010x, expected result 0x00000002\n", c);
......@@ -178,17 +98,8 @@ int main()
__asm__("ld [%fp + -8], %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("sll %g2, %g1, %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("st %g1, [ %fp + -12 ]");
printf("signed min min: c=%#010x, expected result 0xffffff80\n", c);
......@@ -198,18 +109,8 @@ int main()
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("sll %g2, %g1, %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("st %g1, [ %fp + -12 ]");
printf("dot product: c=%#010x, expected result 0x00000014\n", c);
......@@ -219,18 +120,8 @@ int main()
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("sll %g2, %g1, %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("st %g1, [ %fp + -12 ]");
printf("dot product2: c=%#010x, expected result 0xfffffffc\n", c);
......@@ -241,18 +132,8 @@ int main()
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("sll %g2, %g1, %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("st %g1, [ %fp + -12 ]");
printf("saturate mul: c=%#010x, expected result 0x7f81807f\n",c);
......@@ -262,18 +143,8 @@ int main()
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("sll %g2, %g1, %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("st %g1, [ %fp + -12 ]");
printf("div: c=%#010x, expected result 0x4020ff01\n", c);
......@@ -283,18 +154,8 @@ int main()
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("sll %g2, %g1, %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("st %g1, [ %fp + -12 ]");
printf("div2: c=%#010x, expected result 0xf6fb0aff\n", c);
......@@ -303,18 +164,8 @@ int main()
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("sll %g2, %g1, %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("st %g1, [ %fp + -12 ]");
printf("nand: c=%#010x, expected result 0x21524150\n", c);
......@@ -323,18 +174,8 @@ int main()
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("sll %g2, %g1, %g1");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("nop");
__asm__("st %g1, [ %fp + -12 ]");
printf("xor reduce: c=%#010x, expected result 0x00000027\n", c);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -135,6 +135,7 @@ architecture rtl of iu3 is
conv_std_logic_vector(NWIN, NWINLOG2);
constant FPEN : boolean := (fpu /= 0);
constant CPEN : boolean := (cp = 1);
constant SIMDEN : boolean := true; --marcmod: SIMD is enabled (simdmod = 1);
constant MULEN : boolean := (v8 /= 0);
constant MULTYPE: integer := (v8 / 16);
constant DIVEN : boolean := (v8 /= 0);
......@@ -1590,11 +1591,13 @@ begin
when IAND | ANDCC | ANDN | ANDNCC | IOR | ORCC | ORN | ORNCC | IXOR |
XORCC | IXNOR | XNORCC | ISLL | ISRL | ISRA | MULSCC | IADD | ADDX |
ADDCC | ADDXCC | ISUB | SUBX | SUBCC | SUBXCC | FLUSH | JMPL | TICC |
SAVE | RESTORE | RDY | SIMD => null;
SAVE | RESTORE | RDY => null;
when TADDCC | TADDCCTV | TSUBCC | TSUBCCTV =>
if notag = 1 then illegal_inst := '1'; end if;
when UMAC | SMAC =>
if not MACEN then illegal_inst := '1'; end if;
when SIMD | WRMSK => -- marcmod: add SIMD and WRMSK to nonillegal instruction if SIMD enabled
if not SIMDEN then illegal_inst := '1'; end if;
when UMUL | SMUL | UMULCC | SMULCC =>
if not MULEN then illegal_inst := '1'; end if;
when UDIV | SDIV | UDIVCC | SDIVCC =>
......@@ -1902,8 +1905,8 @@ end;
if DIVEN then y_check := '1'; nobp := op3(4); end if; -- no BP on divcc
when FPOP1 | FPOP2 => ldcheck1:= '0'; ldcheck2 := '0'; fins := BPRED;
when JMPL => call_hold := '1'; nobp := BPRED;
--marcmod
when SIMD =>
when SIMD => --marcmod: handle SIMD data bypass
-- no need to bypass data for WRMSK
insimd := '1';
ldchkra := '0'; ldchkex := '0';
when others =>
......@@ -4105,7 +4108,7 @@ begin
end if;
elsif MACEN and MACPIPE and ((not r.x.ctrl.annul and r.x.mac) = '1') then
xc_result := mulo.result(31 downto 0);
--marcmod
--marcmod: get result from the SIMD module
elsif (r.x.ctrl.simd and (not r.x.ctrl.annul)) = '1' then
xc_result := sdo.rc_data;
else xc_result := r.x.result; end if;
......@@ -4526,7 +4529,7 @@ begin
v.e.aluop, v.e.alusel, v.e.aluadd, v.e.shcnt, v.e.sari, v.e.shleft,
v.e.ymsb, v.e.mul, ra_div, v.e.mulstep, v.e.mac, v.e.ldbp2, v.e.invop2
);
--marcmod
--marcmod: configure SIMD module input
sdi.inst <= r.a.ctrl.inst;
sdi.ra <= ra_op1;
sdi.rb <= ra_op2;
......@@ -4534,6 +4537,12 @@ begin
sdi.rc_we <= r.a.ctrl.simd;
sdi.rc_addr <= r.a.ctrl.inst(29 downto 25);
if r.a.ctrl.inst(24 downto 19) = WRMSK then
sdi.mask_we <= '1';
else sdi.mask_we <= '0';
end if;
sdi.mask_value <= r.a.ctrl.inst(3 downto 0);
cin_gen(r, v.m.icc(0), v.e.alucin);
bp_miss_ra(r, ra_bpmiss, de_bpannul);
v.e.bp := r.a.bp and not ra_bpmiss;
......@@ -4625,8 +4634,8 @@ begin
v.a.bp := v.a.bp and not v.a.ctrl.annul;
v.a.nobp := v.a.nobp and not v.a.ctrl.annul;
--marcmod
v.a.ctrl.simd := v.a.ctrl.simd and not v.a.ctrl.annul;
v.a.ctrl.simd := v.a.ctrl.simd and not v.a.ctrl.annul; --marcmod
v.a.ctrl.inst := de_inst;
......
......@@ -52,6 +52,8 @@ package libiu is
op : std_logic_vector (7 downto 0); -- operation code
rc_we : std_logic; -- we on destination (work)
rc_addr : std_logic_vector (4 downto 0); -- addr of destination
mask_we : std_logic; -- we on the mask register
mask_value : std_logic_vector (3 downto 0); -- new value for the mask
end record;
type simd_out_type is record
......
......@@ -174,7 +174,7 @@ begin
simd0 : simd
generic map (32,8,5)
port map (clk, rstn, holdnx, sdi.inst, sdi.ra, sdi.rb, sdi.op, sdi.rc_we, sdi.rc_addr,
sdo.rc_data, sdo.rc_we, sdo.rc_addr);
sdi.mask_we, sdi.mask_value, sdo.rc_data, sdo.rc_we, sdo.rc_addr);
-- multiply and divide units
......
......@@ -61,7 +61,7 @@ constant ANDN : op3_type := "000101";
constant ORN : op3_type := "000110";
constant IXNOR : op3_type := "000111";
constant ADDX : op3_type := "001000";
constant SIMD : op3_type := "001001";
constant SIMD : op3_type := "001001"; -- marcmod
constant UMUL : op3_type := "001010";
constant SMUL : op3_type := "001011";
constant SUBX : op3_type := "001100";
......@@ -76,6 +76,7 @@ constant ANDNCC : op3_type := "010101";
constant ORNCC : op3_type := "010110";
constant XNORCC : op3_type := "010111";
constant ADDXCC : op3_type := "011000";
constant WRMSK : op3_type := "011001"; --marcmod
constant UMULCC : op3_type := "011010";
constant SMULCC : op3_type := "011011";
constant SUBXCC : op3_type := "011100";
......
......@@ -62,6 +62,7 @@ architecture rtl of simd is
constant S1_SADD : std_logic_vector (4 downto 0) :="01101";
constant S1_SSUB : std_logic_vector (4 downto 0) :="01110";
constant S1_SMUL : std_logic_vector (4 downto 0) :="01111";
constant S1_MOVB : std_logic_vector (4 downto 0) :="10000";
constant S1_UMUL : std_logic_vector (4 downto 0) :="10011";
constant S1_UDIV : std_logic_vector (4 downto 0) :="10100";
......@@ -200,6 +201,9 @@ architecture rtl of simd is
when S1_NOP =>
rc.data <= ra.data;
when S1_MOVB =>
rc.data <= rb.data;
--addition and saturated addition
when S1_ADD =>
for i in 0 to (XLEN/VLEN)-1 loop
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment