Commit d00a16af authored by Marc's avatar Marc
Browse files

minor change for add sign

parent f6a63cc7
......@@ -506,6 +506,9 @@ ghdl-wave: $(SIMTOP)
ghdl-vcd: $(SIMTOP)
./$(SIMTOP) $(GHDLRUNOPT) --vcd=$(SIMTOP).vcd
ghdl-fst: $(SIMTOP)
./$(SIMTOP) $(GHDLRUNOPT) --fst=$(SIMTOP).fst --read-wave-opt=signals.txt
ghdl-time: $(SIMTOP)
./$(SIMTOP) $(GHDLRUNOPT)
......
This diff is collapsed.
......@@ -483,7 +483,7 @@ begin
s1_mux(r.s1.op1, s1_alusel);
-- S1 TO S2 --
for i in vector_reg_type'range loop
add_res(i) := add(rs1(i), rs2(i), r.s1.op1(3) and (not r.s1.op1(4)), r.s1.op1(3));
add_res(i) := add(rs1(i), rs2(i), not r.s1.op2(2) or (r.s1.op1(3) and (not r.s1.op1(4))), r.s1.op1(3));
sub_res(i) := sub(rs1(i), rs2(i), not r.s1.op1(4), r.s1.op1(3));
mul_res(i) := extend(lpmulo(i).mul_res, not r.s1.op1(4), high_prec_component'length);
max_res(i) := max(rs1(i), rs2(i), not r.s1.op1(4));
......
simd_test.o: file format elf32-sparc
Disassembly of section .text:
00000000 <main>:
0: 9d e3 bf 90 save %sp, -112, %sp
4: 03 00 40 80 sethi %hi(0x1020000), %g1
8: 82 10 63 04 or %g1, 0x304, %g1 ! 1020304 <main+0x1020304>
c: c2 27 bf fc st %g1, [ %fp + -4 ]
10: 03 00 00 40 sethi %hi(0x10000), %g1
14: 82 10 62 03 or %g1, 0x203, %g1 ! 10203 <main+0x10203>
18: c2 27 bf f8 st %g1, [ %fp + -8 ]
1c: c4 07 bf fc ld [ %fp + -4 ], %g2
20: c2 07 bf f8 ld [ %fp + -8 ], %g1
24: 82 48 80 00 pass_ %g2, %g1
28: c2 27 bf f4 st %g1, [ %fp + -12 ]
2c: d2 07 bf f4 ld [ %fp + -12 ], %o1
30: 03 00 00 00 sethi %hi(0), %g1
34: 90 10 60 00 mov %g1, %o0 ! 0 <main>
38: 40 00 00 00 call 38 <main+0x38>
3c: 01 00 00 00 nop
40: c4 07 bf fc ld [ %fp + -4 ], %g2
44: c2 07 bf f8 ld [ %fp + -8 ], %g1
48: 82 48 80 21 add_ %g2, %g1, %g1
4c: c2 27 bf f4 st %g1, [ %fp + -12 ]
50: d2 07 bf f4 ld [ %fp + -12 ], %o1
54: 03 00 00 00 sethi %hi(0), %g1
58: 90 10 60 00 mov %g1, %o0 ! 0 <main>
5c: 40 00 00 00 call 5c <main+0x5c>
60: 01 00 00 00 nop
64: 03 20 60 3f sethi %hi(0x8180fc00), %g1
68: 82 10 63 01 or %g1, 0x301, %g1 ! 8180ff01 <main+0x8180ff01>
6c: c2 27 bf fc st %g1, [ %fp + -4 ]
70: 03 20 7f df sethi %hi(0x81ff7c00), %g1
74: 82 10 63 7f or %g1, 0x37f, %g1 ! 81ff7f7f <main+0x81ff7f7f>
78: c2 27 bf f8 st %g1, [ %fp + -8 ]
7c: c4 07 bf fc ld [ %fp + -4 ], %g2
80: c2 07 bf f8 ld [ %fp + -8 ], %g1
84: 82 48 81 a1 sadd_ %g2, %g1, %g1
88: c2 27 bf f4 st %g1, [ %fp + -12 ]
8c: d2 07 bf f4 ld [ %fp + -12 ], %o1
90: 03 00 00 00 sethi %hi(0), %g1
94: 90 10 60 00 mov %g1, %o0 ! 0 <main>
98: 40 00 00 00 call 98 <main+0x98>
9c: 01 00 00 00 nop
a0: 03 02 82 82 sethi %hi(0xa0a0800), %g1
a4: 82 10 62 0a or %g1, 0x20a, %g1 ! a0a0a0a <main+0xa0a0a0a>
a8: c2 27 bf fc st %g1, [ %fp + -4 ]
ac: 03 00 01 42 sethi %hi(0x50800), %g1
b0: 82 10 62 0b or %g1, 0x20b, %g1 ! 50a0b <main+0x50a0b>
b4: c2 27 bf f8 st %g1, [ %fp + -8 ]
b8: c4 07 bf fc ld [ %fp + -4 ], %g2
bc: c2 07 bf f8 ld [ %fp + -8 ], %g1
c0: 82 48 80 41 sub_ %g2, %g1, %g1
c4: c2 27 bf f4 st %g1, [ %fp + -12 ]
c8: d2 07 bf f4 ld [ %fp + -12 ], %o1
cc: 03 00 00 00 sethi %hi(0), %g1
d0: 90 10 60 00 mov %g1, %o0 ! 0 <main>
d4: 40 00 00 00 call d4 <main+0xd4>
d8: 01 00 00 00 nop
dc: 03 20 1f c2 sethi %hi(0x807f0800), %g1
e0: 82 10 62 fb or %g1, 0x2fb, %g1 ! 807f0afb <main+0x807f0afb>
e4: c2 27 bf fc st %g1, [ %fp + -4 ]
e8: 03 01 7f fe sethi %hi(0x5fff800), %g1
ec: 82 10 63 0a or %g1, 0x30a, %g1 ! 5fffb0a <main+0x5fffb0a>
f0: c2 27 bf f8 st %g1, [ %fp + -8 ]
f4: c4 07 bf fc ld [ %fp + -4 ], %g2
f8: c2 07 bf f8 ld [ %fp + -8 ], %g1
fc: 82 48 81 c1 ssub_ %g2, %g1, %g1
100: c2 27 bf f4 st %g1, [ %fp + -12 ]
104: d2 07 bf f4 ld [ %fp + -12 ], %o1
108: 03 00 00 00 sethi %hi(0), %g1
10c: 90 10 60 00 mov %g1, %o0 ! 0 <main>
110: 40 00 00 00 call 110 <main+0x110>
114: 01 00 00 00 nop
118: 03 00 81 02 sethi %hi(0x2040800), %g1
11c: 82 10 60 0a or %g1, 0xa, %g1 ! 204080a <main+0x204080a>
120: c2 27 bf fc st %g1, [ %fp + -4 ]
124: 03 08 10 20 sethi %hi(0x20408000), %g1
128: 82 10 60 a0 or %g1, 0xa0, %g1 ! 204080a0 <main+0x204080a0>
12c: c2 27 bf f8 st %g1, [ %fp + -8 ]
130: c4 07 bf fc ld [ %fp + -4 ], %g2
134: c2 07 bf f8 ld [ %fp + -8 ], %g1
138: 82 48 88 a1 max_max %g2, %g1, %g1
13c: c2 27 bf f4 st %g1, [ %fp + -12 ]
140: d2 07 bf f4 ld [ %fp + -12 ], %o1
144: 03 00 00 00 sethi %hi(0), %g1
148: 90 10 60 00 mov %g1, %o0 ! 0 <main>
14c: 40 00 00 00 call 14c <main+0x14c>
150: 01 00 00 00 nop
154: c4 07 bf fc ld [ %fp + -4 ], %g2
158: c2 07 bf f8 ld [ %fp + -8 ], %g1
15c: 82 48 9a a1 umax_umax %g2, %g1, %g1
160: c2 27 bf f4 st %g1, [ %fp + -12 ]
164: d2 07 bf f4 ld [ %fp + -12 ], %o1
168: 03 00 00 00 sethi %hi(0), %g1
16c: 90 10 60 00 mov %g1, %o0 ! 0 <main>
170: 40 00 00 00 call 170 <main+0x170>
174: 01 00 00 00 nop
178: c4 07 bf fc ld [ %fp + -4 ], %g2
17c: c2 07 bf f8 ld [ %fp + -8 ], %g1
180: 82 48 9e c1 umin_umin %g2, %g1, %g1
184: c2 27 bf f4 st %g1, [ %fp + -12 ]
188: d2 07 bf f4 ld [ %fp + -12 ], %o1
18c: 03 00 00 00 sethi %hi(0), %g1
190: 90 10 60 00 mov %g1, %o0 ! 0 <main>
194: 40 00 00 00 call 194 <main+0x194>
198: 01 00 00 00 nop
19c: c4 07 bf fc ld [ %fp + -4 ], %g2
1a0: c2 07 bf f8 ld [ %fp + -8 ], %g1
1a4: 82 48 8c c1 min_min %g2, %g1, %g1
1a8: c2 27 bf f4 st %g1, [ %fp + -12 ]
1ac: d2 07 bf f4 ld [ %fp + -12 ], %o1
1b0: 03 00 00 00 sethi %hi(0), %g1
1b4: 90 10 60 00 mov %g1, %o0 ! 0 <main>
1b8: 40 00 00 00 call 1b8 <main+0x1b8>
1bc: 01 00 00 00 nop
1c0: 03 00 40 80 sethi %hi(0x1020000), %g1
1c4: 82 10 63 04 or %g1, 0x304, %g1 ! 1020304 <main+0x1020304>
1c8: c2 27 bf fc st %g1, [ %fp + -4 ]
1cc: 03 00 00 40 sethi %hi(0x10000), %g1
1d0: 82 10 62 03 or %g1, 0x203, %g1 ! 10203 <main+0x10203>
1d4: c2 27 bf f8 st %g1, [ %fp + -8 ]
1d8: c4 07 bf fc ld [ %fp + -4 ], %g2
1dc: c2 07 bf f8 ld [ %fp + -8 ], %g1
1e0: 82 48 84 61 dot %g2, %g1, %g1
1e4: c2 27 bf f4 st %g1, [ %fp + -12 ]
1e8: d2 07 bf f4 ld [ %fp + -12 ], %o1
1ec: 03 00 00 00 sethi %hi(0), %g1
1f0: 90 10 60 00 mov %g1, %o0 ! 0 <main>
1f4: 40 00 00 00 call 1f4 <main+0x1f4>
1f8: 01 00 00 00 nop
1fc: 03 3f ff 80 sethi %hi(0xfffe0000), %g1
200: 82 10 63 fc or %g1, 0x3fc, %g1 ! fffe03fc <main+0xfffe03fc>
204: c2 27 bf fc st %g1, [ %fp + -4 ]
208: 03 00 3f c0 sethi %hi(0xff0000), %g1
20c: 82 10 62 03 or %g1, 0x203, %g1 ! ff0203 <main+0xff0203>
210: c2 27 bf f8 st %g1, [ %fp + -8 ]
214: c4 07 bf fc ld [ %fp + -4 ], %g2
218: c2 07 bf f8 ld [ %fp + -8 ], %g1
21c: 82 48 84 61 dot %g2, %g1, %g1
220: c2 27 bf f4 st %g1, [ %fp + -12 ]
224: d2 07 bf f4 ld [ %fp + -12 ], %o1
228: 03 00 00 00 sethi %hi(0), %g1
22c: 90 10 60 00 mov %g1, %o0 ! 0 <main>
230: 40 00 00 00 call 230 <main+0x230>
234: 01 00 00 00 nop
238: 03 1f df ff sethi %hi(0x7f7ffc00), %g1
23c: 82 10 61 ff or %g1, 0x1ff, %g1 ! 7f7ffdff <main+0x7f7ffdff>
240: c2 27 bf fc st %g1, [ %fp + -4 ]
244: 03 1f ff df sethi %hi(0x7fff7c00), %g1
248: 82 10 63 80 or %g1, 0x380, %g1 ! 7fff7f80 <main+0x7fff7f80>
24c: c2 27 bf f8 st %g1, [ %fp + -8 ]
250: c4 07 bf fc ld [ %fp + -4 ], %g2
254: c2 07 bf f8 ld [ %fp + -8 ], %g1
258: 82 48 81 e1 smul_ %g2, %g1, %g1
25c: c2 27 bf f4 st %g1, [ %fp + -12 ]
260: d2 07 bf f4 ld [ %fp + -12 ], %o1
264: 03 00 00 00 sethi %hi(0), %g1
268: 90 10 60 00 mov %g1, %o0 ! 0 <main>
26c: 40 00 00 00 call 26c <main+0x26c>
270: 01 00 00 00 nop
274: 03 37 ab 6f sethi %hi(0xdeadbc00), %g1
278: 82 10 62 af or %g1, 0x2af, %g1 ! deadbeaf <main+0xdeadbeaf>
27c: c2 27 bf fc st %g1, [ %fp + -4 ]
280: c4 07 bf fc ld [ %fp + -4 ], %g2
284: c2 07 bf f8 ld [ %fp + -8 ], %g1
288: 82 48 81 42 nand_ %g2, %g2, %g1
28c: c2 27 bf f4 st %g1, [ %fp + -12 ]
290: d2 07 bf f4 ld [ %fp + -12 ], %o1
294: 03 00 00 00 sethi %hi(0), %g1
298: 90 10 60 00 mov %g1, %o0 ! 0 <main>
29c: 40 00 00 00 call 29c <main+0x29c>
2a0: 01 00 00 00 nop
2a4: 03 3f bb 72 sethi %hi(0xfeedc800), %g1
2a8: 82 10 62 fe or %g1, 0x2fe, %g1 ! feedcafe <main+0xfeedcafe>
2ac: c2 27 bf fc st %g1, [ %fp + -4 ]
2b0: c4 07 bf fc ld [ %fp + -4 ], %g2
2b4: c2 07 bf f8 ld [ %fp + -8 ], %g1
2b8: 82 48 90 00 nop_xor %g2, %g1
2bc: c2 27 bf f4 st %g1, [ %fp + -12 ]
2c0: d2 07 bf f4 ld [ %fp + -12 ], %o1
2c4: 03 00 00 00 sethi %hi(0), %g1
2c8: 90 10 60 00 mov %g1, %o0 ! 0 <main>
2cc: 40 00 00 00 call 2cc <main+0x2cc>
2d0: 01 00 00 00 nop
2d4: 03 00 00 00 sethi %hi(0), %g1
2d8: 90 10 60 00 mov %g1, %o0 ! 0 <main>
2dc: 40 00 00 00 call 2dc <main+0x2dc>
2e0: 01 00 00 00 nop
2e4: 82 10 20 00 clr %g1 ! 0 <main>
2e8: b0 10 00 01 mov %g1, %i0
2ec: 81 e8 00 00 restore
2f0: 81 c3 e0 08 retl
2f4: 01 00 00 00 nop
This source diff could not be displayed because it is too large. You can view the blob instead.
#cycles TestName MatSize (simd|orig) speedup
6.441.041 Grayscale 256x256 (simd) 2.1X
13.548.090 Grayscale 256x256 (orig) 1X
138.909.319 Matrix Multiplication 120x120 (simd) 5.1X
709.738.072 Matrix Multiplication 120x120 (oirg) 1X
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment