Commit f43c9199 authored by Marc's avatar Marc
Browse files

changes in the test software to include assembler instructions for the custom ISA extension

parent 7a9d3dcc
This diff is collapsed.
......@@ -26,10 +26,10 @@
//return r;
//}
int shift_and_add(int a);
asm("shift_and_add:");
asm("retl");
asm("srl %o0, %g1, %g1");
//int shift_and_add(int a);
//asm("shift_and_add:");
//asm("retl");
//asm("srl %o0, %g1, %g1");
__attribute__((optimize("unroll-loops")))
......
grayscale_simd.o: file format elf32-sparc
Disassembly of section .text:
00000000 <grayscale>:
0: 01 00 00 00 nop
4: 85 36 00 09 srl %i0, %o1, %g2
8: 01 00 00 00 nop
c: 84 02 24 00 add %o0, 0x400, %g2
10: c2 02 00 00 ld [ %o0 ], %g1
14: 83 38 60 02 sra %g1, 2, %g1
18: c2 2a 40 00 stb %g1, [ %o1 ]
1c: c2 02 20 04 ld [ %o0 + 4 ], %g1
20: 83 38 60 02 sra %g1, 2, %g1
24: c2 2a 60 01 stb %g1, [ %o1 + 1 ]
28: c2 02 20 08 ld [ %o0 + 8 ], %g1
2c: 83 38 60 02 sra %g1, 2, %g1
30: c2 2a 60 02 stb %g1, [ %o1 + 2 ]
34: c2 02 20 0c ld [ %o0 + 0xc ], %g1
38: 83 38 60 02 sra %g1, 2, %g1
3c: c2 2a 60 03 stb %g1, [ %o1 + 3 ]
40: c2 02 20 10 ld [ %o0 + 0x10 ], %g1
44: 83 38 60 02 sra %g1, 2, %g1
48: c2 2a 60 04 stb %g1, [ %o1 + 4 ]
4c: c2 02 20 14 ld [ %o0 + 0x14 ], %g1
50: 83 38 60 02 sra %g1, 2, %g1
54: c2 2a 60 05 stb %g1, [ %o1 + 5 ]
58: c2 02 20 18 ld [ %o0 + 0x18 ], %g1
5c: 83 38 60 02 sra %g1, 2, %g1
60: c2 2a 60 06 stb %g1, [ %o1 + 6 ]
64: c2 02 20 1c ld [ %o0 + 0x1c ], %g1
68: 83 38 60 02 sra %g1, 2, %g1
6c: c2 2a 60 07 stb %g1, [ %o1 + 7 ]
70: c2 02 20 20 ld [ %o0 + 0x20 ], %g1
74: 83 38 60 02 sra %g1, 2, %g1
78: c2 2a 60 08 stb %g1, [ %o1 + 8 ]
7c: c2 02 20 24 ld [ %o0 + 0x24 ], %g1
80: 83 38 60 02 sra %g1, 2, %g1
84: c2 2a 60 09 stb %g1, [ %o1 + 9 ]
88: c2 02 20 28 ld [ %o0 + 0x28 ], %g1
8c: 83 38 60 02 sra %g1, 2, %g1
90: c2 2a 60 0a stb %g1, [ %o1 + 0xa ]
94: c2 02 20 2c ld [ %o0 + 0x2c ], %g1
98: 83 38 60 02 sra %g1, 2, %g1
9c: c2 2a 60 0b stb %g1, [ %o1 + 0xb ]
a0: c2 02 20 30 ld [ %o0 + 0x30 ], %g1
a4: 83 38 60 02 sra %g1, 2, %g1
a8: c2 2a 60 0c stb %g1, [ %o1 + 0xc ]
ac: c2 02 20 34 ld [ %o0 + 0x34 ], %g1
b0: 83 38 60 02 sra %g1, 2, %g1
b4: c2 2a 60 0d stb %g1, [ %o1 + 0xd ]
b8: c2 02 20 38 ld [ %o0 + 0x38 ], %g1
bc: 83 38 60 02 sra %g1, 2, %g1
c0: c2 2a 60 0e stb %g1, [ %o1 + 0xe ]
c4: c2 02 20 3c ld [ %o0 + 0x3c ], %g1
c8: 83 38 60 02 sra %g1, 2, %g1
cc: c2 2a 60 0f stb %g1, [ %o1 + 0xf ]
d0: 90 02 20 40 add %o0, 0x40, %o0
d4: 80 a0 80 08 cmp %g2, %o0
d8: 12 bf ff ce bne 10 <grayscale+0x10>
dc: 92 02 60 10 add %o1, 0x10, %o1
e0: 01 00 00 00 nop
e4: 85 36 00 09 srl %i0, %o1, %g2
e8: 01 00 00 00 nop
ec: 81 c3 e0 08 retl
f0: 01 00 00 00 nop
000000f4 <print>:
f4: 9d e3 bf a0 save %sp, -96, %sp
f8: 94 10 20 10 mov 0x10, %o2
fc: 92 10 20 10 mov 0x10, %o1
100: 11 00 00 00 sethi %hi(0), %o0
104: 39 00 00 00 sethi %hi(0), %i4
108: 90 12 20 00 mov %o0, %o0
10c: 40 00 00 00 call 10c <print+0x18>
110: b6 06 21 00 add %i0, 0x100, %i3
114: b8 17 20 00 mov %i4, %i4
118: ba 10 20 00 clr %i5
11c: d6 0e 00 1d ldub [ %i0 + %i5 ], %o3
120: 90 10 00 1c mov %i4, %o0
124: 94 10 00 0b mov %o3, %o2
128: 40 00 00 00 call 128 <print+0x34>
12c: 92 10 00 0b mov %o3, %o1
130: ba 07 60 01 inc %i5
134: 80 a7 60 10 cmp %i5, 0x10
138: 32 bf ff fa bne,a 120 <print+0x2c>
13c: d6 0e 00 1d ldub [ %i0 + %i5 ], %o3
140: 40 00 00 00 call 140 <print+0x4c>
144: 90 10 20 0a mov 0xa, %o0
148: b0 06 20 10 add %i0, 0x10, %i0
14c: 80 a6 00 1b cmp %i0, %i3
150: 12 bf ff f3 bne 11c <print+0x28>
154: ba 10 20 00 clr %i5
158: 81 c7 e0 08 ret
15c: 81 e8 00 00 restore
Disassembly of section .text.startup:
00000000 <main>:
0: 9d e3 ba a0 save %sp, -1376, %sp
4: 94 10 24 00 mov 0x400, %o2
8: 90 07 bc 00 add %fp, -1024, %o0
c: 13 00 00 00 sethi %hi(0), %o1
10: 40 00 00 00 call 10 <main+0x10>
14: 92 12 60 00 mov %o1, %o1 ! 0 <main>
18: 90 07 bc 00 add %fp, -1024, %o0
1c: 40 00 00 00 call 1c <main+0x1c>
20: 92 07 bb 00 add %fp, -1280, %o1
24: b0 10 20 00 clr %i0
28: 40 00 00 00 call 28 <main+0x28>
2c: 90 07 bb 00 add %fp, -1280, %o0
30: 81 c7 e0 08 ret
34: 81 e8 00 00 restore
.text
.global grayscale
grayscale:
nop
This diff is collapsed.
This diff is collapsed.
#define IMAGE32 113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,251,242,54,0,237,205,49,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,255,250,141,0,255,250,141,0,251,242,54,0,237,205,49,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,95,68,179,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,237,205,49,0,237,205,49,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,95,68,179,0,95,68,179,0,95,68,179,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,237,205,49,0,237,205,49,0,113,81,213,0,113,81,213,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,237,205,49,0,237,205,49,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,255,163,226,0,255,163,226,0,255,163,226,0,255,163,226,0,255,163,226,0,113,81,213,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,237,205,49,0,237,205,49,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,229,138,200,0,255,163,226,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,255,163,226,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,237,205,49,0,237,205,49,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,251,242,54,0,229,138,200,0,255,163,226,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,255,163,226,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,237,205,49,0,237,205,49,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,251,242,54,0,229,138,200,0,255,163,226,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,255,255,255,0,255,202,239,0,255,202,239,0,255,255,255,0,255,163,226,0,255,163,226,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,95,68,179,0,95,68,179,0,95,68,179,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,229,138,200,0,255,163,226,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,0,0,0,0,255,202,239,0,255,202,239,0,0,0,0,0,255,163,226,0,255,119,189,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,95,68,179,0,95,68,179,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,229,138,200,0,255,163,226,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,0,0,0,0,255,202,239,0,255,202,239,0,0,0,0,0,255,185,233,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,237,205,49,0,95,68,179,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,229,138,200,0,255,163,226,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,255,163,226,0,255,202,239,0,255,202,239,0,217,87,99,0,255,202,239,0,255,185,233,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,237,205,49,0,95,68,179,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,229,138,200,0,255,163,226,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,247,98,111,0,255,202,239,0,255,185,233,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,95,68,179,0,95,68,179,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,217,87,99,0,217,87,99,0,217,87,99,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,255,202,239,0,255,149,222,0,255,149,222,0,255,149,222,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,237,205,49,0,95,68,179,0,95,68,179,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,217,87,99,0,237,94,107,0,237,94,107,0,237,94,107,0,217,87,99,0,255,202,239,0,255,202,239,0,255,202,239,0,255,149,222,0,255,180,232,0,255,180,232,0,255,180,232,0,255,154,223,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,95,68,179,0,95,68,179,0,95,68,179,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,217,87,99,0,237,94,107,0,237,94,107,0,237,94,107,0,237,94,107,0,255,128,139,0,229,138,200,0,229,138,200,0,229,138,200,0,255,149,222,0,255,180,232,0,255,180,232,0,255,180,232,0,229,138,200,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,95,68,179,0,95,68,179,0,74,54,136,0,74,54,136,0,113,81,213,0,113,81,213,0,113,81,213,0,113,81,213,0,251,242,54,0,217,87,99,0,237,94,107,0,237,94,107,0,255,128,139,0,255,128,139,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,229,138,200,0,229,138,200,0,229,138,200,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,95,68,179,0,95,68,179,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,113,81,213,0,113,81,213,0,113,81,213,0,251,242,54,0,255,250,141,0,237,94,107,0,237,94,107,0,255,128,139,0,255,128,139,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,95,68,179,0,95,68,179,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,113,81,213,0,113,81,213,0,251,242,54,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,237,205,49,0,95,68,179,0,95,68,179,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,113,81,213,0,251,242,54,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,251,242,54,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,237,205,49,0,95,68,179,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,251,242,54,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,95,68,179,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,149,236,255,0,149,236,255,0,149,236,255,0,149,236,255,0,237,205,49,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,237,205,49,0,251,242,54,0,255,250,141,0,255,250,141,0,255,250,141,0,255,250,141,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,149,236,255,0,149,236,255,0,119,224,247,0,119,224,247,0,119,224,247,0,119,224,247,0,237,205,49,0,237,205,49,0,237,205,49,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,251,242,54,0,251,242,54,0,255,250,141,0,255,250,141,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,74,54,136,0,74,54,136,0,74,54,136,0,149,236,255,0,149,236,255,0,119,224,247,0,119,224,247,0,202,255,101,0,202,255,101,0,202,255,101,0,119,168,255,0,113,81,213,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,95,68,179,0,95,68,179,0,95,68,179,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,237,205,49,0,74,54,136,0,74,54,136,0,74,54,136,0,149,236,255,0,119,224,247,0,119,224,247,0,202,255,101,0,202,255,101,0,202,255,101,0,255,255,255,0,176,255,101,0,176,255,101,0,95,68,179,0,95,68,179,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,251,242,54,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,74,54,136,0,74,54,136,0,149,236,255,0,119,224,247,0,119,224,247,0,99,155,255,0,202,255,101,0,176,255,101,0,255,255,255,0,238,238,238,0,202,255,101,0,176,255,101,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,74,54,136,0,74,54,136,0,149,236,255,0,119,224,247,0,119,224,247,0,99,155,255,0,255,255,255,0,119,168,255,0,176,255,101,0,183,255,114,0,202,255,101,0,202,255,101,0,176,255,101,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,237,205,49,0,237,205,49,0,237,205,49,0,237,205,49,0,74,54,136,0,74,54,136,0,74,54,136,0,149,236,255,0,119,224,247,0,99,155,255,0,119,168,255,0,255,255,255,0,238,238,238,0,99,155,255,0,99,155,255,0,176,255,101,0,176,255,101,0,255,255,255,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,149,236,255,0,119,224,247,0,119,224,247,0,99,155,255,0,119,168,255,0,119,168,255,0,119,168,255,0,119,168,255,0,99,155,255,0,176,255,101,0,176,255,101,0,238,238,238,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,149,236,255,0,119,224,247,0,99,155,255,0,119,168,255,0,202,255,101,0,202,255,101,0,99,155,255,0,119,168,255,0,255,255,255,0,99,155,255,0,176,255,101,0,202,255,101,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,149,236,255,0,119,224,247,0,255,255,255,0,255,255,255,0,176,255,101,0,176,255,101,0,176,255,101,0,255,255,255,0,238,238,238,0,119,168,255,0,99,155,255,0,176,255,101,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,95,68,179,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,74,54,136,0,149,236,255,0,119,224,247,0,119,168,255,0,238,238,238,0,176,255,101,0,202,255,101,0,99,155,255,0,119,168,255,0,238,238,238,0,119,168,255,0,119,168,255,0,99,155,255,0
#include <stdio.h>
#include <time.h>
#include <string.h>
#include <stdlib.h>
#define min(a,b) \
({ __typeof__ (a) _a = (a); \
__typeof__ (b) _b = (b); \
_a < _b ? _a : _b; })
#ifndef N
#define N 4
#endif
int computeCell(int a, int b){
int r;
asm("smul %1, %0, %0"
: "=r"(r)
: "r"(a), "0"(b));
//printf("a: %d\nb: %d\nr: %d\n",a,b,r);
return min(r, 255);
}
int computeSum(int a, int b) {
int r;
//sum sum a b
asm("add %1, %0, %0"
: "=r"(r)
: "r"(a), "0"(b));
return min(r, 255);
}
int main()
{
char string[3*(3+(6*N*N+N))];
int pos = 0;
unsigned char A[N][N], B[N][N], C[N][N];
srand(N);
for(int i=0; i<N; i++)
for(int j=0; j<N; j++) {
A[i][j] = rand()%10;
B[j][i] = rand()%10;
}
int sum = 0;
int aux;
puts("TEST BEGIN");
asm("nop");
asm("srl %i0, %o1, %g2");
asm("nop");
for(int i=0; i<N; i++)
for(int j=0; j<N; j++){
for(int k=0; k<N; k++){
aux = computeCell(A[i][k],B[j][k]);
sum = computeSum(sum, aux);
}
C[i][j] = sum;
sum = 0;
}
asm("nop");
asm("srl %i0, %o1, %g2");
asm("nop");
puts("TEST END");
pos += sprintf(&string[pos],"A:\n");
for(int i=0; i<N; i++){
for(int j=0; j<N; j++)
pos+=sprintf(&string[pos],"%d ", A[i][j]);
pos+=sprintf(&string[pos],"\n");
}
pos += sprintf(&string[pos],"B:\n");
for(int i=0; i<N; i++){
for(int j=0; j<N; j++)
pos+=sprintf(&string[pos],"%d ", B[i][j]);
pos+=sprintf(&string[pos],"\n");
}
pos += sprintf(&string[pos],"C:\n");
for(int i=0; i<N; i++){
for(int j=0; j<N; j++)
pos+=sprintf(&string[pos],"%d ", C[i][j]);
pos+=sprintf(&string[pos],"\n");
}
puts(string);
puts("END OF SIMULATION");
}
mat_mul.o: file format elf32-sparc
Disassembly of section .text:
00000000 <computeCell>:
0: 92 5a 00 09 smul %o0, %o1, %o1
4: 80 a2 60 ff cmp %o1, 0xff
8: 34 80 00 02 bg,a 10 <computeCell+0x10>
c: 92 10 20 ff mov 0xff, %o1
10: 81 c3 e0 08 retl
14: 90 10 00 09 mov %o1, %o0
00000018 <computeSum>:
18: 92 02 00 09 add %o0, %o1, %o1
1c: 80 a2 60 ff cmp %o1, 0xff
20: 34 80 00 02 bg,a 28 <computeSum+0x10>
24: 92 10 20 ff mov 0xff, %o1
28: 81 c3 e0 08 retl
2c: 90 10 00 09 mov %o1, %o0
Disassembly of section .text.startup:
00000000 <main>:
0: 9d e3 be 38 save %sp, -456, %sp
4: 90 10 20 04 mov 4, %o0
8: b4 07 be a8 add %fp, -344, %i2
c: 40 00 00 00 call c <main+0xc>
10: b2 07 be 98 add %fp, -360, %i1
14: ba 10 00 19 mov %i1, %i5
18: b8 10 00 1a mov %i2, %i4
1c: b0 10 00 1c mov %i4, %i0
20: b6 10 20 00 clr %i3
24: 40 00 00 00 call 24 <main+0x24>
28: 01 00 00 00 nop
2c: 83 3a 20 1f sra %o0, 0x1f, %g1
30: 81 80 60 00 wr %g1, %y
34: 01 00 00 00 nop
38: 01 00 00 00 nop
3c: 01 00 00 00 nop
40: 84 7a 20 0a sdiv %o0, 0xa, %g2
44: 83 28 a0 02 sll %g2, 2, %g1
48: 82 00 40 02 add %g1, %g2, %g1
4c: 82 00 40 01 add %g1, %g1, %g1
50: 90 22 00 01 sub %o0, %g1, %o0
54: 40 00 00 00 call 54 <main+0x54>
58: d0 2f 40 1b stb %o0, [ %i5 + %i3 ]
5c: 83 3a 20 1f sra %o0, 0x1f, %g1
60: 81 80 60 00 wr %g1, %y
64: 01 00 00 00 nop
68: 01 00 00 00 nop
6c: 01 00 00 00 nop
70: 84 7a 20 0a sdiv %o0, 0xa, %g2
74: 83 28 a0 02 sll %g2, 2, %g1
78: 82 00 40 02 add %g1, %g2, %g1
7c: 82 00 40 01 add %g1, %g1, %g1
80: 90 22 00 01 sub %o0, %g1, %o0
84: d0 2e 00 00 stb %o0, [ %i0 ]
88: b6 06 e0 01 inc %i3
8c: 80 a6 e0 04 cmp %i3, 4
90: 12 bf ff e5 bne 24 <main+0x24>
94: b0 06 20 04 add %i0, 4, %i0
98: ba 07 60 04 add %i5, 4, %i5
9c: 80 a7 40 1a cmp %i5, %i2
a0: 12 bf ff df bne 1c <main+0x1c>
a4: b8 07 20 01 inc %i4
a8: 11 00 00 00 sethi %hi(0), %o0
ac: 40 00 00 00 call ac <main+0xac>
b0: 90 12 20 00 mov %o0, %o0 ! 0 <main>
b4: 01 00 00 00 nop
b8: 85 36 00 09 srl %i0, %o1, %g2
bc: 01 00 00 00 nop
c0: b0 07 be b8 add %fp, -328, %i0
c4: a0 10 00 19 mov %i1, %l0
c8: a6 10 00 18 mov %i0, %l3
cc: a2 06 a0 10 add %i2, 0x10, %l1
d0: b6 10 00 1a mov %i2, %i3
d4: a4 10 00 13 mov %l3, %l2
d8: ba 10 20 00 clr %i5
dc: b8 10 20 00 clr %i4
e0: d2 0e c0 1d ldub [ %i3 + %i5 ], %o1
e4: 40 00 00 00 call e4 <main+0xe4>
e8: d0 0c 00 1d ldub [ %l0 + %i5 ], %o0
ec: 92 10 00 08 mov %o0, %o1
f0: 40 00 00 00 call f0 <main+0xf0>
f4: 90 10 00 1c mov %i4, %o0
f8: ba 07 60 01 inc %i5
fc: 80 a7 60 04 cmp %i5, 4
100: 12 bf ff f8 bne e0 <main+0xe0>
104: b8 10 00 08 mov %o0, %i4
108: d0 2c 80 00 stb %o0, [ %l2 ]
10c: b6 06 e0 04 add %i3, 4, %i3
110: 80 a6 c0 11 cmp %i3, %l1
114: 12 bf ff f1 bne d8 <main+0xd8>
118: a4 04 a0 01 inc %l2
11c: a0 04 20 04 add %l0, 4, %l0
120: 80 a4 00 1a cmp %l0, %i2
124: 12 bf ff eb bne d0 <main+0xd0>
128: a6 04 e0 04 add %l3, 4, %l3
12c: 01 00 00 00 nop
130: 85 36 00 09 srl %i0, %o1, %g2
134: 01 00 00 00 nop
138: 11 00 00 00 sethi %hi(0), %o0
13c: 40 00 00 00 call 13c <main+0x13c>
140: 90 12 20 00 mov %o0, %o0 ! 0 <main>
144: 21 00 00 00 sethi %hi(0), %l0
148: 05 10 4e 82 sethi %hi(0x413a0800), %g2
14c: b8 07 be c8 add %fp, -312, %i4
150: 84 10 a2 00 or %g2, 0x200, %g2
154: a4 14 20 00 mov %l0, %l2
158: 82 10 20 03 mov 3, %g1
15c: a6 10 20 0a mov 0xa, %l3
160: c4 27 be c8 st %g2, [ %fp + -312 ]
164: ba 10 00 01 mov %g1, %i5
168: b6 10 20 00 clr %i3
16c: d4 0e 40 1b ldub [ %i1 + %i3 ], %o2
170: 90 07 00 1d add %i4, %i5, %o0
174: 40 00 00 00 call 174 <main+0x174>
178: 92 10 00 12 mov %l2, %o1
17c: b6 06 e0 01 inc %i3
180: 80 a6 e0 04 cmp %i3, 4
184: 12 bf ff fa bne 16c <main+0x16c>
188: ba 07 40 08 add %i5, %o0, %i5
18c: e6 2f 00 1d stb %l3, [ %i4 + %i5 ]
190: 82 07 00 1d add %i4, %i5, %g1
194: c0 28 60 01 clrb [ %g1 + 1 ]
198: b2 06 60 04 add %i1, 4, %i1
19c: 80 a6 40 1a cmp %i1, %i2
1a0: 12 bf ff f1 bne 164 <main+0x164>
1a4: 82 07 60 01 add %i5, 1, %g1
1a8: 84 10 20 42 mov 0x42, %g2
1ac: c4 2f 00 01 stb %g2, [ %i4 + %g1 ]
1b0: 82 07 00 01 add %i4, %g1, %g1
1b4: 84 10 20 3a mov 0x3a, %g2
1b8: e6 28 60 02 stb %l3, [ %g1 + 2 ]
1bc: c4 28 60 01 stb %g2, [ %g1 + 1 ]
1c0: c0 28 60 03 clrb [ %g1 + 3 ]
1c4: b2 14 20 00 mov %l0, %i1
1c8: 82 07 60 04 add %i5, 4, %g1
1cc: a4 10 20 0a mov 0xa, %l2
1d0: ba 10 00 01 mov %g1, %i5
1d4: b6 10 20 00 clr %i3
1d8: d4 0e 80 1b ldub [ %i2 + %i3 ], %o2
1dc: 90 07 00 1d add %i4, %i5, %o0
1e0: 40 00 00 00 call 1e0 <main+0x1e0>
1e4: 92 10 00 19 mov %i1, %o1
1e8: b6 06 e0 01 inc %i3
1ec: 80 a6 e0 04 cmp %i3, 4
1f0: 12 bf ff fa bne 1d8 <main+0x1d8>
1f4: ba 07 40 08 add %i5, %o0, %i5
1f8: e4 2f 00 1d stb %l2, [ %i4 + %i5 ]
1fc: 82 07 00 1d add %i4, %i5, %g1
200: c0 28 60 01 clrb [ %g1 + 1 ]
204: b4 06 a0 04 add %i2, 4, %i2
208: 80 a6 80 11 cmp %i2, %l1
20c: 12 bf ff f1 bne 1d0 <main+0x1d0>
210: 82 07 60 01 add %i5, 1, %g1
214: 84 10 20 43 mov 0x43, %g2
218: c4 2f 00 01 stb %g2, [ %i4 + %g1 ]
21c: ba 07 60 04 add %i5, 4, %i5
220: 82 07 00 01 add %i4, %g1, %g1
224: 84 10 20 3a mov 0x3a, %g2
228: e4 28 60 02 stb %l2, [ %g1 + 2 ]
22c: c4 28 60 01 stb %g2, [ %g1 + 1 ]
230: c0 28 60 03 clrb [ %g1 + 3 ]
234: b2 06 20 10 add %i0, 0x10, %i1
238: a0 14 20 00 mov %l0, %l0
23c: b4 10 20 0a mov 0xa, %i2
240: b6 10 20 00 clr %i3
244: d4 0e 00 1b ldub [ %i0 + %i3 ], %o2
248: 90 07 00 1d add %i4, %i5, %o0
24c: 40 00 00 00 call 24c <main+0x24c>
250: 92 10 00 10 mov %l0, %o1
254: b6 06 e0 01 inc %i3
258: 80 a6 e0 04 cmp %i3, 4
25c: 12 bf ff fa bne 244 <main+0x244>
260: ba 07 40 08 add %i5, %o0, %i5
264: f4 2f 00 1d stb %i2, [ %i4 + %i5 ]
268: 82 07 00 1d add %i4, %i5, %g1
26c: c0 28 60 01 clrb [ %g1 + 1 ]
270: b0 06 20 04 add %i0, 4, %i0
274: 80 a6 40 18 cmp %i1, %i0
278: 12 bf ff f2 bne 240 <main+0x240>
27c: ba 07 60 01 inc %i5
280: 40 00 00 00 call 280 <main+0x280>
284: 90 10 00 1c mov %i4, %o0
288: b0 10 20 00 clr %i0
28c: 11 00 00 00 sethi %hi(0), %o0
290: 40 00 00 00 call 290 <main+0x290>
294: 90 12 20 00 mov %o0, %o0 ! 0 <main>
298: 81 c7 e0 08 ret
29c: 81 e8 00 00 restore
This diff is collapsed.
#include <stdio.h>
#include <time.h>
#include <string.h>
#include <stdlib.h>
#ifndef N
#define N 4
#endif
int computeCell(int a, int b){
int r;
asm("smul %1, %0, %0"
: "=r"(r)
: "r"(a), "0"(b));
//printf("a: %d\nb: %d\nr: %d\n",a,b,r);
return r;
}
int computeSum(int a, int b) {
int r;
//sum sum a b
asm("add %1, %0, %0"
: "=r"(r)
: "r"(a), "0"(b));
return r;
}
int main()
{
char string[3*(3+(6*N*N+N))];
int pos = 0;
unsigned int A[N][N], B[N][N], C[N][N];
srand(N);
for(int i=0; i<N; i++)
for(int j=0; j<N; j++) {
A[i][j] = rand()%10;
B[j][i] = rand()%10;
}
int sum = 0;
int aux;
puts("TEST BEGIN");
asm("nop");
asm("srl %i0, %o1, %g2");
asm("nop");
for(int i=0; i<N; i++)
for(int j=0; j<N; j++){
for(int k=0; k<N; k++){
aux = computeCell(A[i][k],B[j][k]);
sum = computeSum(sum, aux);
}
C[i][j] = sum;
sum = 0;
}
asm("nop");
asm("srl %i0, %o1, %g2");
asm("nop");
puts("TEST END");
pos += sprintf(&string[pos],"A:\n");
for(int i=0; i<N; i++){
for(int j=0; j<N; j++)
pos+=sprintf(&string[pos],"%d ", A[i][j]);
pos+=sprintf(&string[pos],"\n");
}
pos += sprintf(&string[pos],"B:\n");
for(int i=0; i<N; i++){
for(int j=0; j<N; j++)
pos+=sprintf(&string[pos],"%d ", B[i][j]);
pos+=sprintf(&string[pos],"\n");
}
pos += sprintf(&string[pos],"C:\n");
for(int i=0; i<N; i++){
for(int j=0; j<N; j++)
pos+=sprintf(&string[pos],"%d ", C[i][j]);
pos+=sprintf(&string[pos],"\n");
}
puts(string);
puts("END OF SIMULATION");
}
mat_mul_int.o: file format elf32-sparc
Disassembly of section .text:
00000000 <computeCell>:
0: 92 5a 00 09 smul %o0, %o1, %o1
4: 81 c3 e0 08 retl
8: 90 10 00 09 mov %o1, %o0
0000000c <computeSum>:
c: 92 02 00 09 add %o0, %o1, %o1
10: 81 c3 e0 08 retl
14: 90 10 00 09 mov %o1, %o0
Disassembly of section .text.startup:
00000000 <main>:
0: 03 3f ff e1 sethi %hi(0xffff8400), %g1
4: 82 10 63 30 or %g1, 0x330, %g1 ! ffff8730 <computeSum+0xffff8724>
8: 9d e3 80 01 save %sp, %g1, %sp
c: 90 10 20 20 mov 0x20, %o0
10: 31 3f ff e1 sethi %hi(0xffff8400), %i0
14: 33 3f ff e5 sethi %hi(0xffff9400), %i1
18: b0 16 23 90 or %i0, 0x390, %i0
1c: b2 16 63 90 or %i1, 0x390, %i1
20: b0 07 80 18 add %fp, %i0, %i0
24: 40 00 00 00 call 24 <main+0x24>
28: b2 07 80 19 add %fp, %i1, %i1
2c: b6 26 30 00 sub %i0, -4096, %i3
30: b8 10 00 19 mov %i1, %i4
34: a0 10 00 18 mov %i0, %l0
38: ba 04 20 80 add %l0, 0x80, %i5
3c: b4 10 00 1c mov %i4, %i2
40: 40 00 00 00 call 40 <main+0x40>
44: 01 00 00 00 nop
48: 83 3a 20 1f sra %o0, 0x1f, %g1
4c: 81 80 60 00 wr %g1, %y
50: 01 00 00 00 nop
54: 01 00 00 00 nop
58: 01 00 00 00 nop
5c: 84 7a 20 0a sdiv %o0, 0xa, %g2
60: 83 28 a0 02 sll %g2, 2, %g1
64: 82 00 40 02 add %g1, %g2, %g1
68: 82 00 40 01 add %g1, %g1, %g1
6c: 82 22 00 01 sub %o0, %g1, %g1
70: 40 00 00 00 call 70 <main+0x70>
74: c2 24 00 00 st %g1, [ %l0 ]
78: 83 3a 20 1f sra %o0, 0x1f, %g1
7c: 81 80 60 00 wr %g1, %y
80: 01 00 00 00 nop
84: 01 00 00 00 nop
88: 01 00 00 00 nop
8c: 84 7a 20 0a sdiv %o0, 0xa, %g2
90: 83 28 a0 02 sll %g2, 2, %g1
94: 82 00 40 02 add %g1, %g2, %g1
98: 82 00 40 01 add %g1, %g1, %g1
9c: 82 22 00 01 sub %o0, %g1, %g1
a0: c2 26 80 00 st %g1, [ %i2 ]
a4: a0 04 20 04 add %l0, 4, %l0
a8: 80 a4 00 1d cmp %l0, %i5
ac: 12 bf ff e5 bne 40 <main+0x40>
b0: b4 06 a0 80 add %i2, 0x80, %i2