Commit f43c9199 authored by Marc's avatar Marc
Browse files

changes in the test software to include assembler instructions for the custom ISA extension

parent 7a9d3dcc
#include <stdio.h>
#include <time.h>
#include <string.h>
#include <stdlib.h>
#ifndef N
#define N 8
#endif
int computeCell(int a, int b);
int computeSum(int a, int b);
asm("computeCell:");
asm("retl");
asm("usdot %o0, %o1, %o0");
asm("computeSum:");
asm("retl");
asm("usadd_usum %o0, %o1, %o0");
int main()
{
char string[3*(3+(6*N*N+N))];
int pos = 0;
unsigned char A[N][N], B[N][N], C[N][N];
srand(N);
for(int i=0; i<N; i++)
for(int j=0; j<N; j++) {
A[i][j] = rand()%10;
B[j][i] = rand()%10;
}
int sum = 0;
int aux;
puts("TEST BEGIN");
asm("nop");
asm("srl %i0, %o1, %g2");
asm("nop");
for(int i=0; i<N; i++)
for(int j=0; j<N; j++){
for(int k=0; k<N/4; k++){
aux = computeCell(*((int *) &A[i][k*4]),*((int *) &B[j][k*4]));
sum = computeSum(aux, sum);
}
C[i][j] = sum;
sum = 0;
}
asm("nop");
asm("srl %i0, %o1, %g2");
asm("nop");
puts("TEST END");
pos += sprintf(&string[pos],"A:\n");
for(int i=0; i<N; i++){
for(int j=0; j<N; j++)
pos+=sprintf(&string[pos],"%d ", A[i][j]);
pos+=sprintf(&string[pos],"\n");
}
pos += sprintf(&string[pos],"B:\n");
for(int j=0; j<N; j++){
for(int i=0; i<N; i++)
pos+=sprintf(&string[pos],"%d ", B[i][j]);
pos+=sprintf(&string[pos],"\n");
}
pos += sprintf(&string[pos],"C:\n");
for(int i=0; i<N; i++){
for(int j=0; j<N; j++)
pos+=sprintf(&string[pos],"%d ", C[i][j]);
pos+=sprintf(&string[pos],"\n");
}
puts(string);
puts("END OF SIMULATION");
}
#include <stdio.h>
#include <time.h>
#include <string.h>
#include <stdlib.h>
#define N 4
int computeCell(int a, int b);
asm("computeCell:");
asm("retl");
asm("usdot %o0, %o1, %o0");
int main()
{
char string[3*(3+(6*N*N+N))];
int pos = 0;
unsigned char A[N][N], B[N][N], C[N][N];
srand(N);
for(int i=0; i<N; i++)
for(int j=0; j<N; j++) {
A[i][j] = rand()%10;
B[j][i] = rand()%10;
}
puts("TEST BEGIN");
asm("nop");
asm("srl %i0, %o1, %g2");
asm("nop");
for(int i=0; i<N; i++)
for(int j=0; j<N; j++){
C[i][j] = computeCell(*((int *) &A[i][0]),*((int *) &B[j][0]));
}
asm("nop");
asm("srl %i0, %o1, %g2");
asm("nop");
puts("TEST END");
pos += sprintf(&string[pos],"A:\n");
for(int i=0; i<N; i++){
for(int j=0; j<N; j++)
pos+=sprintf(&string[pos],"%d ", A[i][j]);
pos+=sprintf(&string[pos],"\n");
}
pos += sprintf(&string[pos],"B:\n");
for(int j=0; j<N; j++){
for(int i=0; i<N; i++)
pos+=sprintf(&string[pos],"%d ", B[i][j]);
pos+=sprintf(&string[pos],"\n");
}
pos += sprintf(&string[pos],"C:\n");
for(int i=0; i<N; i++){
for(int j=0; j<N; j++)
pos+=sprintf(&string[pos],"%d ", C[i][j]);
pos+=sprintf(&string[pos],"\n");
}
puts(string);
puts("END OF SIMULATION");
}
XCC=sparc-gaisler-elf-gcc #$(XINC)
XCFLAGS0=-O0 -g -msoft-float -mcpu=v8
XCFLAGS=-O2 -g -msoft-float -mcpu=v8 -fno-inline
simd_test: simd_test.c
$(XCC) $(XCFLAGS0) -c simd_test.c
$(XCC) $(XCFLAGS0) simd_test.o -o simd_test.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 simd_test.exe simd_test.srec
cp simd_test.srec $(CURRENT_DIR)/test.srec
sparc-gaisler-elf-objdump -d simd_test.o > simd_test.dump
simd_swizling: simd_swizling.c
$(XCC) $(XCFLAGS0) -c simd_swizling.c
$(XCC) $(XCFLAGS0) simd_swizling.o -o simd_swizling.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 simd_swizling.exe simd_swizling.srec
cp simd_swizling.srec $(CURRENT_DIR)/test.srec
sparc-gaisler-elf-objdump -d simd_swizling.o > simd_swizling.dump
simd_mask: simd_mask_test.c
$(XCC) $(XCFLAGS0) -c simd_mask_test.c
$(XCC) $(XCFLAGS0) simd_mask_test.o -o simd_mask_test.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 simd_mask_test.exe simd_mask_test.srec
cp simd_mask_test.srec $(CURRENT_DIR)/test.srec
sparc-gaisler-elf-objdump -d simd_mask_test.o > simd_mask_test.dump
simd_imm: simd_imm.c
$(XCC) $(XCFLAGS0) -c simd_imm.c
$(XCC) $(XCFLAGS0) simd_imm.o -o simd_imm.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 simd_imm.exe simd_imm.srec
cp simd_imm.srec $(CURRENT_DIR)/test.srec
sparc-gaisler-elf-objdump -d simd_imm.o > simd_imm.dump
clean:
rm -f *.exe *.o *.dump *.srec
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
//This is a simple test, since the compiler support is not provided
//It is necessary to modify the srec file by hand
int main()
{
int a,b,c;
//test nop
a = 0x01020304;
//test add
__asm__("ld [%fp + -4], %g1");
__asm__("add_ %g1, 1, %g1");
__asm__("st %g1, [ %fp + -8 ]");
//add a b
printf("add +1: c=%#010x, expected result 0x02030405\n", c);
//test sadd
a=0x7f7e8000;
__asm__("ld [%fp + -4], %g1");
__asm__("sadd_ %g1, 1, %g1");
__asm__("st %g1, [ %fp + -8 ]");
printf("sadd+1: c=%#010x, expected result 0x7f7f8101\n", c);
//test sub
a=0x01020304;
__asm__("ld [%fp + -4], %g1");
__asm__("sub_ %g1, 1, %g1");
__asm__("st %g1, [ %fp + -8 ]");
printf("sub-1: c=%#010x, expected result 0x00010203\n", c);
//test ssub
a=0x80817f00;
__asm__("ld [%fp + -4], %g1");
__asm__("ssub_ %g1, 1, %g1");
__asm__("st %g1, [ %fp + -8 ]");
printf("ssub-1: c=%#010x, expected result 0x80807eff\n", c);
puts("END OF TEST");
}
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
//This is a simple test, since the compiler support is not provided
//It is necessary to modify the srec file by hand
int main()
{
int a,b,c;
//test nop
a = 0x01020304;
b = 0x00010203;
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("pass_ %g2, %g1");
__asm__("st %g1, [ %fp + -12 ]");
//nop 82488001
printf("NOP (move) c=%#010x, expected 0x01020304\n", c);
//test add
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("add_ %g2, %g1, %g1");
__asm__("st %g1, [ %fp + -12 ]");
//add a b
printf("add: c=%#010x, expected result 0x01030507\n", c);
//test sadd
a=0x8180ff01;
b=0x81ff7f7f;
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("sadd_ %g2, %g1, %g1");
__asm__("st %g1, [ %fp + -12 ]");
printf("sadd: c=%#010x, expected result 0x80807e7f\n", c);
//test sub
a=0x0a0a0a0a;
b=0x00050a0b;
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("sub_ %g2, %g1, %g1");
__asm__("st %g1, [ %fp + -12 ]");
printf("sub: c=%#010x, expected result 0x0a0500ff\n", c);
//test ssub
a=0x807f0afb;
b=0x05fffb0a;
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("ssub_ %g2, %g1, %g1");
__asm__("st %g1, [ %fp + -12 ]");
printf("ssub: c=%#010x, expected result 0x807f0ff1\n", c);
//test Max MAX signed
a=0x0204080a;
b=0x204080a0;
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("max_max %g2, %g1, %g1");
__asm__("st %g1, [ %fp + -12 ]");
printf("signed max max: c=%#010x, expected result 0x00000040\n", c);
//test Max MAX unsigned
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("umax_umax %g2, %g1, %g1");
__asm__("st %g1, [ %fp + -12 ]");
printf("unsigned max max: c=%#010x, expected result 0x000000a0\n", c);
//test Min MIN unsigned
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("umin_umin %g2, %g1, %g1");
__asm__("st %g1, [ %fp + -12 ]");
printf("unsigned min min: c=%#010x, expected result 0x00000002\n", c);
//test Min MIN signed
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("min_min %g2, %g1, %g1");
__asm__("st %g1, [ %fp + -12 ]");
printf("signed min min: c=%#010x, expected result 0xffffff80\n", c);
//test dot product
a=0x01020304;
b=0x00010203;
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("dot %g2, %g1, %g1");
__asm__("st %g1, [ %fp + -12 ]");
printf("dot product: c=%#010x, expected result 0x00000014\n", c);
//test dot product2
a=0xfffe03fc;
b=0x00ff0203;
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("dot %g2, %g1, %g1");
__asm__("st %g1, [ %fp + -12 ]");
printf("dot product2: c=%#010x, expected result 0xfffffffc\n", c);
//test saturated mul
a=0x7f7ffdff;
b=0x7fff7f80;
//c=0x7f81807f
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("smul_ %g2, %g1, %g1");
__asm__("st %g1, [ %fp + -12 ]");
printf("saturate mul: c=%#010x, expected result 0x7f81807f\n",c);
//test nand as not (a nand a = !a)
a=0xdeadbeaf;
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("nand_ %g2, %g2, %g1");
__asm__("st %g1, [ %fp + -12 ]");
printf("nand: c=%#010x, expected result 0x21524150\n", c);
//test xor reduction
a=0xfeedcafe;
__asm__("ld [%fp + -4], %g2");
__asm__("ld [%fp + -8], %g1");
__asm__("nop_xor %g2, %g1");
__asm__("st %g1, [ %fp + -12 ]");
printf("xor reduce: c=%#010x, expected result 0x00000027\n", c);
puts("END OF TEST");
}
#XINC=-I../../../software/leon3 -I../../../software/l2c
XCC=sparc-gaisler-elf-gcc #$(XINC)
XCFLAGS=-O2 -g -msoft-float -mcpu=v8 -fno-inline
XCFLAGS0=-O0 -g -msoft-float -mcpu=v8
%.o: %.c
$(XCC) $(XCFLAGS) -c $<
%.o: %.S
$(XCC) $(XCFLAGS) -c $<
bin_change:
g++ -o bin_change bin_change.cc
g++ -o bin_change2 bin_change2.cc
make.x:
g++ -o make.x make_simd_op.cc
systest:
cp systest.srec $(CURRENT_DIR)/test.srec
hello: hello.o
$(XCC) $(XCFLAGS) hello.o -o hello.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 hello.exe hello.srec
cp hello.srec $(CURRENT_DIR)/test.srec
sparc-gaisler-elf-objdump -d hello.o > hello.dump
simd_test: simd_test.c bin_change
$(XCC) $(XCFLAGS0) -c simd_test.c
$(XCC) $(XCFLAGS0) simd_test.o -o simd_test.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 simd_test.exe tmp.simd_test.srec
./bin_change tmp.simd_test.srec simd_test.list simd_test.srec 83288001
cp simd_test.srec $(CURRENT_DIR)/test.srec
sparc-gaisler-elf-objdump -d simd_test.o > simd_test.dump
simd_swizling: simd_swizling.c bin_change
$(XCC) $(XCFLAGS0) -c simd_swizling.c
$(XCC) $(XCFLAGS0) simd_swizling.o -o simd_swizling.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 simd_swizling.exe tmp.simd_swizling.srec
./bin_change tmp.simd_swizling.srec simd_swizling.list simd_swizling.srec 83288001
cp simd_swizling.srec $(CURRENT_DIR)/test.srec
sparc-gaisler-elf-objdump -d simd_swizling.o > simd_swizling.dump
simd_imm: simd_imm.c bin_change
$(XCC) $(XCFLAGS0) -c simd_imm.c
$(XCC) $(XCFLAGS0) simd_imm.o -o simd_imm.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 simd_imm.exe tmp.simd_imm.srec
./bin_change tmp.simd_imm.srec simd_imm.list simd_imm.srec 83288001
cp simd_imm.srec $(CURRENT_DIR)/test.srec
sparc-gaisler-elf-objdump -d simd_imm.o > simd_imm.dump
simd_mask: simd_mask_test.c bin_change
$(XCC) $(XCFLAGS0) -c simd_mask_test.c
$(XCC) $(XCFLAGS0) simd_mask_test.o -o simd_mask_test.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 simd_mask_test.exe tmp.simd_mask_test.srec
./bin_change tmp.simd_mask_test.srec simd_mask_test.list simd_mask_test.srec 83288001
cp simd_mask_test.srec $(CURRENT_DIR)/test.srec
sparc-gaisler-elf-objdump -d simd_mask_test.o > simd_mask_test.dump
mat_mul: mat_mul.c
$(XCC) $(XCFLAGS) -D N=$(par1) -c $<
$(XCC) $(XCFLAGS) mat_mul.o -o mat_mul.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 mat_mul.exe mat_mul.srec
cp mat_mul.srec $(CURRENT_DIR)/test.srec
sparc-gaisler-elf-objdump -d mat_mul.o > mat_mul.dump
grayscale: grayscale.c arrays.h image256.h image32.h
$(XCC) $(XCFLAGS) -D N=$(par1) -c $<
$(XCC) $(XCFLAGS) grayscale.o -o grayscale.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 grayscale.exe grayscale.srec
cp grayscale.srec $(CURRENT_DIR)/test.srec
sparc-gaisler-elf-objdump -d grayscale.o > grayscale.dump
grayscale_simd: grayscale_simd.c arrays.h image256.h image32.h
$(XCC) $(XCFLAGS) -D N=$(par1) -c $<
$(XCC) $(XCFLAGS) grayscale_simd.o -o grayscale_simd.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 grayscale_simd.exe tmp.grayscale_simd.srec
./bin_change2 tmp.grayscale_simd.srec grayscale_simd.list grayscale_simd.srec 83386002
cp grayscale_simd.srec $(CURRENT_DIR)/test.srec
sparc-gaisler-elf-objdump -d grayscale_simd.o > grayscale_simd.dump
mat_mul_int: mat_mul_int.c
$(XCC) $(XCFLAGS) -D N=$(par1) -c $<
$(XCC) $(XCFLAGS) mat_mul_int.o -o mat_mul_int.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 mat_mul_int.exe mat_mul_int.srec
cp mat_mul_int.srec $(CURRENT_DIR)/test.srec
sparc-gaisler-elf-objdump -d mat_mul_int.o > mat_mul_int.dump
mat_mul_simd: mat_mul_simd.c bin_change
$(XCC) $(XCFLAGS) -D N=$(par1) -c mat_mul_simd.c
$(XCC) $(XCFLAGS) mat_mul_simd.o -o mat_mul_simd.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 mat_mul_simd.exe tmp.mat_mul_simd.srec
./bin_change tmp.mat_mul_simd.srec mat_mul_simd.list tmp2.mat_mul_simd.srec 925A0009
./bin_change tmp2.mat_mul_simd.srec mat_mul_simd32.list3 mat_mul_simd.srec 92020009
cp mat_mul_simd.srec $(CURRENT_DIR)/test.srec
sparc-gaisler-elf-objdump -d mat_mul_simd.o > mat_mul_simd.dump
mat_mul_simd4: mat_mul_simd4.o bin_change
$(XCC) $(XCFLAGS) mat_mul_simd4.o -o mat_mul_simd4.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 mat_mul_simd4.exe tmp.mat_mul_simd4.srec
./bin_change tmp.mat_mul_simd4.srec mat_mul_simd4.list mat_mul_simd4.srec 925A0009
cp mat_mul_simd4.srec $(CURRENT_DIR)/test.srec
sparc-gaisler-elf-objdump -d mat_mul_simd4.o > mat_mul_simd4.dump
mat_mul_simd32: mat_mul_simd32.o bin_change
$(XCC) $(XCFLAGS) mat_mul_simd32.o -o mat_mul_simd32.exe
sparc-gaisler-elf-objcopy -O srec --gap-fill 0 mat_mul_simd32.exe tmp.mat_mul_simd32.srec
./bin_change tmp.mat_mul_simd32.srec mat_mul_simd32.list tmp2.mat_mul_simd32.srec 925A0009
./bin_change tmp2.mat_mul_simd32.srec mat_mul_simd32.list2 tmp3.mat_mul_simd32.srec 84784001
./bin_change tmp3.mat_mul_simd32.srec mat_mul_simd32.list3 mat_mul_simd32.srec 92020009
cp mat_mul_simd32.srec $(CURRENT_DIR)/test.srec
sparc-gaisler-elf-objdump -d mat_mul_simd32.o > mat_mul_simd32.dump
clean:
rm -f *.exe *.o *.dump bin_change2 bin_change make.x tmp*.*
Directory containing the testing utilities for the leon3mp design
The executed program, must be found in test.srec file in the .srec format
A makefile is provided with the commands to generate this srec file
execute "make _test_name" to generate all the necessary files
To use instructions not included in the compiler the bin_change and make_simd_op
programs are included. Use make_sim_op (whose executable is make.x) to
generate the hex codification for the simd instructions.
--TODO-- allow input to be operation name
Include a list of all the new instructions, just the ones not included in the
compiler, in the corresponding order. This list must be in _test_name.list
When calling bin_change pass this list together with the output file, usually
test.srec, and the dummy instruction used in the C code to compile.
For existing test all this mechanisms are included when executing "make _test_name"
#include "image256.h"
#include "image32.h"
#ifndef N
#define N 2
#endif
#if N == 2
#define IMAGE_ARRAY 255,0,0,0, 255,255,0,0, 255,255,0,0, 255,0,0,0
#elif N == 4
#define IMAGE_ARRAY 255,255,255,0,255,0,0,0,255,0,0,0,255,255,255,0,255,0,0,0,255,178,127,0,0,19,127,0,255,255,255,0,255,255,255,0,0,19,127,0,255,216,0,0,0,19,127,0,255,255,255,0,127,51,0,0,0,19,127,0,127,51,0,0
#elif N == 8
#define IMAGE_ARRAY 66,165,245,0,66,165,245,0,66,165,245,0,66,165,245,0,66,165,245,0,66,165,245,0,66,165,245,0,66,165,245,0,66,165,245,0,66,165,245,0,248,187,208,0,248,187,208,0,248,187,208,0,248,187,208,0,66,165,245,0,66,165,245,0,66,165,245,0,248,187,208,0,248,187,208,0,248,187,208,0,248,187,208,0,248,187,208,0,248,187,208,0,66,165,245,0,66,165,245,0,244,143,177,0,248,187,208,0,0,0,0,0,248,187,208,0,0,0,0,0,248,187,208,0,66,165,245,0,244,143,177,0,248,187,208,0,248,187,208,0,26,35,126,0,248,187,208,0,26,35,126,0,248,187,208,0,244,143,177,0,244,143,177,0,244,143,177,0,240,98,146,0,248,187,208,0,248,187,208,0,248,187,208,0,240,98,146,0,244,143,177,0,66,165,245,0,173,20,87,0,244,143,177,0,244,143,177,0,248,187,208,0,248,187,208,0,173,20,87,0,66,165,245,0,76,175,80,0,173,20,87,0,194,24,91,0,194,24,91,0,76,175,80,0,173,20,87,0,194,24,91,0,76,175,80,0
#elif N == 16
#define IMAGE_ARRAY 96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,0,0,0,0,0,0,0,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,0,0,0,0,0,0,0,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,0,0,0,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,0,0,0,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,0,0,0,0,229,57,53,0,229,57,53,0,255,255,255,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,0,0,0,0,0,0,0,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,0,0,0,0,229,57,53,0,255,255,255,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,0,0,0,0,96,125,139,0,96,125,139,0,96,125,139,0,0,0,0,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,0,0,0,0,96,125,139,0,96,125,139,0,0,0,0,0,0,0,0,0,0,0,0,0,229,57,53,0,229,57,53,0,229,57,53,0,229,57,53,0,0,0,0,0,0,0,0,0,0,0,0,0,229,57,53,0,229,57,53,0,229,57,53,0,0,0,0,0,96,125,139,0,96,125,139,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,229,57,53,0,229,57,53,0,0,0,0,0,255,255,255,0,255,255,255,0,255,255,255,0,0,0,0,0,229,57,53,0,229,57,53,0,0,0,0,0,96,125,139,0,96,125,139,0,0,0,0,0,158,158,158,0,255,255,255,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,255,255,255,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,125,139,0,96,125,139,0,96,125,139,0,0,0,0,0,255,255,255,0,255,255,255,0,255,255,255,0,0,0,0,0,0,0,0,0,255,255,255,0,255,255,255,0,255,255,255,0,0,0,0,0,255,255,255,0,0,0,0,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,0,0,0,0,158,158,158,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,0,255,255,255,0,0,0,0,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,0,0,0,0,158,158,158,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,0,0,0,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,0,0,0,0,0,0,0,0,158,158,158,0,158,158,158,0,255,255,255,0,255,255,255,0,0,0,0,0,0,0,0,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0,96,125,139,0
#elif N == 32
#define IMAGE_ARRAY IMAGE32
#else
#define IMAGE_ARRAY IMAGE256
#endif
#include <stdio.h>
#include <stdlib.h>
#include "arrays.h"
#include "grayscale_simd.s"
#ifndef N
#define N 2
#endif
//int shift_and_add(int a);
//asm("shift_and_add:");
//asm("retl");
//asm("srl %o0, %g1, %g1");
extern void grayscale(unsigned char src[4][N][N], unsigned char dst[N][N]);
void print(unsigned char src[N][N]) {
printf("P3\n%d %d\n255\n",N,N);
for (int i = 0; i<N; i++){
for (int j = 0; j<N; j++){
printf("%d %d %d ", src[i][j], src[i][j], src[i][j]);
}
printf("\n");
}
}
int main(){
unsigned char source[4][N][N] = {IMAGE_ARRAY};
unsigned char dest[N][N];
//init(source);
grayscale(source, dest);
print(dest);
}
grayscale.o: file format elf32-sparc
Disassembly of section .text:
00000000 <grayscale>:
0: 01 00 00 00 nop
4: 85 36 00 09 srl %i0, %o1, %g2
8: 01 00 00 00 nop
c: 84 02 24 00 add %o0, 0x400, %g2
10: c2 0a 20 01 ldub [ %o0 + 1 ], %g1
14: c8 0a 00 00 ldub [ %o0 ], %g4
18: c6 0a 20 02 ldub [ %o0 + 2 ], %g3
1c: 89 31 20 02 srl %g4, 2, %g4
20: 87 30 e0 02 srl %g3, 2, %g3
24: 83 30 60 02 srl %g1, 2, %g1
28: 82 00 40 04 add %g1, %g4, %g1
2c: 82 00 40 03 add %g1, %g3, %g1
30: c2 2a 40 00 stb %g1, [ %o1 ]
34: c2 0a 20 05 ldub [ %o0 + 5 ], %g1
38: c8 0a 20 04 ldub [ %o0 + 4 ], %g4
3c: c6 0a 20 06 ldub [ %o0 + 6 ], %g3
40: 89 31 20 02 srl %g4, 2, %g4
44: 87 30 e0 02 srl %g3, 2, %g3
48: 83 30 60 02 srl %g1, 2, %g1
4c: 82 00 40 04 add %g1, %g4, %g1
50: 82 00 40 03 add %g1, %g3, %g1
54: c2 2a 60 01 stb %g1, [ %o1 + 1 ]