Commit ecf5246b authored by Lorien's avatar Lorien
Browse files

Matrix multiplication integer version

parent 32cd23dc
......@@ -21,6 +21,7 @@ LDFLAGS=-lm
# MODULES
################################################################################
MODULES=hpc.matmul \
hpc.matmul_int \
pqs.mceliece \
pqs.mceliece_reduced \
wfa.compute \
......
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <stdint.h>
#include <assert.h>
static void matmul_builtins(int n, int bi, int bj, int bk, int64_t (* restrict c)[n],
int64_t (* restrict a)[n], int64_t (* restrict b)[n]) {
unsigned vl = bi;
for (int ii = 0; ii < n; ii += bi) {
for (int kk = 0; kk < n; kk += bk) {
for (int jj = 0; jj < n; jj += bj) {
// c[ii:ii+bi-1][jj:jj+bj-1] += a[ii:ii+bi-1][kk:kk+bk-1] *
// b[kk:kk+bk-1][jj:jj+bj-1];
for (int i = ii; i < ii + bi; i++) {
__epi_1xi64 v1 = __builtin_epi_vload_1xi64(&c[i][jj], vl);
__epi_1xi64 v2 = __builtin_epi_vload_1xi64(&a[i][kk], vl);
for (int k = kk; k < kk + bk; k++) {
__epi_1xi64 v3 = __builtin_epi_vload_1xi64(&b[k][jj], vl);
// v1 = v2 * v3 + v1
v3 = __builtin_epi_vmul_1xi64(v2, v3, vl);
v1 = __builtin_epi_vadd_1xi64(v1, v3, vl);
}
__builtin_epi_vstore_1xi64(&c[i][jj], v1, vl);
}
}
}
}
}
static void matmul_reference(int n, int64_t (*c)[n], int64_t (*a)[n], int64_t (*b)[n]) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
for (int k = 0; k < n; k++) {
c[i][j] += a[i][k] * b[k][j];
}
}
}
}
typedef enum {
INIT_ALL_ONES,
INIT_IOTA,
} init_mode_t;
static void init_matrix(int n, int64_t (*a)[n], init_mode_t init_mode) {
switch (init_mode) {
case INIT_ALL_ONES:
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
a[i][j] = 1;
}
}
break;
case INIT_IOTA: {
int64_t v = 1;
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
a[i][j] = v;
v += 1;
}
}
break;
}
}
}
static void zero_matrix(int n, int64_t (*a)[n]) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
a[i][j] = 0;
}
}
}
static void compare_results(int n, int64_t (*ref)[n], int64_t (*c)[n]) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
if (labs(ref[i][j] - c[i][j]) > 1e-9) {
printf("ERROR: reference [%d][%d] == %lu vs computed [%d][%d] == %lu\n", i,
j, ref[i][j], i, j, c[i][j]);
exit(EXIT_FAILURE);
}
}
}
printf("RESULT OK\n");
}
int main(int argc, char *argv[])
{
// FIXME: Make this a parameter read from the command line
int n = 64;
int64_t (*ref)[n] = malloc(sizeof(*ref) * n);
int64_t (*c)[n] = malloc(sizeof(*c) * n);
int64_t (*a)[n] = malloc(sizeof(*a) * n);
int64_t (*b)[n] = malloc(sizeof(*b) * n);
zero_matrix(n, c);
zero_matrix(n, ref);
// FIXME: Allow other ways of initializing
init_matrix(n, a, INIT_ALL_ONES);
init_matrix(n, b, INIT_ALL_ONES);
matmul_reference(n, ref, a, b);
unsigned long vl = __builtin_epi_vsetvl(n, __epi_e64, __epi_m1);
int bi = vl;//64;
int bj = vl;// 64;
int bk = vl;// 64;
matmul_builtins(n, bi, bj, bk, c, a, b);
compare_results(n, ref, c);
free(b);
free(a);
free(c);
free(ref);
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment