Commit 3b03f7ea authored by Santiago Marco-Sola's avatar Santiago Marco-Sola
Browse files

Added Smith-Waterman benchmark

parent 3be38913
###############################################################################
# Compiler & Flags
###############################################################################
EPI_TOOLCHAIN=/shared_folder/llvm-EPI-0.7-development-toolchain-native/
CC=$(EPI_TOOLCHAIN)/bin/clang
MARCH=-march=rv64imafd -mcmodel=medany
CC_FLAGS=-g -std=gnu99 -O2 -mepi -fno-vectorize -Wall -Rpass=loop-vectorize $(MARCH)
CC_FLAGS+=-Wno-implicit-function-declaration -fno-builtin-printf -fno-builtin -ffreestanding
LD_FLAGS=-fno-builtin-printf -fno-builtin -ffreestanding
LD_FLAGS+=-nostdlib -static -nostartfiles -lc -lgcc -mabi=lp64 $(MARCH)
LD_FLAGS+=-Wl,-T ../common/test.ld
###############################################################################
# Compile rules
###############################################################################
COMMON=../common/syscalls.s ../common/pmu.s ../common/crt.S
SRCS=align_benchmark.c \
cigar.c \
mm_allocator.c \
score_matrix.c \
sw.c
all: local
local: $(SRCS)
gcc -O3 -g $(SRCS) -o align_benchmark.local
asm: $(SRCS)
# Generate ASM
$(CC) $(CC_FLAGS) -DPMU -S $(SRCS)
epi:
# EPI
$(CC) $(CC_FLAGS) $(SRCS) -o align_benchmark.epi
sargantana:
# Sargantana
$(CC) $(CC_FLAGS) $(LD_FLAGS) $(COMMON) -DPMU $(SRCS) -o align_benchmark.sargantana
clean:
rm -rf *.s *.local *.epi *.spike *.sargantana &> /dev/null
/*
* The MIT License
*
* Wavefront Alignments Algorithms
* Copyright (c) 2017 by Santiago Marco-Sola <santiagomsola@gmail.com>
*
* This file is part of Wavefront Alignments Algorithms.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* PROJECT: Wavefront Alignments Algorithms
* AUTHOR(S): Santiago Marco-Sola <santiagomsola@gmail.com>
* DESCRIPTION: Wavefront Alignment benchmarking tool
*/
// DEBUG
#ifdef DISABLE_PRINT
#define printf(fmt, ...)
#define fprintf(fmt, ...)
#endif
// PMU
#ifdef PMU
#include "../common/pmu.h"
#define PROFILE_RESET() reset_pmu()
#define PROFILE_START() enable_PMU_32b()
#define PROFILE_STOP() disable_PMU_32b()
#define PROFILE_PRINT() print_PMU_events()
#else
#define PROFILE_RESET()
#define PROFILE_START()
#define PROFILE_STOP()
#define PROFILE_PRINT()
#endif
// Includes
#include "commons.h"
#include "cigar.h"
#include "sw.h"
// Input
typedef struct {
char *pattern;
char *text;
int64_t text_length;
int64_t pattern_length;
int64_t score;
} input_sequence_t;
#define MAX_SEQUENCE_LENGTH 1100
// SELECT INPUT AND REPETITIONS HERE
#define REPS 1
#include "data/sequences.n1K.l1K.h"
//#include "data/sequences.n10K.l100.h"
// Memory
#define MM_ALLOCATOR_MEM_SIZE BUFFER_SIZE_64M
uint8_t mm_allocator_mem[MM_ALLOCATOR_MEM_SIZE];
/*
* Generic parameters
*/
typedef struct {
// Misc
int progress;
bool verbose;
} benchmark_args;
benchmark_args parameters = {
// Misc
.progress = 10000,
.verbose = false
};
/*
* Generic Menu
*/
int main(int argc,char* argv[]) {
// Initialize
mm_allocator_t mm_allocator;
mm_allocator_init(&mm_allocator,mm_allocator_mem,MM_ALLOCATOR_MEM_SIZE);
score_matrix_t score_matrix;
score_matrix_allocate(&score_matrix,MAX_SEQUENCE_LENGTH,MAX_SEQUENCE_LENGTH,&mm_allocator);
lineal_penalties_t penalties = {
.match = -1,
.insertion = 2,
.deletion = 2,
.mismatch['A']['A'] =-1, .mismatch['A']['C'] = 3, .mismatch['A']['G'] = 3, .mismatch['A']['T'] = 3,
.mismatch['C']['A'] = 3, .mismatch['C']['C'] =-1, .mismatch['C']['G'] = 3, .mismatch['C']['T'] = 3,
.mismatch['G']['A'] = 3, .mismatch['G']['C'] = 3, .mismatch['G']['G'] =-1, .mismatch['G']['T'] = 3,
.mismatch['T']['A'] = 3, .mismatch['T']['C'] = 3, .mismatch['T']['G'] = 3, .mismatch['T']['T'] =-1,
};
cigar_t cigar;
cigar_allocate(&cigar,2*MAX_SEQUENCE_LENGTH+1,&mm_allocator);
// PMU
PROFILE_RESET();
// Read-align loop
int rep, correct = 1;
for (rep=0;rep<REPS;++rep) {
int i, progress = 0;
printf("Doing repetition %d \n",rep);
for (i=0;i<num_input_sequences;++i) {
// Align
const int64_t score = sw_compute(
&score_matrix,&penalties,
input_sequences[i].pattern,
input_sequences[i].pattern_length,
input_sequences[i].text,
input_sequences[i].text_length,&cigar);
// Check
if (score != input_sequences[i].score) {
printf("[ERROR] Incorrect alignment of sequence %d"
" (score = %ld, correct=%ld)\n",i,score,input_sequences[i].score);
printf(">%s\n",input_sequences[i].pattern);
printf("<%s\n",input_sequences[i].text);
correct = 0;
}
// Update progress
if (++progress == parameters.progress) {
progress = 0;
printf("...processed %d reads \n",i+1);
}
}
}
// PMU
PROFILE_PRINT();
// DEBUG
if (correct) {
printf("All sequences aligned correctly! (%d sequences)\n",num_input_sequences);
} else {
printf("Errors aligning sequences\n");
}
// Free
score_matrix_free(&score_matrix);
// Exit
return 0;
}
/*
* The MIT License
*
* Wavefront Alignments Algorithms
* Copyright (c) 2017 by Santiago Marco-Sola <santiagomsola@gmail.com>
*
* This file is part of Wavefront Alignments Algorithms.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* PROJECT: Wavefront Alignments Algorithms
* AUTHOR(S): Santiago Marco-Sola <santiagomsola@gmail.com>
* DESCRIPTION: Edit cigar data-structure (match/mismatch/insertion/deletion)
*/
#include "cigar.h"
/*
* Setup
*/
void cigar_allocate(
cigar_t* const cigar,
const int max_operations,
mm_allocator_t* const mm_allocator) {
// Allocate buffer
cigar->max_operations = max_operations;
cigar->operations = mm_allocator_malloc(mm_allocator,cigar->max_operations);
cigar->begin_offset = 0;
cigar->end_offset = 0;
cigar->score = INT32_MIN;
// MM
cigar->mm_allocator = mm_allocator;
}
void cigar_clear(
cigar_t* const cigar) {
cigar->begin_offset = 0;
cigar->end_offset = 0;
cigar->score = INT32_MIN;
}
void cigar_resize(
cigar_t* const cigar,
const int max_operations) {
// Check maximum operations
if (max_operations > cigar->max_operations) {
cigar->max_operations = max_operations;
mm_allocator_free(cigar->mm_allocator,cigar->operations); // Free
cigar->operations = mm_allocator_malloc(
cigar->mm_allocator,max_operations); // Allocate
}
cigar->begin_offset = 0;
cigar->end_offset = 0;
cigar->score = INT32_MIN;
}
void cigar_free(
cigar_t* const cigar) {
mm_allocator_free(cigar->mm_allocator,cigar->operations);
}
/*
* Accessors
*/
int cigar_get_matches(
cigar_t* const cigar) {
int i, num_matches=0;
for (i=cigar->begin_offset;i<cigar->end_offset;++i) {
num_matches += (cigar->operations[i]=='M');
}
return num_matches;
}
void cigar_add_mismatches(
char* const pattern,
const int pattern_length,
char* const text,
const int text_length,
cigar_t* const cigar) {
// Refine adding mismatches
int i, p=0, t=0;
for (i=cigar->begin_offset;i<cigar->end_offset;++i) {
// Check limits
if (p >= pattern_length || t >= text_length) break;
switch (cigar->operations[i]) {
case 'M':
cigar->operations[i] = (pattern[p]==text[t]) ? 'M' : 'X';
++p; ++t;
break;
case 'I':
++t;
break;
case 'D':
++p;
break;
default:
fprintf(stderr,"Aband-gaba. Wrong edit operation\n");
exit(1);
break;
}
}
while (p < pattern_length) { cigar->operations[i++] = 'D'; ++p; };
while (t < text_length) { cigar->operations[i++] = 'I'; ++t; };
cigar->end_offset = i;
cigar->operations[cigar->end_offset] = '\0';
}
/*
* Score
*/
int cigar_score_edit(
cigar_t* const cigar) {
int score = 0, i;
for (i=cigar->begin_offset;i<cigar->end_offset;++i) {
switch (cigar->operations[i]) {
case 'M': break;
case 'X':
case 'D':
case 'I': ++score; break;
default: return INT_MIN;
}
}
return score;
}
/*
* Utils
*/
int cigar_cmp(
cigar_t* const cigar_a,
cigar_t* const cigar_b) {
// Compare lengths
const int length_cigar_a = cigar_a->end_offset - cigar_a->begin_offset;
const int length_cigar_b = cigar_b->end_offset - cigar_b->begin_offset;
if (length_cigar_a != length_cigar_b) return length_cigar_a - length_cigar_b;
// Compare operations
char* const operations_a = cigar_a->operations + cigar_a->begin_offset;
char* const operations_b = cigar_b->operations + cigar_b->begin_offset;
int i;
for (i=0;i<length_cigar_a;++i) {
if (operations_a[i] != operations_b[i]) {
return operations_a[i] - operations_b[i];
}
}
// Equal
return 0;
}
void cigar_copy(
cigar_t* const cigar_dst,
cigar_t* const cigar_src) {
cigar_dst->max_operations = cigar_src->max_operations;
cigar_dst->begin_offset = cigar_src->begin_offset;
cigar_dst->end_offset = cigar_src->end_offset;
cigar_dst->score = cigar_src->score;
memcpy(cigar_dst->operations+cigar_src->begin_offset,
cigar_src->operations+cigar_src->begin_offset,
cigar_src->end_offset-cigar_src->begin_offset);
}
bool cigar_check_alignment(
FILE* const stream,
const char* const pattern,
const int pattern_length,
const char* const text,
const int text_length,
cigar_t* const cigar,
const bool verbose) {
// Parameters
char* const operations = cigar->operations;
// Traverse CIGAR
int pattern_pos=0, text_pos=0, i;
for (i=cigar->begin_offset;i<cigar->end_offset;++i) {
switch (operations[i]) {
case 'M':
// Check match
if (pattern[pattern_pos] != text[text_pos]) {
if (verbose) {
fprintf(stream,
"Align Check. Alignment not matching (pattern[%d]=%c != text[%d]=%c)\n",
pattern_pos,pattern[pattern_pos],text_pos,text[text_pos]);
}
return false;
}
++pattern_pos;
++text_pos;
break;
case 'X':
// Check mismatch
if (pattern[pattern_pos] == text[text_pos]) {
if (verbose) {
fprintf(stream,
"Align Check. Alignment not mismatching (pattern[%d]=%c == text[%d]=%c)\n",
pattern_pos,pattern[pattern_pos],text_pos,text[text_pos]);
}
return false;
}
++pattern_pos;
++text_pos;
break;
case 'I':
++text_pos;
break;
case 'D':
++pattern_pos;
break;
default:
fprintf(stderr,"CIGAR check. Unknown edit operation '%c'\n",operations[i]);
exit(1);
break;
}
}
// Check alignment length
if (pattern_pos != pattern_length) {
if (verbose) {
fprintf(stream,
"Align Check. Alignment incorrect length (pattern-aligned=%d,pattern-length=%d)\n",
pattern_pos,pattern_length);
}
return false;
}
if (text_pos != text_length) {
if (verbose) {
fprintf(stream,
"Align Check. Alignment incorrect length (text-aligned=%d,text-length=%d)\n",
text_pos,text_length);
}
return false;
}
// OK
return true;
}
/*
* Display
*/
void cigar_print(
FILE* const stream,
cigar_t* const cigar,
const bool print_matches) {
// Check null CIGAR
if (cigar->begin_offset >= cigar->end_offset) return;
// Print operations
char last_op = cigar->operations[cigar->begin_offset];
int last_op_length = 1;
int i;
for (i=cigar->begin_offset+1;i<cigar->end_offset;++i) {
if (cigar->operations[i]==last_op) {
++last_op_length;
} else {
if (print_matches || last_op != 'M') {
fprintf(stream,"%d%c",last_op_length,last_op);
}
last_op = cigar->operations[i];
last_op_length = 1;
}
}
if (print_matches || last_op != 'M') {
fprintf(stream,"%d%c",last_op_length,last_op);
}
}
void cigar_print_pretty(
FILE* const stream,
const char* const pattern,
const int pattern_length,
const char* const text,
const int text_length,
cigar_t* const cigar,
mm_allocator_t* const mm_allocator) {
// Parameters
char* const operations = cigar->operations;
// Allocate alignment buffers
const int max_buffer_length = text_length+pattern_length+1;
char* const pattern_alg = mm_allocator_calloc(mm_allocator,max_buffer_length,char,true);
char* const ops_alg = mm_allocator_calloc(mm_allocator,max_buffer_length,char,true);
char* const text_alg = mm_allocator_calloc(mm_allocator,max_buffer_length,char,true);
// Compute alignment buffers
int i, alg_pos = 0, pattern_pos = 0, text_pos = 0;
for (i=cigar->begin_offset;i<cigar->end_offset;++i) {
switch (operations[i]) {
case 'M':
if (pattern[pattern_pos] != text[text_pos]) {
pattern_alg[alg_pos] = pattern[pattern_pos];
ops_alg[alg_pos] = 'X';
text_alg[alg_pos++] = text[text_pos];
} else {
pattern_alg[alg_pos] = pattern[pattern_pos];
ops_alg[alg_pos] = '|';
text_alg[alg_pos++] = text[text_pos];
}
pattern_pos++; text_pos++;
break;
case 'X':
if (pattern[pattern_pos] != text[text_pos]) {
pattern_alg[alg_pos] = pattern[pattern_pos++];
ops_alg[alg_pos] = ' ';
text_alg[alg_pos++] = text[text_pos++];
} else {
pattern_alg[alg_pos] = pattern[pattern_pos++];
ops_alg[alg_pos] = 'X';
text_alg[alg_pos++] = text[text_pos++];
}
break;
case 'I':
pattern_alg[alg_pos] = '-';
ops_alg[alg_pos] = ' ';
text_alg[alg_pos++] = text[text_pos++];
break;
case 'D':
pattern_alg[alg_pos] = pattern[pattern_pos++];
ops_alg[alg_pos] = ' ';
text_alg[alg_pos++] = '-';
break;
default:
break;
}
}
i=0;
while (pattern_pos < pattern_length) {
pattern_alg[alg_pos+i] = pattern[pattern_pos++];
ops_alg[alg_pos+i] = '?';
++i;
}
i=0;
while (text_pos < text_length) {
text_alg[alg_pos+i] = text[text_pos++];
ops_alg[alg_pos+i] = '?';
++i;
}
// Print alignment pretty
fprintf(stream," ALIGNMENT\t");
cigar_print(stderr,cigar,true);
fprintf(stream,"\n");
fprintf(stream," ALIGNMENT.COMPACT\t");
cigar_print(stderr,cigar,false);
fprintf(stream,"\n");
fprintf(stream," PATTERN %s\n",pattern_alg);
fprintf(stream," %s\n",ops_alg);
fprintf(stream," TEXT %s\n",text_alg);
// Free
mm_allocator_free(mm_allocator,pattern_alg);
mm_allocator_free(mm_allocator,ops_alg);
mm_allocator_free(mm_allocator,text_alg);
}
/*
* The MIT License
*
* Wavefront Alignments Algorithms
* Copyright (c) 2017 by Santiago Marco-Sola <santiagomsola@gmail.com>
*
* This file is part of Wavefront Alignments Algorithms.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* PROJECT: Wavefront Alignments Algorithms
* AUTHOR(S): Santiago Marco-Sola <santiagomsola@gmail.com>
* DESCRIPTION: Cigar data-structure (match/mismatch/insertion/deletion)
*/
#ifndef CIGAR_H_
#define CIGAR_H_
#include "commons.h"
#include "mm_allocator.h"
#include "lineal_penalties.h"
/*
* CIGAR
*/
typedef struct {
// Operations buffer
char* operations;
int max_operations;
int begin_offset;
int end_offset;
// Score
int score;
// MM
mm_allocator_t* mm_allocator;
} cigar_t;
/*
* Setup
*/
void cigar_allocate(
cigar_t* const cigar,
const int max_operations,
mm_allocator_t* const mm_allocator);
void cigar_clear(
cigar_t* const cigar);