Commit 3be38913 authored by Santiago Marco-Sola's avatar Santiago Marco-Sola
Browse files

Added dna-pack and base-count examples

parent 3d788cc4
###############################################################################
# Compiler & Flags
###############################################################################
EPI_TOOLCHAIN=/shared_folder/llvm-EPI-0.7-development-toolchain-native/
CC=$(EPI_TOOLCHAIN)/bin/clang
# Flags: architectural
MARCH=-march=rv64imafd -mcmodel=medany
# Flags: compilation
CC_FLAGS=-g -std=gnu99 -O2 -mepi -fno-vectorize -Wall -Rpass=loop-vectorize $(MARCH)
CC_FLAGS+=-Wno-implicit-function-declaration -fno-builtin-printf -fno-builtin -ffreestanding
# Flags: linking + extra
LD_FLAGS=-fno-builtin-printf -fno-builtin -ffreestanding
LD_FLAGS+=-nostdlib -static -nostartfiles -lc -lgcc -mabi=lp64 $(MARCH)
LD_FLAGS+=-Wl,-T ./system/test.ld
###############################################################################
# Compile rules
###############################################################################
SYSTEM=./system/syscalls.s ./system/crt.S
TOOL=base_count
SRCS=base_count.c
all: native # asm epi sargantana
debug: $(SRCS)
gcc -O0 -g $(SRCS) -o $(TOOL)
native: $(SRCS)
gcc -O3 -g $(SRCS) -o $(TOOL)
asm: $(SRCS)
$(CC) $(CC_FLAGS) -S $(SRCS)
epi: $(SRCS)
$(CC) $(CC_FLAGS) $(SRCS) -o $(TOOL).epi
sargantana: $(SRCS)
$(CC) $(CC_FLAGS) $(LD_FLAGS) $(SYSTEM) $(SRCS) -o $(TOOL).sargantana
clean:
rm -rf $(TOOL) *.s *.epi *.sargantana &> /dev/null
/*
* The MIT License
*
* Genomic Tools & Algorithms
* Copyright (c) 2017 by Santiago Marco-Sola <santiagomsola@gmail.com>
*
* This file is part of Genomic Tools & Algorithms.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* PROJECT: Genomic Tools & Algorithms
* AUTHOR(S): Santiago Marco-Sola <santiagomsola@gmail.com>
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdbool.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
/*
* Config
*/
#define REPS 100000000
/*
* Utils
*/
#define MIN(a,b) (((a)<=(b))?(a):(b))
#define MAX(a,b) (((a)>=(b))?(a):(b))
#define ABS(a) (((a)>=0)?(a):-(a))
#define DIV_FLOOR(NUMERATOR,DENOMINATOR) ((NUMERATOR)/(DENOMINATOR))
#define DIV_CEIL(NUMERATOR,DENOMINATOR) (((NUMERATOR)+((DENOMINATOR)-1))/(DENOMINATOR))
/*
* Input Sequence
*/
uint64_t packed_seq = 0xEA550000EA550000ul;
/*
* Block Pack (Instruction)
*/
void bcnt(
uint64_t* const rd,
const uint64_t rs1,
const uint64_t rs2) {
// Init rd
uint16_t* const rd16 = (uint16_t*)rd;
rd16[0] = 0;
rd16[1] = 0;
rd16[2] = 0;
rd16[3] = 0;
// Compute offset
int offset = rs2 & 31;
// Count up to offset (exclusive)
int i;
for (i=0;i<offset;++i) {
const int idx = (rs1 >> (2*i)) & 3;
rd16[idx]++;
}
}
/*
* Generic Menu
*/
int main(int argc,char* argv[]) {
// Benchmark
uint16_t counts[4];
int rep, j;
for (rep=0;rep<REPS;++rep) {
// Count bases
bcnt((uint64_t*)counts,packed_seq,1040);
}
fprintf(stderr,">>> COUNTS\n");
fprintf(stderr," A = %d\n",counts[0]);
fprintf(stderr," C = %d\n",counts[1]);
fprintf(stderr," G = %d\n",counts[3]);
fprintf(stderr," T = %d\n",counts[2]);
// Exit
return 0;
}
###############################################################################
# Compiler & Flags
###############################################################################
EPI_TOOLCHAIN=/shared_folder/llvm-EPI-0.7-development-toolchain-native/
CC=$(EPI_TOOLCHAIN)/bin/clang
# Flags: architectural
MARCH=-march=rv64imafd -mcmodel=medany
# Flags: compilation
CC_FLAGS=-g -std=gnu99 -O2 -mepi -fno-vectorize -Wall -Rpass=loop-vectorize $(MARCH)
CC_FLAGS+=-Wno-implicit-function-declaration -fno-builtin-printf -fno-builtin -ffreestanding
# Flags: linking + extra
LD_FLAGS=-fno-builtin-printf -fno-builtin -ffreestanding
LD_FLAGS+=-nostdlib -static -nostartfiles -lc -lgcc -mabi=lp64 $(MARCH)
LD_FLAGS+=-Wl,-T ./system/test.ld
###############################################################################
# Compile rules
###############################################################################
SYSTEM=./system/syscalls.s ./system/crt.S
TOOL=dna_pack
SRCS=dna_pack.c
all: native # asm epi sargantana
debug: $(SRCS)
gcc -O0 -g $(SRCS) -o $(TOOL)
native: $(SRCS)
gcc -O3 -g $(SRCS) -o $(TOOL)
asm: $(SRCS)
$(CC) $(CC_FLAGS) -S $(SRCS)
epi: $(SRCS)
$(CC) $(CC_FLAGS) $(SRCS) -o $(TOOL).epi
sargantana: $(SRCS)
$(CC) $(CC_FLAGS) $(LD_FLAGS) $(SYSTEM) $(SRCS) -o $(TOOL).sargantana
clean:
rm -rf $(TOOL) *.s *.epi *.sargantana &> /dev/null
/*
* The MIT License
*
* Genomic Tools & Algorithms
* Copyright (c) 2017 by Santiago Marco-Sola <santiagomsola@gmail.com>
*
* This file is part of Genomic Tools & Algorithms.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* PROJECT: Genomic Tools & Algorithms
* AUTHOR(S): Santiago Marco-Sola <santiagomsola@gmail.com>
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdbool.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
/*
* Config
*/
#define REPS 50000000
/*
* Utils
*/
#define MIN(a,b) (((a)<=(b))?(a):(b))
#define MAX(a,b) (((a)>=(b))?(a):(b))
#define ABS(a) (((a)>=0)?(a):-(a))
#define DIV_FLOOR(NUMERATOR,DENOMINATOR) ((NUMERATOR)/(DENOMINATOR))
#define DIV_CEIL(NUMERATOR,DENOMINATOR) (((NUMERATOR)+((DENOMINATOR)-1))/(DENOMINATOR))
char* packing_lut[16][2] = {
{"AA", "0"},
{"CA", "1"},
{"TA", "2"},
{"GA", "3"},
{"AC", "4"},
{"CC", "5"},
{"TC", "6"},
{"GC", "7"},
{"AT", "8"},
{"CT", "9"},
{"TT", "A"},
{"GT", "B"},
{"AG", "C"},
{"CG", "D"},
{"TG", "E"},
{"GG", "F"},
};
/*
* Block Pack (Instruction)
*/
void pck(
uint16_t* const rd,
const uint64_t rs) {
*rd = ((rs >> (0+1)) & (3ul)) |
((rs >> (8+1-2)) & (3ul<<2)) |
((rs >> (16+1-4)) & (3ul<<4)) |
((rs >> (24+1-6)) & (3ul<<6)) |
((rs >> (32+1-8)) & (3ul<<8)) |
((rs >> (40+1-10)) & (3ul<<10)) |
((rs >> (48+1-12)) & (3ul<<12)) |
((rs >> (56+1-14)) & (3ul<<14));
}
void pack_sequence__pck(
char* const sequence,
const int sequence_length,
uint64_t* packed_seq) {
uint16_t* packed_seq_ptr = (uint16_t*)packed_seq;
int i;
for (i=0;i<sequence_length;i+=8) {
pck(packed_seq_ptr,*((uint64_t*)(sequence+i)));
++packed_seq_ptr;
}
}
/*
* Pack Sequence Kernel (Base)
*/
void pack_sequence(
char* const sequence,
const int sequence_length,
uint64_t* packed_seq) {
/*
* A - 65 - 1000 00 1 => 00
* C - 67 - 1000 01 1 => 01
* G - 71 - 1000 11 1 => 11
* T - 84 - 1010 10 0 => 10
*/
uint64_t packed_word = 0;
int i, displ = 0;
for (i=0;i<sequence_length;++i) {
packed_word = packed_word | ((((uint64_t)sequence[i] & 6ul)>>1) << displ);
displ += 2;
if (displ == 64) {
displ = 0;
*packed_seq = packed_word;
packed_seq++;
packed_word = 0;
}
}
if (displ != 0) {
*packed_seq = packed_word;
packed_seq++;
}
}
/*
* Input Sequence
*/
uint64_t packed_seq[1000]; // Packed buffer
char sequence[1024] = "ACGATCAGCCTACGATCATTATCGGATCTCAGATATTATAAAATCGATCAGCATTTGCATGATAGTCGTACGTACGTACCGACTTCAGCTAGCTGCTACAA";
//char sequence[1024] = "AAAAAAAACCCCTTTGAAAAAAAACCCCTTTG";
/*
* Main
*/
int main(int argc,char* argv[]) {
const int sequence_length = strlen(sequence);
// Benchmark
int rep, j;
for (rep=0;rep<REPS;++rep) {
// Pack sequence
//pack_sequence(sequence,sequence_length,packed_seq);
pack_sequence__pck(sequence,sequence_length,packed_seq);
}
// Output result
const int num_word64 = DIV_CEIL(sequence_length,32);
int correct = 1;
int word_idx, string_pos = 0;
fprintf(stderr,">> HEX DST_PACKED[SRC_SEQUENCE]\n");
for (word_idx=0;word_idx<num_word64;++word_idx) {
// Print Hexa
fprintf(stderr,"%016lX ",packed_seq[word_idx]);
// Print groups of 16 chars (64bits = 2bits * 32chars = 4hexa * 16 groups)
for (j=0;j<16;++j) {
const int length = MIN(2,sequence_length-string_pos);
// Check pair
const int packed_hex = (packed_seq[word_idx] >> (4*j)) & 15ul;
correct = correct && (strncmp(packing_lut[packed_hex][0],sequence+string_pos,length)==0);
fprintf(stderr,"%.*s",length,packing_lut[packed_hex][0]);
fprintf(stderr,"[%.*s] ",length,sequence+string_pos);
// Next
string_pos += length;
if (string_pos >= sequence_length) break;
}
fprintf(stderr,"\n");
}
fprintf(stderr,">> CHECK [%s]\n",correct?"Correct":"Incorrect");
// Exit
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment