Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
Santiago Marco-Sola
EPROC-WP3
Commits
b34bb43b
Commit
b34bb43b
authored
Jan 30, 2022
by
Santiago Marco-Sola
Browse files
Fixed compilation flags and removed mm includes
parent
efed43e3
Changes
8
Hide whitespace changes
Inline
Side-by-side
ut80.fmindex/Makefile
View file @
b34bb43b
...
...
@@ -27,19 +27,18 @@
##Contacts: Vasimuddin Md <vasimuddin.md@intel.com>; Sanchit Misra <sanchit.misra@intel.com>;
## Heng Li <hli@jimmy.harvard.edu>
##*****************************************************************************************/
EPI_TOOLCHAIN
=
/scratch/workspace-riscv/llvm-EPI-0.7-release-toolchain-cross-1602/
CC
=
$(EPI_TOOLCHAIN)
/bin/clang
CXX
=
$(EPI_TOOLCHAIN)
/bin/clang++
FMI
=
fmi
CC
=
gcc
CXX
=
g++
BWAMEM2_PATH
=
./bwa-mem2
ARCH_FLAGS
=
-march
=
native
CXXFLAGS
=
-std
=
c++11
-fopenmp
$(ARCH_FLAGS)
#-mtune=native -march=native
#CPPFLAGS=-DPRINT_OUTPUT -DENABLE_PREFETCH -DBWA_OTHER_ELE=0
CXXFLAGS
=
-std
=
c++11
-fopenmp
CPPFLAGS
=
-DENABLE_PREFETCH
-DBWA_OTHER_ELE
=
0
INCLUDES
=
-I
$(BWAMEM2_PATH)
/src
-I
$(BWAMEM2_PATH)
/ext/safestringlib/include
# -DENABLE_PARSEC_HOOKS -I/romol/hooks/include
LIBS
=
-L
$(BWAMEM2_PATH)
-L
$(BWAMEM2_PATH)
/ext/safestringlib
-lsafestring
-fopenmp
-lz
-lbwa
-ldl
# -L/romol/hooks/lib -lhooks
INCLUDES
=
-I
$(BWAMEM2_PATH)
/src
-I
$(BWAMEM2_PATH)
/ext/safestringlib/include
LIBS
=
-L
$(BWAMEM2_PATH)
-L
$(BWAMEM2_PATH)
/ext/safestringlib
-lsafestring
-fopenmp
-lz
-lbwa
-ldl
.PHONY
:
all clean depend
.SUFFIXES
:
.cpp .o
...
...
ut80.fmindex/bwa-mem2/Makefile
View file @
b34bb43b
...
...
@@ -27,10 +27,11 @@
##Contacts: Vasimuddin Md <vasimuddin.md@intel.com>; Sanchit Misra <sanchit.misra@intel.com>;
## Heng Li <hli@jimmy.harvard.edu>
##*****************************************************************************************/
EPI_TOOLCHAIN
=
/scratch/workspace-riscv/llvm-EPI-0.7-release-toolchain-cross-1602/
CC
=
$(EPI_TOOLCHAIN)
/bin/clang
CXX
=
$(EPI_TOOLCHAIN)
/bin/clang++
EXE
=
bwa-mem2
CC
=
gcc
CXX
=
g++
MEM_FLAGS
=
-DSAIS
=
1
CPPFLAGS
+=
-DENABLE_PREFETCH
-DV17
=
1
$(MEM_FLAGS)
...
...
ut80.fmindex/bwa-mem2/ext/safestringlib/makefile
View file @
b34bb43b
EPI_TOOLCHAIN
=
/scratch/workspace-riscv/llvm-EPI-0.7-release-toolchain-cross-1602/
CC
=
$(EPI_TOOLCHAIN)
/bin/clang
CXX
=
$(EPI_TOOLCHAIN)
/bin/clang++
IDIR
=
include
MKDIR_P
=
mkdir
-p
CC
=
gcc
CFLAGS
=
-I
$(IDIR)
-fPIE
-fPIC
-O2
-D_FORTIFY_SOURCE
=
2
-Wformat
-Wformat-security
LDFLAGS
=
-z
noexecstack
-z
relo
-z
now
...
...
ut80.fmindex/bwa-mem2/src/FMI_search.h
View file @
b34bb43b
...
...
@@ -34,7 +34,6 @@ Authors: Sanchit Misra <sanchit.misra@intel.com>; Vasimuddin Md <vasimuddin.md@i
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <immintrin.h>
#include <limits.h>
#include <fstream>
...
...
ut80.fmindex/bwa-mem2/src/bandedSWA.h
View file @
b34bb43b
...
...
@@ -37,13 +37,8 @@ Authors: Vasimuddin Md <vasimuddin.md@intel.com>; Sanchit Misra <sanchit.misra@i
#include "macro.h"
#include "utils.h"
#if (__AVX512BW__ || __AVX2__)
#include <immintrin.h>
#else
#include <smmintrin.h> // for SSE4.1
#define __mmask8 uint8_t
#define __mmask16 uint16_t
#endif
#define MAX_SEQ_LEN_REF 256
#define MAX_SEQ_LEN_QER 128
...
...
@@ -135,183 +130,6 @@ public:
int
nthreads
,
int32_t
w
);
#if ((!__AVX512BW__) & (!__AVX2__) & (__SSE2__))
// AVX256 is not updated for banding and separate ins/del in the inner loop.
// 8 bit vector code section
void
getScores8
(
SeqPair
*
pairArray
,
uint8_t
*
seqBufRef
,
uint8_t
*
seqBufQer
,
int32_t
numPairs
,
uint16_t
numThreads
,
int32_t
w
);
void
smithWatermanBatchWrapper8
(
SeqPair
*
pairArray
,
uint8_t
*
seqBufRef
,
uint8_t
*
seqBufQer
,
int32_t
numPairs
,
uint16_t
numThreads
,
int32_t
w
);
void
smithWaterman128_8
(
uint8_t
seq1SoA
[],
uint8_t
seq2SoA
[],
uint8_t
nrow
,
uint8_t
ncol
,
SeqPair
*
p
,
uint8_t
h0
[],
uint16_t
tid
,
int32_t
numPairs
,
int
zdrop
,
int32_t
w
,
uint8_t
qlen
[],
uint8_t
myband
[]);
// 16 bit vector code section
void
getScores16
(
SeqPair
*
pairArray
,
uint8_t
*
seqBufRef
,
uint8_t
*
seqBufQer
,
int32_t
numPairs
,
uint16_t
numThreads
,
int32_t
w
);
void
smithWatermanBatchWrapper16
(
SeqPair
*
pairArray
,
uint8_t
*
seqBufRef
,
uint8_t
*
seqBufQer
,
int32_t
numPairs
,
uint16_t
numThreads
,
int32_t
w
);
void
smithWaterman128_16
(
uint16_t
seq1SoA
[],
uint16_t
seq2SoA
[],
uint16_t
nrow
,
uint16_t
ncol
,
SeqPair
*
p
,
uint16_t
h0
[],
uint16_t
tid
,
int32_t
numPairs
,
int
zdrop
,
int32_t
w
,
uint16_t
qlen
[],
uint16_t
myband
[]);
#endif //SSE2
#if ((!__AVX512BW__) & (__AVX2__))
// AVX256 is not updated for banding and separate ins/del in the inner loop.
// 8 bit vector code section
void
getScores8
(
SeqPair
*
pairArray
,
uint8_t
*
seqBufRef
,
uint8_t
*
seqBufQer
,
int32_t
numPairs
,
uint16_t
numThreads
,
int32_t
w
);
void
smithWatermanBatchWrapper8
(
SeqPair
*
pairArray
,
uint8_t
*
seqBufRef
,
uint8_t
*
seqBufQer
,
int32_t
numPairs
,
uint16_t
numThreads
,
int32_t
w
);
void
smithWaterman256_8
(
uint8_t
seq1SoA
[],
uint8_t
seq2SoA
[],
uint8_t
nrow
,
uint8_t
ncol
,
SeqPair
*
p
,
uint8_t
h0
[],
uint16_t
tid
,
int32_t
numPairs
,
int
zdrop
,
int32_t
w
,
uint8_t
qlen
[],
uint8_t
myband
[]);
// 16 bit vector code section
void
getScores16
(
SeqPair
*
pairArray
,
uint8_t
*
seqBufRef
,
uint8_t
*
seqBufQer
,
int32_t
numPairs
,
uint16_t
numThreads
,
int32_t
w
);
void
smithWatermanBatchWrapper16
(
SeqPair
*
pairArray
,
uint8_t
*
seqBufRef
,
uint8_t
*
seqBufQer
,
int32_t
numPairs
,
uint16_t
numThreads
,
int32_t
w
);
void
smithWaterman256_16
(
uint16_t
seq1SoA
[],
uint16_t
seq2SoA
[],
uint16_t
nrow
,
uint16_t
ncol
,
SeqPair
*
p
,
uint16_t
h0
[],
uint16_t
tid
,
int32_t
numPairs
,
int
zdrop
,
int32_t
w
,
uint16_t
qlen
[],
uint16_t
myband
[],
int64_t
*
nCellsComputed
);
#endif //axv2
#if __AVX512BW__
// 8 bit vector code section
void
getScores8
(
SeqPair
*
pairArray
,
uint8_t
*
seqBufRef
,
uint8_t
*
seqBufQer
,
int32_t
numPairs
,
uint16_t
numThreads
,
int32_t
w
);
void
smithWatermanBatchWrapper8
(
SeqPair
*
pairArray
,
uint8_t
*
seqBufRef
,
uint8_t
*
seqBufQer
,
int32_t
numPairs
,
uint16_t
numThreads
,
int32_t
w
);
void
smithWaterman512_8
(
uint8_t
seq1SoA
[],
uint8_t
seq2SoA
[],
uint8_t
nrow
,
uint8_t
ncol
,
SeqPair
*
p
,
uint8_t
h0
[],
uint16_t
tid
,
int32_t
numPairs
,
int
zdrop
,
int32_t
w
,
uint8_t
qlen
[],
uint8_t
myband
[]);
// 16 bit vector code section
void
getScores16
(
SeqPair
*
pairArray
,
uint8_t
*
seqBufRef
,
uint8_t
*
seqBufQer
,
int32_t
numPairs
,
uint16_t
numThreads
,
int32_t
w
);
void
smithWatermanBatchWrapper16
(
SeqPair
*
pairArray
,
uint8_t
*
seqBufRef
,
uint8_t
*
seqBufQer
,
int32_t
numPairs
,
uint16_t
numThreads
,
int32_t
w
);
void
smithWaterman512_16
(
uint16_t
seq1SoA
[],
uint16_t
seq2SoA
[],
uint16_t
nrow
,
uint16_t
ncol
,
SeqPair
*
p
,
uint16_t
h0
[],
uint16_t
tid
,
int32_t
numPairs
,
int
zdrop
,
int32_t
w
,
uint16_t
qlen
[],
uint16_t
myband
[]);
#endif
int64_t
getTicks
();
private:
...
...
ut80.fmindex/bwa-mem2/src/ksw.cpp
View file @
b34bb43b
...
...
@@ -30,7 +30,6 @@
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <emmintrin.h>
#include "ksw.h"
#include "macro.h"
...
...
@@ -108,235 +107,6 @@ kswq_t *ksw_qinit(int size, int qlen, const uint8_t *query, int m, const int8_t
return
q
;
}
kswr_t
ksw_u8
(
kswq_t
*
q
,
int
tlen
,
const
uint8_t
*
target
,
int
_o_del
,
int
_e_del
,
int
_o_ins
,
int
_e_ins
,
int
xtra
)
// the first gap costs -(_o+_e)
{
int
slen
,
i
,
m_b
,
n_b
,
te
=
-
1
,
gmax
=
0
,
minsc
,
endsc
;
uint64_t
*
b
;
__m128i
zero
,
oe_del
,
e_del
,
oe_ins
,
e_ins
,
shift
,
*
H0
,
*
H1
,
*
E
,
*
Hmax
;
kswr_t
r
;
#define __max_16(ret, xx) do { \
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 8)); \
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 4)); \
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 2)); \
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 1)); \
(ret) = _mm_extract_epi16((xx), 0) & 0x00ff; \
} while (0)
// initialization
r
=
g_defr
;
minsc
=
(
xtra
&
KSW_XSUBO
)
?
xtra
&
0xffff
:
0x10000
;
endsc
=
(
xtra
&
KSW_XSTOP
)
?
xtra
&
0xffff
:
0x10000
;
m_b
=
n_b
=
0
;
b
=
0
;
zero
=
_mm_set1_epi32
(
0
);
oe_del
=
_mm_set1_epi8
(
_o_del
+
_e_del
);
e_del
=
_mm_set1_epi8
(
_e_del
);
oe_ins
=
_mm_set1_epi8
(
_o_ins
+
_e_ins
);
e_ins
=
_mm_set1_epi8
(
_e_ins
);
shift
=
_mm_set1_epi8
(
q
->
shift
);
H0
=
q
->
H0
;
H1
=
q
->
H1
;
E
=
q
->
E
;
Hmax
=
q
->
Hmax
;
slen
=
q
->
slen
;
for
(
i
=
0
;
i
<
slen
;
++
i
)
{
_mm_store_si128
(
E
+
i
,
zero
);
_mm_store_si128
(
H0
+
i
,
zero
);
_mm_store_si128
(
Hmax
+
i
,
zero
);
}
// the core loop
for
(
i
=
0
;
i
<
tlen
;
++
i
)
{
int
j
,
k
,
cmp
,
imax
;
__m128i
e
,
h
,
t
,
f
=
zero
,
max
=
zero
,
*
S
=
q
->
qp
+
target
[
i
]
*
slen
;
// s is the 1st score vector
h
=
_mm_load_si128
(
H0
+
slen
-
1
);
// h={2,5,8,11,14,17,-1,-1} in the above example
h
=
_mm_slli_si128
(
h
,
1
);
// h=H(i-1,-1); << instead of >> because x64 is little-endian
for
(
j
=
0
;
LIKELY
(
j
<
slen
);
++
j
)
{
/* SW cells are computed in the following order:
* H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
* E(i+1,j) = max{H(i,j)-q, E(i,j)-r}
* F(i,j+1) = max{H(i,j)-q, F(i,j)-r}
*/
// compute H'(i,j); note that at the beginning, h=H'(i-1,j-1)
h
=
_mm_adds_epu8
(
h
,
_mm_load_si128
(
S
+
j
));
h
=
_mm_subs_epu8
(
h
,
shift
);
// h=H'(i-1,j-1)+S(i,j)
e
=
_mm_load_si128
(
E
+
j
);
// e=E'(i,j)
h
=
_mm_max_epu8
(
h
,
e
);
h
=
_mm_max_epu8
(
h
,
f
);
// h=H'(i,j)
max
=
_mm_max_epu8
(
max
,
h
);
// set max
_mm_store_si128
(
H1
+
j
,
h
);
// save to H'(i,j)
// now compute E'(i+1,j)
e
=
_mm_subs_epu8
(
e
,
e_del
);
// e=E'(i,j) - e_del
t
=
_mm_subs_epu8
(
h
,
oe_del
);
// h=H'(i,j) - o_del - e_del
e
=
_mm_max_epu8
(
e
,
t
);
// e=E'(i+1,j)
_mm_store_si128
(
E
+
j
,
e
);
// save to E'(i+1,j)
// now compute F'(i,j+1)
f
=
_mm_subs_epu8
(
f
,
e_ins
);
t
=
_mm_subs_epu8
(
h
,
oe_ins
);
// h=H'(i,j) - o_ins - e_ins
f
=
_mm_max_epu8
(
f
,
t
);
// get H'(i-1,j) and prepare for the next j
h
=
_mm_load_si128
(
H0
+
j
);
// h=H'(i-1,j)
}
// NB: we do not need to set E(i,j) as we disallow adjecent insertion and then deletion
for
(
k
=
0
;
LIKELY
(
k
<
16
);
++
k
)
{
// this block mimics SWPS3; NB: H(i,j) updated in the lazy-F loop cannot exceed max
f
=
_mm_slli_si128
(
f
,
1
);
for
(
j
=
0
;
LIKELY
(
j
<
slen
);
++
j
)
{
h
=
_mm_load_si128
(
H1
+
j
);
h
=
_mm_max_epu8
(
h
,
f
);
// h=H'(i,j)
_mm_store_si128
(
H1
+
j
,
h
);
h
=
_mm_subs_epu8
(
h
,
oe_ins
);
f
=
_mm_subs_epu8
(
f
,
e_ins
);
cmp
=
_mm_movemask_epi8
(
_mm_cmpeq_epi8
(
_mm_subs_epu8
(
f
,
h
),
zero
));
if
(
UNLIKELY
(
cmp
==
0xffff
))
goto
end_loop16
;
}
}
end_loop16:
__max_16
(
imax
,
max
);
// imax is the maximum number in max
if
(
imax
>=
minsc
)
{
// write the b array; this condition adds branching unfornately
if
(
n_b
==
0
||
(
int32_t
)
b
[
n_b
-
1
]
+
1
!=
i
)
{
// then append
if
(
n_b
==
m_b
)
{
m_b
=
m_b
?
m_b
<<
1
:
8
;
b
=
(
uint64_t
*
)
realloc
(
b
,
8
*
m_b
);
}
b
[
n_b
++
]
=
(
uint64_t
)
imax
<<
32
|
i
;
}
else
if
((
int
)(
b
[
n_b
-
1
]
>>
32
)
<
imax
)
b
[
n_b
-
1
]
=
(
uint64_t
)
imax
<<
32
|
i
;
// modify the last
}
if
(
imax
>
gmax
)
{
gmax
=
imax
;
te
=
i
;
// te is the end position on the target
for
(
j
=
0
;
LIKELY
(
j
<
slen
);
++
j
)
// keep the H1 vector
_mm_store_si128
(
Hmax
+
j
,
_mm_load_si128
(
H1
+
j
));
if
(
gmax
+
q
->
shift
>=
255
||
gmax
>=
endsc
)
break
;
}
S
=
H1
;
H1
=
H0
;
H0
=
S
;
// swap H0 and H1
}
r
.
score
=
gmax
+
q
->
shift
<
255
?
gmax
:
255
;
r
.
te
=
te
;
if
(
r
.
score
!=
255
)
{
// get a->qe, the end of query match; find the 2nd best score
int
max
=
-
1
,
tmp
,
low
,
high
,
qlen
=
slen
*
16
;
uint8_t
*
t
=
(
uint8_t
*
)
Hmax
;
for
(
i
=
0
;
i
<
qlen
;
++
i
,
++
t
)
if
((
int
)
*
t
>
max
)
max
=
*
t
,
r
.
qe
=
i
/
16
+
i
%
16
*
slen
;
else
if
((
int
)
*
t
==
max
&&
(
tmp
=
i
/
16
+
i
%
16
*
slen
)
<
r
.
qe
)
r
.
qe
=
tmp
;
//printf("%d,%d\n", max, gmax);
if
(
b
)
{
i
=
(
r
.
score
+
q
->
max
-
1
)
/
q
->
max
;
low
=
te
-
i
;
high
=
te
+
i
;
for
(
i
=
0
;
i
<
n_b
;
++
i
)
{
int
e
=
(
int32_t
)
b
[
i
];
if
((
e
<
low
||
e
>
high
)
&&
(
int
)(
b
[
i
]
>>
32
)
>
r
.
score2
)
r
.
score2
=
b
[
i
]
>>
32
,
r
.
te2
=
e
;
}
}
}
free
(
b
);
return
r
;
}
kswr_t
ksw_i16
(
kswq_t
*
q
,
int
tlen
,
const
uint8_t
*
target
,
int
_o_del
,
int
_e_del
,
int
_o_ins
,
int
_e_ins
,
int
xtra
)
// the first gap costs -(_o+_e)
{
int
slen
,
i
,
m_b
,
n_b
,
te
=
-
1
,
gmax
=
0
,
minsc
,
endsc
;
uint64_t
*
b
;
__m128i
zero
,
oe_del
,
e_del
,
oe_ins
,
e_ins
,
*
H0
,
*
H1
,
*
E
,
*
Hmax
;
kswr_t
r
;
#define __max_8(ret, xx) do { \
(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 8)); \
(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 4)); \
(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 2)); \
(ret) = _mm_extract_epi16((xx), 0); \
} while (0)
// initialization
r
=
g_defr
;
minsc
=
(
xtra
&
KSW_XSUBO
)
?
xtra
&
0xffff
:
0x10000
;
endsc
=
(
xtra
&
KSW_XSTOP
)
?
xtra
&
0xffff
:
0x10000
;
m_b
=
n_b
=
0
;
b
=
0
;
zero
=
_mm_set1_epi32
(
0
);
oe_del
=
_mm_set1_epi16
(
_o_del
+
_e_del
);
e_del
=
_mm_set1_epi16
(
_e_del
);
oe_ins
=
_mm_set1_epi16
(
_o_ins
+
_e_ins
);
e_ins
=
_mm_set1_epi16
(
_e_ins
);
H0
=
q
->
H0
;
H1
=
q
->
H1
;
E
=
q
->
E
;
Hmax
=
q
->
Hmax
;
slen
=
q
->
slen
;
for
(
i
=
0
;
i
<
slen
;
++
i
)
{
_mm_store_si128
(
E
+
i
,
zero
);
_mm_store_si128
(
H0
+
i
,
zero
);
_mm_store_si128
(
Hmax
+
i
,
zero
);
}
// the core loop
for
(
i
=
0
;
i
<
tlen
;
++
i
)
{
int
j
,
k
,
imax
;
__m128i
e
,
t
,
h
,
f
=
zero
,
max
=
zero
,
*
S
=
q
->
qp
+
target
[
i
]
*
slen
;
// s is the 1st score vector
h
=
_mm_load_si128
(
H0
+
slen
-
1
);
// h={2,5,8,11,14,17,-1,-1} in the above example
h
=
_mm_slli_si128
(
h
,
2
);
for
(
j
=
0
;
LIKELY
(
j
<
slen
);
++
j
)
{
h
=
_mm_adds_epi16
(
h
,
*
S
++
);
e
=
_mm_load_si128
(
E
+
j
);
h
=
_mm_max_epi16
(
h
,
e
);
h
=
_mm_max_epi16
(
h
,
f
);
max
=
_mm_max_epi16
(
max
,
h
);
_mm_store_si128
(
H1
+
j
,
h
);
e
=
_mm_subs_epu16
(
e
,
e_del
);
t
=
_mm_subs_epu16
(
h
,
oe_del
);
e
=
_mm_max_epi16
(
e
,
t
);
_mm_store_si128
(
E
+
j
,
e
);
f
=
_mm_subs_epu16
(
f
,
e_ins
);
t
=
_mm_subs_epu16
(
h
,
oe_ins
);
f
=
_mm_max_epi16
(
f
,
t
);
h
=
_mm_load_si128
(
H0
+
j
);
}
for
(
k
=
0
;
LIKELY
(
k
<
16
);
++
k
)
{
f
=
_mm_slli_si128
(
f
,
2
);
for
(
j
=
0
;
LIKELY
(
j
<
slen
);
++
j
)
{
h
=
_mm_load_si128
(
H1
+
j
);
h
=
_mm_max_epi16
(
h
,
f
);
_mm_store_si128
(
H1
+
j
,
h
);
h
=
_mm_subs_epu16
(
h
,
oe_ins
);
f
=
_mm_subs_epu16
(
f
,
e_ins
);
if
(
UNLIKELY
(
!
_mm_movemask_epi8
(
_mm_cmpgt_epi16
(
f
,
h
))))
goto
end_loop8
;
}
}
end_loop8:
__max_8
(
imax
,
max
);
if
(
imax
>=
minsc
)
{
if
(
n_b
==
0
||
(
int32_t
)
b
[
n_b
-
1
]
+
1
!=
i
)
{
if
(
n_b
==
m_b
)
{
m_b
=
m_b
?
m_b
<<
1
:
8
;
b
=
(
uint64_t
*
)
realloc
(
b
,
8
*
m_b
);
}
b
[
n_b
++
]
=
(
uint64_t
)
imax
<<
32
|
i
;
}
else
if
((
int
)(
b
[
n_b
-
1
]
>>
32
)
<
imax
)
b
[
n_b
-
1
]
=
(
uint64_t
)
imax
<<
32
|
i
;
// modify the last
}
if
(
imax
>
gmax
)
{
gmax
=
imax
;
te
=
i
;
for
(
j
=
0
;
LIKELY
(
j
<
slen
);
++
j
)
_mm_store_si128
(
Hmax
+
j
,
_mm_load_si128
(
H1
+
j
));
if
(
gmax
>=
endsc
)
break
;
}
S
=
H1
;
H1
=
H0
;
H0
=
S
;
}
r
.
score
=
gmax
;
r
.
te
=
te
;
{
int
max
=
-
1
,
tmp
,
low
,
high
,
qlen
=
slen
*
8
;
uint16_t
*
t
=
(
uint16_t
*
)
Hmax
;
for
(
i
=
0
,
r
.
qe
=
-
1
;
i
<
qlen
;
++
i
,
++
t
)
if
((
int
)
*
t
>
max
)
max
=
*
t
,
r
.
qe
=
i
/
8
+
i
%
8
*
slen
;
else
if
((
int
)
*
t
==
max
&&
(
tmp
=
i
/
8
+
i
%
8
*
slen
)
<
r
.
qe
)
r
.
qe
=
tmp
;
if
(
b
)
{
i
=
(
r
.
score
+
q
->
max
-
1
)
/
q
->
max
;
low
=
te
-
i
;
high
=
te
+
i
;
for
(
i
=
0
;
i
<
n_b
;
++
i
)
{
int
e
=
(
int32_t
)
b
[
i
];
if
((
e
<
low
||
e
>
high
)
&&
(
int
)(
b
[
i
]
>>
32
)
>
r
.
score2
)
r
.
score2
=
b
[
i
]
>>
32
,
r
.
te2
=
e
;
}
}
}
free
(
b
);
return
r
;
}
static
inline
void
revseq
(
int
l
,
uint8_t
*
s
)
{
int
i
,
t
;
...
...
ut80.fmindex/bwa-mem2/src/ksw.h
View file @
b34bb43b
...
...
@@ -26,7 +26,6 @@
#define __AC_KSW_H
#include <stdint.h>
#include <emmintrin.h>
#define KSW_XBYTE 0x10000
#define KSW_XSTOP 0x20000
...
...
ut80.fmindex/bwa-mem2/src/kswv.h
View file @
b34bb43b
...
...
@@ -38,8 +38,6 @@ Authors: Vasimuddin Md <vasimuddin.md@intel.com>; Sanchit Misra <sanchit.misra@i
#if !MAINY
#include "ksw.h"
#include "bandedSWA.h"
#else
#include <immintrin.h>
#endif
#ifdef __GNUC__
...
...
@@ -108,14 +106,6 @@ typedef struct {
const
kswr_t
g_defr
=
{
0
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
};
#define __max_16(ret, xx) do { \
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 8)); \
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 4)); \
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 2)); \
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 1)); \
(ret) = _mm_extract_epi16((xx), 0) & 0x00ff; \
} while (0)
#define DP 6
#define DP1 7
#define DP2 8
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment