forked from chienchi/FaQCs
-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathseq_overlap.h
629 lines (492 loc) · 14.7 KB
/
seq_overlap.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
#ifndef __SEQ_OVERLAP
#define __SEQ_OVERLAP
#include <stdlib.h>
#include <string.h>
#include <string>
#include <iostream>
#include <vector>
#include <deque>
/////////////////////////////////////////////////////////////////////////////////////////////
//
// SSE instruction defines, macros and datatypes. Note that using SSE4.1 requires
// g++ version >= 4.4 and the compiler flag -msse4.1. Currenlty (4/1/17) the Intel AVX2 instruction
// set (which offers 256 bit registers for the operations we need) will be a good future path
// forward for modern hardware. From the AVX Wikipedia web page
// (https://en.wikipedia.org/wiki/Advanced_Vector_Extension) chips supporting AVX2:
//
// Intel
// Haswell processor, Q2 2013
// Haswell E processor, Q3 2014
// Broadwell processor, Q4 2014
// Broadwell E processor, Q3 2016
// Skylake processor, Q3 2015
// Kaby Lake processor, Q3 2016(ULV mobile)/Q1 2017(desktop/mobile)
// Cannonlake processor, expected in 2017
// AMD
// Carrizo processor, Q2 2015
// Ryzen processor, Q1 2017
//
//
// Note that ATA32 requires the sse4.1 instruction set.
// Note that ATA16 requires the sse2 instruction set.
//
// 32-bit OSes require allocated memory be aligned to 16 byte boundaries. This requires use of
// the _mm_malloc/_mm_free functions (instead of new and delete). This dependance does not
// appear to be necessary for 64-bit OSes.
/////////////////////////////////////////////////////////////////////////////////////////////
#ifdef ATA32
#include <immintrin.h>
#elif ATA16
#include <xmmintrin.h>
#endif
namespace SO {
#define SSE_ALIGNMENT_SIZE 16
#ifdef ATA32
// Use a 32 bit integer data type for dynamic programming. Since SSE provides
// a 128 bit integer data type, we can process 4 elements in parallel.
#define SO_LEN 4u
#define _MM_SET1 _mm_set1_epi32
#define _MM_ADD _mm_add_epi32
#define _MM_SUB _mm_sub_epi32
#define _MM_CMPEQ _mm_cmpeq_epi32
#define _MM_CMPLT _mm_cmplt_epi32
#define _MM_CMPGT _mm_cmpgt_epi32
#define _MM_MAX _mm_max_epi32
#define _MM_MULLO _mm_mullo_epi32
#define _MM_MIN _mm_min_epi32
typedef int SO_Score;
typedef __m128i SIMDVector;
#elif ATA16
// Use a 16 bit integer data type for dynamic programming. Since SSE provides
// a 128 bit integer data type, we can process 8 elements in parallel.
#define SO_LEN 8u
#define _MM_SET1 _mm_set1_epi16
#define _MM_ADD _mm_add_epi16
#define _MM_SUB _mm_sub_epi16
#define _MM_CMPEQ _mm_cmpeq_epi16
#define _MM_CMPLT _mm_cmplt_epi16
#define _MM_CMPGT _mm_cmpgt_epi16
#define _MM_MAX _mm_max_epi16
#define _MM_MULLO _mm_mullo_epi16
#define _MM_MIN _mm_min_epi16
typedef short int SO_Score;
typedef __m128i SIMDVector;
#else
#error "Please specificy a data size!"
#endif
union sse_elem {
sse_elem()
{
// Do nothing
};
sse_elem(const SO_Score &m_scalar)
{
sse = _MM_SET1(m_scalar);
};
sse_elem(const SIMDVector &m_vector)
{
sse = m_vector;
};
~sse_elem()
{
// Do nothing
};
SIMDVector sse;
SO_Score v[SO_LEN];
};
#define MINIMUM_ALLOWED_SCORE -9999
namespace NA {
// Allowed nucleic acid bases
// A = adenine
// C = cytosine
// G = guanine
// T = thymine
// M = A or C
// R = G or A
// S = G or C
// V = G or C or A
// W = A or T
// Y = T or C
// H = A or C or T
// K = G or T
// D = G or A or T
// B = G or T or C
// N = A or T or C or G
typedef enum {
A = (1 << 0),
C = (1 << 1),
G = (1 << 2),
T = (1 << 3),
GAP = (1 << 4),
M = (A | C),
R = (G | A),
S = (G | C),
V = (G | C | A),
W = (A | T),
Y = (T | C),
H = (A | C | T),
K = (G | T),
D = (G | A | T),
B = (G | T | C),
N = (A | T | C | G)
} nucleic_acid;
};
// Both base types (nucleic acid and amino acid) must fit
// into a variable of size base_type
typedef unsigned char base_type;
base_type na_to_bits(const char &m_base);
class SeqOverlap{
public:
typedef enum {SmithWaterman, Overlap} AlignmentMode;
private:
struct SO_Elem{
// Use the notation of "Biological sequence analysis" by Durbin, Eddy, Krogh and Mitchison
sse_elem M;
#ifdef ALLOW_GAPS
sse_elem I_query; // insertion in query
sse_elem I_target; // insertion in target
#endif // ALLOW_GAPS
// The actual alignment is not needed, just the score and the
// coordinates of the begining and end of the alignment. For this purpose, propagate
// the alignment start during the dynamic programming.
// The starting row and column of the alignment
sse_elem M_start_i;
#ifdef INCLUDE_TARGET_RANGE
sse_elem M_start_j;
#endif // INCLUDE_TARGET_RANGE
#ifdef ALLOW_GAPS
sse_elem I_query_start_i;
sse_elem I_target_start_i;
#ifdef INCLUDE_TARGET_RANGE
sse_elem I_query_start_j;
sse_elem I_target_start_j;
#endif // INCLUDE_TARGET_RANGE
#endif // ALLOW_GAPS
};
AlignmentMode mode;
SO_Elem *last_row;
SO_Elem *curr_row;
SO_Elem max_elem;
size_t dp_num_elem;
// The location of the max element in the *query*
sse_elem stop_i;
#ifdef INCLUDE_TARGET_RANGE
// The location of the max element in the *target*
sse_elem stop_j;
#endif // INCLUDE_TARGET_RANGE
sse_elem match;
#ifdef TREAT_N_AS_MASK
sse_elem mask;
#endif // TREAT_N_AS_MASK
sse_elem mismatch;
#ifdef ALLOW_GAPS
sse_elem gap_existance;
sse_elem gap_extension;
#endif // ALLOW_GAPS
// The input sequences in 5'-3' orientation
sse_elem *query;
sse_elem *target;
sse_elem query_len;
sse_elem target_len;
// The maximum query and target length for the
// currently packed sequences.
SO_Score max_query_len;
SO_Score max_target_len;
// The current number of elements allocated to the query and target arrays.
// We only reallocate when we need to *grow* these arrays (so these values may
// be different from max_query_len and max_target_len, which dynamically grow
// and shrink as needed).
unsigned int query_buffer_len;
unsigned int target_buffer_len;
#ifdef TREAT_N_AS_MASK
// If mask_N_na == true, then treat 'N' as a masking character that gets a score of mask
bool mask_N_na;
#endif // TREAT_N_AS_MASK
void reverse_complement(std::vector<base_type> &m_seq) const
{
size_t len = m_seq.size();
len = len/2 + (len%2);
std::vector<base_type>::iterator f = m_seq.begin();
std::vector<base_type>::reverse_iterator r = m_seq.rbegin();
for(size_t i = 0;i < len;i++, f++, r++){
const base_type tmp = complement(*f);
*f = complement(*r);
*r = tmp;
}
};
inline base_type complement(const base_type &m_base) const
{
switch(m_base){
case NA::A:
return NA::T;
case NA::C:
return NA::G;
case NA::G:
return NA::C;
case NA::T:
return NA::A;
case NA::M:
return NA::K;
case NA::R:
return NA::Y;
case NA::S:
return NA::S;
case NA::V:
return NA::B;
case NA::W:
return NA::W;
case NA::Y:
return NA::R;
case NA::H:
return NA::D;
case NA::K:
return NA::M;
case NA::D:
return NA::H;
case NA::B:
return NA::V;
case NA::N:
return NA::N;
case NA::GAP:
return NA::GAP;
};
throw __FILE__ ":complement: Unknown base";
return NA::GAP; // Keep the compiler happy
};
void align_overlap();
void align_smith_waterman();
SO_Score trace_back();
public:
SeqOverlap(const AlignmentMode &m_mode);
~SeqOverlap()
{
if(query != NULL){
_mm_free(query);
}
if(target != NULL){
_mm_free(target);
}
if(last_row != NULL){
_mm_free(last_row);
}
if(curr_row != NULL){
_mm_free(curr_row);
}
};
void align()
{
switch(mode){
case Overlap:
align_overlap();
break;
case SmithWaterman:
align_smith_waterman();
break;
default:
throw __FILE__ ":align: Unknown mode";
};
}
inline void set_mode(const AlignmentMode &m_mode)
{
mode = m_mode;
};
#ifdef TREAT_N_AS_MASK
inline void enable_mask_N_na()
{
mask_N_na = true;
};
inline void disable_mask_N_na()
{
mask_N_na = false;
};
#endif // TREAT_N_AS_MASK
// Pack the supplied query into slot m_index
inline void pack_query(const unsigned int &m_index, const std::string &m_query)
{
#ifdef _DEBUG
if(m_index >= SO_LEN){
throw __FILE__ ":pack_query: m_index >= SO_LEN";
}
#endif // _DEBUG
const unsigned int len = m_query.size();
if( ( len > query_buffer_len ) || (query == NULL) ){
// We need to allocate a new query buffer
sse_elem *temp = (sse_elem*)_mm_malloc(len*sizeof(sse_elem), SSE_ALIGNMENT_SIZE);
if(query != NULL){
// Copy the contents of the current query buffer into the new query buffer
memcpy( temp, query, query_buffer_len*sizeof(sse_elem) );
_mm_free(query);
}
query_buffer_len = len;
query = temp;
}
for(unsigned int i = 0;i < len;i++){
query[i].v[m_index] = na_to_bits(m_query[i]);
}
query_len.v[m_index] = len;
// Recompute the maximum query length in case m_index *used* to store the largest sequence,
// in which case we will need to shrink max_query_len.
max_query_len = 0;
for(unsigned int i = 0;i < SO_LEN;i++){
max_query_len = std::max(max_query_len, query_len.v[i]);
}
};
// Pack the supplied query into *all* slots
inline void pack_query(const std::string &m_query)
{
const unsigned int len = m_query.size();
if( ( len > query_buffer_len ) || (query == NULL) ){
// We need to allocate a new query buffer
sse_elem *temp = (sse_elem*)_mm_malloc(len*sizeof(sse_elem), SSE_ALIGNMENT_SIZE);
if(query != NULL){
// Copy the contents of the current query buffer into the new query buffer
memcpy( temp, query, query_buffer_len*sizeof(sse_elem) );
_mm_free(query);
}
query_buffer_len = len;
query = temp;
}
// Update the maximum query length
max_query_len = len;
for(unsigned int i = 0;i < len;i++){
query[i].sse = _MM_SET1( na_to_bits(m_query[i]) );
}
query_len.sse = _MM_SET1(len);
};
// Clear all queries in all slots
inline void clear_query()
{
// Reset the maximum query length
max_query_len = 0;
query_len.sse = _MM_SET1(0);
};
// Clear the query in slot m_index
inline void clear_query(const unsigned int &m_index)
{
#ifdef _DEBUG
if(m_index >= SO_LEN){
throw __FILE__ ":clear_query: m_index >= SO_LEN";
}
#endif // _DEBUG
query_len.v[m_index] = 0;
// Recompute the maximum query length
max_query_len = 0;
for(unsigned int i = 0;i < SO_LEN;i++){
max_query_len = std::max(max_query_len, query_len.v[i]);
}
};
// Pack the supplied target into slot m_index
inline void pack_target(const unsigned int &m_index, const std::string &m_target)
{
#ifdef _DEBUG
if(m_index >= SO_LEN){
throw __FILE__ ":pack_target: m_index >= SO_LEN";
}
#endif // _DEBUG
const unsigned int len = m_target.size();
if( ( len > target_buffer_len ) || (target == NULL) ){
// We need to allocate a new query buffer
sse_elem *temp = (sse_elem*)_mm_malloc(len*sizeof(sse_elem), SSE_ALIGNMENT_SIZE);
if(target != NULL){
// Copy the contents of the current query buffer into the new query buffer
memcpy( temp, target, target_buffer_len*sizeof(sse_elem) );
_mm_free(target);
}
target_buffer_len = len;
target = temp;
}
for(unsigned int i = 0;i < len;i++){
target[i].v[m_index] = na_to_bits(m_target[i]);
}
target_len.v[m_index] = len;
// Recompute the maximum target length in case m_index *used* to store the largest sequence,
// in which case we will need to shrink max_target_len.
max_target_len = 0;
for(unsigned int i = 0;i < SO_LEN;i++){
max_target_len = std::max(max_target_len, target_len.v[i]);
}
};
// Pack the supplied target into *all* slots
inline void pack_target(const std::string &m_target)
{
const unsigned int len = m_target.size();
if( ( len > target_buffer_len ) || (target == NULL) ){
// We need to allocate a new query buffer
sse_elem *temp = (sse_elem*)_mm_malloc(len*sizeof(sse_elem), SSE_ALIGNMENT_SIZE);
if(target != NULL){
// Copy the contents of the current query buffer into the new query buffer
memcpy( temp, target, target_buffer_len*sizeof(sse_elem) );
_mm_free(target);
}
target_buffer_len = len;
target = temp;
}
// Update the maximum target length
max_target_len = len;
for(unsigned int i = 0;i < len;i++){
target[i].sse = _MM_SET1( na_to_bits(m_target[i]) );
}
target_len.sse = _MM_SET1(len);
};
// Clear all targets in all slots
inline void clear_target()
{
// Reset the maximum target length
max_target_len = 0;
target_len.sse = _MM_SET1(0);
};
// Clear the target in slot m_index
inline void clear_target(const unsigned int &m_index)
{
#ifdef _DEBUG
if(m_index >= SO_LEN){
throw __FILE__ ":clear_target: m_index >= SO_LEN";
}
#endif // _DEBUG
target_len.v[m_index] = 0;
// Recompute the maximum target length
max_target_len = 0;
for(unsigned int i = 0;i < SO_LEN;i++){
max_target_len = std::max(max_target_len, target_len.v[i]);
}
};
// The coordinates of the first and last aligned base in the query
inline std::pair<int, int> alignment_range_query(const unsigned int &m_index) const
{
#ifdef _DEBUG
if(m_index >= SO_LEN){
throw __FILE__ ":alignment_range_query: Index out of bounds";
}
#endif // _DEBUG
return std::make_pair(max_elem.M_start_i.v[m_index], stop_i.v[m_index]);
};
#ifdef INCLUDE_TARGET_RANGE
// The coordinates of the first and last aligned base in the query
inline std::pair<int, int> alignment_range_target(const unsigned int &m_index) const
{
#ifdef _DEBUG
if(m_index >= SO_LEN){
throw __FILE__ ":alignment_range_target: Index out of bounds";
}
#endif // _DEBUG
return std::make_pair(max_elem.M_start_j.v[m_index], stop_j.v[m_index]);
};
inline void alignment_range(std::pair<int, int> &m_query_range,
std::pair<int, int> &m_target_range, const unsigned int &m_index) const
{
m_query_range = alignment_range_query(m_index);
m_target_range = alignment_range_target(m_index);
};
#endif // INCLUDE_TARGET_RANGE
inline SO_Score score(const unsigned int &m_index) const
{
#ifdef _DEBUG
if(m_index >= SO_LEN){
throw __FILE__ ":score: Index out of bounds";
}
#endif // _DEBUG
return max_elem.M.v[m_index];
};
};
} // namespace::SO
#endif // __SEQ_OVERLAP