|
10 | 10 |
|
11 | 11 | import matplotlib.pyplot as plt
|
12 | 12 | from Bio import AlignIO
|
13 |
| -from Bio.Align.AlignInfo import SummaryInfo |
14 | 13 | from Bio.AlignIO import MultipleSeqAlignment as MSA
|
15 | 14 | from Bio.Phylo.BaseTree import Tree
|
16 | 15 | from Bio.Phylo.TreeConstruction import DistanceCalculator, DistanceTreeConstructor
|
@@ -100,7 +99,7 @@ def __init__(
|
100 | 99 | self._msa: MSA = AlignIO.read(msa, format)
|
101 | 100 | if sort:
|
102 | 101 | self._msa = self._sorted_msa_by_njtree(self._msa)
|
103 |
| - self._consensus_seq = str(SummaryInfo(self._msa).dumb_consensus(threshold=0)) |
| 102 | + self._consensus_seq = self._get_consensus_seq(self._msa) |
104 | 103 | self._color_scheme_name = color_scheme
|
105 | 104 |
|
106 | 105 | # Check & Set start, end position
|
@@ -613,6 +612,47 @@ def _plot_consensus(
|
613 | 612 | color_list = self._get_interpolate_colors(self._consensus_color, ident_list)
|
614 | 613 | ax.bar(xticks, ident_list, width=1, color=color_list, ec="white", lw=0.5)
|
615 | 614 |
|
| 615 | + def _get_consensus_seq(self, msa: MSA) -> str: |
| 616 | + """Get consensus sequence |
| 617 | +
|
| 618 | + Parameters |
| 619 | + ---------- |
| 620 | + msa : MSA |
| 621 | + Multiple sequence alignment |
| 622 | +
|
| 623 | + Returns |
| 624 | + ------- |
| 625 | + consensus_seq : str |
| 626 | + Consensus suquence |
| 627 | + """ |
| 628 | + consensus_seq = "" |
| 629 | + ambiguous_char = "X" |
| 630 | + aln_len = msa.get_alignment_length() |
| 631 | + |
| 632 | + for idx in range(aln_len): |
| 633 | + chars = "" |
| 634 | + for record in self._msa: |
| 635 | + char = str(record.seq)[idx] |
| 636 | + if char != "-" and char != ".": |
| 637 | + chars += str(record.seq)[idx] |
| 638 | + if len(chars) == 0: |
| 639 | + consensus_seq += ambiguous_char |
| 640 | + continue |
| 641 | + |
| 642 | + char2count = Counter(chars) |
| 643 | + most_freq_chars = [] |
| 644 | + most_freq_count = char2count.most_common()[0][1] |
| 645 | + for char, count in char2count.most_common(): |
| 646 | + if count == most_freq_count: |
| 647 | + most_freq_chars.append(char) |
| 648 | + |
| 649 | + if len(most_freq_chars) == 1: |
| 650 | + consensus_seq += most_freq_chars[0] |
| 651 | + else: |
| 652 | + consensus_seq += ambiguous_char |
| 653 | + |
| 654 | + return consensus_seq |
| 655 | + |
616 | 656 | def _get_consensus_identity_list(
|
617 | 657 | self, start: int | None = None, end: int | None = None
|
618 | 658 | ) -> list[float]:
|
|
0 commit comments