Skip to content

Commit 8dcb847

Browse files
committed
Fix consensus seq code to suppress warn msg
1 parent 638d1e4 commit 8dcb847

File tree

1 file changed

+42
-2
lines changed

1 file changed

+42
-2
lines changed

src/pymsaviz/msaviz.py

+42-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010

1111
import matplotlib.pyplot as plt
1212
from Bio import AlignIO
13-
from Bio.Align.AlignInfo import SummaryInfo
1413
from Bio.AlignIO import MultipleSeqAlignment as MSA
1514
from Bio.Phylo.BaseTree import Tree
1615
from Bio.Phylo.TreeConstruction import DistanceCalculator, DistanceTreeConstructor
@@ -100,7 +99,7 @@ def __init__(
10099
self._msa: MSA = AlignIO.read(msa, format)
101100
if sort:
102101
self._msa = self._sorted_msa_by_njtree(self._msa)
103-
self._consensus_seq = str(SummaryInfo(self._msa).dumb_consensus(threshold=0))
102+
self._consensus_seq = self._get_consensus_seq(self._msa)
104103
self._color_scheme_name = color_scheme
105104

106105
# Check & Set start, end position
@@ -613,6 +612,47 @@ def _plot_consensus(
613612
color_list = self._get_interpolate_colors(self._consensus_color, ident_list)
614613
ax.bar(xticks, ident_list, width=1, color=color_list, ec="white", lw=0.5)
615614

615+
def _get_consensus_seq(self, msa: MSA) -> str:
616+
"""Get consensus sequence
617+
618+
Parameters
619+
----------
620+
msa : MSA
621+
Multiple sequence alignment
622+
623+
Returns
624+
-------
625+
consensus_seq : str
626+
Consensus suquence
627+
"""
628+
consensus_seq = ""
629+
ambiguous_char = "X"
630+
aln_len = msa.get_alignment_length()
631+
632+
for idx in range(aln_len):
633+
chars = ""
634+
for record in self._msa:
635+
char = str(record.seq)[idx]
636+
if char != "-" and char != ".":
637+
chars += str(record.seq)[idx]
638+
if len(chars) == 0:
639+
consensus_seq += ambiguous_char
640+
continue
641+
642+
char2count = Counter(chars)
643+
most_freq_chars = []
644+
most_freq_count = char2count.most_common()[0][1]
645+
for char, count in char2count.most_common():
646+
if count == most_freq_count:
647+
most_freq_chars.append(char)
648+
649+
if len(most_freq_chars) == 1:
650+
consensus_seq += most_freq_chars[0]
651+
else:
652+
consensus_seq += ambiguous_char
653+
654+
return consensus_seq
655+
616656
def _get_consensus_identity_list(
617657
self, start: int | None = None, end: int | None = None
618658
) -> list[float]:

0 commit comments

Comments
 (0)