Skip to content

Commit

Permalink
blackify; rename to pyIsland
Browse files Browse the repository at this point in the history
  • Loading branch information
sean-bam committed Feb 21, 2024
1 parent ec4dbbf commit 82d0079
Show file tree
Hide file tree
Showing 16 changed files with 421 additions and 382 deletions.
3 changes: 2 additions & 1 deletion PyIsland/Drivers/hhsuite.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def get_hhsuite_neff(msa):
neff = float(p1.stdout.strip().split()[10])
return round(neff, 2)


def align_clusters_with_hhalign(input_dir, output_dir, df_clustering):
"""
accepts a table of profile, length and cluster assignment
Expand Down Expand Up @@ -115,4 +116,4 @@ def align_clusters_with_hhalign(input_dir, output_dir, df_clustering):
f"hhalign -i {query_profile} {templates} -M 50 -glob -id 100 -diff inf -oa3m {output_a3m}",
shell=True,
check=True,
)
)
33 changes: 20 additions & 13 deletions PyIsland/Drivers/mmseqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import shutil
import tempfile

from pyIsland import Parsers
from PyIsland import Parsers


def run_mmseqs_cls(
Expand All @@ -26,7 +26,7 @@ def run_mmseqs_cls(

# seqdb = Path(f'{dbname}seqDB')
# clsdb = Path(f'{dbname}clsDB')
tmp_dir = tempfile.TemporaryDirectory(dir='.')
tmp_dir = tempfile.TemporaryDirectory(dir=".")
tmp_dir_path = Path(tmp_dir.name)
tsv = Path(tsv)

Expand Down Expand Up @@ -62,21 +62,21 @@ def run_mmseqs_cls(
# file.unlink()
# except IsADirectoryError:
# pass
#shutil.rmtree("tmpmmseqsdb/")

# shutil.rmtree("tmpmmseqsdb/")


def get_reps_from_clustering(seqDB, clsDB, fasta):

tmp_dir = tempfile.TemporaryDirectory(dir='.')
tmp_dir = tempfile.TemporaryDirectory(dir=".")
tmp_dir_path = Path(tmp_dir.name)
repdb = tmp_dir_path / Path('repDB')
repdb = tmp_dir_path / Path("repDB")

subprocess.run(
f"mmseqs createsubdb {clsDB} {seqDB} {repdb}",
shell=True,
check=True,
stdout=subprocess.DEVNULL,
)

subprocess.run(
f"mmseqs convert2fasta {repdb} {fasta}",
shell=True,
Expand All @@ -85,7 +85,6 @@ def get_reps_from_clustering(seqDB, clsDB, fasta):
)



def cls2pro(
seqDB, clsDB, proDB, evalue=0.001, qid=0.0, qsc=-20, diff=1000, max_seq_id=0.9
):
Expand All @@ -94,11 +93,17 @@ def cls2pro(
)

subprocess.run(
f"mmseqs createsubdb {clsDB} {seqDB} {clsDB}rep", shell=True, check=True, stdout=subprocess.DEVNULL
f"mmseqs createsubdb {clsDB} {seqDB} {clsDB}rep",
shell=True,
check=True,
stdout=subprocess.DEVNULL,
)

subprocess.run(
f"mmseqs createsubdb {clsDB} {seqDB}_h {clsDB}rep_h", shell=True, check=True, stdout=subprocess.DEVNULL
f"mmseqs createsubdb {clsDB} {seqDB}_h {clsDB}rep_h",
shell=True,
check=True,
stdout=subprocess.DEVNULL,
)

subprocess.run(
Expand Down Expand Up @@ -298,7 +303,9 @@ def get_missing_seqs_as_fasta_using_tsv(db1, tsv, output):
assert db1_lookup.is_file(), f"Can't find {db1_lookup}"

# get headers in DB1 missing from DB2
db1_dict = Parsers.mmseqs.get_mmseqs_internal_ids_as_dict(db1_lookup) # internal id : header
db1_dict = Parsers.mmseqs.get_mmseqs_internal_ids_as_dict(
db1_lookup
) # internal id : header
tsv_dict = Parsers.mmseqs.mmseqs_tsv_to_dict(tsv) # member : representative

missing_ids = db1_dict.values() - tsv_dict.keys()
Expand Down
Loading

0 comments on commit 82d0079

Please sign in to comment.