blackify; rename to pyIsland

sean-bam · Feb 21, 2024 · 82d0079 · 82d0079
1 parent ec4dbbf
commit 82d0079
Show file tree

Hide file tree

Showing 16 changed files with 421 additions and 382 deletions.
diff --git a/PyIsland/Drivers/hhsuite.py b/PyIsland/Drivers/hhsuite.py
@@ -67,6 +67,7 @@ def get_hhsuite_neff(msa):
     neff = float(p1.stdout.strip().split()[10])
     return round(neff, 2)
 
+
 def align_clusters_with_hhalign(input_dir, output_dir, df_clustering):
     """
     accepts a table of profile, length and cluster assignment
@@ -115,4 +116,4 @@ def align_clusters_with_hhalign(input_dir, output_dir, df_clustering):
                 f"hhalign -i {query_profile} {templates} -M 50 -glob -id 100 -diff inf -oa3m {output_a3m}",
                 shell=True,
                 check=True,
-            )
+            )
diff --git a/PyIsland/Drivers/mmseqs.py b/PyIsland/Drivers/mmseqs.py
@@ -3,7 +3,7 @@
 import shutil
 import tempfile
 
-from pyIsland import Parsers
+from PyIsland import Parsers
 
 
 def run_mmseqs_cls(
@@ -26,7 +26,7 @@ def run_mmseqs_cls(
 
     # seqdb = Path(f'{dbname}seqDB')
     # clsdb = Path(f'{dbname}clsDB')
-    tmp_dir = tempfile.TemporaryDirectory(dir='.')
+    tmp_dir = tempfile.TemporaryDirectory(dir=".")
     tmp_dir_path = Path(tmp_dir.name)
     tsv = Path(tsv)
 
@@ -62,21 +62,21 @@ def run_mmseqs_cls(
     #        file.unlink()
     #    except IsADirectoryError:
     #        pass
-    #shutil.rmtree("tmpmmseqsdb/")
-
+    # shutil.rmtree("tmpmmseqsdb/")
+
+
 def get_reps_from_clustering(seqDB, clsDB, fasta):
-
-    tmp_dir = tempfile.TemporaryDirectory(dir='.')
+    tmp_dir = tempfile.TemporaryDirectory(dir=".")
     tmp_dir_path = Path(tmp_dir.name)
-    repdb = tmp_dir_path / Path('repDB')
-    
+    repdb = tmp_dir_path / Path("repDB")
+
     subprocess.run(
         f"mmseqs createsubdb {clsDB} {seqDB} {repdb}",
         shell=True,
         check=True,
         stdout=subprocess.DEVNULL,
     )
-    
+
     subprocess.run(
         f"mmseqs convert2fasta {repdb} {fasta}",
         shell=True,
@@ -85,7 +85,6 @@ def get_reps_from_clustering(seqDB, clsDB, fasta):
     )
 
 
-
 def cls2pro(
     seqDB, clsDB, proDB, evalue=0.001, qid=0.0, qsc=-20, diff=1000, max_seq_id=0.9
 ):
@@ -94,11 +93,17 @@ def cls2pro(
     )
 
     subprocess.run(
-        f"mmseqs createsubdb {clsDB} {seqDB} {clsDB}rep", shell=True, check=True, stdout=subprocess.DEVNULL
+        f"mmseqs createsubdb {clsDB} {seqDB} {clsDB}rep",
+        shell=True,
+        check=True,
+        stdout=subprocess.DEVNULL,
     )
 
     subprocess.run(
-        f"mmseqs createsubdb {clsDB} {seqDB}_h {clsDB}rep_h", shell=True, check=True, stdout=subprocess.DEVNULL
+        f"mmseqs createsubdb {clsDB} {seqDB}_h {clsDB}rep_h",
+        shell=True,
+        check=True,
+        stdout=subprocess.DEVNULL,
     )
 
     subprocess.run(
@@ -298,7 +303,9 @@ def get_missing_seqs_as_fasta_using_tsv(db1, tsv, output):
     assert db1_lookup.is_file(), f"Can't find {db1_lookup}"
 
     # get headers in DB1 missing from DB2
-    db1_dict = Parsers.mmseqs.get_mmseqs_internal_ids_as_dict(db1_lookup)  # internal id : header
+    db1_dict = Parsers.mmseqs.get_mmseqs_internal_ids_as_dict(
+        db1_lookup
+    )  # internal id : header
     tsv_dict = Parsers.mmseqs.mmseqs_tsv_to_dict(tsv)  # member : representative
 
     missing_ids = db1_dict.values() - tsv_dict.keys()