Merge pull request #6 from gavinmdouglas/Prep_for_conda

gavinmdouglas · web-flow · commit 450b0b60f39f · 2019-04-03T12:04:30.000+03:00
updated to be compatible with qiime2 2019-0.1
diff --git a/q2_picrust2/_custom_tree_pipeline.py b/q2_picrust2/_custom_tree_pipeline.py
@@ -12,7 +12,6 @@ def custom_tree_pipeline(table: biom.Table,
                          threads: int = 1,
                          hsp_method: str = "mp",
                          max_nsti: float = 2.0)  -> (biom.Table,
-                                                     biom.Table,
                                                      biom.Table,
                                                      biom.Table):
 
@@ -34,50 +33,59 @@ def custom_tree_pipeline(table: biom.Table,
 
         # Run hidden-state prediction step (on 16S, EC, and KO tables
         # separately.
-        hsp_out_16S = path.join(picrust2_out, "16S_predicted")
-        system_call_check("hsp.py -i 16S -t " + newick_infile + " -p 1 -n "
-                          "-o " + hsp_out_16S + " -m " + hsp_method)
-
-        hsp_out_EC = path.join(picrust2_out, "EC_predicted")
-        system_call_check("hsp.py -i EC -t " + newick_infile + " -p " +
-                          str(threads) + " -o " + hsp_out_EC + " -m " +
-                          hsp_method)
-
-        hsp_out_KO = path.join(picrust2_out, "KO_predicted")
-        system_call_check("hsp.py -i KO -t " + newick_infile + " -p " +
-                          str(threads) + " -o " + hsp_out_KO + " -m " +
-                          hsp_method)
+        hsp_out_16S = path.join(picrust2_out, "16S_predicted.tsv")
+        system_call_check("hsp.py -i 16S " +
+                          " -t " + newick_infile +
+                          " -p 1 " +
+                          " -n " +
+                          "-o " + hsp_out_16S +
+                          " -m " + hsp_method)
+
+        hsp_out_EC = path.join(picrust2_out, "EC_predicted.tsv")
+        system_call_check("hsp.py -i EC " +
+                          " -t " + newick_infile +
+                          " -p " + str(threads) +
+                          " -o " + hsp_out_EC +
+                          " -m " + hsp_method)
+
+        hsp_out_KO = path.join(picrust2_out, "KO_predicted.tsv")
+        system_call_check("hsp.py -i KO " + 
+                          " -t " + newick_infile +
+                          " -p " + str(threads) +
+                          " -o " + hsp_out_KO +
+                          " -m " + hsp_method)
 
         # Run metagenome pipeline step.
         EC_metagenome_out = path.join(picrust2_out, "EC_metagenome_out")
-        system_call_check("metagenome_pipeline.py -i " + biom_infile + " -m " +
-                          hsp_out_16S + ".tsv -f " + hsp_out_EC + ".tsv -p " +
-                          str(threads) + " -o " + EC_metagenome_out +
+        system_call_check("metagenome_pipeline.py -i " + biom_infile +
+                          " -m " + hsp_out_16S +
+                          " -f " + hsp_out_EC +
+                          " -o " + EC_metagenome_out +
                           " --max_nsti " + str(max_nsti))
 
         KO_metagenome_out = path.join(picrust2_out, "KO_metagenome_out")
-        system_call_check("metagenome_pipeline.py -i " + biom_infile + " -m " +
-                          hsp_out_16S + ".tsv -f " + hsp_out_KO + ".tsv -p " +
-                          str(threads) + " -o " + KO_metagenome_out +
+        system_call_check("metagenome_pipeline.py -i " + biom_infile +
+                          " -m " + hsp_out_16S +
+                          " -f " + hsp_out_KO +
+                          " -o " + KO_metagenome_out +
                           " --max_nsti " + str(max_nsti))
 
         # Run pathway inference step.
         pathways_out = path.join(picrust2_out, "pathways_out")
 
         EC_out = path.join(EC_metagenome_out, "pred_metagenome_unstrat.tsv")
 
-        system_call_check("run_minpath.py -i " + EC_out + " -o " +
-                          pathways_out + " -p " + str(threads))
+        system_call_check("pathway_pipeline.py -i " + EC_out +
+                          " -o " + pathways_out +
+                          " -p " + str(threads))
 
         # Read in output unstratified metagenome tables and return as BIOM
         # objects.
         KO_out = path.join(KO_metagenome_out, "pred_metagenome_unstrat.tsv")
         pathabun_out = path.join(pathways_out, "path_abun_unstrat.tsv")
-        pathcov_out = path.join(pathways_out, "path_cov_unstrat.tsv")
 
         ko_biom = biom.load_table(KO_out)
         ec_biom = biom.load_table(EC_out)
         pathabun_biom = biom.load_table(pathabun_out)
-        pathcov_biom = biom.load_table(pathcov_out)
 
-        return ko_biom, ec_biom, pathabun_biom, pathcov_biom
+        return ko_biom, ec_biom, pathabun_biom
diff --git a/q2_picrust2/_full_pipeline.py b/q2_picrust2/_full_pipeline.py
@@ -7,51 +7,27 @@
 import subprocess
 import sys
 import picrust2.pipeline
-from picrust2.default import (default_fasta, default_tree, default_hmm,
-                              default_tables, default_map, default_regroup_map,
-                              default_pathway_map)
+from picrust2.default import (default_ref_dir, default_tables, default_map,
+                              default_regroup_map, default_pathway_map)
 
 def full_pipeline(table: biom.Table,
-                  seq : pd.Series,
+                  seq: pd.Series,
                   threads: int = 1,
                   hsp_method: str = "mp",
-                  max_nsti: float = 2.0)  -> (biom.Table,
-                                               biom.Table,
-                                               biom.Table,
-                                               biom.Table):
+                  max_nsti: float = 2.0) -> (biom.Table,
+                                             biom.Table,
+                                             biom.Table):
 
-    # Check whether EPA-NG and GAPPA are installed. Exit with explanation if
-    # not.
-    missing_prog = []
-    try:
-        subprocess.check_call(['epa-ng', '--help'])
-    except OSError:
-        missing_prog.append("epa-ng")
-
-    try:
-        subprocess.check_call(['gappa', '--help'])
-    except OSError:
-        missing_prog.append("gappa")
-
-    if len(missing_prog) > 0:
-        sys.exit("Missing the following tools from your $PATH variable: " +
-                 " and ".join(missing_prog) + ". Note that this QIIME2 " +
-                 "command is for running the default PICRUSt2 pipeline, " +
-                 "which includes steps for running sequence placement with " +
-                 "EPA-NG. If you do not want to install the missing programs " +
-                 "you can run sequence placement with q2-fragment-insertion " +
-                 "and input this tree to the \"qiime picrust2 " +
-                 "custom_tree_pipeline\" command")
-
-    # Need to write out BIOM table and fasta to be used in pipeline.
+    # Write out BIOM table and FASTA to be used in pipeline.
     with TemporaryDirectory() as temp_dir:
-            
-        # Write out biom table:
+
+        # Write out BIOM table:
         biom_infile = path.join(temp_dir, "intable.biom")
         with biom.util.biom_open(biom_infile, 'w') as out_biom:  
-            table.to_hdf5(h5grp=out_biom, generated_by="PICRUSt2 QIIME2 Plugin")
+            table.to_hdf5(h5grp=out_biom,
+                          generated_by="PICRUSt2 QIIME2 Plugin")
 
-        # Write out Pandas series as fasta:
+        # Write out Pandas series as FASTA:
         seq_outfile = path.join(temp_dir, "seqs.fna")
 
         with open(seq_outfile, "w") as outfile_fh:
@@ -65,9 +41,7 @@ def full_pipeline(table: biom.Table,
                                                                         input_table=biom_infile,
                                                                         output_folder=picrust2_out,
                                                                         threads=threads,
-                                                                        ref_msa=default_fasta,
-                                                                        tree=default_tree,
-                                                                        hmm=default_hmm,
+                                                                        ref_dir=default_ref_dir,
                                                                         in_traits="EC,KO",
                                                                         custom_trait_tables=None,
                                                                         marker_gene_table=default_tables["16S"],
@@ -76,23 +50,22 @@ def full_pipeline(table: biom.Table,
                                                                         regroup_map=default_regroup_map,
                                                                         no_regroup=False,
                                                                         stratified=False,
-                                                                        alignment_tool="hmmalign",
                                                                         max_nsti=max_nsti,
                                                                         min_reads=1,
                                                                         min_samples=1,
                                                                         hsp_method=hsp_method,
-                                                                        calculate_NSTI=True,
-                                                                        confidence=False,
-                                                                        seed=198,
+                                                                        skip_nsti=False,
                                                                         no_gap_fill=False,
+                                                                        skip_minpath=False,
+                                                                        coverage=False,
                                                                         per_sequence_contrib=False,
-                                                                        no_descrip=True,
-                                                                        verbose=False)
+                                                                        verbose=True)
 
-        # Convert the returned unstratified tables to biom tables.
-        ko_biom = biom.load_table(func_outputs["KO"])
-        ec_biom = biom.load_table(func_outputs["EC"])
+        # Convert the returned unstratified tables to BIOM tables.
+        # Note that the 0-index in the func table returned objects corresponds
+        # to the path to the unstratified table.
+        ko_biom = biom.load_table(func_outputs["KO"][0])
+        ec_biom = biom.load_table(func_outputs["EC"][0])
         pathabun_biom = biom.load_table(pathway_outputs["unstrat_abun"])
-        pathcov_biom = biom.load_table(pathway_outputs["unstrat_cov"])
 
-        return ko_biom, ec_biom, pathabun_biom, pathcov_biom
+        return ko_biom, ec_biom, pathabun_biom
diff --git a/q2_picrust2/plugin_setup.py b/q2_picrust2/plugin_setup.py
@@ -13,7 +13,7 @@
 
 plugin = Plugin(
     name='picrust2',
-    version="0.0.0",
+    version="0.0.3",
     website='https://github.com/gavinmdouglas/q2-picrust2',
     package='q2_picrust2',
     description=('This QIIME 2 plugin wraps the default 16S PICRUSt2 pipeline to run '
@@ -32,12 +32,9 @@
                 'hsp_method': Str % Choices(HSP_METHODS),
                 'max_nsti': Float % Range(0.0, None)},
 
-    outputs=[
-       ('ko_metagenome', FeatureTable[Frequency]),
-       ('ec_metagenome', FeatureTable[Frequency]),
-       ('pathway_abundance', FeatureTable[Frequency]),
-       ('pathway_coverage', FeatureTable[Frequency])
-    ],
+    outputs=[('ko_metagenome', FeatureTable[Frequency]),
+             ('ec_metagenome', FeatureTable[Frequency]),
+             ('pathway_abundance', FeatureTable[Frequency])],
 
     input_descriptions={
         'table': ('The feature table containing sequence abundances per sample.'),
@@ -53,8 +50,7 @@
 
     output_descriptions={'ko_metagenome': 'Predicted metagenome for KEGG orthologs',
                          'ec_metagenome': 'Predicted metagenome for E.C. numbers',
-                         'pathway_abundance': 'Predicted MetaCyc pathway abundances',
-                         'pathway_coverage': 'Predicted MetaCyc pathway coverages'},
+                         'pathway_abundance': 'Predicted MetaCyc pathway abundances'},
 
     name='Default 16S PICRUSt2 Pipeline',
 
@@ -77,8 +73,7 @@
     outputs=[
        ('ko_metagenome', FeatureTable[Frequency]),
        ('ec_metagenome', FeatureTable[Frequency]),
-       ('pathway_abundance', FeatureTable[Frequency]),
-       ('pathway_coverage', FeatureTable[Frequency])
+       ('pathway_abundance', FeatureTable[Frequency])
     ],
 
     input_descriptions={
@@ -94,8 +89,7 @@
 
     output_descriptions={'ko_metagenome': 'Predicted metagenome for KEGG orthologs',
                          'ec_metagenome': 'Predicted metagenome for E.C. numbers',
-                         'pathway_abundance': 'Predicted MetaCyc pathway abundances',
-                         'pathway_coverage': 'Predicted MetaCyc pathway coverages'},
+                         'pathway_abundance': 'Predicted MetaCyc pathway abundances'},
 
     name='16S PICRUSt2 pipeline with custom tree',
 
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name="q2-picrust2",
-    version="0.0.2",
+    version="0.0.3",
     packages=find_packages(),
     package_data={'q2_picrust2': ['citations.bib']},
     author="Gavin Douglas",