Skip to content

Commit 3f78a7e

Browse files
committed
Bumped for 2019.10
1 parent b49dcd9 commit 3f78a7e

File tree

3 files changed

+169
-71
lines changed

3 files changed

+169
-71
lines changed

q2_picrust2/_custom_tree_pipeline.py

Lines changed: 92 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,22 @@
1-
import qiime2
21
import skbio
32
import biom
43
from os import path
54
import sys
6-
import pandas as pd
75
from tempfile import TemporaryDirectory
8-
from q2_types.feature_table import FeatureTable, Frequency
96
from picrust2.util import system_call_check
107

8+
119
def custom_tree_pipeline(table: biom.Table,
1210
tree: skbio.TreeNode,
1311
threads: int = 1,
1412
hsp_method: str = "mp",
15-
max_nsti: float = 2.0) -> (biom.Table,
16-
biom.Table,
17-
biom.Table):
13+
max_nsti: float = 2.0,
14+
skip_minpath: bool = False,
15+
no_gap_fill: bool = False,
16+
skip_norm: bool = False,
17+
highly_verbose: bool = False) -> (biom.Table,
18+
biom.Table,
19+
biom.Table):
1820

1921
# Run pipeline in temporary directory so that files are not saved locally.
2022
with TemporaryDirectory() as temp_dir:
@@ -23,8 +25,9 @@ def custom_tree_pipeline(table: biom.Table,
2325

2426
# Write out biom table:
2527
biom_infile = path.join(temp_dir, "intable.biom")
26-
with biom.util.biom_open(biom_infile, 'w') as out_biom:
27-
table.to_hdf5(h5grp=out_biom, generated_by="PICRUSt2 QIIME2 Plugin")
28+
with biom.util.biom_open(biom_infile, 'w') as out_biom:
29+
table.to_hdf5(h5grp=out_biom,
30+
generated_by="PICRUSt2 QIIME 2 Plugin")
2831

2932
# Write out newick tree.
3033
newick_infile = path.join(temp_dir, "placed_seqs.tre")
@@ -37,57 +40,102 @@ def custom_tree_pipeline(table: biom.Table,
3740
# Run hidden-state prediction step (on 16S, EC, and KO tables
3841
# separately.
3942
hsp_out_16S = path.join(picrust2_out, "16S_predicted.tsv.gz")
40-
system_call_check("hsp.py -i 16S " +
41-
" -t " + newick_infile +
42-
" -p 1 " +
43-
" -n " +
44-
"-o " + hsp_out_16S +
45-
" -m " + hsp_method,
46-
print_out=True)
43+
hsp_out_16S_cmd = "hsp.py -i 16S " + \
44+
" -t " + newick_infile + \
45+
" -p 1 " + \
46+
" -n " + \
47+
" -o " + hsp_out_16S + \
48+
" -m " + hsp_method
4749

4850
hsp_out_EC = path.join(picrust2_out, "EC_predicted.tsv.gz")
49-
system_call_check("hsp.py -i EC " +
50-
" -t " + newick_infile +
51-
" -p " + str(threads) +
52-
" -o " + hsp_out_EC +
53-
" -m " + hsp_method,
54-
print_out=True)
51+
hsp_out_EC_cmd = "hsp.py -i EC " + \
52+
" -t " + newick_infile + \
53+
" -p " + str(threads) + \
54+
" -n " + \
55+
" -o " + hsp_out_EC + \
56+
" -m " + hsp_method
5557

5658
hsp_out_KO = path.join(picrust2_out, "KO_predicted.tsv.gz")
57-
system_call_check("hsp.py -i KO " +
58-
" -t " + newick_infile +
59-
" -p " + str(threads) +
60-
" -o " + hsp_out_KO +
61-
" -m " + hsp_method,
62-
print_out=True)
59+
hsp_out_KO_cmd = "hsp.py -i KO " + \
60+
" -t " + newick_infile + \
61+
" -p " + str(threads) + \
62+
" -n " + \
63+
" -o " + hsp_out_KO + \
64+
" -m " + hsp_method
65+
66+
if highly_verbose:
67+
hsp_out_16S_cmd += " --verbose"
68+
hsp_out_EC_cmd += " --verbose"
69+
hsp_out_KO_cmd += " --verbose"
70+
71+
if not skip_norm:
72+
system_call_check(hsp_out_16S_cmd,
73+
print_command=True,
74+
print_stdout=highly_verbose,
75+
print_stderr=True)
76+
77+
78+
system_call_check(hsp_out_EC_cmd,
79+
print_command=True,
80+
print_stdout=highly_verbose,
81+
print_stderr=True)
82+
83+
system_call_check(hsp_out_KO_cmd,
84+
print_command=True,
85+
print_stdout=highly_verbose,
86+
print_stderr=True)
6387

6488
# Run metagenome pipeline step.
6589
EC_metagenome_out = path.join(picrust2_out, "EC_metagenome_out")
66-
system_call_check("metagenome_pipeline.py -i " + biom_infile +
67-
" -m " + hsp_out_16S +
68-
" -f " + hsp_out_EC +
69-
" -o " + EC_metagenome_out +
70-
" --max_nsti " + str(max_nsti),
71-
print_out=True)
72-
7390
KO_metagenome_out = path.join(picrust2_out, "KO_metagenome_out")
74-
system_call_check("metagenome_pipeline.py -i " + biom_infile +
75-
" -m " + hsp_out_16S +
76-
" -f " + hsp_out_KO +
77-
" -o " + KO_metagenome_out +
78-
" --max_nsti " + str(max_nsti),
79-
print_out=True)
91+
92+
EC_metagenome_cmd = "metagenome_pipeline.py -i " + biom_infile + \
93+
" -f " + hsp_out_EC + \
94+
" -o " + EC_metagenome_out + \
95+
" --max_nsti " + str(max_nsti)
96+
97+
KO_metagenome_cmd = "metagenome_pipeline.py -i " + biom_infile + \
98+
" -f " + hsp_out_KO + \
99+
" -o " + KO_metagenome_out + \
100+
" --max_nsti " + str(max_nsti)
101+
102+
if skip_norm:
103+
EC_metagenome_cmd += " --skip_norm"
104+
KO_metagenome_cmd += " --skip_norm"
105+
else:
106+
EC_metagenome_cmd += " -m " + hsp_out_16S
107+
KO_metagenome_cmd += " -m " + hsp_out_16S
108+
109+
system_call_check(EC_metagenome_cmd, print_command=True,
110+
print_stdout=highly_verbose,
111+
print_stderr=True)
112+
system_call_check(KO_metagenome_cmd, print_command=True,
113+
print_stdout=highly_verbose,
114+
print_stderr=True)
80115

81116
EC_out = path.join(EC_metagenome_out, "pred_metagenome_unstrat.tsv.gz")
82117
KO_out = path.join(KO_metagenome_out, "pred_metagenome_unstrat.tsv.gz")
83118

84119
# Run pathway inference step.
85120
pathways_out = path.join(picrust2_out, "pathways_out")
86121
pathabun_out = path.join(pathways_out, "path_abun_unstrat.tsv.gz")
87-
system_call_check("pathway_pipeline.py -i " + EC_out +
88-
" -o " + pathways_out +
89-
" -p " + str(threads),
90-
print_out=True)
122+
123+
pathway_pipeline_cmd = "pathway_pipeline.py -i " + EC_out + \
124+
" -o " + pathways_out + \
125+
" -p " + str(threads)
126+
127+
if skip_minpath:
128+
pathway_pipeline_cmd += " --skip_minpath"
129+
130+
if no_gap_fill:
131+
pathway_pipeline_cmd += " --no_gap_fill"
132+
133+
if highly_verbose:
134+
pathway_pipeline_cmd += " --verbose"
135+
136+
system_call_check(pathway_pipeline_cmd, print_command=True,
137+
print_stdout=highly_verbose,
138+
print_stderr=True)
91139

92140
# Read in output unstratified metagenome tables and return as BIOM
93141
# objects.

q2_picrust2/_full_pipeline.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,34 @@
1-
import qiime2
21
import biom
32
from os import path
3+
import sys
44
import pandas as pd
55
from tempfile import TemporaryDirectory
6-
from q2_types.feature_table import FeatureTable, Frequency
7-
import subprocess
8-
import sys
96
import picrust2.pipeline
10-
from picrust2.default import (default_ref_dir, default_tables, default_map,
7+
from picrust2.default import (default_ref_dir, default_tables,
118
default_regroup_map, default_pathway_map)
129

10+
1311
def full_pipeline(table: biom.Table,
1412
seq: pd.Series,
1513
threads: int = 1,
1614
hsp_method: str = "mp",
17-
max_nsti: float = 2.0) -> (biom.Table,
18-
biom.Table,
19-
biom.Table):
15+
min_align: float = 0.8,
16+
max_nsti: float = 2.0,
17+
skip_minpath: bool = False,
18+
no_gap_fill: bool = False,
19+
skip_norm: bool = False,
20+
highly_verbose: bool = False) -> (biom.Table,
21+
biom.Table,
22+
biom.Table):
2023

2124
# Write out BIOM table and FASTA to be used in pipeline.
2225
with TemporaryDirectory() as temp_dir:
2326

2427
# Write out BIOM table:
2528
biom_infile = path.join(temp_dir, "intable.biom")
26-
with biom.util.biom_open(biom_infile, 'w') as out_biom:
29+
with biom.util.biom_open(biom_infile, 'w') as out_biom:
2730
table.to_hdf5(h5grp=out_biom,
28-
generated_by="PICRUSt2 QIIME2 Plugin")
31+
generated_by="PICRUSt2 QIIME 2 Plugin")
2932

3033
# Write out Pandas series as FASTA:
3134
seq_outfile = path.join(temp_dir, "seqs.fna")
@@ -55,15 +58,16 @@ def full_pipeline(table: biom.Table,
5558
min_reads=1,
5659
min_samples=1,
5760
hsp_method=hsp_method,
61+
min_align=min_align,
5862
skip_nsti=False,
59-
skip_minpath=False,
60-
no_gap_fill=False,
63+
skip_minpath=skip_minpath,
64+
no_gap_fill=no_gap_fill,
6165
coverage=False,
6266
per_sequence_contrib=False,
6367
wide_table=False,
64-
skip_norm=False,
68+
skip_norm=skip_norm,
6569
remove_intermediate=False,
66-
verbose=True)
70+
verbose=highly_verbose)
6771

6872
# Convert the returned unstratified tables to BIOM tables.
6973
# Note that the 0-index in the func table returned objects corresponds

q2_picrust2/plugin_setup.py

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
1-
from qiime2.plugin import (Plugin, Str, Properties, Choices, Int, Bool, Range,
2-
Float, Set, Visualization, Metadata, MetadataColumn,
3-
Categorical, Numeric, Citations)
1+
from qiime2.plugin import (Plugin, Str, Choices, Int, Bool, Range, Float,
2+
Citations)
43
from q2_types.feature_table import FeatureTable, Frequency
54
from q2_types.feature_data import FeatureData, Sequence
6-
from q2_types.sample_data import SampleData
75
from q2_types.tree import Phylogeny, Rooted
86
import q2_picrust2
97

@@ -28,10 +26,15 @@
2826

2927
inputs={'table': FeatureTable[Frequency],
3028
'seq': FeatureData[Sequence]},
31-
29+
3230
parameters={'threads': Int % Range(1, None),
3331
'hsp_method': Str % Choices(HSP_METHODS),
34-
'max_nsti': Float % Range(0.0, None)},
32+
'min_align': Float % Range(0.0, 1.0),
33+
'max_nsti': Float % Range(0.0, None),
34+
'skip_minpath': Bool,
35+
'no_gap_fill': Bool,
36+
'skip_norm': Bool,
37+
'highly_verbose': Bool},
3538

3639
outputs=[('ko_metagenome', FeatureTable[Frequency]),
3740
('ec_metagenome', FeatureTable[Frequency]),
@@ -46,16 +49,39 @@
4649
parameter_descriptions={
4750
'threads': 'Number of threads/processes to use during workflow.',
4851
'hsp_method': 'Which hidden-state prediction method to use.',
52+
'min_align': ('Proportion of the total length of an input query '
53+
'sequence that must align with reference sequences. '
54+
'Any sequences with lengths below this value after '
55+
'making an alignment with reference sequences will '
56+
'be excluded from the placement and all subsequent '
57+
'steps.'),
4958
'max_nsti': ('Max nearest-sequenced taxon index for an input ASV to '
50-
'be output.')},
59+
'be output.'),
60+
'skip_minpath': ('Do not run MinPath to identify which pathways are '
61+
'present as a first pass (on by default).'),
62+
'no_gap_fill': ('Do not perform gap filling before predicting '
63+
'pathway abundances (gap filling is on otherwise by '
64+
'default).'),
65+
'skip_norm': ('Skip normalizing sequence abundances by predicted '
66+
'marker gene copy numbers (typically 16S rRNA '
67+
'genes). The normalization step will be performed '
68+
'automatically unless this option is specified.'),
69+
'highly_verbose': ('Print all commands being written as well as all '
70+
'standard output of wrapped tools. This can be '
71+
'especially useful for debugging. Note that this '
72+
'option requires that the --verbose option is also '
73+
'set (which is an internal QIIME 2 option that '
74+
'indicates that STDOUT and STDERR should be printed '
75+
'out).')
76+
},
5177

5278
output_descriptions={'ko_metagenome': 'Predicted metagenome for KEGG orthologs',
5379
'ec_metagenome': 'Predicted metagenome for EC numbers',
5480
'pathway_abundance': 'Predicted MetaCyc pathway abundances'},
5581

5682
name='Default 16S PICRUSt2 Pipeline',
5783

58-
description=("QIIME2 Plugin for default 16S PICRUSt2 pipeline"),
84+
description=("QIIME 2 plugin for default 16S PICRUSt2 pipeline"),
5985

6086
citations=[citations['Douglas2019bioRxiv']]
6187
)
@@ -66,10 +92,14 @@
6692

6793
inputs={'table': FeatureTable[Frequency],
6894
'tree': Phylogeny[Rooted]},
69-
95+
7096
parameters={'threads': Int % Range(1, None),
7197
'hsp_method': Str % Choices(HSP_METHODS),
72-
'max_nsti': Float % Range(0.0, None)},
98+
'max_nsti': Float % Range(0.0, None),
99+
'skip_minpath': Bool,
100+
'no_gap_fill': Bool,
101+
'skip_norm': Bool,
102+
'highly_verbose': Bool},
73103

74104
outputs=[
75105
('ko_metagenome', FeatureTable[Frequency]),
@@ -86,19 +116,35 @@
86116
'threads': 'Number of threads/processes to use during workflow.',
87117
'hsp_method': 'Which hidden-state prediction method to use.',
88118
'max_nsti': ('Max nearest-sequenced taxon index for an input ASV to '
89-
'be output.')},
119+
'be output.'),
120+
'skip_minpath': ('Do not run MinPath to identify which pathways are '
121+
'present as a first pass (on by default).'),
122+
'no_gap_fill': ('Do not perform gap filling before predicting '
123+
'pathway abundances (gap filling is on otherwise by '
124+
'default).'),
125+
'skip_norm': ('Skip normalizing sequence abundances by predicted '
126+
'marker gene copy numbers (typically 16S rRNA '
127+
'genes). The normalization step will be performed '
128+
'automatically unless this option is specified.'),
129+
'highly_verbose': ('Print all commands being written as well as all '
130+
'standard output of wrapped tools. This can be '
131+
'especially useful for debugging. Note that this '
132+
'option requires that the --verbose option is also '
133+
'set (which is an internal QIIME 2 option that '
134+
'indicates that STDOUT and STDERR should be printed '
135+
'out).')
136+
},
90137

91138
output_descriptions={'ko_metagenome': 'Predicted metagenome for KEGG orthologs',
92139
'ec_metagenome': 'Predicted metagenome for E.C. numbers',
93140
'pathway_abundance': 'Predicted MetaCyc pathway abundances'},
94141

95142
name='16S PICRUSt2 pipeline with custom tree',
96143

97-
description=("QIIME2 plugin for running PICRUSt2 pipeline based on a " +
144+
description=("QIIME 2 plugin for running PICRUSt2 pipeline based on a " +
98145
"tree from a different pipeline. This was written to be " +
99146
"used with the output of SEPP (q2-fragment-insertion) as a " +
100147
"starting point."),
101148

102149
citations=[citations['Douglas2019bioRxiv']]
103150
)
104-

0 commit comments

Comments
 (0)