Skip to content

Commit

Permalink
Bumped for 2019.10
Browse files Browse the repository at this point in the history
  • Loading branch information
gavinmdouglas committed Jan 23, 2020
1 parent b49dcd9 commit 3f78a7e
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 71 deletions.
136 changes: 92 additions & 44 deletions q2_picrust2/_custom_tree_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
import qiime2
import skbio
import biom
from os import path
import sys
import pandas as pd
from tempfile import TemporaryDirectory
from q2_types.feature_table import FeatureTable, Frequency
from picrust2.util import system_call_check


def custom_tree_pipeline(table: biom.Table,
tree: skbio.TreeNode,
threads: int = 1,
hsp_method: str = "mp",
max_nsti: float = 2.0) -> (biom.Table,
biom.Table,
biom.Table):
max_nsti: float = 2.0,
skip_minpath: bool = False,
no_gap_fill: bool = False,
skip_norm: bool = False,
highly_verbose: bool = False) -> (biom.Table,
biom.Table,
biom.Table):

# Run pipeline in temporary directory so that files are not saved locally.
with TemporaryDirectory() as temp_dir:
Expand All @@ -23,8 +25,9 @@ def custom_tree_pipeline(table: biom.Table,

# Write out biom table:
biom_infile = path.join(temp_dir, "intable.biom")
with biom.util.biom_open(biom_infile, 'w') as out_biom:
table.to_hdf5(h5grp=out_biom, generated_by="PICRUSt2 QIIME2 Plugin")
with biom.util.biom_open(biom_infile, 'w') as out_biom:
table.to_hdf5(h5grp=out_biom,
generated_by="PICRUSt2 QIIME 2 Plugin")

# Write out newick tree.
newick_infile = path.join(temp_dir, "placed_seqs.tre")
Expand All @@ -37,57 +40,102 @@ def custom_tree_pipeline(table: biom.Table,
# Run hidden-state prediction step (on 16S, EC, and KO tables
# separately.
hsp_out_16S = path.join(picrust2_out, "16S_predicted.tsv.gz")
system_call_check("hsp.py -i 16S " +
" -t " + newick_infile +
" -p 1 " +
" -n " +
"-o " + hsp_out_16S +
" -m " + hsp_method,
print_out=True)
hsp_out_16S_cmd = "hsp.py -i 16S " + \
" -t " + newick_infile + \
" -p 1 " + \
" -n " + \
" -o " + hsp_out_16S + \
" -m " + hsp_method

hsp_out_EC = path.join(picrust2_out, "EC_predicted.tsv.gz")
system_call_check("hsp.py -i EC " +
" -t " + newick_infile +
" -p " + str(threads) +
" -o " + hsp_out_EC +
" -m " + hsp_method,
print_out=True)
hsp_out_EC_cmd = "hsp.py -i EC " + \
" -t " + newick_infile + \
" -p " + str(threads) + \
" -n " + \
" -o " + hsp_out_EC + \
" -m " + hsp_method

hsp_out_KO = path.join(picrust2_out, "KO_predicted.tsv.gz")
system_call_check("hsp.py -i KO " +
" -t " + newick_infile +
" -p " + str(threads) +
" -o " + hsp_out_KO +
" -m " + hsp_method,
print_out=True)
hsp_out_KO_cmd = "hsp.py -i KO " + \
" -t " + newick_infile + \
" -p " + str(threads) + \
" -n " + \
" -o " + hsp_out_KO + \
" -m " + hsp_method

if highly_verbose:
hsp_out_16S_cmd += " --verbose"
hsp_out_EC_cmd += " --verbose"
hsp_out_KO_cmd += " --verbose"

if not skip_norm:
system_call_check(hsp_out_16S_cmd,
print_command=True,
print_stdout=highly_verbose,
print_stderr=True)


system_call_check(hsp_out_EC_cmd,
print_command=True,
print_stdout=highly_verbose,
print_stderr=True)

system_call_check(hsp_out_KO_cmd,
print_command=True,
print_stdout=highly_verbose,
print_stderr=True)

# Run metagenome pipeline step.
EC_metagenome_out = path.join(picrust2_out, "EC_metagenome_out")
system_call_check("metagenome_pipeline.py -i " + biom_infile +
" -m " + hsp_out_16S +
" -f " + hsp_out_EC +
" -o " + EC_metagenome_out +
" --max_nsti " + str(max_nsti),
print_out=True)

KO_metagenome_out = path.join(picrust2_out, "KO_metagenome_out")
system_call_check("metagenome_pipeline.py -i " + biom_infile +
" -m " + hsp_out_16S +
" -f " + hsp_out_KO +
" -o " + KO_metagenome_out +
" --max_nsti " + str(max_nsti),
print_out=True)

EC_metagenome_cmd = "metagenome_pipeline.py -i " + biom_infile + \
" -f " + hsp_out_EC + \
" -o " + EC_metagenome_out + \
" --max_nsti " + str(max_nsti)

KO_metagenome_cmd = "metagenome_pipeline.py -i " + biom_infile + \
" -f " + hsp_out_KO + \
" -o " + KO_metagenome_out + \
" --max_nsti " + str(max_nsti)

if skip_norm:
EC_metagenome_cmd += " --skip_norm"
KO_metagenome_cmd += " --skip_norm"
else:
EC_metagenome_cmd += " -m " + hsp_out_16S
KO_metagenome_cmd += " -m " + hsp_out_16S

system_call_check(EC_metagenome_cmd, print_command=True,
print_stdout=highly_verbose,
print_stderr=True)
system_call_check(KO_metagenome_cmd, print_command=True,
print_stdout=highly_verbose,
print_stderr=True)

EC_out = path.join(EC_metagenome_out, "pred_metagenome_unstrat.tsv.gz")
KO_out = path.join(KO_metagenome_out, "pred_metagenome_unstrat.tsv.gz")

# Run pathway inference step.
pathways_out = path.join(picrust2_out, "pathways_out")
pathabun_out = path.join(pathways_out, "path_abun_unstrat.tsv.gz")
system_call_check("pathway_pipeline.py -i " + EC_out +
" -o " + pathways_out +
" -p " + str(threads),
print_out=True)

pathway_pipeline_cmd = "pathway_pipeline.py -i " + EC_out + \
" -o " + pathways_out + \
" -p " + str(threads)

if skip_minpath:
pathway_pipeline_cmd += " --skip_minpath"

if no_gap_fill:
pathway_pipeline_cmd += " --no_gap_fill"

if highly_verbose:
pathway_pipeline_cmd += " --verbose"

system_call_check(pathway_pipeline_cmd, print_command=True,
print_stdout=highly_verbose,
print_stderr=True)

# Read in output unstratified metagenome tables and return as BIOM
# objects.
Expand Down
32 changes: 18 additions & 14 deletions q2_picrust2/_full_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,34 @@
import qiime2
import biom
from os import path
import sys
import pandas as pd
from tempfile import TemporaryDirectory
from q2_types.feature_table import FeatureTable, Frequency
import subprocess
import sys
import picrust2.pipeline
from picrust2.default import (default_ref_dir, default_tables, default_map,
from picrust2.default import (default_ref_dir, default_tables,
default_regroup_map, default_pathway_map)


def full_pipeline(table: biom.Table,
seq: pd.Series,
threads: int = 1,
hsp_method: str = "mp",
max_nsti: float = 2.0) -> (biom.Table,
biom.Table,
biom.Table):
min_align: float = 0.8,
max_nsti: float = 2.0,
skip_minpath: bool = False,
no_gap_fill: bool = False,
skip_norm: bool = False,
highly_verbose: bool = False) -> (biom.Table,
biom.Table,
biom.Table):

# Write out BIOM table and FASTA to be used in pipeline.
with TemporaryDirectory() as temp_dir:

# Write out BIOM table:
biom_infile = path.join(temp_dir, "intable.biom")
with biom.util.biom_open(biom_infile, 'w') as out_biom:
with biom.util.biom_open(biom_infile, 'w') as out_biom:
table.to_hdf5(h5grp=out_biom,
generated_by="PICRUSt2 QIIME2 Plugin")
generated_by="PICRUSt2 QIIME 2 Plugin")

# Write out Pandas series as FASTA:
seq_outfile = path.join(temp_dir, "seqs.fna")
Expand Down Expand Up @@ -55,15 +58,16 @@ def full_pipeline(table: biom.Table,
min_reads=1,
min_samples=1,
hsp_method=hsp_method,
min_align=min_align,
skip_nsti=False,
skip_minpath=False,
no_gap_fill=False,
skip_minpath=skip_minpath,
no_gap_fill=no_gap_fill,
coverage=False,
per_sequence_contrib=False,
wide_table=False,
skip_norm=False,
skip_norm=skip_norm,
remove_intermediate=False,
verbose=True)
verbose=highly_verbose)

# Convert the returned unstratified tables to BIOM tables.
# Note that the 0-index in the func table returned objects corresponds
Expand Down
72 changes: 59 additions & 13 deletions q2_picrust2/plugin_setup.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from qiime2.plugin import (Plugin, Str, Properties, Choices, Int, Bool, Range,
Float, Set, Visualization, Metadata, MetadataColumn,
Categorical, Numeric, Citations)
from qiime2.plugin import (Plugin, Str, Choices, Int, Bool, Range, Float,
Citations)
from q2_types.feature_table import FeatureTable, Frequency
from q2_types.feature_data import FeatureData, Sequence
from q2_types.sample_data import SampleData
from q2_types.tree import Phylogeny, Rooted
import q2_picrust2

Expand All @@ -28,10 +26,15 @@

inputs={'table': FeatureTable[Frequency],
'seq': FeatureData[Sequence]},

parameters={'threads': Int % Range(1, None),
'hsp_method': Str % Choices(HSP_METHODS),
'max_nsti': Float % Range(0.0, None)},
'min_align': Float % Range(0.0, 1.0),
'max_nsti': Float % Range(0.0, None),
'skip_minpath': Bool,
'no_gap_fill': Bool,
'skip_norm': Bool,
'highly_verbose': Bool},

outputs=[('ko_metagenome', FeatureTable[Frequency]),
('ec_metagenome', FeatureTable[Frequency]),
Expand All @@ -46,16 +49,39 @@
parameter_descriptions={
'threads': 'Number of threads/processes to use during workflow.',
'hsp_method': 'Which hidden-state prediction method to use.',
'min_align': ('Proportion of the total length of an input query '
'sequence that must align with reference sequences. '
'Any sequences with lengths below this value after '
'making an alignment with reference sequences will '
'be excluded from the placement and all subsequent '
'steps.'),
'max_nsti': ('Max nearest-sequenced taxon index for an input ASV to '
'be output.')},
'be output.'),
'skip_minpath': ('Do not run MinPath to identify which pathways are '
'present as a first pass (on by default).'),
'no_gap_fill': ('Do not perform gap filling before predicting '
'pathway abundances (gap filling is on otherwise by '
'default).'),
'skip_norm': ('Skip normalizing sequence abundances by predicted '
'marker gene copy numbers (typically 16S rRNA '
'genes). The normalization step will be performed '
'automatically unless this option is specified.'),
'highly_verbose': ('Print all commands being written as well as all '
'standard output of wrapped tools. This can be '
'especially useful for debugging. Note that this '
'option requires that the --verbose option is also '
'set (which is an internal QIIME 2 option that '
'indicates that STDOUT and STDERR should be printed '
'out).')
},

output_descriptions={'ko_metagenome': 'Predicted metagenome for KEGG orthologs',
'ec_metagenome': 'Predicted metagenome for EC numbers',
'pathway_abundance': 'Predicted MetaCyc pathway abundances'},

name='Default 16S PICRUSt2 Pipeline',

description=("QIIME2 Plugin for default 16S PICRUSt2 pipeline"),
description=("QIIME 2 plugin for default 16S PICRUSt2 pipeline"),

citations=[citations['Douglas2019bioRxiv']]
)
Expand All @@ -66,10 +92,14 @@

inputs={'table': FeatureTable[Frequency],
'tree': Phylogeny[Rooted]},

parameters={'threads': Int % Range(1, None),
'hsp_method': Str % Choices(HSP_METHODS),
'max_nsti': Float % Range(0.0, None)},
'max_nsti': Float % Range(0.0, None),
'skip_minpath': Bool,
'no_gap_fill': Bool,
'skip_norm': Bool,
'highly_verbose': Bool},

outputs=[
('ko_metagenome', FeatureTable[Frequency]),
Expand All @@ -86,19 +116,35 @@
'threads': 'Number of threads/processes to use during workflow.',
'hsp_method': 'Which hidden-state prediction method to use.',
'max_nsti': ('Max nearest-sequenced taxon index for an input ASV to '
'be output.')},
'be output.'),
'skip_minpath': ('Do not run MinPath to identify which pathways are '
'present as a first pass (on by default).'),
'no_gap_fill': ('Do not perform gap filling before predicting '
'pathway abundances (gap filling is on otherwise by '
'default).'),
'skip_norm': ('Skip normalizing sequence abundances by predicted '
'marker gene copy numbers (typically 16S rRNA '
'genes). The normalization step will be performed '
'automatically unless this option is specified.'),
'highly_verbose': ('Print all commands being written as well as all '
'standard output of wrapped tools. This can be '
'especially useful for debugging. Note that this '
'option requires that the --verbose option is also '
'set (which is an internal QIIME 2 option that '
'indicates that STDOUT and STDERR should be printed '
'out).')
},

output_descriptions={'ko_metagenome': 'Predicted metagenome for KEGG orthologs',
'ec_metagenome': 'Predicted metagenome for E.C. numbers',
'pathway_abundance': 'Predicted MetaCyc pathway abundances'},

name='16S PICRUSt2 pipeline with custom tree',

description=("QIIME2 plugin for running PICRUSt2 pipeline based on a " +
description=("QIIME 2 plugin for running PICRUSt2 pipeline based on a " +
"tree from a different pipeline. This was written to be " +
"used with the output of SEPP (q2-fragment-insertion) as a " +
"starting point."),

citations=[citations['Douglas2019bioRxiv']]
)

0 comments on commit 3f78a7e

Please sign in to comment.