Skip to content

Commit

Permalink
pipeline: support genomic sequence input
Browse files Browse the repository at this point in the history
  • Loading branch information
azmtag committed Sep 12, 2021
1 parent 76134e0 commit 8e685ab
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 6 deletions.
33 changes: 27 additions & 6 deletions nerpa.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@

def parse_args(log):
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter)
genomic_group = parser.add_argument_group('Genomic input', 'antiSMASH-processed genomes of NRP-producing organisms (i.e. BGC predictions)')
genomic_group = parser.add_argument_group('Genomic input', 'Genomes of NRP-producing organisms (i.e. BGC predictions)')
genomic_group.add_argument("--antismash_output_list", dest="antismash_out",
help="file with list of paths to antiSMASH output directories", type=str)
genomic_group.add_argument("--antismash", "-a", dest="antismash", action='append',
help="single antiSMASH output directory or directory with many antiSMASH outputs")
genomic_group.add_argument("--sequences", dest="seqs", help="GenBank/EMBL/FASTA file containing DNA sequences")

struct_group = parser.add_argument_group('Chemical input', 'Structures of NRP molecules')
struct_input_group = struct_group.add_mutually_exclusive_group()
Expand Down Expand Up @@ -65,6 +66,8 @@ def parse_args(log):
help='file with custom monomers in rBAN compatible format')
parser.add_argument("--process-hybrids", dest="process_hybrids", action="store_true", default=False,
help="process NRP-PK hybrid monomers (requires use of rBAN)")
parser.add_argument('--antismash-path', dest='antismash_path', type=str, default=None,
help='path to antismash source directory')
parser.add_argument("--threads", default=1, type=int, help="number of threads for running Nerpa", action="store")
parser.add_argument("--output_dir", "-o", help="output dir [default: nerpa_results/results_<datetime>]",
type=str, default=None)
Expand Down Expand Up @@ -95,12 +98,12 @@ def validate(expr, msg=''):

def validate_arguments(args, parser, log):
try:
if not (args.predictions or args.antismash or args.antismash_out):
if not (args.predictions or args.antismash or args.antismash_out or args.seqs):
raise ValidationError(f'one of the arguments --predictions --antismash/-a --antismash_output_list '
f'is required')
if args.predictions and (args.antismash or args.antismash_out):
f'--sequences is required')
if args.predictions and (args.antismash or args.antismash_out or args.seqs):
raise ValidationError(f'argument --predictions: not allowed with argument --antismash/-a '
f'or --antismash_output_list')
f'or --antismash_output_list or --sequences')
if not (args.structures or args.smiles or args.smiles_tsv or args.rban_output):
raise ValidationError(f'one of the arguments --rban-json --smiles-tsv --smiles --structures/-s'
f'is required')
Expand Down Expand Up @@ -281,8 +284,26 @@ def run(args, log):
if args.predictions is not None:
path_predictions = copy_prediction_list(args, output_dir)
else:
antismash_out_dirs = args.antismash if args.antismash is not None else []
if args.seqs:
cur_antismash_out = os.path.join(output_dir, 'antismash_output')
if args.antismash_path:
antismash_exe = nerpa_utils.get_path_to_program('run_antismash.py', dirpath=args.antismash_path, min_version='5.0')
else:
antismash_exe = nerpa_utils.get_path_to_program('antismash', min_version='5.0')
if antismash_exe is None:
log.error("Can't find antismash 5.x executable. Please make sure that you have antismash 5.x installed "
"in your system or provide path to antismash source directory via --antismash-path option.")
command = [antismash_exe,
'--genefinding-tool', 'prodigal',
'--output-dir', cur_antismash_out,
'--minimal', '--skip-zip-file', '--enable-nrps-pks',
'--cpus', str(args.threads), args.seqs]
nerpa_utils.sys_call(command, log, cwd=output_dir)
antismash_out_dirs.append(cur_antismash_out)

path_predictions = predictions_preprocessor.create_predictions_by_antiSAMSHout(get_antismash_v3_compatible_input_paths(
listing_fpath=args.antismash_out, list_of_paths=args.antismash,
listing_fpath=args.antismash_out, list_of_paths=antismash_out_dirs,
output_dir=output_dir, log=log), output_dir, log)

input_configs_dir = args.configs_dir if args.configs_dir else nerpa_init.configs_dir
Expand Down
39 changes: 39 additions & 0 deletions src/nerpa_pipeline/nerpa_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import re
import shutil
import datetime
import shlex
Expand Down Expand Up @@ -111,3 +112,41 @@ def is_exe(fpath):
if is_exe(exe_file):
return exe_file
return None


def get_path_to_program(program, dirpath=None, min_version=None):
"""
returns the path to an executable or None if it can't be found
"""
def is_exe(fpath):
if os.path.isfile(fpath) and os.access(fpath, os.X_OK):
if not min_version or check_version(fpath, min_version):
return True

def check_version(fpath, min_version):
p = subprocess.Popen([fpath, '--version'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
stdout, stderr = p.communicate()

version_pattern = re.compile('(?P<major_version>\d+)\.(?P<minor_version>\d+)')
searchstring = stdout.decode('utf8').strip()

# ad hoc workaround to AS 5.2.0 printing FutureWarning to stdout
searchstring = searchstring.split('\n')[-1]

v = version_pattern.search(searchstring)
if not v.group('major_version') or not v.group('minor_version'):
return False
version, minor_version = map(int, min_version.split('.'))
if int(v.group('major_version')) == version and int(v.group('minor_version')) >= minor_version:
return True

if dirpath:
exe_file = os.path.join(dirpath, program)
if is_exe(exe_file):
return exe_file
else:
for path in os.environ["PATH"].split(os.pathsep):
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
return None

0 comments on commit 8e685ab

Please sign in to comment.