diff --git a/CHANGELOG.md b/CHANGELOG.md index e6f3c17..b692942 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,10 @@ ## development version - You can now cite XAVIER with the DOI [10.5281/zenodo.12727315](https://doi.org/10.5281/zenodo.12727315). (#88, @kelly-sovacool) -- Minor documentation improvements. (#92, @kelly-sovacool) -- Minor documentation rendering improvements (#93, @samarth8392) +- Minor documentation improvements. (#92, @kelly-sovacool; #93, @samarth8392) - The docs website now has a dropdown menu to select which version to view. The latest release is shown by default. (#150, @kelly-sovacool) +- Add `xavier gui` subcommand to launch the graphical user interface. (#99, @kelly-sovacool) + - Previously, `xavier_gui` (with an underscore) was a command in the `ccbrpipeliner` module. ## XAVIER 3.0.3 diff --git a/config/genomes/hg38.biowulf.json b/config/genomes/biowulf/hg38.json similarity index 100% rename from config/genomes/hg38.biowulf.json rename to config/genomes/biowulf/hg38.json diff --git a/config/genomes/mm10.biowulf.json b/config/genomes/biowulf/mm10.json similarity index 100% rename from config/genomes/mm10.biowulf.json rename to config/genomes/biowulf/mm10.json diff --git a/config/genomes/hg38.frce.json b/config/genomes/frce/hg38.json old mode 100755 new mode 100644 similarity index 100% rename from config/genomes/hg38.frce.json rename to config/genomes/frce/hg38.json diff --git a/config/genomes/mm10.frce.json b/config/genomes/frce/mm10.json similarity index 100% rename from config/genomes/mm10.frce.json rename to config/genomes/frce/mm10.json diff --git a/docs/usage/gui.md b/docs/usage/gui.md index 5c73ddf..c20bc0f 100644 --- a/docs/usage/gui.md +++ b/docs/usage/gui.md @@ -73,7 +73,7 @@ xavier --version To run the XAVIER pipeline from the GUI, simply enter: ```bash -xavier_gui +xavier gui ``` and it will launch the XAVIER window. @@ -177,6 +177,6 @@ and start an interactive session. ![gui_nx_config2](images/gui_nx_config2.png) -Similar to the instructions above, load `ccbrpipeliner` module and enter `xavier_gui` to launch the XAVIER gui. +Similar to the instructions above, load `ccbrpipeliner` module and enter `xavier gui` to launch the XAVIER gui. ![gui_nx_xavier](images/gui_nx_xavier.png) diff --git a/resources/CCBRlogo.png b/resources/CCBRlogo.png new file mode 100644 index 0000000..256ea29 Binary files /dev/null and b/resources/CCBRlogo.png differ diff --git a/src/xavier/__main__.py b/src/xavier/__main__.py index df6c478..5d8bceb 100755 --- a/src/xavier/__main__.py +++ b/src/xavier/__main__.py @@ -44,137 +44,26 @@ import argparse # potential python3 3rd party package, added in python/3.5 # Local imports -from .run import init, setup, bind, dryrun, runner +from .run import init, setup, bind, dryrun, runner, run from .shells import bash from .options import genome_options -from .util import err, exists, fatal, permissions, check_cache, require, get_version +from .util import ( + err, + exists, + fatal, + permissions, + check_cache, + require, + get_version, + get_genomes_list, +) +from .gui import launch_gui __version__ = get_version() __email__ = "ccbr@mail.nih.gov" __home__ = os.path.dirname(os.path.abspath(__file__)) -def run(sub_args): - """Initialize, setup, and run the XAVIER pipeline. - Calls initialize() to create output directory and copy over pipeline resources, - setup() to create the pipeline config file, dryrun() to ensure their are no issues - before running the pipeline, and finally run() to execute the Snakemake workflow. - @param sub_args : - Parsed arguments for run sub-command - """ - # Step 0. Check for required dependencies - # The pipelines has only two requirements: - # snakemake and singularity - require(["snakemake", "singularity"], ["snakemake", "singularity"]) - - # Optional Step. Initialize working directory, - # copy over required resources to run - # the pipeline - git_repo = __home__ - if sub_args.runmode == "init": - print("--Initializing") - input_files = init( - repo_path=git_repo, output_path=sub_args.output, links=sub_args.input - ) - - # Required Step. Setup pipeline for execution, - # dynamically create config.json config - # file from user inputs and base config - # determine "nidap folder" - create_nidap_folder_YN = "no" - if sub_args.create_nidap_folder: - create_nidap_folder_YN = "yes" - - # templates - config = setup( - sub_args, - repo_path=git_repo, - output_path=sub_args.output, - create_nidap_folder_YN=create_nidap_folder_YN, - links=sub_args.input, - ) - - # Required Step. Resolve docker/singularity bind - # paths from the config file. - bindpaths = bind(sub_args, config=config) - - # Optional Step: Dry-run pipeline - # if sub_args.dry_run: - if sub_args.runmode == "dryrun" or sub_args.runmode == "run": - print("--Dry-Run") - # Dryrun pipeline - dryrun_output = dryrun( - outdir=sub_args.output - ) # python3 returns byte-string representation - print( - "\nDry-running XAVIER pipeline:\n{}".format(dryrun_output.decode("utf-8")) - ) - - # Optional Step. Orchestrate pipeline execution, - # run pipeline in locally on a compute node - # for debugging purposes or submit the master - # job to the job scheduler, SLURM, and create - # logging file - if sub_args.runmode == "run": - print("--Run full pipeline") - if not exists(os.path.join(sub_args.output, "logfiles")): - # Create directory for logfiles - os.makedirs(os.path.join(sub_args.output, "logfiles")) - if sub_args.mode == "local": - log = os.path.join(sub_args.output, "logfiles", "snakemake.log") - else: - log = os.path.join(sub_args.output, "logfiles", "master.log") - logfh = open(log, "w") - wait = "" - if sub_args.wait: - wait = "--wait" - mjob = runner( - mode=sub_args.mode, - outdir=sub_args.output, - # additional_bind_paths = all_bind_paths, - alt_cache=sub_args.singularity_cache, - threads=int(sub_args.threads), - jobname=sub_args.job_name, - submission_script="runner", - logger=logfh, - additional_bind_paths=",".join(bindpaths), - tmp_dir=sub_args.tmp_dir, - wait=wait, - ) - - # Step 5. Wait for subprocess to complete, - # this is blocking and not asynchronous - if not sub_args.silent: - print("\nRunning XAVIER pipeline in '{}' mode...".format(sub_args.mode)) - mjob.wait() - logfh.close() - - # Step 6. Relay information about submission - # of the master job or the exit code of the - # pipeline that ran in local mode - if sub_args.mode == "local": - if int(mjob.returncode) == 0: - print("XAVIER has successfully completed") - else: - fatal( - "XAVIER failed. Please see {} for more information.".format( - os.path.join(sub_args.output, "logfiles", "snakemake.log") - ) - ) - elif sub_args.mode == "slurm": - jobid = ( - open(os.path.join(sub_args.output, "logfiles", "mjobid.log")) - .read() - .strip() - ) - if not sub_args.silent: - if int(mjob.returncode) == 0: - print("Successfully submitted master job: ", end="") - else: - fatal("Error occurred when submitting the master job.") - print(jobid) - - def unlock(sub_args): """Unlocks a previous runs output directory. If snakemake fails ungracefully, it maybe required to unlock the working directory before proceeding again. @@ -407,13 +296,19 @@ def parsed_arguments(): # Suppressing help message of required args to overcome no sub-parser named groups subparser_run = subparsers.add_parser( "run", - help="Run the XAVIER pipeline with input files.", + help="Run the XAVIER pipeline with input files.", usage=argparse.SUPPRESS, formatter_class=argparse.RawDescriptionHelpFormatter, description=required_run_options, epilog=run_epilog, ) + subparser_gui = subparsers.add_parser( + "gui", + help="Launch the pipeline with a Graphical User Interface (GUI)", + description="", + ) + # Required Arguments # Input FastQ files subparser_run.add_argument( @@ -772,6 +667,7 @@ def parsed_arguments(): subparser_run.set_defaults(func=run) subparser_unlock.set_defaults(func=unlock) subparser_cache.set_defaults(func=cache) + subparser_gui.set_defaults(func=launch_gui) # Parse command-line args args = parser.parse_args() diff --git a/src/xavier/cache.py b/src/xavier/cache.py new file mode 100644 index 0000000..a908634 --- /dev/null +++ b/src/xavier/cache.py @@ -0,0 +1,63 @@ +import json +import os +import sys + + +def get_singularity_cachedir(output_dir, cache_dir=None): + """Returns the singularity cache directory. + If no user-provided cache directory is provided, + the default singularity cache is in the output directory. + """ + if not cache_dir: + cache_dir = os.path.join(output_dir, ".singularity") + return cache_dir + + +def get_sif_cache_dir(hpc=None): + sif_dir = None + if hpc == "biowulf": + sif_dir = "/data/CCBR_Pipeliner/SIFS" + elif hpc == "frce": + sif_dir = "/mnt/projects/CCBR-Pipelines/SIFs" + return sif_dir + + +def image_cache(sub_args, config): + """Adds Docker Image URIs, or SIF paths to config if singularity cache option is provided. + If singularity cache option is provided and a local SIF does not exist, a warning is + displayed and the image will be pulled from URI in 'config/containers/images.json'. + @param sub_args : + Parsed arguments for run sub-command + @params config : + Docker Image config file + @return config : + Updated config dictionary containing user information (username and home directory) + """ + images = os.path.join(sub_args.output, "config", "containers", "images.json") + + # Read in config for docker image uris + with open(images, "r") as fh: + data = json.load(fh) + # Check if local sif exists + for image, uri in data["images"].items(): + if sub_args.sif_cache: + sif = os.path.join( + sub_args.sif_cache, + "{}.sif".format(os.path.basename(uri).replace(":", "_")), + ) + if not os.path.exists(sif): + # If local sif does not exist on in cache, print warning + # and default to pulling from URI in config/containers/images.json + print( + 'Warning: Local image "{}" does not exist in singularity cache'.format( + sif + ), + file=sys.stderr, + ) + else: + # Change pointer to image from Registry URI to local SIF + data["images"][image] = sif + + config.update(data) + + return config diff --git a/src/xavier/gui.py b/src/xavier/gui.py new file mode 100644 index 0000000..72ee41e --- /dev/null +++ b/src/xavier/gui.py @@ -0,0 +1,375 @@ +#!/usr/bin/env python3 +import argparse +import os +import sys +import glob +import PySimpleGUI as sg + +from .util import ( + get_genomes_dict, + get_tmp_dir, + xavier_base, + get_version, + get_hpcname, + check_python_version, +) +from .run import run_in_context +from .cache import get_sif_cache_dir + +def launch_gui(DEBUG=True): + check_python_version() + # get drop down genome options + jsons = get_genomes_dict() + genome_annotation_combinations = list(jsons.keys()) + genome_annotation_combinations.sort() + if DEBUG: + print(jsons) + if DEBUG: + print(genome_annotation_combinations) + + # Create different layouts + tumorPair_layout = [ + [ + sg.Text("Pairs file", size=(20, 1)), + sg.InputText(key="-PAIRS-"), + sg.FileBrowse(target="-PAIRS-"), + ], + [ + sg.Text("Copy Number Variants (CNV):"), + sg.Radio("No", "CNVRADIO", enable_events=True, default=True, key="-NOCNV-"), + sg.Radio("Yes", "CNVRADIO", enable_events=True, key="-CNV-"), + ], + ] + + tumorOnly_layout = [ + [sg.T("Copy Number Variants (CNVs) can only be analyzed in Tumor-Normal mode.")] + ] + + analysis_layout = [ + [ + sg.Radio( + "Tumor-normal pair", "TUMORADIO", enable_events=True, key="-TUMNORM-" + ), + sg.Radio("Tumor-only", "TUMORADIO", enable_events=True, key="-TUMONLY-"), + ], + [ + sg.Frame( + "Tumor-Normal", + tumorPair_layout, + font=("Helvetica", 12, "bold"), + key="-PAIROPTS-", + visible=False, + ) + ], + [ + sg.Frame( + "Tumor-Only", + tumorOnly_layout, + font=("Helvetica", 12, "bold"), + key="-ONLYOPTS-", + visible=False, + ) + ], + ] + + targets_layout = [ + [ + sg.Text("Targets .BED file", size=(20, 1)), + sg.InputText(key="-TARGETS-"), + sg.FileBrowse(target="-TARGETS-"), + ] + ] + + settings_layout = [ + [sg.T("Please read the Documentation before changing")], + [ + sg.T("Apply FFPE correction?"), + sg.Radio( + "No", "FFPERADIO", enable_events=True, default=True, key="-NOFFPE-" + ), + sg.Radio("Yes", "FFPERADIO", enable_events=True, key="-FFPE-"), + ], + [sg.T("Targets (.BED file):")], + [ + sg.Radio( + "Default", "BEDRADIO", enable_events=True, default=True, key="-DEFTARG-" + ), + sg.Radio("Custom", "BEDRADIO", enable_events=True, key="-CUSTARG-"), + ], + [sg.Frame("Custom Targets", targets_layout, visible=False, key="-BED-")], + [sg.Button(button_text="Discard", key="-DISCSET-", button_color="#3864AB")], + ] + + default_values = { + "-NOCNV-": True, + "-CNV-": False, + "-NOFFPE-": True, + "-FFPE-": False, + "-DEFTARG-": True, + "-CUSTARG-": False, + } + textKeys = [ + "-INDIR-", + "-OUTDIR-", + "-PAIRS-", + "-TARGETS-", + "-JOBNAME-", + "-ANNOTATION-", + ] + # create main layout + logo = sg.Image(xavier_base(os.path.join("resources", "CCBRlogo.png"))) + layout = [ + [sg.Column([[logo]], justification="center")], + [ + sg.Text( + "XAVIER - eXome Analysis and Variant explorER", + font=("Helvetica", 12, "bold"), + ) + ], + [ + sg.Text("Input Fastqs folder", size=(20, 1)), + sg.InputText(key="-INDIR-"), + sg.FolderBrowse(target="-INDIR-"), + ], + [ + sg.Text("Output folder", size=(20, 1)), + sg.InputText(key="-OUTDIR-"), + sg.FolderBrowse(target="-OUTDIR-"), + ], + [ + sg.Text("Genome", size=(20, 1)), + sg.Combo( + values=genome_annotation_combinations, + key="-ANNOTATION-", + tooltip="hg38: Homo sapiens GRCh38.p14; mm10: Mus musculus GRCm38.p6", + ), + ], + [ + sg.Text("Job name", size=(20, 1)), + sg.InputText( + key="-JOBNAME-", + tooltip="Name of the job for this run. All output files will be stored under this folder name in the output folder.", + ), + ], + [sg.Frame("Analysis Mode", analysis_layout, visible=True)], + [sg.Button(button_text="Additional Settings", key="-SETTINGS-")], + [sg.Frame("", settings_layout, key="-SET-", visible=False)], + [ + sg.Submit(key="-SUBMIT-"), + sg.Button(button_text="Documentation", key="--DOC--"), + sg.Button(button_text="Help", key="--HELP--"), + sg.Cancel(key="--CANCEL--", button_color="tomato"), + ], + ] + + if DEBUG: + print("layout is ready!") + + window = sg.Window(f"XAVIER {get_version()}", layout, location=(0, 500), finalize=True) + if DEBUG: + print("window created!") + + # Event loop: + while True: + event, values = window.read() + # if DEBUG: print(event,values) ## Turn on for debugging + + # if any((event != 'Submit')): + if event in ("--CANCEL--", sg.WINDOW_CLOSED): + sg.popup_auto_close( + "Thank you for running XAVIER. GoodBye!", location=(0, 500), title="" + ) + sys.exit(69) + if event == "-TUMNORM-": + window["-PAIROPTS-"].update(visible=True) + window["-ONLYOPTS-"].update(visible=False) + elif event == "-TUMONLY-": + window["-PAIROPTS-"].update(visible=False) + window["-ONLYOPTS-"].update(visible=True) + values["-CNV-"] = False + if event == "--DOC--": + copy_to_clipboard("https://ccbr.github.io/XAVIER/") + sg.Popup( + "Visit https://ccbr.github.io/XAVIER/ for links to complete documentation. The link has been copied to your clipboard. Please paste it in your favorite web browser.", + font=("Arial", 12, "bold"), + title="", + location=(0, 500), + ) + continue + if event == "--HELP--": + copy_to_clipboard("ccbr_pipeliner@mail.nih.gov") + sg.Popup( + "Email ccbr_pipeliner@mail.nih.gov for help. The email id has been copied to your clipboard. Please paste it in your emailing software.", + font=("Arial", 12, "bold"), + title="", + location=(0, 500), + ) + continue + if event == "-SETTINGS-": + window["-SET-"].update(visible=True) + if event == "-DEFTARG-": + window["-BED-"].update(visible=False) + if event == "-CUSTARG-": + window["-BED-"].update(visible=True) + if event == "-DISCSET-": + window["-SET-"].update(visible=False) + window["-BED-"].update(visible=False) + for key, value in default_values.items(): + window[key].Update(value) + if event == "-SUBMIT-": + # check for correct inputs + if values["-INDIR-"] == "": + sg.PopupError("Input folder must be provided!!", location=(0, 500)) + continue + elif not os.path.exists(values["-INDIR-"]): + sg.PopupError("Input folder doesn't exist!!", location=(0, 500)) + continue + elif len(get_fastqs(values["-INDIR-"])) == 0: + sg.PopupError("Input folder has no fastqs!!", location=(0, 500)) + continue + else: + inputfastqs = get_fastqs(values["-INDIR-"]) + if DEBUG: + print(inputfastqs) + if len(inputfastqs) == 0: + sg.PopupError( + "Input folder has no fastqs!!", + location=(0, 500), + title="ERROR!", + font=("Arial", 12, "bold"), + ) + window.Element("-INDIR-").update("") + continue + if values["-OUTDIR-"] == "": + sg.PopupError("Output folder must be provided!!", location=(0, 500)) + continue + outputfolder = values["-OUTDIR-"] + "/" + values["-JOBNAME-"] + if os.path.exists(outputfolder): + ch = sg.popup_yes_no( + "Output folder name exists... this is probably a re-run ... proceed?", + title="Rerun??", + location=(0, 500), + ) + if ch == "No": + window.Element("-OUTDIR-").update("") + continue + if values["-CUSTARG-"] == True: + if values["-TARGETS-"] == "": + sg.PopupError( + "Custom Targets BED file selected but not provided!!", + location=(0, 500), + ) + continue + if values["-TUMNORM-"] == "" and values["-TUMONLY-"] == "": + sg.PopupError("Select an analysis mode", location=(0, 500)) + continue + if values["-TUMNORM-"] == True: + if values["-PAIRS-"] == "": + sg.PopupError( + "Tumor-normal mode selected. Need Pairs file to continue", + location=(0, 500), + ) + continue + genome = values["-ANNOTATION-"] + output_dir = os.path.join(values["-OUTDIR-"], values["-JOBNAME-"]) + run_args = argparse.Namespace( + runmode="init", + input=list(glob.glob(os.path.join(values["-INDIR-"], "*.fastq.gz"))), + output=output_dir, + genome=genome, + targets=values["-TARGETS-"] if values["-TARGETS-"] else xavier_base('resources', 'Agilent_SSv7_allExons_hg38.bed'), # TODO should this be part of the genome config file? + mode="slurm", + job_name="pl:xavier", + callers=["mutect2", "mutect", "strelka", "vardict", "varscan"], + pairs=values.get("-PAIRS-", None), + ffpe=values["-FFPE-"], + cnv=values["-CNV-"], + wait=False, + create_nidap_folder=False, + silent=False, + singularity_cache=os.environ.get("SINGULARITY_CACHEDIR", None), + sif_cache=get_sif_cache_dir(), + tmp_dir=get_tmp_dir(None, output_dir), + threads=2, + ) + allout_init = run_in_context(run_args) + run_args.runmode = "dryrun" + allout_dryrun = run_in_context(run_args) + allout = "\n".join([allout_init, allout_dryrun]) + if DEBUG: + print(allout) + sg.popup_scrolled( + allout, title="Dryrun:STDOUT/STDERR", location=(0, 500), size=(80, 30) + ) + if "error" in allout or "Error" in allout or "ERROR" in allout: + continue + ch = sg.popup_yes_no( + "Submit run to slurm?", title="Submit??", location=(0, 500) + ) + if ch == "Yes": + run_args.runmode = "run" + allout = run_in_context(run_args) + sg.popup_scrolled( + allout, + title="Slurmrun:STDOUT/STDERR", + location=(0, 500), + size=(80, 30), + ) + rerun = sg.popup_yes_no( + "Submit another XAVIER job?", title="", location=(0, 500) + ) + if rerun == "Yes": + for key in window.read(): + window[key].Update(value="") + window["-PAIROPTS-"].update(visible=False) + window["-ONLYOPTS-"].update(visible=False) + window["-TUMNORM-"].update(value=False) + window["-TUMONLY-"].update(value=False) + if rerun == "No": + sg.popup_auto_close( + "Thank you for running XAVIER. GoodBye!", + location=(0, 500), + title="", + ) + break + elif ch == "No": + for key in textKeys: + window[key].Update(value="") + for key, value in default_values.items(): + window[key].Update(value) + window["-PAIROPTS-"].update(visible=False) + window["-ONLYOPTS-"].update(visible=False) + window["-TUMNORM-"].update(value=False) + window["-TUMONLY-"].update(value=False) + continue + window.close() + +def copy_to_clipboard(string): + r = Tk() + r.withdraw() + r.clipboard_clear() + r.clipboard_append(string) + r.update() + r.destroy() + + +def fixpath(p): + return os.path.abspath(os.path.expanduser(p)) + + +def get_fastqs(inputdir): + inputdir = fixpath(inputdir) + inputfastqs = glob.glob(inputdir + os.sep + "*.fastq.gz") + inputfqs = glob.glob(inputdir + os.sep + "*.fq.gz") + inputfastqs.extend(inputfqs) + return inputfastqs + + +def delete_files(files): + for f in files: + if os.path.exists(f): + os.remove(f) + + +if __name__ == "__main__": + main() diff --git a/src/xavier/run.py b/src/xavier/run.py index de92d95..0ee67bd 100644 --- a/src/xavier/run.py +++ b/src/xavier/run.py @@ -3,15 +3,149 @@ # Python standard library from __future__ import print_function -from shutil import copytree, copyfile +import contextlib +import io import os import re import json +import shutil import sys import subprocess # Local imports -from .util import git_commit_hash, join_jsons, fatal, which, exists, err, get_version +from .util import ( + git_commit_hash, + join_jsons, + fatal, + which, + exists, + err, + get_version, + xavier_base, + require, + get_hpcname, +) + + +def run(sub_args): + """Initialize, setup, and run the XAVIER pipeline. + Calls initialize() to create output directory and copy over pipeline resources, + setup() to create the pipeline config file, dryrun() to ensure their are no issues + before running the pipeline, and finally run() to execute the Snakemake workflow. + @param sub_args : + Parsed arguments for run sub-command + """ + # Step 0. Check for required dependencies + # The pipelines has only two requirements: + # snakemake and singularity + require(["snakemake", "singularity"], ["snakemake", "singularity"]) + + # Optional Step. Initialize working directory, + # copy over required resources to run + # the pipeline + git_repo = xavier_base() + if sub_args.runmode == "init": + print("--Initializing") + input_files = init( + repo_path=git_repo, output_path=sub_args.output, links=sub_args.input + ) + + # Required Step. Setup pipeline for execution, + # dynamically create config.json config + # file from user inputs and base config + # determine "nidap folder" + create_nidap_folder_YN = "no" + if sub_args.create_nidap_folder: + create_nidap_folder_YN = "yes" + + # templates + config = setup( + sub_args, + repo_path=git_repo, + output_path=sub_args.output, + create_nidap_folder_YN=create_nidap_folder_YN, + links=sub_args.input, + ) + + # Required Step. Resolve docker/singularity bind + # paths from the config file. + bindpaths = bind(sub_args, config=config) + + # Optional Step: Dry-run pipeline + # if sub_args.dry_run: + if sub_args.runmode == "dryrun" or sub_args.runmode == "run": + print("--Dry-Run") + # Dryrun pipeline + dryrun_output = dryrun( + outdir=sub_args.output + ) # python3 returns byte-string representation + print( + "\nDry-running XAVIER pipeline:\n{}".format(dryrun_output.decode("utf-8")) + ) + + # Optional Step. Orchestrate pipeline execution, + # run pipeline in locally on a compute node + # for debugging purposes or submit the master + # job to the job scheduler, SLURM, and create + # logging file + if sub_args.runmode == "run": + print("--Run full pipeline") + if not exists(os.path.join(sub_args.output, "logfiles")): + # Create directory for logfiles + os.makedirs(os.path.join(sub_args.output, "logfiles")) + if sub_args.mode == "local": + log = os.path.join(sub_args.output, "logfiles", "snakemake.log") + else: + log = os.path.join(sub_args.output, "logfiles", "master.log") + logfh = open(log, "w") + wait = "" + if sub_args.wait: + wait = "--wait" + mjob = runner( + mode=sub_args.mode, + outdir=sub_args.output, + # additional_bind_paths = all_bind_paths, + alt_cache=sub_args.singularity_cache, + threads=int(sub_args.threads), + jobname=sub_args.job_name, + submission_script="runner", + logger=logfh, + additional_bind_paths=",".join(bindpaths), + tmp_dir=sub_args.tmp_dir, + wait=wait, + ) + + # Step 5. Wait for subprocess to complete, + # this is blocking and not asynchronous + if not sub_args.silent: + print("\nRunning XAVIER pipeline in '{}' mode...".format(sub_args.mode)) + mjob.wait() + logfh.close() + + # Step 6. Relay information about submission + # of the master job or the exit code of the + # pipeline that ran in local mode + if sub_args.mode == "local": + if int(mjob.returncode) == 0: + print("XAVIER has successfully completed") + else: + fatal( + "XAVIER failed. Please see {} for more information.".format( + os.path.join(sub_args.output, "logfiles", "snakemake.log") + ) + ) + elif sub_args.mode == "slurm": + jobid = ( + open(os.path.join(sub_args.output, "logfiles", "mjobid.log")) + .read() + .strip() + ) + if not sub_args.silent: + if int(mjob.returncode) == 0: + print("Successfully submitted master job: ", end="") + else: + fatal("Error occurred when submitting the master job.") + print(jobid) def init( @@ -73,7 +207,7 @@ def copy_safe(source, target, resources=[]): destination = os.path.join(target, resource) if not exists(destination): # Required resources do not exist - copytree(os.path.join(source, resource), destination) + shutil.copytree(os.path.join(source, resource), destination) def sym_safe(input_data, target): @@ -181,32 +315,26 @@ def setup(sub_args, repo_path, output_path, create_nidap_folder_YN="no", links=[ ifiles = sym_safe(input_data=links, target=output_path) mixed_inputs(ifiles) - hpcget = subprocess.run( - "scontrol show config", shell=True, capture_output=True, text=True - ) - hpcname = "" - - if "biowulf" in hpcget.stdout: - shorthostname = "biowulf" - print("Thank you for running XAVIER on Biowulf") - elif "fsitgl" in hpcget.stdout: - shorthostname = "frce" - print("Thank you for running XAVIER on FRCE") - else: + shorthostname = get_hpcname() + if not shorthostname: shorthostname = "biowulf" print( - "%s unknown host. Configuration files for references may not be correct. Defaulting to Biowulf config" - % (hpcget) + f"{shorthostname} unknown host. Configuration files for references may not be correct. Defaulting to Biowulf config" ) + else: + print(f"Thank you for running XAVIER on {shorthostname.upper()}") genome_config = os.path.join( - repo_path, "config", "genomes", sub_args.genome + "." + shorthostname + ".json" + repo_path, "config", "genomes", get_hpcname(), sub_args.genome + ".json" ) if sub_args.genome.endswith(".json"): # Provided a custom reference genome generated by rna-seek build genome_config = os.path.abspath(sub_args.genome) + if not os.path.exists(genome_config): + raise FileNotFoundError(f"Genome config file does not exist: {genome_config}") + required = { # Base configuration file "base": os.path.join(repo_path, "config", "config.json"), @@ -223,7 +351,7 @@ def setup(sub_args, repo_path, output_path, create_nidap_folder_YN="no", links=[ repo_path, "config", "cluster" + "." + shorthostname + ".json" ) cluster_output = os.path.join(output_path, "cluster.json") - copyfile(cluster_config, cluster_output) + shutil.copyfile(cluster_config, cluster_output) # Global config file for pipeline, config.json config = join_jsons(required.values()) # uses templates in the rna-seek repo @@ -831,3 +959,13 @@ def runner( ) return masterjob + + +def run_in_context(args): + """Execute the run function in a context manager to capture stdout/stderr""" + with contextlib.redirect_stdout(io.StringIO()) as out_f, contextlib.redirect_stderr( + io.StringIO() + ) as err_f: + run(args) + allout = out_f.getvalue() + "\n" + err_f.getvalue() + return allout diff --git a/src/xavier/util.py b/src/xavier/util.py index 3a269f8..3cf87b8 100644 --- a/src/xavier/util.py +++ b/src/xavier/util.py @@ -13,14 +13,14 @@ import warnings -def xavier_base(rel_path=""): +def xavier_base(*paths): """Get the absolute path to a file in the repository @return abs_path """ basedir = os.path.dirname( os.path.dirname(os.path.dirname(os.path.realpath(__file__))) ) - return os.path.join(basedir, rel_path) + return os.path.join(basedir, *paths) def get_version(): @@ -397,6 +397,23 @@ def join_jsons(templates): return aggregated +def check_python_version(): + # version check + # glob.iglob requires 3.11 for using "include_hidden=True" + MIN_PYTHON = (3, 11) + try: + assert sys.version_info >= MIN_PYTHON + print( + "Python version: {0}.{1}.{2}".format( + sys.version_info.major, sys.version_info.minor, sys.version_info.micro + ) + ) + except AssertionError: + exit( + f"{sys.argv[0]} requires Python {'.'.join([str(n) for n in MIN_PYTHON])} or newer" + ) + + if __name__ == "__main__": # Calculate MD5 checksum of entire file print("{} {}".format(md5sum(sys.argv[0]), sys.argv[0])) diff --git a/tests/test_run.py b/tests/test_run.py new file mode 100644 index 0000000..43d9f35 --- /dev/null +++ b/tests/test_run.py @@ -0,0 +1,42 @@ +import argparse +import glob +import os +import tempfile + +from xavier.src.xavier.util import get_tmp_dir, xavier_base, get_hpcname +from xavier.src.xavier.cache import get_sif_cache_dir +from xavier.src.xavier.run import run, run_in_context + + +def test_dryrun(): + if get_hpcname() == "biowulf": + with tempfile.TemporaryDirectory() as tmp_dir: + run_args = argparse.Namespace( + runmode="init", + input=list(glob.glob(xavier_base(".tests/*.fastq.gz"))), + output=tmp_dir, + genome="hg38", + targets=xavier_base(".tests/Agilent_SSv7_allExons_hg38.bed"), + mode="local", + job_name="pl:xavier", + callers=["mutect2", "mutect", "strelka", "vardict", "varscan"], + pairs=xavier_base(".tests/pairs.tsv"), + ffpe=False, + cnv=False, + wait=False, + create_nidap_folder=False, + silent=False, + singularity_cache=os.environ.get("SINGULARITY_CACHEDIR", None), + sif_cache=get_sif_cache_dir(), + tmp_dir=get_tmp_dir(None, tmp_dir), + threads=2, + ) + # init + allout_1 = run_in_context(run_args) + run_args.runmode = "dryrun" + # dryrun + allout_2 = run_in_context(run_args) + assert (all([ + "--Initializing" in allout_1, + "This was a dry-run (flag -n). The order of jobs does not reflect the order of execution." in allout_2 + ])) diff --git a/tests/test_util.py b/tests/test_util.py new file mode 100644 index 0000000..4cccd51 --- /dev/null +++ b/tests/test_util.py @@ -0,0 +1,10 @@ +import os +import warnings +from xavier.src.xavier.util import ( + xavier_base +) + +def test_xavier_base(): + test_base = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + xavier_base() + assert xavier_base("a","b","c").endswith('/a/b/c')