From 7492856d3d4c9505e62e7de1baa1f1dba6e206e1 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 8 Aug 2024 12:26:02 -0400 Subject: [PATCH 01/11] chore: copy gui code as-is from pipeliner --- src/xavier/gui.py | 486 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 486 insertions(+) create mode 100644 src/xavier/gui.py diff --git a/src/xavier/gui.py b/src/xavier/gui.py new file mode 100644 index 0000000..ad55168 --- /dev/null +++ b/src/xavier/gui.py @@ -0,0 +1,486 @@ +#!/usr/bin/env python3 +global DEBUG + +DEBUG = True + +import os +import sys +import stat +import subprocess +import glob +import uuid +from pathlib import Path # core python module + + +# getting the name of the directory +# where the this file is present. +current = os.path.dirname(os.path.realpath(__file__)) + +# Getting the parent directory name +# where the current directory is present. +parent = os.path.dirname(current) + +# adding the parent directory to +# the sys.path. +sys.path.append(parent) +imgdir = os.path.join(parent, "resources", "images") + +# Check if python 3.11 or later is available and running +from src.VersionCheck import version_check + +version_check() + +from src.Utils import * # copy_to_clipboard comes from Utils + +# import pysimplegui +import PySimpleGUI as sg + +global XAVIERDIR +global SIFCACHE +global XAVIER +global XAVIERVER +global RANDOMSTR +global FILES2DELETE +global HOSTNAME + +XAVIERDIR = os.getenv("XAVIERDIR") +SIFCACHE = os.getenv("SIFCACHE") +XAVIERVER = os.getenv("XAVIERVER") +HOSTNAME = os.getenv("HOSTNAME") +XAVIER = os.path.join(XAVIERDIR, XAVIERVER, "bin", "xavier") +RANDOMSTR = str(uuid.uuid4()) +FILES2DELETE = list() + +# sg.SetOptions(button_color=sg.COLOR_SYSTEM_DEFAULT) + + +def get_combos(): + config_dir = os.path.join(XAVIERDIR, XAVIERVER, "config") + if not os.path.exists(config_dir): + sys.exit("ERROR: Folder does not exist : {}".format(config_dir)) + if HOSTNAME == "biowulf.nih.gov": + cluster = "biowulf" + elif HOSTNAME == "fsitgl-head01p.ncifcrf.gov": + cluster = "frce" + else: + sys.exit("ERROR: XAVIER GUI only works on Biowulf or FRCE clusters") + searchterm = config_dir + "/genomes/*" + cluster + ".json" + jsonfiles = glob.glob(searchterm) + if len(jsonfiles) == 0: + sys.exit("ERROR: No Genome JSONs found in : {}".format(config_dir)) + jsons = dict() + for j in jsonfiles: + k = os.path.basename(j) + k = k.replace("." + cluster + ".json", "") + jsons[k] = j + return jsons + + +def fixpath(p): + return os.path.abspath(os.path.expanduser(p)) + + +def get_fastqs(inputdir): + inputdir = fixpath(inputdir) + inputfastqs = glob.glob(inputdir + os.sep + "*.fastq.gz") + inputfqs = glob.glob(inputdir + os.sep + "*.fq.gz") + inputfastqs.extend(inputfqs) + return inputfastqs + + +def deletefiles(): + for f in FILES2DELETE: + os.remove(f) + + +def run(cmd, init=False, dry=False, run=False): + if init: + cmd += " --runmode init" + if dry: + cmd += " --runmode dryrun" + if run: + cmd += " --runmode run" + runner_file = os.path.join(os.getenv("HOME"), RANDOMSTR + ".xavier.runner") + with open(runner_file, "w") as runner: + runner.write(cmd) + st = os.stat(runner_file) + os.chmod(runner_file, st.st_mode | stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) + x = subprocess.run(runner_file, capture_output=True, shell=True, text=True) + run_stdout = x.stdout.encode().decode("utf-8") + run_stderr = x.stderr.encode().decode("utf-8") + return run_stdout, run_stderr + + +def main(): + # get drop down genome options + jsons = get_combos() + genome_annotation_combinations = list(jsons.keys()) + genome_annotation_combinations.sort() + if DEBUG: + print(jsons) + if DEBUG: + print(genome_annotation_combinations) + + # Create different layouts + tumorPair_layout = [ + [ + sg.Text("Pairs file", size=(20, 1)), + sg.InputText(key="-PAIRS-"), + sg.FileBrowse(target="-PAIRS-"), + ], + [ + sg.Text("Copy Number Variants (CNV):"), + sg.Radio("No", "CNVRADIO", enable_events=True, default=True, key="-NOCNV-"), + sg.Radio("Yes", "CNVRADIO", enable_events=True, key="-CNV-"), + ], + ] + + tumorOnly_layout = [ + [sg.T("Copy Number Variants (CNVs) can only be analyzed in Tumor-Normal mode.")] + ] + + analysis_layout = [ + [ + sg.Radio( + "Tumor-normal pair", "TUMORADIO", enable_events=True, key="-TUMNORM-" + ), + sg.Radio("Tumor-only", "TUMORADIO", enable_events=True, key="-TUMONLY-"), + ], + [ + sg.Frame( + "Tumor-Normal", + tumorPair_layout, + font=("Helvetica", 12, "bold"), + key="-PAIROPTS-", + visible=False, + ) + ], + [ + sg.Frame( + "Tumor-Only", + tumorOnly_layout, + font=("Helvetica", 12, "bold"), + key="-ONLYOPTS-", + visible=False, + ) + ], + ] + + targets_layout = [ + [ + sg.Text("Targets .BED file", size=(20, 1)), + sg.InputText(key="-TARGETS-"), + sg.FileBrowse(target="-TARGETS-"), + ] + ] + + settings_layout = [ + [sg.T("Please read the Documentation before changing")], + [ + sg.T("Apply FFPE correction?"), + sg.Radio( + "No", "FFPERADIO", enable_events=True, default=True, key="-NOFFPE-" + ), + sg.Radio("Yes", "FFPERADIO", enable_events=True, key="-FFPE-"), + ], + [sg.T("Targets (.BED file):")], + [ + sg.Radio( + "Default", "BEDRADIO", enable_events=True, default=True, key="-DEFTARG-" + ), + sg.Radio("Custom", "BEDRADIO", enable_events=True, key="-CUSTARG-"), + ], + [sg.Frame("Custom Targets", targets_layout, visible=False, key="-BED-")], + [sg.Button(button_text="Discard", key="-DISCSET-", button_color="#3864AB")], + ] + + default_values = { + "-NOCNV-": True, + "-CNV-": False, + "-NOFFPE-": True, + "-FFPE-": False, + "-DEFTARG-": True, + "-CUSTARG-": False, + } + textKeys = [ + "-INDIR-", + "-OUTDIR-", + "-PAIRS-", + "-TARGETS-", + "-JOBNAME-", + "-ANNOTATION-", + ] + # create main layout + logo = sg.Image(os.path.join(imgdir, "CCBRlogo.png")) + layout = [ + [sg.Column([[logo]], justification="center")], + [ + sg.Text( + "XAVIER - eXome Analysis and Variant explorER", + font=("Helvetica", 12, "bold"), + ) + ], + [ + sg.Text("Input Fastqs folder", size=(20, 1)), + sg.InputText(key="-INDIR-"), + sg.FolderBrowse(target="-INDIR-"), + ], + [ + sg.Text("Output folder", size=(20, 1)), + sg.InputText(key="-OUTDIR-"), + sg.FolderBrowse(target="-OUTDIR-"), + ], + [ + sg.Text("Genome", size=(20, 1)), + sg.Combo( + values=genome_annotation_combinations, + key="-ANNOTATION-", + tooltip="hg38: Homo sapiens GRCh38.p14; mm10: Mus musculus GRCm38.p6", + ), + ], + [ + sg.Text("Job name", size=(20, 1)), + sg.InputText( + key="-JOBNAME-", + tooltip="Name of the job for this run. All output files will be stored under this folder name in the output folder.", + ), + ], + [sg.Frame("Analysis Mode", analysis_layout, visible=True)], + [sg.Button(button_text="Additional Settings", key="-SETTINGS-")], + [sg.Frame("", settings_layout, key="-SET-", visible=False)], + [ + sg.Submit(key="-SUBMIT-"), + sg.Button(button_text="Documentation", key="--DOC--"), + sg.Button(button_text="Help", key="--HELP--"), + sg.Cancel(key="--CANCEL--", button_color="tomato"), + ], + ] + + if DEBUG: + print("layout is ready!") + + window = sg.Window("XAVIER " + XAVIERVER, layout, location=(0, 500), finalize=True) + if DEBUG: + print("window created!") + + # Event loop: + while True: + event, values = window.read() + # if DEBUG: print(event,values) ## Turn on for debugging + + # if any((event != 'Submit')): + if event in ("--CANCEL--", sg.WINDOW_CLOSED): + sg.popup_auto_close( + "Thank you for running XAVIER. GoodBye!", location=(0, 500), title="" + ) + sys.exit(69) + if event == "-TUMNORM-": + window["-PAIROPTS-"].update(visible=True) + window["-ONLYOPTS-"].update(visible=False) + elif event == "-TUMONLY-": + window["-PAIROPTS-"].update(visible=False) + window["-ONLYOPTS-"].update(visible=True) + values["-CNV-"] = False + if event == "--DOC--": + copy_to_clipboard("https://ccbr.github.io/XAVIER/") + sg.Popup( + "Visit https://ccbr.github.io/XAVIER/ for links to complete documentation. The link has been copied to your clipboard. Please paste it in your favorite web browser.", + font=("Arial", 12, "bold"), + title="", + location=(0, 500), + ) + continue + if event == "--HELP--": + copy_to_clipboard("ccbr_pipeliner@mail.nih.gov") + sg.Popup( + "Email ccbr_pipeliner@mail.nih.gov for help. The email id has been copied to your clipboard. Please paste it in your emailing software.", + font=("Arial", 12, "bold"), + title="", + location=(0, 500), + ) + continue + if event == "-SETTINGS-": + window["-SET-"].update(visible=True) + if event == "-DEFTARG-": + window["-BED-"].update(visible=False) + if event == "-CUSTARG-": + window["-BED-"].update(visible=True) + targets_file = values["-TARGETS-"] + if event == "-DISCSET-": + window["-SET-"].update(visible=False) + window["-BED-"].update(visible=False) + for key, value in default_values.items(): + window[key].Update(value) + if event == "-SUBMIT-": + # check for correct inputs + if values["-INDIR-"] == "": + sg.PopupError("Input folder must be provided!!", location=(0, 500)) + continue + elif not os.path.exists(values["-INDIR-"]): + sg.PopupError("Input folder doesn't exist!!", location=(0, 500)) + continue + elif len(get_fastqs(values["-INDIR-"])) == 0: + sg.PopupError("Input folder has no fastqs!!", location=(0, 500)) + continue + else: + inputfastqs = get_fastqs(values["-INDIR-"]) + if DEBUG: + print(inputfastqs) + if len(inputfastqs) == 0: + sg.PopupError( + "Input folder has no fastqs!!", + location=(0, 500), + title="ERROR!", + font=("Arial", 12, "bold"), + ) + window.Element("-INDIR-").update("") + continue + if values["-OUTDIR-"] == "": + sg.PopupError("Output folder must be provided!!", location=(0, 500)) + continue + outputfolder = values["-OUTDIR-"] + "/" + values["-JOBNAME-"] + if os.path.exists(outputfolder): + ch = sg.popup_yes_no( + "Output folder name exists... this is probably a re-run ... proceed?", + title="Rerun??", + location=(0, 500), + ) + if ch == "No": + window.Element("-OUTDIR-").update("") + continue + if values["-CUSTARG-"] == True: + if values["-TARGETS-"] == "": + sg.PopupError( + "Custom Targets BED file selected but not provided!!", + location=(0, 500), + ) + continue + else: + targets_file = values["-TARGETS-"] + if values["-TUMNORM-"] == "" and values["-TUMONLY-"] == "": + sg.PopupError("Select an analysis mode", location=(0, 500)) + continue + if values["-TUMNORM-"] == True: + if values["-PAIRS-"] == "": + sg.PopupError( + "Tumor-normal mode selected. Need Pairs file to continue", + location=(0, 500), + ) + continue + else: + pairs_file = values["-PAIRS-"] + + genome = values["-ANNOTATION-"] + targets_file = ( + os.path.join(XAVIERDIR, XAVIERVER, "resources") + + "/*" + + genome + + "*.bed" + ) + + xavier_cmd = XAVIER + " run " + xavier_cmd += " --input " + values["-INDIR-"] + "/*.R?.fastq.gz" + xavier_cmd += " --output " + values["-OUTDIR-"] + "/" + values["-JOBNAME-"] + xavier_cmd += " --genome " + genome + xavier_cmd += " --targets " + targets_file + xavier_cmd += " --mode slurm " + + if HOSTNAME == "fsitgl-head01p.ncifcrf.gov": + xavier_cmd += " --sif-cache " + SIFCACHE + "/XAVIER" + xavier_cmd += " --tmp-dir /scratch/cluster_scratch/$USER/" + + if values["-TUMNORM-"] == True: + xavier_cmd += " --pairs " + pairs_file + if values["-CNV-"] == True: + xavier_cmd += " --cnv " + + if values["-FFPE-"] == True: + xavier_cmd += " --ffpe " + + run_stdout, run_stderr = run(xavier_cmd, init=True, dry=False, run=False) + run_stdout, run_stderr = run(xavier_cmd, init=False, dry=True, run=False) + if DEBUG: + print(run_stdout) + if DEBUG: + print(run_stderr) + allout = "{}\n{}".format(run_stdout, run_stderr) + sg.popup_scrolled( + allout, title="Dryrun:STDOUT/STDERR", location=(0, 500), size=(80, 30) + ) + if "error" in allout or "Error" in allout or "ERROR" in allout: + continue + ch = sg.popup_yes_no( + "Submit run to slurm?", title="Submit??", location=(0, 500) + ) + if ch == "Yes": + run_stdout, run_stderr = run( + xavier_cmd, init=False, dry=False, run=True + ) + if DEBUG: + print(run_stdout) + if DEBUG: + print(run_stderr) + allout = "{}\n{}".format(run_stdout, run_stderr) + sg.popup_scrolled( + allout, + title="Slurmrun:STDOUT/STDERR", + location=(0, 500), + size=(80, 30), + ) + runner_file = os.path.join( + os.getenv("HOME"), RANDOMSTR + ".xavier.runner" + ) + FILES2DELETE.append(runner_file) + rerun = sg.popup_yes_no( + "Submit another XAVIER job?", title="", location=(0, 500) + ) + if rerun == "Yes": + for key in window.read(): + window[key].Update(value="") + window["-PAIROPTS-"].update(visible=False) + window["-ONLYOPTS-"].update(visible=False) + window["-TUMNORM-"].update(value=False) + window["-TUMONLY-"].update(value=False) + if rerun == "No": + sg.popup_auto_close( + "Thank you for running XAVIER. GoodBye!", + location=(0, 500), + title="", + ) + break + elif ch == "No": + for key in textKeys: + window[key].Update(value="") + for key, value in default_values.items(): + window[key].Update(value) + window["-PAIROPTS-"].update(visible=False) + window["-ONLYOPTS-"].update(visible=False) + window["-TUMNORM-"].update(value=False) + window["-TUMONLY-"].update(value=False) + continue + + window.close() + if len(FILES2DELETE) != 0: + deletefiles() + + +# $ ./exome-seek run [--help] \ +# [--mode {local, slurm}] \ +# [--job-name JOB_NAME] \ +# [--callers {mutect2,mutect,strelka, ...}] \ +# [--pairs PAIRS] \ +# [--ffpe] \ +# [--cnv] \ +# [--silent] \ +# [--singularity-cache SINGULARITY_CACHE] \ +# [--sif-cache SIF_CACHE] \ +# [--threads THREADS] \ +# --runmode {init, dryrun, run} \ +# --input INPUT [INPUT ...] \ +# --output OUTPUT \ +# --genome {hg38, ...} \ +# --targets TARGETS + + +if __name__ == "__main__": + main() From 6718d95adbe6bcceafc09c0f4e04b97e41d8d4d1 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 8 Aug 2024 12:49:05 -0400 Subject: [PATCH 02/11] refactor: reorganize genome configs fixes #98 --- config/genomes/{hg38.biowulf.json => biowulf/hg38.json} | 0 config/genomes/{mm10.biowulf.json => biowulf/mm10.json} | 0 config/genomes/{hg38.frce.json => frce/hg38.json} | 0 config/genomes/{mm10.frce.json => frce/mm10.json} | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename config/genomes/{hg38.biowulf.json => biowulf/hg38.json} (100%) rename config/genomes/{mm10.biowulf.json => biowulf/mm10.json} (100%) rename config/genomes/{hg38.frce.json => frce/hg38.json} (100%) mode change 100755 => 100644 rename config/genomes/{mm10.frce.json => frce/mm10.json} (100%) diff --git a/config/genomes/hg38.biowulf.json b/config/genomes/biowulf/hg38.json similarity index 100% rename from config/genomes/hg38.biowulf.json rename to config/genomes/biowulf/hg38.json diff --git a/config/genomes/mm10.biowulf.json b/config/genomes/biowulf/mm10.json similarity index 100% rename from config/genomes/mm10.biowulf.json rename to config/genomes/biowulf/mm10.json diff --git a/config/genomes/hg38.frce.json b/config/genomes/frce/hg38.json old mode 100755 new mode 100644 similarity index 100% rename from config/genomes/hg38.frce.json rename to config/genomes/frce/hg38.json diff --git a/config/genomes/mm10.frce.json b/config/genomes/frce/mm10.json similarity index 100% rename from config/genomes/mm10.frce.json rename to config/genomes/frce/mm10.json From c34f3fdc38123d51d4dd708e917aca1ae7487004 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 8 Aug 2024 13:24:07 -0400 Subject: [PATCH 03/11] refactor: add gui subcommand, move run() function --- src/xavier/__main__.py | 144 ++++++-------------------------------- src/xavier/cache.py | 63 +++++++++++++++++ src/xavier/gui.py | 147 ++++++++++++--------------------------- src/xavier/run.py | 152 +++++++++++++++++++++++++++++++++++++++-- src/xavier/util.py | 17 +++++ tests/test_run.py | 63 +++++++++++++++++ 6 files changed, 355 insertions(+), 231 deletions(-) create mode 100644 src/xavier/cache.py create mode 100644 tests/test_run.py diff --git a/src/xavier/__main__.py b/src/xavier/__main__.py index df6c478..5d8bceb 100755 --- a/src/xavier/__main__.py +++ b/src/xavier/__main__.py @@ -44,137 +44,26 @@ import argparse # potential python3 3rd party package, added in python/3.5 # Local imports -from .run import init, setup, bind, dryrun, runner +from .run import init, setup, bind, dryrun, runner, run from .shells import bash from .options import genome_options -from .util import err, exists, fatal, permissions, check_cache, require, get_version +from .util import ( + err, + exists, + fatal, + permissions, + check_cache, + require, + get_version, + get_genomes_list, +) +from .gui import launch_gui __version__ = get_version() __email__ = "ccbr@mail.nih.gov" __home__ = os.path.dirname(os.path.abspath(__file__)) -def run(sub_args): - """Initialize, setup, and run the XAVIER pipeline. - Calls initialize() to create output directory and copy over pipeline resources, - setup() to create the pipeline config file, dryrun() to ensure their are no issues - before running the pipeline, and finally run() to execute the Snakemake workflow. - @param sub_args : - Parsed arguments for run sub-command - """ - # Step 0. Check for required dependencies - # The pipelines has only two requirements: - # snakemake and singularity - require(["snakemake", "singularity"], ["snakemake", "singularity"]) - - # Optional Step. Initialize working directory, - # copy over required resources to run - # the pipeline - git_repo = __home__ - if sub_args.runmode == "init": - print("--Initializing") - input_files = init( - repo_path=git_repo, output_path=sub_args.output, links=sub_args.input - ) - - # Required Step. Setup pipeline for execution, - # dynamically create config.json config - # file from user inputs and base config - # determine "nidap folder" - create_nidap_folder_YN = "no" - if sub_args.create_nidap_folder: - create_nidap_folder_YN = "yes" - - # templates - config = setup( - sub_args, - repo_path=git_repo, - output_path=sub_args.output, - create_nidap_folder_YN=create_nidap_folder_YN, - links=sub_args.input, - ) - - # Required Step. Resolve docker/singularity bind - # paths from the config file. - bindpaths = bind(sub_args, config=config) - - # Optional Step: Dry-run pipeline - # if sub_args.dry_run: - if sub_args.runmode == "dryrun" or sub_args.runmode == "run": - print("--Dry-Run") - # Dryrun pipeline - dryrun_output = dryrun( - outdir=sub_args.output - ) # python3 returns byte-string representation - print( - "\nDry-running XAVIER pipeline:\n{}".format(dryrun_output.decode("utf-8")) - ) - - # Optional Step. Orchestrate pipeline execution, - # run pipeline in locally on a compute node - # for debugging purposes or submit the master - # job to the job scheduler, SLURM, and create - # logging file - if sub_args.runmode == "run": - print("--Run full pipeline") - if not exists(os.path.join(sub_args.output, "logfiles")): - # Create directory for logfiles - os.makedirs(os.path.join(sub_args.output, "logfiles")) - if sub_args.mode == "local": - log = os.path.join(sub_args.output, "logfiles", "snakemake.log") - else: - log = os.path.join(sub_args.output, "logfiles", "master.log") - logfh = open(log, "w") - wait = "" - if sub_args.wait: - wait = "--wait" - mjob = runner( - mode=sub_args.mode, - outdir=sub_args.output, - # additional_bind_paths = all_bind_paths, - alt_cache=sub_args.singularity_cache, - threads=int(sub_args.threads), - jobname=sub_args.job_name, - submission_script="runner", - logger=logfh, - additional_bind_paths=",".join(bindpaths), - tmp_dir=sub_args.tmp_dir, - wait=wait, - ) - - # Step 5. Wait for subprocess to complete, - # this is blocking and not asynchronous - if not sub_args.silent: - print("\nRunning XAVIER pipeline in '{}' mode...".format(sub_args.mode)) - mjob.wait() - logfh.close() - - # Step 6. Relay information about submission - # of the master job or the exit code of the - # pipeline that ran in local mode - if sub_args.mode == "local": - if int(mjob.returncode) == 0: - print("XAVIER has successfully completed") - else: - fatal( - "XAVIER failed. Please see {} for more information.".format( - os.path.join(sub_args.output, "logfiles", "snakemake.log") - ) - ) - elif sub_args.mode == "slurm": - jobid = ( - open(os.path.join(sub_args.output, "logfiles", "mjobid.log")) - .read() - .strip() - ) - if not sub_args.silent: - if int(mjob.returncode) == 0: - print("Successfully submitted master job: ", end="") - else: - fatal("Error occurred when submitting the master job.") - print(jobid) - - def unlock(sub_args): """Unlocks a previous runs output directory. If snakemake fails ungracefully, it maybe required to unlock the working directory before proceeding again. @@ -407,13 +296,19 @@ def parsed_arguments(): # Suppressing help message of required args to overcome no sub-parser named groups subparser_run = subparsers.add_parser( "run", - help="Run the XAVIER pipeline with input files.", + help="Run the XAVIER pipeline with input files.", usage=argparse.SUPPRESS, formatter_class=argparse.RawDescriptionHelpFormatter, description=required_run_options, epilog=run_epilog, ) + subparser_gui = subparsers.add_parser( + "gui", + help="Launch the pipeline with a Graphical User Interface (GUI)", + description="", + ) + # Required Arguments # Input FastQ files subparser_run.add_argument( @@ -772,6 +667,7 @@ def parsed_arguments(): subparser_run.set_defaults(func=run) subparser_unlock.set_defaults(func=unlock) subparser_cache.set_defaults(func=cache) + subparser_gui.set_defaults(func=launch_gui) # Parse command-line args args = parser.parse_args() diff --git a/src/xavier/cache.py b/src/xavier/cache.py new file mode 100644 index 0000000..a908634 --- /dev/null +++ b/src/xavier/cache.py @@ -0,0 +1,63 @@ +import json +import os +import sys + + +def get_singularity_cachedir(output_dir, cache_dir=None): + """Returns the singularity cache directory. + If no user-provided cache directory is provided, + the default singularity cache is in the output directory. + """ + if not cache_dir: + cache_dir = os.path.join(output_dir, ".singularity") + return cache_dir + + +def get_sif_cache_dir(hpc=None): + sif_dir = None + if hpc == "biowulf": + sif_dir = "/data/CCBR_Pipeliner/SIFS" + elif hpc == "frce": + sif_dir = "/mnt/projects/CCBR-Pipelines/SIFs" + return sif_dir + + +def image_cache(sub_args, config): + """Adds Docker Image URIs, or SIF paths to config if singularity cache option is provided. + If singularity cache option is provided and a local SIF does not exist, a warning is + displayed and the image will be pulled from URI in 'config/containers/images.json'. + @param sub_args : + Parsed arguments for run sub-command + @params config : + Docker Image config file + @return config : + Updated config dictionary containing user information (username and home directory) + """ + images = os.path.join(sub_args.output, "config", "containers", "images.json") + + # Read in config for docker image uris + with open(images, "r") as fh: + data = json.load(fh) + # Check if local sif exists + for image, uri in data["images"].items(): + if sub_args.sif_cache: + sif = os.path.join( + sub_args.sif_cache, + "{}.sif".format(os.path.basename(uri).replace(":", "_")), + ) + if not os.path.exists(sif): + # If local sif does not exist on in cache, print warning + # and default to pulling from URI in config/containers/images.json + print( + 'Warning: Local image "{}" does not exist in singularity cache'.format( + sif + ), + file=sys.stderr, + ) + else: + # Change pointer to image from Registry URI to local SIF + data["images"][image] = sif + + config.update(data) + + return config diff --git a/src/xavier/gui.py b/src/xavier/gui.py index ad55168..4ab3838 100644 --- a/src/xavier/gui.py +++ b/src/xavier/gui.py @@ -1,15 +1,20 @@ #!/usr/bin/env python3 -global DEBUG - -DEBUG = True - import os import sys -import stat -import subprocess import glob import uuid -from pathlib import Path # core python module +import PySimpleGUI as sg + +from .util import ( + get_genomes_dict, + get_tmp_dir, + xavier_base, + get_version, + get_hpcname, + check_python_version, +) +from .run import run_in_context +from .cache import get_sif_cache_dir # getting the name of the directory @@ -25,95 +30,22 @@ sys.path.append(parent) imgdir = os.path.join(parent, "resources", "images") -# Check if python 3.11 or later is available and running -from src.VersionCheck import version_check - -version_check() - -from src.Utils import * # copy_to_clipboard comes from Utils - -# import pysimplegui -import PySimpleGUI as sg +# TODO remove all global variables, use tmpdir instead +global FILES2DELETE +FILES2DELETE = list() global XAVIERDIR global SIFCACHE global XAVIER global XAVIERVER -global RANDOMSTR -global FILES2DELETE global HOSTNAME -XAVIERDIR = os.getenv("XAVIERDIR") -SIFCACHE = os.getenv("SIFCACHE") -XAVIERVER = os.getenv("XAVIERVER") -HOSTNAME = os.getenv("HOSTNAME") -XAVIER = os.path.join(XAVIERDIR, XAVIERVER, "bin", "xavier") -RANDOMSTR = str(uuid.uuid4()) -FILES2DELETE = list() - -# sg.SetOptions(button_color=sg.COLOR_SYSTEM_DEFAULT) - - -def get_combos(): - config_dir = os.path.join(XAVIERDIR, XAVIERVER, "config") - if not os.path.exists(config_dir): - sys.exit("ERROR: Folder does not exist : {}".format(config_dir)) - if HOSTNAME == "biowulf.nih.gov": - cluster = "biowulf" - elif HOSTNAME == "fsitgl-head01p.ncifcrf.gov": - cluster = "frce" - else: - sys.exit("ERROR: XAVIER GUI only works on Biowulf or FRCE clusters") - searchterm = config_dir + "/genomes/*" + cluster + ".json" - jsonfiles = glob.glob(searchterm) - if len(jsonfiles) == 0: - sys.exit("ERROR: No Genome JSONs found in : {}".format(config_dir)) - jsons = dict() - for j in jsonfiles: - k = os.path.basename(j) - k = k.replace("." + cluster + ".json", "") - jsons[k] = j - return jsons - - -def fixpath(p): - return os.path.abspath(os.path.expanduser(p)) - - -def get_fastqs(inputdir): - inputdir = fixpath(inputdir) - inputfastqs = glob.glob(inputdir + os.sep + "*.fastq.gz") - inputfqs = glob.glob(inputdir + os.sep + "*.fq.gz") - inputfastqs.extend(inputfqs) - return inputfastqs - - -def deletefiles(): - for f in FILES2DELETE: - os.remove(f) - -def run(cmd, init=False, dry=False, run=False): - if init: - cmd += " --runmode init" - if dry: - cmd += " --runmode dryrun" - if run: - cmd += " --runmode run" - runner_file = os.path.join(os.getenv("HOME"), RANDOMSTR + ".xavier.runner") - with open(runner_file, "w") as runner: - runner.write(cmd) - st = os.stat(runner_file) - os.chmod(runner_file, st.st_mode | stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) - x = subprocess.run(runner_file, capture_output=True, shell=True, text=True) - run_stdout = x.stdout.encode().decode("utf-8") - run_stderr = x.stderr.encode().decode("utf-8") - return run_stdout, run_stderr - - -def main(): +def launch_gui(DEBUG=True): + check_python_version() + RANDOMSTR = str(uuid.uuid4()) # get drop down genome options - jsons = get_combos() + jsons = get_genomes_dict() genome_annotation_combinations = list(jsons.keys()) genome_annotation_combinations.sort() if DEBUG: @@ -464,22 +396,31 @@ def main(): deletefiles() -# $ ./exome-seek run [--help] \ -# [--mode {local, slurm}] \ -# [--job-name JOB_NAME] \ -# [--callers {mutect2,mutect,strelka, ...}] \ -# [--pairs PAIRS] \ -# [--ffpe] \ -# [--cnv] \ -# [--silent] \ -# [--singularity-cache SINGULARITY_CACHE] \ -# [--sif-cache SIF_CACHE] \ -# [--threads THREADS] \ -# --runmode {init, dryrun, run} \ -# --input INPUT [INPUT ...] \ -# --output OUTPUT \ -# --genome {hg38, ...} \ -# --targets TARGETS +def copy_to_clipboard(string): + r = Tk() + r.withdraw() + r.clipboard_clear() + r.clipboard_append(string) + r.update() + r.destroy() + + +def fixpath(p): + return os.path.abspath(os.path.expanduser(p)) + + +def get_fastqs(inputdir): + inputdir = fixpath(inputdir) + inputfastqs = glob.glob(inputdir + os.sep + "*.fastq.gz") + inputfqs = glob.glob(inputdir + os.sep + "*.fq.gz") + inputfastqs.extend(inputfqs) + return inputfastqs + + +def delete_files(files): + for f in files: + if os.path.exists(f): + os.remove(f) if __name__ == "__main__": diff --git a/src/xavier/run.py b/src/xavier/run.py index de92d95..39d9e69 100644 --- a/src/xavier/run.py +++ b/src/xavier/run.py @@ -3,15 +3,149 @@ # Python standard library from __future__ import print_function -from shutil import copytree, copyfile +import contextlib +import io import os import re import json +import shutil import sys import subprocess # Local imports -from .util import git_commit_hash, join_jsons, fatal, which, exists, err, get_version +from .util import ( + git_commit_hash, + join_jsons, + fatal, + which, + exists, + err, + get_version, + xavier_base, + require, +) + + +def run(sub_args): + """Initialize, setup, and run the XAVIER pipeline. + Calls initialize() to create output directory and copy over pipeline resources, + setup() to create the pipeline config file, dryrun() to ensure their are no issues + before running the pipeline, and finally run() to execute the Snakemake workflow. + @param sub_args : + Parsed arguments for run sub-command + """ + print("SUB_ARGS", sub_args) + # Step 0. Check for required dependencies + # The pipelines has only two requirements: + # snakemake and singularity + require(["snakemake", "singularity"], ["snakemake", "singularity"]) + + # Optional Step. Initialize working directory, + # copy over required resources to run + # the pipeline + git_repo = xavier_base() + if sub_args.runmode == "init": + print("--Initializing") + input_files = init( + repo_path=git_repo, output_path=sub_args.output, links=sub_args.input + ) + + # Required Step. Setup pipeline for execution, + # dynamically create config.json config + # file from user inputs and base config + # determine "nidap folder" + create_nidap_folder_YN = "no" + if sub_args.create_nidap_folder: + create_nidap_folder_YN = "yes" + + # templates + config = setup( + sub_args, + repo_path=git_repo, + output_path=sub_args.output, + create_nidap_folder_YN=create_nidap_folder_YN, + links=sub_args.input, + ) + + # Required Step. Resolve docker/singularity bind + # paths from the config file. + bindpaths = bind(sub_args, config=config) + + # Optional Step: Dry-run pipeline + # if sub_args.dry_run: + if sub_args.runmode == "dryrun" or sub_args.runmode == "run": + print("--Dry-Run") + # Dryrun pipeline + dryrun_output = dryrun( + outdir=sub_args.output + ) # python3 returns byte-string representation + print( + "\nDry-running XAVIER pipeline:\n{}".format(dryrun_output.decode("utf-8")) + ) + + # Optional Step. Orchestrate pipeline execution, + # run pipeline in locally on a compute node + # for debugging purposes or submit the master + # job to the job scheduler, SLURM, and create + # logging file + if sub_args.runmode == "run": + print("--Run full pipeline") + if not exists(os.path.join(sub_args.output, "logfiles")): + # Create directory for logfiles + os.makedirs(os.path.join(sub_args.output, "logfiles")) + if sub_args.mode == "local": + log = os.path.join(sub_args.output, "logfiles", "snakemake.log") + else: + log = os.path.join(sub_args.output, "logfiles", "master.log") + logfh = open(log, "w") + wait = "" + if sub_args.wait: + wait = "--wait" + mjob = runner( + mode=sub_args.mode, + outdir=sub_args.output, + # additional_bind_paths = all_bind_paths, + alt_cache=sub_args.singularity_cache, + threads=int(sub_args.threads), + jobname=sub_args.job_name, + submission_script="runner", + logger=logfh, + additional_bind_paths=",".join(bindpaths), + tmp_dir=sub_args.tmp_dir, + wait=wait, + ) + + # Step 5. Wait for subprocess to complete, + # this is blocking and not asynchronous + if not sub_args.silent: + print("\nRunning XAVIER pipeline in '{}' mode...".format(sub_args.mode)) + mjob.wait() + logfh.close() + + # Step 6. Relay information about submission + # of the master job or the exit code of the + # pipeline that ran in local mode + if sub_args.mode == "local": + if int(mjob.returncode) == 0: + print("XAVIER has successfully completed") + else: + fatal( + "XAVIER failed. Please see {} for more information.".format( + os.path.join(sub_args.output, "logfiles", "snakemake.log") + ) + ) + elif sub_args.mode == "slurm": + jobid = ( + open(os.path.join(sub_args.output, "logfiles", "mjobid.log")) + .read() + .strip() + ) + if not sub_args.silent: + if int(mjob.returncode) == 0: + print("Successfully submitted master job: ", end="") + else: + fatal("Error occurred when submitting the master job.") + print(jobid) def init( @@ -73,7 +207,7 @@ def copy_safe(source, target, resources=[]): destination = os.path.join(target, resource) if not exists(destination): # Required resources do not exist - copytree(os.path.join(source, resource), destination) + shutil.copytree(os.path.join(source, resource), destination) def sym_safe(input_data, target): @@ -223,7 +357,7 @@ def setup(sub_args, repo_path, output_path, create_nidap_folder_YN="no", links=[ repo_path, "config", "cluster" + "." + shorthostname + ".json" ) cluster_output = os.path.join(output_path, "cluster.json") - copyfile(cluster_config, cluster_output) + shutil.copyfile(cluster_config, cluster_output) # Global config file for pipeline, config.json config = join_jsons(required.values()) # uses templates in the rna-seek repo @@ -831,3 +965,13 @@ def runner( ) return masterjob + + +def run_in_context(args): + """Execute the run function in a context manager to capture stdout/stderr""" + with contextlib.redirect_stdout(io.StringIO()) as out_f, contextlib.redirect_stderr( + io.StringIO() + ) as err_f: + run(args) + allout = out_f.getvalue() + "\n" + err_f.getvalue() + return allout diff --git a/src/xavier/util.py b/src/xavier/util.py index 3a269f8..7069fda 100644 --- a/src/xavier/util.py +++ b/src/xavier/util.py @@ -397,6 +397,23 @@ def join_jsons(templates): return aggregated +def check_python_version(): + # version check + # glob.iglob requires 3.11 for using "include_hidden=True" + MIN_PYTHON = (3, 11) + try: + assert sys.version_info >= MIN_PYTHON + print( + "Python version: {0}.{1}.{2}".format( + sys.version_info.major, sys.version_info.minor, sys.version_info.micro + ) + ) + except AssertionError: + exit( + f"{sys.argv[0]} requires Python {'.'.join([str(n) for n in MIN_PYTHON])} or newer" + ) + + if __name__ == "__main__": # Calculate MD5 checksum of entire file print("{} {}".format(md5sum(sys.argv[0]), sys.argv[0])) diff --git a/tests/test_run.py b/tests/test_run.py new file mode 100644 index 0000000..fdc7b18 --- /dev/null +++ b/tests/test_run.py @@ -0,0 +1,63 @@ +import argparse +import glob +import os +import tempfile + +from xavier.src.xavier.util import get_tmp_dir, xavier_base, get_hpcname +from xavier.src.xavier.cache import get_sif_cache_dir +from xavier.src.xavier.run import run, run_in_context + + +def test_dryrun(): + if get_hpcname() == "biowulf": + with tempfile.TemporaryDirectory() as tmp_dir: + run_args = argparse.Namespace( + runmode="dryrun", + input=list(glob.glob(xavier_base(".tests/*.fastq.gz"))), + output=tmp_dir, + genome="hg38", + targets=xavier_base(".tests/Agilent_SSv7_allExons_hg38.bed"), + mode="local", + job_name="pl:xavier", + callers=["mutect2", "mutect", "strelka", "vardict", "varscan"], + pairs=xavier_base(".tests/pairs.tsv"), + ffpe=False, + cnv=False, + wait=False, + create_nidap_folder=False, + silent=False, + singularity_cache=os.environ.get("SINGULARITY_CACHEDIR", None), + sif_cache=get_sif_cache_dir(), + tmp_dir=get_tmp_dir(None, tmp_dir), + threads=2, + ) + + # execute dry run and capture stdout/stderr + allout = run_in_context(run_args) + assert ( + "This was a dry-run (flag -n). The order of jobs does not reflect the order of execution." + in allout + ) + + +run_args = argparse.Namespace( + runmode="dryrun", + input=list(glob.glob(xavier_base(".tests/*.fastq.gz"))), + output="tmp", + genome="hg38", + targets=xavier_base(".tests/Agilent_SSv7_allExons_hg38.bed"), + mode="local", + job_name="pl:xavier", + callers=["mutect2", "mutect", "strelka", "vardict", "varscan"], + pairs=xavier_base(".tests/pairs.tsv"), + ffpe=False, + cnv=False, + wait=False, + create_nidap_folder=False, + silent=False, + singularity_cache=os.environ.get("SINGULARITY_CACHEDIR", None), + sif_cache=get_sif_cache_dir(), + tmp_dir=get_tmp_dir(None, "tmp"), + threads=2, +) +run(run_args) From 21416c153a148f8c19a10b0c051e475a09075906 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 8 Aug 2024 13:52:04 -0400 Subject: [PATCH 04/11] fix: genome config path --- src/xavier/run.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/src/xavier/run.py b/src/xavier/run.py index 39d9e69..0ee67bd 100644 --- a/src/xavier/run.py +++ b/src/xavier/run.py @@ -23,6 +23,7 @@ get_version, xavier_base, require, + get_hpcname, ) @@ -34,7 +35,6 @@ def run(sub_args): @param sub_args : Parsed arguments for run sub-command """ - print("SUB_ARGS", sub_args) # Step 0. Check for required dependencies # The pipelines has only two requirements: # snakemake and singularity @@ -315,32 +315,26 @@ def setup(sub_args, repo_path, output_path, create_nidap_folder_YN="no", links=[ ifiles = sym_safe(input_data=links, target=output_path) mixed_inputs(ifiles) - hpcget = subprocess.run( - "scontrol show config", shell=True, capture_output=True, text=True - ) - hpcname = "" - - if "biowulf" in hpcget.stdout: - shorthostname = "biowulf" - print("Thank you for running XAVIER on Biowulf") - elif "fsitgl" in hpcget.stdout: - shorthostname = "frce" - print("Thank you for running XAVIER on FRCE") - else: + shorthostname = get_hpcname() + if not shorthostname: shorthostname = "biowulf" print( - "%s unknown host. Configuration files for references may not be correct. Defaulting to Biowulf config" - % (hpcget) + f"{shorthostname} unknown host. Configuration files for references may not be correct. Defaulting to Biowulf config" ) + else: + print(f"Thank you for running XAVIER on {shorthostname.upper()}") genome_config = os.path.join( - repo_path, "config", "genomes", sub_args.genome + "." + shorthostname + ".json" + repo_path, "config", "genomes", get_hpcname(), sub_args.genome + ".json" ) if sub_args.genome.endswith(".json"): # Provided a custom reference genome generated by rna-seek build genome_config = os.path.abspath(sub_args.genome) + if not os.path.exists(genome_config): + raise FileNotFoundError(f"Genome config file does not exist: {genome_config}") + required = { # Base configuration file "base": os.path.join(repo_path, "config", "config.json"), From 82058f0200beae9960ac1bc83aa28705c8d402eb Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 8 Aug 2024 13:52:20 -0400 Subject: [PATCH 05/11] test: dryrun on biowulf --- tests/test_run.py | 41 ++++++++++------------------------------- 1 file changed, 10 insertions(+), 31 deletions(-) diff --git a/tests/test_run.py b/tests/test_run.py index fdc7b18..43d9f35 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -12,7 +12,7 @@ def test_dryrun(): if get_hpcname() == "biowulf": with tempfile.TemporaryDirectory() as tmp_dir: run_args = argparse.Namespace( - runmode="dryrun", + runmode="init", input=list(glob.glob(xavier_base(".tests/*.fastq.gz"))), output=tmp_dir, genome="hg38", @@ -31,33 +31,12 @@ def test_dryrun(): tmp_dir=get_tmp_dir(None, tmp_dir), threads=2, ) - - # execute dry run and capture stdout/stderr - allout = run_in_context(run_args) - assert ( - "This was a dry-run (flag -n). The order of jobs does not reflect the order of execution." - in allout - ) - - -run_args = argparse.Namespace( - runmode="dryrun", - input=list(glob.glob(xavier_base(".tests/*.fastq.gz"))), - output="tmp", - genome="hg38", - targets=xavier_base(".tests/Agilent_SSv7_allExons_hg38.bed"), - mode="local", - job_name="pl:xavier", - callers=["mutect2", "mutect", "strelka", "vardict", "varscan"], - pairs=xavier_base(".tests/pairs.tsv"), - ffpe=False, - cnv=False, - wait=False, - create_nidap_folder=False, - silent=False, - singularity_cache=os.environ.get("SINGULARITY_CACHEDIR", None), - sif_cache=get_sif_cache_dir(), - tmp_dir=get_tmp_dir(None, "tmp"), - threads=2, -) -run(run_args) + # init + allout_1 = run_in_context(run_args) + run_args.runmode = "dryrun" + # dryrun + allout_2 = run_in_context(run_args) + assert (all([ + "--Initializing" in allout_1, + "This was a dry-run (flag -n). The order of jobs does not reflect the order of execution." in allout_2 + ])) From 2019ad498f6f7ef68564b595b2377a9eb0a4973b Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 8 Aug 2024 14:33:55 -0400 Subject: [PATCH 06/11] fix: add ccbr logo to repo --- resources/CCBRlogo.png | Bin 0 -> 4717 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 resources/CCBRlogo.png diff --git a/resources/CCBRlogo.png b/resources/CCBRlogo.png new file mode 100644 index 0000000000000000000000000000000000000000..256ea2929cc032498f8d8500f4d4ca8f1037d303 GIT binary patch literal 4717 zcmY*d2UrtZuujBC2pt3j=|u=7p-Bx@dJ~Y2NRTcaLhk_ur3uod_ue}wQUnP|1VmJ% zH>DSmBH&x@z3<-lcE8=3J^Rl;XU?9TGrLc9byP`V3@{J~M5?Z)qz_27%L951I2&#? zssaM+rLU?8su^Yc0bE4dnyA}pYlCXaS z0`$vfBs=S05R8)yyNR|g>m4^wTUIfIAVQE`7RJiTit@CvlhRjG`Ntf%l3{niVBDpU zNMB!Hgs%v~&C?z!Bq=G06ck1Z3kv`U0WW`7jFq2&s~5-LB>&5!Wb0+^>FADebaQ3B z*;6v$BQD&$FP^>f}GcsduBcdp0l1u_WWxX4k=}E4Xn~$|>Cw2cI#M@6QYY;JfEXbDfVZ0=Jq; zSJXcTAI%oEMsRy0m2=A?i zVZ`2~v88rbw2{X51ww`$p=F%^;#2N|j9`KTAXNLVg(o)`J9l1hzf0Q~{ zh~;>VN_xzAPq~!syyHlS-Rb@lQgLfdh|6bpj02)q?hUV91w6#4GI}V5j{$v>1qP}b z0i{ifFf$U(n^@PQpM(ck1|J$0S&hmZeN#O5!pr!g{+t~wrCEmjC^KOGK&CvT6&^>K(7J&_+sIG!X?_&&Y$C&bk`3*+#!E!8G1fh zuKt9!=$_9stXC_XCFGdRwfp?nXqa{*vrf7oTSlFk#N;kZ@G^FyIbL%0#+pm}O6tLG zRpY2e7G~VR1+IpxcNE`ln$UTWZ&$uQlGt`E7nFQ+t)_F^^MQSCun#$X8mDHb#-no9 zW|A4)CjoUyLMF^}yw>-8@d$M2cRkdd-#vsZ{QRU+%|#_JePvALH_uVj!3bxXyM8a| z;XE;^3fIl=!XK}ffyH`8_L0k>D6MXRGBXOGWXpnScpdtrARn`N zXU*_-5(tDS{G9ab$`@dOlI#v(p6n4n%;U7CGVOEx?+@6fS%`h5jEm7r6@shLli2Xp zv^~_PkhYh{9lT==Rj1`wed3OTm$n&j;keoc#{J~nP~(N9)TQM)!(*IrG}pllNu(w;gm1%GcA8=+j6Bx_(~QDuu)koNQP zjr+dhJ+CUV6E%_jsG=MzAak}LJvi0Mbnt|)wrhOP#hjAX1RY+tkK-ep)?c#o^0D-a^H49+IjTk4dLb%|~WvU&P z<6@&-TP@S0&2ht!x|&rFpP{N!P&49nRdvf(+<|sRj_NRjxW{^Vsg4obDeF8FTWA!M zPxNYE0+6Wc2h|%9^P+W9rVnI4EM^7$98;x!O?K>gs+0Gdiy_Uz$CQ_YFw&fY4hAx3 zJiXD|Dmv^-uZ8Y*mp^5X;a~jnNFh$apWZ=Rd zg9cK5?e$R+O=KKBjLysY%~VzANwkHj!AE0oK&eR-bB*kI9>EES%t*i32&P1(=uWn zma_&{9{hdHp8FE<1924I8@xf!zrXfz-Hwp*kRIg`z2aynZ*hZ2{*K}t4W|Nb#E`OF zvv5Prw7@t@GvI+S&1;6Oi{Crs#+Jd&FABhjw)rc@pp%bj6MDf@mZ8BCz*2E0OXbKz zqf4J)(DiPLB5yO)jRjsD#inRP^fmUr^fCYadZ|?Qjuf$B{>G0<7JdtlBi^gPbGNZ? z^VM#nas0ssL-UO$M764VH$F@t!tIW!?3yQyh)I^Cm;)T@&L7T8F^TS{Afv!L>cwCy-;gavSfF?L1+2^_ z2|NuW+OYygFHZDI4>JeU#8({|%kRBFJMM zJzv}j-W&@PuHpU~gLo%P5>^`&^s_&c&Zykfh^Mby>8Vx>l?l|44XdUL&#$Z;77rw3 z334~1h9gVaPFs$MN^=4#e3`W8By(OX6Ug2sr~pUaBkK|IP2rvkI(Q$hEu7aOqrZ2$ z)jBtxq#Du8rxtVP6R~%7(nJKfxg@zVrG5r6tT)j*aQ$(nPIJ0ubTRpQ0f~myAX#0W z%QYUXH$8fqx!d&{iDlfIw-X*q1Cr}Wnus6yd}YQLJr#8fOUA=;TlkD-hyu>0a1#DW#4$Lq(f6gQo8H=Q6$b%lsb58R*g9bu=3& zEz8WbOyD3{Yv4=j9Z?uhaqK_Kk$|E?cJ1~?BQ2k=rR}xrfiE~aL60ID>FU9H%J&@P zjH7)hWjrNc=+be^zuPlRi_0CeQyHA((kJWT(Uh9<11$t{#!Q42Zk1hoJT3uht6^o2 zV)Tr*H#^dpvLEh$L;Z1OWrR4b?%r8f)$4pc+9<_~AuJrw zj1c-5P~U#tdU1%Bzs$U2DsNhOZ&V1AtZ1AGg zNd1&;?2QMp?bA_wuYRh_pD@!uMBXdW6qS_54|FC*+&w6ZU+Ib?tx>ik)sB0TB`R$o z*WsMV_Zc&xeNIh$EvPbIEhMMU2+c!4diSVA^DDg%)otw^jh-xk;F$|{_%zaP0hDf-*`L?iF z-6YX;Yn<%!l}YrsDQ0dwsH)pUqWo4hv05wXmbv$9_j_~pc> z&o-t(LYwAlgN2IP*V)iN=P&kc9Br`#LWq^Rr?$0@-AO(fo80T8HT#u*wFVT?`k>W8 z$QA9!gv@?uh7V0QYuQn{g@s?pZ*v za5JAvLY3a9Bc0(3W8zcgg`&uD)q9X|K8Slx!$wVKiAMT`w@O5VnV~^Nt7YM0hQ=}d z+ltDX%wsXUb44dpjyW&%Q3GvqvN=@=F83f z1c!atTvekFRoL(ZkqGa!YnfCeo}O0D8y8ADN^S(yx@xb#Z@kY~wZHeP zTNa{X^~u##sgrUehx+_?IPQqED9~EqEC{}%@$qe6BHTD0BAKAE!MT}&ErVXO=~$hP zcXmFQd->F(3T7*0;u&$>m#kOVRZv!*J2ZK8XG#}3aXaL;mzJ3574O|C=@mK2@712q zYb`I(M0jXu_uDvcFYd|H0T+uEX5E?#54ktTLZ*Mf+vt~z5#BG*QGObT$dY7q(D03A0iMvV;Zj(es{t!e`f-G(I%a;%kESdwlW}sRLp%!{Ez*u zH1TMjrrPU;&wd*yu)_*LAzViS-u9mrMKBqJ;e>pBUf8g2-|7%4?r^+?Jwlze?p#5A z9{H-xqo+EqLBC>sy{2JGp__+iO~c60h*n}bF<$X_H2kNOlPl{HMj?{hg6*Gxb| Tb{x;;KNodn9iZ0Zs4DN literal 0 HcmV?d00001 From bb3d1290843b57c2a9bf49f305aa448c0db3b7aa Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 8 Aug 2024 14:34:18 -0400 Subject: [PATCH 07/11] refactor: allow multiple paths for xavier_base() --- src/xavier/util.py | 4 ++-- tests/test_util.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 tests/test_util.py diff --git a/src/xavier/util.py b/src/xavier/util.py index 7069fda..3cf87b8 100644 --- a/src/xavier/util.py +++ b/src/xavier/util.py @@ -13,14 +13,14 @@ import warnings -def xavier_base(rel_path=""): +def xavier_base(*paths): """Get the absolute path to a file in the repository @return abs_path """ basedir = os.path.dirname( os.path.dirname(os.path.dirname(os.path.realpath(__file__))) ) - return os.path.join(basedir, rel_path) + return os.path.join(basedir, *paths) def get_version(): diff --git a/tests/test_util.py b/tests/test_util.py new file mode 100644 index 0000000..53951d5 --- /dev/null +++ b/tests/test_util.py @@ -0,0 +1,11 @@ +import os +import warnings +from xavier.src.xavier.util import ( + xavier_base +) + +def test_xavier_base(): + test_base = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + assert all([xavier_base() == test_base, + xavier_base("a","b","c") == os.path.join(test_base, "a", "b", "c") + ]) From ee37ab0a127874271558dacd79655be4f4185f1d Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 8 Aug 2024 14:48:41 -0400 Subject: [PATCH 08/11] refactor: call run() directly from the GUI resolves #81 --- src/xavier/gui.py | 112 +++++++++++++--------------------------------- 1 file changed, 30 insertions(+), 82 deletions(-) diff --git a/src/xavier/gui.py b/src/xavier/gui.py index 4ab3838..72ee41e 100644 --- a/src/xavier/gui.py +++ b/src/xavier/gui.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 +import argparse import os import sys import glob -import uuid import PySimpleGUI as sg from .util import ( @@ -16,34 +16,8 @@ from .run import run_in_context from .cache import get_sif_cache_dir - -# getting the name of the directory -# where the this file is present. -current = os.path.dirname(os.path.realpath(__file__)) - -# Getting the parent directory name -# where the current directory is present. -parent = os.path.dirname(current) - -# adding the parent directory to -# the sys.path. -sys.path.append(parent) -imgdir = os.path.join(parent, "resources", "images") - -# TODO remove all global variables, use tmpdir instead -global FILES2DELETE -FILES2DELETE = list() - -global XAVIERDIR -global SIFCACHE -global XAVIER -global XAVIERVER -global HOSTNAME - - def launch_gui(DEBUG=True): check_python_version() - RANDOMSTR = str(uuid.uuid4()) # get drop down genome options jsons = get_genomes_dict() genome_annotation_combinations = list(jsons.keys()) @@ -143,7 +117,7 @@ def launch_gui(DEBUG=True): "-ANNOTATION-", ] # create main layout - logo = sg.Image(os.path.join(imgdir, "CCBRlogo.png")) + logo = sg.Image(xavier_base(os.path.join("resources", "CCBRlogo.png"))) layout = [ [sg.Column([[logo]], justification="center")], [ @@ -191,7 +165,7 @@ def launch_gui(DEBUG=True): if DEBUG: print("layout is ready!") - window = sg.Window("XAVIER " + XAVIERVER, layout, location=(0, 500), finalize=True) + window = sg.Window(f"XAVIER {get_version()}", layout, location=(0, 500), finalize=True) if DEBUG: print("window created!") @@ -237,7 +211,6 @@ def launch_gui(DEBUG=True): window["-BED-"].update(visible=False) if event == "-CUSTARG-": window["-BED-"].update(visible=True) - targets_file = values["-TARGETS-"] if event == "-DISCSET-": window["-SET-"].update(visible=False) window["-BED-"].update(visible=False) @@ -287,8 +260,6 @@ def launch_gui(DEBUG=True): location=(0, 500), ) continue - else: - targets_file = values["-TARGETS-"] if values["-TUMNORM-"] == "" and values["-TUMONLY-"] == "": sg.PopupError("Select an analysis mode", location=(0, 500)) continue @@ -299,43 +270,34 @@ def launch_gui(DEBUG=True): location=(0, 500), ) continue - else: - pairs_file = values["-PAIRS-"] - genome = values["-ANNOTATION-"] - targets_file = ( - os.path.join(XAVIERDIR, XAVIERVER, "resources") - + "/*" - + genome - + "*.bed" + output_dir = os.path.join(values["-OUTDIR-"], values["-JOBNAME-"]) + run_args = argparse.Namespace( + runmode="init", + input=list(glob.glob(os.path.join(values["-INDIR-"], "*.fastq.gz"))), + output=output_dir, + genome=genome, + targets=values["-TARGETS-"] if values["-TARGETS-"] else xavier_base('resources', 'Agilent_SSv7_allExons_hg38.bed'), # TODO should this be part of the genome config file? + mode="slurm", + job_name="pl:xavier", + callers=["mutect2", "mutect", "strelka", "vardict", "varscan"], + pairs=values.get("-PAIRS-", None), + ffpe=values["-FFPE-"], + cnv=values["-CNV-"], + wait=False, + create_nidap_folder=False, + silent=False, + singularity_cache=os.environ.get("SINGULARITY_CACHEDIR", None), + sif_cache=get_sif_cache_dir(), + tmp_dir=get_tmp_dir(None, output_dir), + threads=2, ) - - xavier_cmd = XAVIER + " run " - xavier_cmd += " --input " + values["-INDIR-"] + "/*.R?.fastq.gz" - xavier_cmd += " --output " + values["-OUTDIR-"] + "/" + values["-JOBNAME-"] - xavier_cmd += " --genome " + genome - xavier_cmd += " --targets " + targets_file - xavier_cmd += " --mode slurm " - - if HOSTNAME == "fsitgl-head01p.ncifcrf.gov": - xavier_cmd += " --sif-cache " + SIFCACHE + "/XAVIER" - xavier_cmd += " --tmp-dir /scratch/cluster_scratch/$USER/" - - if values["-TUMNORM-"] == True: - xavier_cmd += " --pairs " + pairs_file - if values["-CNV-"] == True: - xavier_cmd += " --cnv " - - if values["-FFPE-"] == True: - xavier_cmd += " --ffpe " - - run_stdout, run_stderr = run(xavier_cmd, init=True, dry=False, run=False) - run_stdout, run_stderr = run(xavier_cmd, init=False, dry=True, run=False) + allout_init = run_in_context(run_args) + run_args.runmode = "dryrun" + allout_dryrun = run_in_context(run_args) + allout = "\n".join([allout_init, allout_dryrun]) if DEBUG: - print(run_stdout) - if DEBUG: - print(run_stderr) - allout = "{}\n{}".format(run_stdout, run_stderr) + print(allout) sg.popup_scrolled( allout, title="Dryrun:STDOUT/STDERR", location=(0, 500), size=(80, 30) ) @@ -345,24 +307,14 @@ def launch_gui(DEBUG=True): "Submit run to slurm?", title="Submit??", location=(0, 500) ) if ch == "Yes": - run_stdout, run_stderr = run( - xavier_cmd, init=False, dry=False, run=True - ) - if DEBUG: - print(run_stdout) - if DEBUG: - print(run_stderr) - allout = "{}\n{}".format(run_stdout, run_stderr) + run_args.runmode = "run" + allout = run_in_context(run_args) sg.popup_scrolled( allout, title="Slurmrun:STDOUT/STDERR", location=(0, 500), size=(80, 30), ) - runner_file = os.path.join( - os.getenv("HOME"), RANDOMSTR + ".xavier.runner" - ) - FILES2DELETE.append(runner_file) rerun = sg.popup_yes_no( "Submit another XAVIER job?", title="", location=(0, 500) ) @@ -390,11 +342,7 @@ def launch_gui(DEBUG=True): window["-TUMNORM-"].update(value=False) window["-TUMONLY-"].update(value=False) continue - window.close() - if len(FILES2DELETE) != 0: - deletefiles() - def copy_to_clipboard(string): r = Tk() From 37200bc1d29c8a955592564b108e3460f4d00ab6 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 8 Aug 2024 14:52:38 -0400 Subject: [PATCH 09/11] test: fix test for xavier_base() installation is not guaranteed to be in same parent folder as test --- tests/test_util.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_util.py b/tests/test_util.py index 53951d5..4cccd51 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -6,6 +6,5 @@ def test_xavier_base(): test_base = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) - assert all([xavier_base() == test_base, - xavier_base("a","b","c") == os.path.join(test_base, "a", "b", "c") - ]) + xavier_base() + assert xavier_base("a","b","c").endswith('/a/b/c') From 658f202214e373babbd781960f5f312528e79480 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 8 Aug 2024 14:53:15 -0400 Subject: [PATCH 10/11] docs: switch xavier_gui to xavier gui --- docs/usage/gui.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/usage/gui.md b/docs/usage/gui.md index 5c73ddf..c20bc0f 100644 --- a/docs/usage/gui.md +++ b/docs/usage/gui.md @@ -73,7 +73,7 @@ xavier --version To run the XAVIER pipeline from the GUI, simply enter: ```bash -xavier_gui +xavier gui ``` and it will launch the XAVIER window. @@ -177,6 +177,6 @@ and start an interactive session. ![gui_nx_config2](images/gui_nx_config2.png) -Similar to the instructions above, load `ccbrpipeliner` module and enter `xavier_gui` to launch the XAVIER gui. +Similar to the instructions above, load `ccbrpipeliner` module and enter `xavier gui` to launch the XAVIER gui. ![gui_nx_xavier](images/gui_nx_xavier.png) From 916c328abcb2378a267d52adc9a0b0f1a9a8e922 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 8 Aug 2024 14:54:30 -0400 Subject: [PATCH 11/11] docs: update CHANGELOG.md --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e6f3c17..b692942 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,10 @@ ## development version - You can now cite XAVIER with the DOI [10.5281/zenodo.12727315](https://doi.org/10.5281/zenodo.12727315). (#88, @kelly-sovacool) -- Minor documentation improvements. (#92, @kelly-sovacool) -- Minor documentation rendering improvements (#93, @samarth8392) +- Minor documentation improvements. (#92, @kelly-sovacool; #93, @samarth8392) - The docs website now has a dropdown menu to select which version to view. The latest release is shown by default. (#150, @kelly-sovacool) +- Add `xavier gui` subcommand to launch the graphical user interface. (#99, @kelly-sovacool) + - Previously, `xavier_gui` (with an underscore) was a command in the `ccbrpipeliner` module. ## XAVIER 3.0.3