Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use tools package #151

Merged
merged 14 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ classifiers = [
requires-python = ">=3.11"
dependencies = [
"argparse",
"ccbr_tools@git+https://github.com/CCBR/Tools",
"Click >= 8.1.3",
"PySimpleGui < 5",
"snakemake >= 7.32, < 8",
Expand Down
47 changes: 30 additions & 17 deletions src/renee/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
"""

# Python standard library
from __future__ import print_function
from shutil import copy
import json
import os
Expand All @@ -22,22 +21,22 @@

# 3rd party imports from pypi
import argparse

# local imports
from .cache import get_sif_cache_dir
from .run import run
from .dryrun import dryrun
from .gui import launch_gui
from .conditions import fatal
from .util import (
from ccbr_tools.pipeline.util import (
get_hpcname,
get_tmp_dir,
get_genomes_list,
get_version,
check_python_version,
_cp_r_safe_,
orchestrate,
)
from ccbr_tools.pipeline.cache import get_sif_cache_dir

# local imports
from .run import run
from .dryrun import dryrun
from .gui import launch_gui
from .conditions import fatal
from .util import renee_base, get_version
from .orchestrate import orchestrate

# Pipeline Metadata and globals
RENEE_PATH = os.path.dirname(
Expand Down Expand Up @@ -398,9 +397,11 @@ def build(sub_args):
)
)
elif sub_args.mode == "slurm":
jobid = (
open(os.path.join(sub_args.output, "logfiles", "bjobid.log")).read().strip()
)
with open(
os.path.join(sub_args.output, "logfiles", "bjobid.log"), "r"
) as infile:
jobid = infile.read().strip()

if int(masterjob.returncode) == 0:
print("Successfully submitted master job: ", end="")
else:
Expand Down Expand Up @@ -770,7 +771,12 @@ def parsed_arguments(name, description):
{2}{3}Prebuilt genome+annotation combos:{4}
{5}
""".format(
"renee", __version__, c.bold, c.url, c.end, list(get_genomes_list())
"renee",
__version__,
c.bold,
c.url,
c.end,
list(get_genomes_list(repo_base=renee_base)),
)
)

Expand Down Expand Up @@ -817,7 +823,9 @@ def parsed_arguments(name, description):
"--genome",
required=True,
type=lambda option: str(
genome_options(subparser_run, option, get_genomes_list())
genome_options(
subparser_run, option, get_genomes_list(repo_base=renee_base)
)
),
help=argparse.SUPPRESS,
)
Expand Down Expand Up @@ -1126,7 +1134,12 @@ def parsed_arguments(name, description):
{2}{3}Prebuilt genome+annotation combos:{4}
{5}
""".format(
"renee", __version__, c.bold, c.url, c.end, list(get_genomes_list())
"renee",
__version__,
c.bold,
c.url,
c.end,
list(get_genomes_list(repo_base=renee_base)),
)
)

Expand Down
63 changes: 0 additions & 63 deletions src/renee/cache.py

This file was deleted.

19 changes: 9 additions & 10 deletions src/renee/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,16 @@
import sys
from tkinter import Tk

from .util import (
from ccbr_tools.pipeline.util import (
get_genomes_dict,
get_tmp_dir,
get_shared_resources_dir,
renee_base,
get_version,
get_singularity_cachedir,
get_hpcname,
)
from .cache import get_sif_cache_dir
from .run import run_in_context
from ccbr_tools.pipeline.cache import get_sif_cache_dir, get_singularity_cachedir
from ccbr_tools.shell import exec_in_context

from .util import get_version, renee_base, get_shared_resources_dir
from .run import run

# TODO: get rid of all the global variables
# TODO: let's use a tmp dir and put these files there instead. see for inspiration:https://github.com/CCBR/RENEE/blob/16d13dca1d5f0f43c7dfda379efb882a67635d17/tests/test_cache.py#L14-L28
Expand All @@ -27,7 +26,7 @@

def launch_gui(sub_args, debug=True):
# get drop down genome+annotation options
jsons = get_genomes_dict(error_on_warnings=True)
jsons = get_genomes_dict(repo_base=renee_base, error_on_warnings=True)
genome_annotation_combinations = list(jsons.keys())
genome_annotation_combinations.sort()
if debug:
Expand Down Expand Up @@ -191,7 +190,7 @@ def launch_gui(sub_args, debug=True):
threads=2,
)
# execute dry run and capture stdout/stderr
allout = run_in_context(run_args)
allout = exec_in_context(run, run_args)
sg.popup_scrolled(
allout,
title="Dryrun:STDOUT/STDERR",
Expand All @@ -211,7 +210,7 @@ def launch_gui(sub_args, debug=True):
if ch == "Yes":
run_args.dry_run = False
# execute live run
allout = run_in_context(run_args)
allout = exec_in_context(run, run_args)
sg.popup_scrolled(
allout,
title="Dryrun:STDOUT/STDERR",
Expand Down
95 changes: 1 addition & 94 deletions src/renee/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
import re
import sys

from .util import (
_cp_r_safe_,
)
from ccbr_tools.pipeline.util import _cp_r_safe_, _sym_safe_


def initialize(sub_args, repo_path, output_path):
Expand Down Expand Up @@ -51,94 +49,3 @@ def initialize(sub_args, repo_path, output_path):
inputs = _sym_safe_(input_data=sub_args.input, target=output_path)

return inputs


def _sym_safe_(input_data, target):
"""Creates re-named symlinks for each FastQ file provided
as input. If a symlink already exists, it will not try to create a new symlink.
If relative source PATH is provided, it will be converted to an absolute PATH.
@param input_data <list[<str>]>:
List of input files to symlink to target location
@param target <str>:
Target path to copy templates and required resources
@return input_fastqs list[<str>]:
List of renamed input FastQs
"""
input_fastqs = [] # store renamed fastq file names
for file in input_data:
filename = os.path.basename(file)
renamed = os.path.join(target, rename(filename))
input_fastqs.append(renamed)

if not os.path.exists(renamed):
# Create a symlink if it does not already exist
# Follow source symlinks to resolve any binding issues
os.symlink(os.path.abspath(os.path.realpath(file)), renamed)

return input_fastqs


def rename(filename):
"""Dynamically renames FastQ file to have one of the following extensions: *.R1.fastq.gz, *.R2.fastq.gz
To automatically rename the fastq files, a few assumptions are made. If the extension of the
FastQ file cannot be inferred, an exception is raised telling the user to fix the filename
of the fastq files.
@param filename <str>:
Original name of file to be renamed
@return filename <str>:
A renamed FastQ filename
"""
# Covers common extensions from SF, SRA, EBI, TCGA, and external sequencing providers
# key = regex to match string and value = how it will be renamed
extensions = {
# Matches: _R[12]_fastq.gz, _R[12].fastq.gz, _R[12]_fq.gz, etc.
".R1.f(ast)?q.gz$": ".R1.fastq.gz",
".R2.f(ast)?q.gz$": ".R2.fastq.gz",
# Matches: _R[12]_001_fastq_gz, _R[12].001.fastq.gz, _R[12]_001.fq.gz, etc.
# Capture lane information as named group
".R1.(?P<lane>...).f(ast)?q.gz$": ".R1.fastq.gz",
".R2.(?P<lane>...).f(ast)?q.gz$": ".R2.fastq.gz",
# Matches: _[12].fastq.gz, _[12].fq.gz, _[12]_fastq_gz, etc.
"_1.f(ast)?q.gz$": ".R1.fastq.gz",
"_2.f(ast)?q.gz$": ".R2.fastq.gz",
}

if filename.endswith(".R1.fastq.gz") or filename.endswith(".R2.fastq.gz"):
# Filename is already in the correct format
return filename

converted = False
for regex, new_ext in extensions.items():
matched = re.search(regex, filename)
if matched:
# regex matches with a pattern in extensions
converted = True
# Try to get substring for named group lane, retain this in new file extension
# Come back to this later, I am not sure if this is necessary
# That string maybe static (i.e. always the same)
# https://support.illumina.com/help/BaseSpace_OLH_009008/Content/Source/Informatics/BS/NamingConvention_FASTQ-files-swBS.htm#
try:
new_ext = "_{}{}".format(matched.group("lane"), new_ext)
except IndexError:
pass # Does not contain the named group lane

filename = re.sub(regex, new_ext, filename)
break # only rename once

if not converted:
raise NameError(
"""\n\tFatal: Failed to rename provided input '{}'!
Cannot determine the extension of the user provided input file.
Please rename the file list above before trying again.
Here is example of acceptable input file extensions:
sampleName.R1.fastq.gz sampleName.R2.fastq.gz
sampleName_R1_001.fastq.gz sampleName_R2_001.fastq.gz
sampleName_1.fastq.gz sampleName_2.fastq.gz
Please also check that your input files are gzipped?
If they are not, please gzip them before proceeding again.
""".format(
filename
)
)

return filename
Loading