-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from ratschlab/[email protected]
[release]
- Loading branch information
Showing
70 changed files
with
21,479 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
.gitignore | ||
.nextflow* | ||
tests | ||
work | ||
outdir |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
FROM ratschlab/wf-base-miniconda4.8.3:v1.0.1 | ||
|
||
LABEL org.opencontainers.image.source https://github.com/ratschlab/wftools-alignment-rna | ||
LABEL org.opencontainers.image.authors Andre Kahles ([email protected]) | ||
LABEL org.opencontainers.image.title Raetschlab RNA-Seq alignment image - STAR | ||
|
||
ENV PATH="/opt/local:${PATH}" | ||
|
||
RUN apt-get update | ||
RUN apt-get -y install wget zip make gcc g++ libcurl4-openssl-dev libgomp1 bzip2 curl libncurses5-dev libbz2-dev liblzma-dev autotools-dev automake libz-dev pkg-config libtool m4 | ||
|
||
# install STAR | ||
RUN mkdir -p /opt/local/src/STAR && \ | ||
cd /opt/local/src/STAR && \ | ||
wget https://github.com/alexdobin/STAR/archive/2.7.2d.tar.gz && \ | ||
tar -xzvf 2.7.2d.tar.gz && \ | ||
ln -s /opt/local/src/STAR/STAR-2.7.2d/bin/Linux_x86_64/STAR /usr/local/bin/STAR && \ | ||
ln -s /opt/local/src/STAR/STAR-2.7.2d/bin/Linux_x86_64/STARlong /usr/local/bin/STARlong | ||
|
||
# install samtools 1.10 | ||
RUN cd /tmp \ | ||
&& curl -sSL -o samtools-1.10.tar.bz2 --retry 10 https://github.com/samtools/samtools/releases/download/1.10/samtools-1.10.tar.bz2 \ | ||
&& bunzip2 -c samtools-1.10.tar.bz2 |tar xf - \ | ||
&& cd samtools-1.10 \ | ||
&& ./configure --prefix=/usr/local \ | ||
&& make \ | ||
&& make install | ||
|
||
# Install OpenJDK-8 | ||
RUN apt-get update && \ | ||
apt-get install -y openjdk-8-jdk && \ | ||
apt-get install -y ant && \ | ||
apt-get clean; | ||
|
||
RUN mkdir -p /tools | ||
|
||
# Picard installation | ||
RUN curl -sSL -o /tools/picard.jar --retry 10 https://github.com/broadinstitute/picard/releases/download/2.21.8/picard.jar | ||
|
||
### get run script | ||
COPY main.py /tools | ||
|
||
### set up environment | ||
ENV PATH="$PATH:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:" | ||
ENV LC_ALL=C | ||
|
||
RUN groupadd -g 1000 ubuntu &&\ | ||
useradd -l -u 1000 -g ubuntu ubuntu &&\ | ||
install -d -m 0755 -o ubuntu -g ubuntu /home/ubuntu | ||
|
||
USER ubuntu | ||
|
||
ENTRYPOINT ["/usr/bin/env"] | ||
|
||
CMD ["/bin/bash"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
/* | ||
Copyright (c) 2021, ratschlab | ||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. | ||
Authors: | ||
Andre Kahles | ||
*/ | ||
|
||
/********************************************************************/ | ||
/* this block is auto-generated based on info from pkg.json where */ | ||
/* changes can be made if needed, do NOT modify this block manually */ | ||
nextflow.enable.dsl = 2 | ||
version = '0.1.0' // package version | ||
|
||
container = [ | ||
'ghcr.io': 'ghcr.io/ratschlab/wftools-alignment-rna.genome-alignment-star' | ||
] | ||
default_container_registry = 'ghcr.io' | ||
/********************************************************************/ | ||
|
||
|
||
// universal params go here | ||
params.container_registry = "" | ||
params.container_version = "" | ||
params.container = "" | ||
|
||
params.cpus = 1 | ||
params.mem = 1 // GB | ||
params.publish_dir = "" // set to empty string will disable publishDir | ||
|
||
|
||
// tool specific parmas go here, add / change as needed | ||
params.index = "NO_FILE_1" //input/test_genome.index_STAR" | ||
params.gtf = "NO_FILE_2" //input/test_annotation.gtf" | ||
params.input_files = ["NO_FILE_3"]//input/TEST-PRO.donor1.donor1_sample1_id.sample_01.b22541e45ff72d9a042e877a0531af0b.lane.bam" | ||
params.input_format = "ubam" | ||
params.sample = "" | ||
params.sjdboverhang = 100 | ||
params.pair_status = "paired" | ||
|
||
process genomeAlignmentSTAR { | ||
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}" | ||
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir | ||
|
||
cpus params.cpus | ||
memory "${params.mem} GB" | ||
|
||
input: | ||
file index | ||
file gtf | ||
path input_files | ||
val input_format | ||
val pair_status | ||
val sample | ||
val sjdboverhang | ||
|
||
output: // output, make update as needed | ||
path("${sample}_Aligned.out.bam"), emit: bam | ||
path("${sample}_SJ.out.tab"), emit: junctions | ||
path("${sample}_Log*") | ||
|
||
script: | ||
""" | ||
python /tools/main.py \\ | ||
--sample ${sample} \\ | ||
--index ${index} \\ | ||
--annotation ${gtf} \\ | ||
--threads ${params.cpus} \\ | ||
--sjdbOverhang ${sjdboverhang} \\ | ||
--pair-status ${pair_status} \\ | ||
--input-files ${input_files} \\ | ||
--input-format ${input_format} \\ | ||
--mem ${params.mem * 1000} > ${sample}_align.log 2>&1 | ||
""" | ||
} | ||
|
||
// this provides an entry point for this main script, so it can be run directly without clone the repo | ||
// using this command: nextflow run <git_acc>/<repo>/<pkg_name>/<main_script>.nf -r <pkg_name>.v<pkg_version> --params-file xxx | ||
workflow { | ||
genomeAlignmentSTAR( | ||
file(params.index), | ||
file(params.gtf), | ||
params.input_files.collect({it -> file(it)}), | ||
params.input_format, | ||
params.pair_status, | ||
params.sample, | ||
params.sjdboverhang | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
|
||
import os | ||
import sys | ||
import argparse | ||
import subprocess | ||
import glob | ||
|
||
def replace_bam_header(bam, header, out, mem=None): | ||
|
||
jvm_Xmx = "-Xmx%iM" % int(mem) if mem else "" | ||
try: | ||
cmd = [ | ||
'java', jvm_Xmx, '-Dpicard.useLegacyParser=false', '-jar', '/tools/picard.jar', 'ReplaceSamHeader', '-I', bam, | ||
'--HEADER', header, '--OUTPUT', out | ||
] | ||
|
||
subprocess.run(cmd, check=True) | ||
except Exception as e: | ||
sys.exit("Error: %s. ReplaceSamHeader failed: %s\n" % (e, bam)) | ||
|
||
|
||
def generate_fastq_from_ubam(ubam, outdir, mem=None): | ||
|
||
jvm_Xmx = "-Xmx%iM" % int(mem) if mem else "" | ||
try: | ||
cmd = [ | ||
'java', jvm_Xmx, '-Dpicard.useLegacyParser=false', '-jar', '/tools/picard.jar', 'SamToFastq', '-I', ubam, | ||
'--OUTPUT_PER_RG', 'true', '--RG_TAG', 'ID', '--COMPRESS_OUTPUTS_PER_RG', 'true', '--OUTPUT_DIR', outdir | ||
] | ||
|
||
subprocess.run(cmd, check=True) | ||
except Exception as e: | ||
sys.exit("Error: %s. SamToFastq failed: %s\n" % (e, ubam)) | ||
|
||
def main(): | ||
""" | ||
Python wrapper for calling STAR aligner on various input configurations, | ||
assuring that read group information is provided appropriately. | ||
""" | ||
|
||
parser = argparse.ArgumentParser(description='Tool: STAR aligner') | ||
parser.add_argument('--sample', dest='sample', type=str, | ||
help='Input sample name / ID.', required=True) | ||
parser.add_argument('--index', dest='index', type=str, | ||
help='Path to STAR genome index.', required=True) | ||
parser.add_argument('--annotation', dest='annotation', type=str, | ||
help='Gene annotation (preferably in GTF format).', required=True) | ||
parser.add_argument('--sjdbOverhang', dest='sjdboverhang', type=int, | ||
help='Overhang for splice junction DB. [100]', default=100, required=False) | ||
parser.add_argument('--threads', dest='threads', type=int, | ||
help='Number of threads. [1]', default=1, required=False) | ||
parser.add_argument('--pair-status', dest='pair_status', type=str, | ||
help='Paired-end status of input samples. Choices are: single, paired.', required=True) | ||
parser.add_argument('--input-files', dest='input_files', type=str, nargs='+', | ||
help='Input read files in fastq or bam format. For paired end fastq, first mate comes first.', default=[]) | ||
parser.add_argument('--input-format', dest='input_format', type=str, | ||
help='Format of read in put: fastq or bam', required=True) | ||
parser.add_argument('--mem', dest='mem', help="Maximal allocated memory in MB", type=float, default=None) | ||
|
||
args = parser.parse_args() | ||
|
||
if not os.path.exists(args.index): | ||
sys.exit('Error: specified index path %s does not exist or is not accessible!' % args.index) | ||
|
||
if not os.path.exists(args.annotation): | ||
sys.exit('Error: specified annotation file %s does not exist or is not accessible!' % args.annotation) | ||
|
||
### handle ubam input | ||
outdir = '.' | ||
if args.input_format == 'bam': | ||
if len(args.input_files) != 1: | ||
sys.exit('Error: number of input files %s needs to be exactly 1!.' % str(args.input_files)) | ||
generate_fastq_from_ubam(args.input_files[0], outdir, mem=args.mem) | ||
fqr1 = glob.glob(os.path.join(outdir, '*_1.fastq.gz')) | ||
print(fqr1) | ||
fqr2 = [] | ||
if args.pair_status == 'paired': | ||
for fq in fqr1: | ||
fqr2.append(fq[:-10] + '2.fastq.gz') | ||
assert os.path.exists(fqr2[-1]) | ||
rgs = [os.path.basename(_)[:-11] for _ in fqr1] | ||
### handle fastq input | ||
elif args.input_format == 'fastq': | ||
fqr1 = [args.input_files[0]] | ||
if args.pair_status == 'paired': | ||
fqr2 = [args.input_files[1]] | ||
if len(args.input_files) != 2: | ||
sys.exit('Error: Paired-end status was given as %s. But files provided were: %s' % (args.pair_status, str(args.input_files))) | ||
elif args.pair_status == 'single' and len(args.input_files != 1): | ||
sys.exit('Error: Paired-end status was given as %s. But files provided were: %s' % (args.pair_status, str(args.input_files))) | ||
### get read group names | ||
rgs = [] | ||
for fq in fqr1: | ||
rg = fq | ||
if rg.lower().endswith('.gz'): | ||
rg = rg[:-3] | ||
if rg.lower().endswith('.bz2'): | ||
rg = rg[:-4] | ||
if rg.lower().endswith('.fastq'): | ||
rg = rg[:-6] | ||
rgs.append(rg[:-2]) | ||
### this should not happen | ||
else: | ||
sys.exit('Error: The input type needs to be either bam or fastq. Currently given: %s' % args.input_format) | ||
|
||
### figure out correct read command | ||
read_command = 'cat' | ||
for fq in fqr1: | ||
if fq.lower().endswith('.gz'): | ||
read_command = 'zcat' | ||
break | ||
if fq.lower().endswith('.bz2'): | ||
read_command = 'bzcat' | ||
|
||
### assemble STAR command | ||
cmd = ['STAR', | ||
'--genomeDir', args.index, | ||
'--sjdbGTFfile', args.annotation, | ||
'--runThreadN', str(args.threads), | ||
'--sjdbOverhang', str(args.sjdboverhang), | ||
'--outFileNamePrefix', args.sample + '_', | ||
'--readFilesIn', ','.join(fqr1), ','.join(fqr2), | ||
'--outSAMattrRGline', ' , '.join(['ID:%s\tSM:%s' % (_, args.sample) for _ in rgs]), | ||
'--readFilesCommand', read_command, | ||
'--twopassMode Basic', | ||
'--outFilterMultimapScoreRange', '1', | ||
'--outFilterMultimapNmax', '20', | ||
'--outFilterMismatchNmax', '10', | ||
'--alignIntronMax', '500000', | ||
'--alignMatesGapMax', '1000000', | ||
'--sjdbScore', '2', | ||
'--alignSJDBoverhangMin', '1', | ||
'--genomeLoad NoSharedMemory', | ||
'--outFilterMatchNminOverLread', '0.33', | ||
'--outFilterScoreMinOverLread', '0.33', | ||
'--outSAMstrandField intronMotif', | ||
'--outSAMmode Full', | ||
'--limitBAMsortRAM', '7000000000', | ||
'--outSAMattributes NH HI NM MD AS XS', | ||
'--outSAMunmapped Within', | ||
'--limitSjdbInsertNsj', '2000000', | ||
'--outSAMtype BAM Unsorted', | ||
'--outSAMheaderHD', '@HD VN:1.4', | ||
'--outSAMmultNmax', '1', | ||
] | ||
subprocess.run(' '.join(cmd), shell=True, check=True) | ||
|
||
### replace original read group line from ubam | ||
if args.input_format == 'bam': | ||
bam = args.sample + '_Aligned.out.bam' | ||
### get current header and drop RG info | ||
subprocess.run('samtools view -H %s --no-PG | grep -v "@RG" > new_header.sam' % bam, shell=True, check=True) | ||
### append ald read group info | ||
for fq in fqr1: | ||
subprocess.run('samtools view -H %s --no-PG | grep -e @RG | grep %s >> new_header.sam' % (args.input_files[0], os.path.basename(fq)[:-11]), shell=True, check=True) # capture_output=True, shell=True, check=True).stdout.decode('utf-8').strip()) | ||
### replace header | ||
bam_rg = args.sample + '_Aligned.out.rg.bam' | ||
replace_bam_header(bam, 'new_header.sam', bam_rg, mem=args.mem) | ||
### clean up | ||
subprocess.run('mv %s %s && rm new_header.sam' % (bam_rg, bam), shell=True, check=True) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
docker { | ||
enabled = true | ||
runOptions = '-u \$(id -u):\$(id -g)' | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
{ | ||
"name": "genome-alignment-star", | ||
"version": "0.1.0", | ||
"description": "Genome alignment with STAR", | ||
"main": "main.nf", | ||
"deprecated": false, | ||
"keywords": [ | ||
"bioinformatics", | ||
"rna-seq", | ||
"alignment", | ||
"star" | ||
], | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/ratschlab/wftools-alignment-rna.git" | ||
}, | ||
"container": { | ||
"registries": [ | ||
{ | ||
"registry": "ghcr.io", | ||
"type": "docker", | ||
"org": "ratschlab", | ||
"default": true | ||
} | ||
] | ||
}, | ||
"dependencies": [], | ||
"devDependencies": [], | ||
"contributors": [ | ||
{ | ||
"name": "Andre Kahles", | ||
"email": "[email protected]" | ||
} | ||
], | ||
"license": "MIT", | ||
"bugReport": "https://github.com/ratschlab/wftools-alignment-rna/issues", | ||
"homepage": "https://github.com/ratschlab/wftools-alignment-rna#readme" | ||
} |
Oops, something went wrong.