From 2db5ceae3ab5b89091c12bfa3a1c50a3a5ac9d34 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:20:46 +0200 Subject: [PATCH 01/66] Changes to check_samplesheet --- bin/check_samplesheet.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 47d1b446..59d39ca5 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -84,10 +84,12 @@ def check_samplesheet(file_in, file_out): sample_mapping_dict = {} with open(file_in, "r") as fin: + ## Check header MIN_COLS = 2 MIN_HEADER = ["sample", "fastq_1", "fastq_2"] - OPT_HEADER = ["expected_cells", "seq_center"] + OPT_HEADER = ["expected_cells", "seq_center", "fastq_barcode", "sample_type"] + SAMPLE_TYPES = ["gex", "atac"] header = [x.strip('"') for x in fin.readline().strip().split(",")] unknown_header = 0 @@ -101,8 +103,7 @@ def check_samplesheet(file_in, file_out): min_header_count = min_header_count + 1 colmap[h] = i i = i + 1 - if min_header_count < len(MIN_HEADER): - # code was checking for unknown_header or min_header_count however looking at the ifelse, unknown_header does not seem that it should be tested + if unknown_header or min_header_count < len(MIN_HEADER): given = ",".join(header) wanted = ",".join(MIN_HEADER) print(f"ERROR: Please check samplesheet header -> {given} != {wanted}") @@ -147,7 +148,20 @@ def check_samplesheet(file_in, file_out): seq_center = seq_center.replace(" ", "_") ## Check FastQ file extension - for fastq in [fastq_1, fastq_2]: + fastq_list = [fastq_1, fastq_2] + + fastq_barcode = "" + if "fastq_barcode" in header: + fastq_barcode = lspl[colmap["fastq_barcode"]] + fastq_list.append(fastq_barcode) + + sample_type = "" + if "sample_type" in header: + sample_type = lspl[colmap["sample_type"]] + if (sample_type not in SAMPLE_TYPES): + print_error("Sample type {} is not supported! Please specify either {}".format(sample_type, " or ".join(SAMPLE_TYPES)), "Line", line) + + for fastq in fastq_list: if fastq: if fastq.find(" ") != -1: print_error("FastQ file contains spaces!", "Line", line) @@ -161,9 +175,9 @@ def check_samplesheet(file_in, file_out): ## Auto-detect paired-end/single-end sample_info = [] ## [single_end, fastq_1, fastq_2] if sample and fastq_1 and fastq_2: ## Paired-end short reads - sample_info = ["0", fastq_1, fastq_2, expected_cells, seq_center] + sample_info = ["0", fastq_1, fastq_2, expected_cells, seq_center, fastq_barcode, sample_type] elif sample and fastq_1 and not fastq_2: ## Single-end short reads - sample_info = ["1", fastq_1, fastq_2, expected_cells, seq_center] + sample_info = ["1", fastq_1, fastq_2, expected_cells, seq_center, fastq_barcode, sample_type] else: print_error("Invalid combination of columns provided!", "Line", line) @@ -180,8 +194,9 @@ def check_samplesheet(file_in, file_out): ## Write validated samplesheet with appropriate columns if len(sample_mapping_dict) > 0: with open(file_out, "w") as fout: - fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2", "expected_cells", "seq_center"]) + "\n") + fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2", "expected_cells", "seq_center" , "fastq_barcode", "sample_type"]) + "\n") for sample in sorted(sample_mapping_dict.keys()): + ## Check that multiple runs of the same sample are of the same datatype if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): print_error( From d197871014f9fc0998b414162b4616a03079d72e Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:20:56 +0200 Subject: [PATCH 02/66] Changes to nextflow.config --- nextflow.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nextflow.config b/nextflow.config index 3367748d..16425abf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -40,6 +40,9 @@ params { // Cellranger parameters cellranger_index = null + // Cellranger ARC parameters + motifs = null + // UniverSC paramaters universc_index = null universc_technology = '10x' From f36d7fb010d7aa61db2a83cc3a330a4a9cb13dde Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:21:07 +0200 Subject: [PATCH 03/66] Changes to input_check --- subworkflows/local/input_check.nf | 80 ++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 12 deletions(-) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index f5a11b18..55eec787 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -10,21 +10,44 @@ workflow INPUT_CHECK { samplesheet // file: /path/to/samplesheet.csv main: - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - .groupTuple(by: [0]) // group replicate files together, modifies channel to [ val(meta), [ [reads_rep1], [reads_repN] ] ] - .map { meta, reads -> [ meta, reads.flatten() ] } // needs to flatten due to last "groupTuple", so we now have reads as a single array as expected by nf-core modules: [ val(meta), [ reads ] ] - .set { reads } + + reads = null + versions = null + + if (params.aligner == "cellranger-arc"){ + SAMPLESHEET_CHECK ( samplesheet ) + .csv + .splitCsv ( header:true, sep:',' ) + .map { create_fastq_channel(it) } + // group replicate files together, modifies channel to + // [ val(meta), [ multimeta_s1, multimeta_s1 ], [ [reads_rep1], [reads_repN] ] ] + .groupTuple(by: [0]) + // needs to flatten due to last "groupTuple", so we now have reads as a single array as expected by + // nf-core modules: [ val(meta), [multi_meta], [ reads ] ] + .map { meta, multi_meta, reads -> [ meta, multi_meta.flatten(), reads.flatten() ] } + .set { reads } + versions = SAMPLESHEET_CHECK.out.versions + } else { + SAMPLESHEET_CHECK ( samplesheet ) + .csv + .splitCsv ( header:true, sep:',' ) + .map { create_fastq_channel(it) } + // group replicate files together, modifies channel to [ val(meta), [ [reads_rep1], [reads_repN] ] ] + .groupTuple(by: [0]) + // needs to flatten due to last "groupTuple", so we now have reads as a single array as expected by + // nf-core modules: [ val(meta), [ reads ] ] + .map { meta, reads -> [ meta, reads.flatten() ] } + .set { reads } + versions = SAMPLESHEET_CHECK.out.versions + } emit: - reads // channel: [ val(meta), [ reads ] ] + reads // channel: [ val(meta), [multi_meta], [ reads ] ] versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] } -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] +// Function to get list of [ meta, [ multimeta ] , [ fastq_1, fastq_2 ] ] def create_fastq_channel(LinkedHashMap row) { // create meta map def meta = [:] @@ -35,16 +58,49 @@ def create_fastq_channel(LinkedHashMap row) { // add path(s) of the fastq file(s) to the meta map def fastq_meta = [] + def fastqs = [] if (!file(row.fastq_1).exists()) { exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" } if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] + fastqs = [ file(row.fastq_1) ] } else { if (!file(row.fastq_2).exists()) { exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] + fastqs = [ file(row.fastq_1), file(row.fastq_2) ] + if (row.sample_type == "atac") { + if (row.fastq_barcode == "") { + exit 1, "ERROR: Please check input samplesheet -> Barcode FastQ (Dual index i5 read) file is missing!\n" + } + if (!file(row.fastq_barcode).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Barcode FastQ (Dual index i5 read) file does not exist!" + + "\n${row.fastq_barcode}" + } + fastqs.add(file(row.fastq_barcode)) + } + } + + // define meta_data for multiome + def multi_meta = [] + multi_meta = row.sample_type ? [row.sample_type] : [param.sample_type] + + if (params.aligner == "cellranger-arc"){ + sub_sample = row.fastq_1.split("/")[-1].replaceAll("_S[0-9]+_L[0-9]+_R1_[0-9]+.fastq.gz","") + fastqs.each{ + if(!it.name.contains(sub_sample)){ + exit 1, "ERROR: Please check input samplesheet -> Some files do not have the same sample name " + + "${sub_sample} in common!\n${it}" + } + } + multi_meta.add(sub_sample) } + + fastq_meta = [ meta, fastqs ] + + if (params.aligner == "cellranger-arc"){ + fastq_meta = [ meta, multi_meta, fastqs ] + } + return fastq_meta -} +} \ No newline at end of file From beeca28d60309b5954ce63488c6353a30cdd1d2b Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:21:24 +0200 Subject: [PATCH 04/66] Changes to scranseq.nf --- workflows/scrnaseq.nf | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 70265642..4407ca2e 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -44,6 +44,7 @@ include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' include { STARSOLO } from '../subworkflows/local/starsolo' include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" +include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellranger_arc" include { UNIVERSC_ALIGN } from "../subworkflows/local/align_universc" include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' @@ -116,6 +117,7 @@ workflow SCRNASEQ { ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) // Run FastQC + /* ch_multiqc_fastqc = Channel.empty() if (!params.skip_fastqc) { FASTQC_CHECK ( ch_fastq ) @@ -124,6 +126,7 @@ workflow SCRNASEQ { } else { ch_multiqc_fastqc = Channel.empty() } + */ ch_filter_gtf = GTF_GENE_FILTER ( ch_genome_fasta, ch_gtf ).gtf @@ -206,6 +209,20 @@ workflow SCRNASEQ { ch_mtx_matrices = ch_mtx_matrices.mix(UNIVERSC_ALIGN.out.universc_out) } + // Run cellranger pipeline + if (params.aligner == "cellranger-arc") { + CELLRANGER_ARC_ALIGN( + ch_genome_fasta, + ch_filter_gtf, + ch_motifs, + ch_cellranger_index, + ch_fastq + ) + ch_versions = ch_versions.mix(CELLRANGER_ARC_ALIGN.out.ch_versions) + ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ARC_ALIGN.out.cellranger_arc_out) + } + + /* // Run mtx to h5ad conversion subworkflow MTX_CONVERSION ( ch_mtx_matrices, @@ -213,6 +230,7 @@ workflow SCRNASEQ { ch_txp2gene, ch_star_index ) + */ //Add Versions from MTX Conversion workflow too ch_versions.mix(MTX_CONVERSION.out.ch_versions) From ee520a589d3dd504055cac7c2483dbb2445b10e8 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:21:58 +0200 Subject: [PATCH 05/66] Adding script for config and lib.csv generation --- bin/generate_config.py | 29 +++++++++++++++++++++++++++++ bin/generate_lib_csv.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100755 bin/generate_config.py create mode 100755 bin/generate_lib_csv.py diff --git a/bin/generate_config.py b/bin/generate_config.py new file mode 100755 index 00000000..93077a1f --- /dev/null +++ b/bin/generate_config.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +import argparse + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Generate the lib.csv for cellranger-arc.") + + parser.add_argument("-f", "--fasta", dest="fasta", help="Name of the fasta file.") + parser.add_argument("-g", "--gtf", dest="gtf", help="Name of the gtf file.") + parser.add_argument("-m", "--motifs", dest="motifs", help="Name of the motifs file.") + parser.add_argument("-a", "--add", dest="add", help="Additional filter line.") + + args = vars(parser.parse_args()) + + print(args) + + config = open("config", "w") + config.write("{\n") + config.write("\torganism: \"scrnaseq\"\n") + config.write("\tgenome: [\"cellranger_arc_reference\"]\n") + config.write("\tinput_fasta: [\"{}\"]\n".format(args["fasta"])) + config.write("\tinput_gtf: [\"{}\"]\n".format(args["gtf"])) + config.write("\tinput_motifs: \"{}\"\n".format(args["motifs"])) + if(args["add"] != "none"): + config.write(args["add"] + "\n") + config.write("}") + config.close() + + print("Wrote config file") diff --git a/bin/generate_lib_csv.py b/bin/generate_lib_csv.py new file mode 100755 index 00000000..f93a23eb --- /dev/null +++ b/bin/generate_lib_csv.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +import argparse +import os + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Generate the lib.csv for cellranger-arc.") + + parser.add_argument("-t", "--sample_types", dest="sample_types", help="Comma seperated list of sample types.") + parser.add_argument("-n", "--sample_names", dest="sample_names", help="Comma seperated list of sample names.") + parser.add_argument("-f", "--fastq_folder", dest="fastq_folder", help="Folder of FASTQ files.") + parser.add_argument("-o", "--out", dest="out", help="Output path.") + + args = vars(parser.parse_args()) + + print(args) + + sample_types = args["sample_types"].split(",") + sample_names = args["sample_names"].split(",") + unique_samples_names = set(sample_names) + + lib_csv = open(args["out"], "w") + lib_csv.write("fastqs,sample,library_type") + + for i in range(0,len(sample_types)): + if (sample_names[i] in unique_samples_names): + unique_samples_names.remove(sample_names[i]) # this has to be done to account for different Lane files (e.g., L002) + if(sample_types[i] == "gex"): + lib_csv.write("\n{},{},{}".format(args["fastq_folder"], sample_names[i],"Gene Expression")) + else: + lib_csv.write("\n{},{},{}".format(args["fastq_folder"], sample_names[i],"Chromatin Accessibility")) + + lib_csv.close() + + print("Wrote lib.csv file to {}".format(args["out"])) From 87716edd295a4a4cdf197cd4cec7dfc5c72d5365 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:22:35 +0200 Subject: [PATCH 06/66] Adding cellranger-arc modules --- modules/local/cellrangerarc/Dockerfile | 28 +++++++++ modules/local/cellrangerarc/README.md | 19 ++++++ modules/local/cellrangerarc/count/main.nf | 72 ++++++++++++++++++++++ modules/local/cellrangerarc/count/meta.yml | 39 ++++++++++++ modules/local/cellrangerarc/mkgtf/main.nf | 36 +++++++++++ modules/local/cellrangerarc/mkgtf/meta.yml | 31 ++++++++++ modules/local/cellrangerarc/mkref/main.nf | 39 ++++++++++++ modules/local/cellrangerarc/mkref/meta.yml | 45 ++++++++++++++ 8 files changed, 309 insertions(+) create mode 100644 modules/local/cellrangerarc/Dockerfile create mode 100644 modules/local/cellrangerarc/README.md create mode 100644 modules/local/cellrangerarc/count/main.nf create mode 100644 modules/local/cellrangerarc/count/meta.yml create mode 100644 modules/local/cellrangerarc/mkgtf/main.nf create mode 100644 modules/local/cellrangerarc/mkgtf/meta.yml create mode 100644 modules/local/cellrangerarc/mkref/main.nf create mode 100644 modules/local/cellrangerarc/mkref/meta.yml diff --git a/modules/local/cellrangerarc/Dockerfile b/modules/local/cellrangerarc/Dockerfile new file mode 100644 index 00000000..ccf55582 --- /dev/null +++ b/modules/local/cellrangerarc/Dockerfile @@ -0,0 +1,28 @@ +# Dockerfile to create container with Cell Ranger v2.0.2 +# Push to nfcore/cellranger-arc: + +FROM continuumio/miniconda3:4.8.2 +LABEL authors="Gisela Gabernet , Florian Heyl" \ + description="Docker image containing Cell Ranger Arc" +# Disclaimer: this container is not provided nor supported by Illumina or 10x Genomics. + +# Install procps and clean apt cache +RUN apt-get update --allow-releaseinfo-change \ + && apt-get install -y \ + cpio \ + procps \ + rpm2cpio \ + unzip \ + && apt-get clean -y && rm -rf /var/lib/apt/lists/* + +# Copy pre-downloaded cellranger-arc file +ENV CELLRANGER_ARC_VER=2.0.2 +COPY cellranger-arc-$CELLRANGER_ARC_VER.tar.gz /opt/cellranger-arc-$CELLRANGER_ARC_VER.tar.gz + +# Install cellranger-arc +RUN \ + cd /opt && \ + tar -xzvf cellranger-arc-$CELLRANGER_ARC_VER.tar.gz && \ + export PATH=/opt/cellranger-arc-$CELLRANGER_ARC_VER:$PATH && \ + ln -s /opt/cellranger-arc-$CELLRANGER_ARC_VER/cellranger-arc /usr/bin/cellranger-arc && \ + rm -rf /opt/cellranger-arc-$CELLRANGER_ARC_VER.tar.gz diff --git a/modules/local/cellrangerarc/README.md b/modules/local/cellrangerarc/README.md new file mode 100644 index 00000000..d4192553 --- /dev/null +++ b/modules/local/cellrangerarc/README.md @@ -0,0 +1,19 @@ +# Updating the docker container and making a new module release + +Cell Ranger Arc is a commercial tool from 10X Genomics. The container provided for the cellranger-arc nf-core module is not provided nor supported by 10x Genomics. Updating the Cell Ranger Arc versions in the container and pushing the update to Dockerhub needs to be done manually. + +1. Navigate to the appropriate download page. - [Cell Ranger Arc](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/installation): download the tar ball of the desired Cell Ranger Arc version with `curl` or `wget`. Place this file in the same folder where the Dockerfile lies. + +2. Edit the Dockerfile. Update the Cell Ranger Arc versions in this line: + +```bash +ENV CELLRANGER_ARC_VER= +``` + +3. Create and test the container: + +```bash +docker build . -t nfcore/cellranger-arc: +``` + +4. **Access rights are needed to push the container to the Dockerhub nfcore organization, please ask a core team member to do so.** \ No newline at end of file diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/local/cellrangerarc/count/main.nf new file mode 100644 index 00000000..a858a2eb --- /dev/null +++ b/modules/local/cellrangerarc/count/main.nf @@ -0,0 +1,72 @@ +process CELLRANGERARC_COUNT { + tag "$meta.id" + label 'process_high' + + container "nf-core/cellranger-arc:2.0.2" + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "CELLRANGERARC_COUNT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + input: + tuple val(meta), val(multi_meta), path(reads) + path reference + + output: + tuple val(meta), path("${meta.id}/outs/*"), emit: outs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def reference_name = reference.name + + def multi_meta_info = multi_meta.collate(2).transpose() + def sample_types = multi_meta_info[0].join(",") + def sample_names = multi_meta_info[1].join(",") + def lib_csv = meta.id + "_lib.csv" + + """ + # The following ugly three commands (mkdir, mv, generate_lib_csv) + # are required because cellranger-arc only deals with abolsute paths + if [ ! -d "fastqs" ]; then + mkdir fastqs + fi + + mv *.fastq.gz fastqs/ + + generate_lib_csv.py \\ + --sample_types $sample_types \\ + --sample_names $sample_names \\ + --fastq_folder \$(readlink -f fastqs)\\ + --out $lib_csv + + cellranger-arc \\ + count \\ + --id='${meta.id}' \\ + --libraries=$lib_csv \\ + --reference=$reference_name \\ + --localcores=$task.cpus \\ + --localmem=${task.memory.toGiga()} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ + + stub: + """ + mkdir -p "${meta.id}/outs/" + touch ${meta.id}/outs/fake_file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ +} diff --git a/modules/local/cellrangerarc/count/meta.yml b/modules/local/cellrangerarc/count/meta.yml new file mode 100644 index 00000000..f69bc1fa --- /dev/null +++ b/modules/local/cellrangerarc/count/meta.yml @@ -0,0 +1,39 @@ +name: cellrangerarc_count +description: Module to use Cell Ranger's ARC pipelines analyze sequencing data produced from Chromium Single Cell ARC. Uses the cellranger-arc count command. +keywords: + - align + - count + - reference +tools: + - cellrangerarc: + description: Cell Ranger ARC is a set of analysis pipelines that process Chromium Single Cell ARC data. + homepage: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + documentation: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + tool_dev_url: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + licence: 10x Genomics EULA +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - lib_csv: + type: file + description: | + Path to a 3-column CSV file declaring FASTQ paths, sample names and library types of input ATAC and GEX FASTQs. + - reference: + type: directory + description: Directory containing all the reference indices needed by Cell Ranger ARC +output: + - outs: + type: file + description: Files containing the outputs of Cell Ranger ARC + pattern: "${meta.id}/outs/*" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@ggabernet" + - "@Emiller88" + - "@heylf" diff --git a/modules/local/cellrangerarc/mkgtf/main.nf b/modules/local/cellrangerarc/mkgtf/main.nf new file mode 100644 index 00000000..f304c6bc --- /dev/null +++ b/modules/local/cellrangerarc/mkgtf/main.nf @@ -0,0 +1,36 @@ +process CELLRANGERARC_MKGTF { + tag "$gtf" + label 'process_low' + + container "nf-core/cellranger-arc:2.0.2" + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "CELLRANGERARC_COUNT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + input: + path gtf + + output: + path "*.filtered.gtf", emit: gtf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + cellranger-arc \\ + mkgtf \\ + $gtf \\ + ${gtf.baseName}.filtered.gtf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ +} diff --git a/modules/local/cellrangerarc/mkgtf/meta.yml b/modules/local/cellrangerarc/mkgtf/meta.yml new file mode 100644 index 00000000..7ce211eb --- /dev/null +++ b/modules/local/cellrangerarc/mkgtf/meta.yml @@ -0,0 +1,31 @@ +name: cellrangerarc_mkgtf +description: Module to build a filtered gtf needed by the 10x Genomics Cell Ranger Arc tool. Uses the cellranger-arc mkgtf command. +keywords: + - reference + - mkref + - index +tools: + - cellrangerarc: + description: Cell Ranger Arc by 10x Genomics is a set of analysis pipelines that process Chromium single-cell data to align reads, generate feature-barcode matrices, perform clustering and other secondary analysis, and more. + homepage: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + documentation: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + tool_dev_url: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + licence: 10x Genomics EULA +input: + - gtf: + type: file + description: The reference GTF transcriptome file + pattern: "*.gtf" +output: + - gtf: + type: directory + description: The filtered GTF transcriptome file + pattern: "*.filtered.gtf" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@ggabernet" + - "@Emiller88" + - "@heylf" diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/local/cellrangerarc/mkref/main.nf new file mode 100644 index 00000000..41e9db30 --- /dev/null +++ b/modules/local/cellrangerarc/mkref/main.nf @@ -0,0 +1,39 @@ +process CELLRANGERARC_MKREF { + tag "$reference_config" + label 'process_medium' + + container "nf-core/cellranger-arc:2.0.2" + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "CELLRANGERARC_COUNT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + input: + path fasta + path gtf + path motifs + path reference_config + val reference_name + + output: + path "${reference_name}", emit: reference + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + cellranger-arc \\ + mkref \\ + --config=$reference_config \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ +} diff --git a/modules/local/cellrangerarc/mkref/meta.yml b/modules/local/cellrangerarc/mkref/meta.yml new file mode 100644 index 00000000..a68adf96 --- /dev/null +++ b/modules/local/cellrangerarc/mkref/meta.yml @@ -0,0 +1,45 @@ +name: cellrangerarc_mkref +description: Module to build the reference needed by the 10x Genomics Cell Ranger Arc tool. Uses the cellranger-arc mkref command. +keywords: + - reference + - mkref + - index +tools: + - cellrangerarc: + description: Cell Ranger Arc is a set of analysis pipelines that process Chromium Single Cell Arc data. + homepage: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + documentation: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + tool_dev_url: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc + licence: 10x Genomics EULA +input: + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fasta,fa}" + - gtf: + type: file + description: Reference transcriptome GTF file + pattern: "*.gtf" + - motifs: + type: file + description: Sequence motif file (e.g., from transcription factors) + pattern: "*.txt" + - reference_config: + type: file + description: JSON-like file holding organism, genome, reference fasta path, reference annotation gtf path, contigs that should be excluded and sequence format motif file path + pattern: config + - reference_name: + type: val + description: The name to give the new reference folder + pattern: str +output: + - reference: + type: folder + description: Folder called like the reference_name containing all the reference indices needed by Cell Ranger Arc + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@ggabernet" + - "@heylf" From 57e590c2f77407f8626272d9c24e1154e4d31e12 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:23:34 +0200 Subject: [PATCH 07/66] Adding cellranger-arc subworkflow --- subworkflows/local/align_cellranger_arc.nf | 52 ++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 subworkflows/local/align_cellranger_arc.nf diff --git a/subworkflows/local/align_cellranger_arc.nf b/subworkflows/local/align_cellranger_arc.nf new file mode 100644 index 00000000..2bbc8a27 --- /dev/null +++ b/subworkflows/local/align_cellranger_arc.nf @@ -0,0 +1,52 @@ +/* + * Alignment with Cellranger Arc + */ + +include {CELLRANGERARC_MKGTF} from "../../modules/local/cellrangerarc/mkgtf/main.nf" +include {CELLRANGERARC_MKREF} from "../../modules/local/cellrangerarc/mkref/main.nf" +include {GENERATELIBCSV} from "../../modules/local/generate_cellranger_lib_csv.nf" +include {GENERATECONFIG} from "../../modules/local/generate_cellranger_mkref_config.nf" +include {CELLRANGERARC_COUNT} from "../../modules/local/cellrangerarc/count/main.nf" + +// Define workflow to subset and index a genome region fasta file +workflow CELLRANGERARC_ALIGN { + take: + fasta + gtf + motifs + cellranger_index + ch_fastq + + main: + ch_versions = Channel.empty() + + assert cellranger_index || (fasta && gtf && motifs): + "Must provide either a cellranger-atac index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf') + motif file (--motifs)." + + if (!cellranger_index) { + // Filter GTF based on gene biotypes passed in params.modules + CELLRANGERARC_MKGTF( gtf ) + filtered_gtf = CELLRANGERARC_MKGTF.out.gtf + ch_versions = ch_versions.mix(CELLRANGERARC_MKGTF.out.versions) + + // Generate the config for mkref + GENERATECONFIG(fasta.name, filtered_gtf.name, motifs.name) + ch_versions.mix(GENERATECONFIG.out.versions) + + // Make reference genome + CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, GENERATECONFIG.out.config, "cellranger_arc_reference" ) + ch_versions = ch_versions.mix(CELLRANGERARC_MKREF.out.versions) + cellranger_index = CELLRANGERARC_MKREF.out.reference + } + + // Obtain read counts + CELLRANGERARC_COUNT ( + ch_fastq, + cellranger_index + ) + ch_versions = ch_versions.mix(CELLRANGERARC_COUNT.out.versions) + + emit: + ch_versions + cellranger_arc_out = CELLRANGERARC_COUNT.out.outs +} \ No newline at end of file From 6ba304249f16314c129d6d0e933fae7827832c4b Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:31:41 +0200 Subject: [PATCH 08/66] Adding cellrangerarc to nextflow schema --- nextflow_schema.json | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 2061e7c0..c7d155bd 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -57,7 +57,7 @@ "default": "alevin", "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon Alevin + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.", "fa_icon": "fas fa-align-center", - "enum": ["kallisto", "star", "alevin", "cellranger", "universc"] + "enum": ["kallisto", "star", "alevin", "cellranger", "cellrangerarc", "universc"] }, "protocol": { "type": "string", @@ -232,6 +232,22 @@ } } }, + "cellrangerarc_options": { + "title": "Cellranger ARC Options", + "type": "object", + "description": "Params related to the Cellranger pipeline", + "default": "", + "properties": { + "cellranger_index": { + "type": "string", + "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. " + }, + "motifs": { + "type": "string", + "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database." + } + } + }, "universc_options": { "title": "UniverSC Options", "type": "object", @@ -461,6 +477,9 @@ { "$ref": "#/definitions/cellranger_options" }, + { + "$ref": "#/definitions/cellrangerarc_options" + }, { "$ref": "#/definitions/universc_options" }, From ef4b284930890a9a4ac4d56ea0d30878c4d96a39 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:40:16 +0200 Subject: [PATCH 09/66] Adding cellrangerarc to modules.config --- conf/modules.config | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index b9d907cf..cdce4f1c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -73,6 +73,39 @@ if(params.aligner == "cellranger") { } } +if(params.aligner == "cellranger-arc") { + process { + withName: CELLRANGERARC_MKGTF { + publishDir = [ + path: "${params.outdir}/${params.aligner}/mkgtf", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = "--attribute=gene_biotype:protein_coding --attribute=gene_biotype:lncRNA --attribute=gene_biotype:pseudogene" + } + withName: GENERATECONFIG { + publishDir = [ + path: "${params.outdir}/${params.aligner}/config", + mode: params.publish_dir_mode + ] + ext.args = "--add none" + } + withName: CELLRANGERARC_MKREF { + publishDir = [ + path: "${params.outdir}/${params.aligner}/mkref", + mode: params.publish_dir_mode + ] + } + withName: CELLRANGERARC_COUNT { + publishDir = [ + path: "${params.outdir}/${params.aligner}/count", + mode: params.publish_dir_mode + ] + ext.args = {meta.expected_cells ? "--expect-cells ${meta.expected_cells}" : ''} + } + } +} + if(params.aligner == "universc") { process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } From 5c51b1d58a329e001a89a905586bbe5bf3186f7a Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 11:40:37 +0200 Subject: [PATCH 10/66] Removing genrate lib csv module --- subworkflows/local/align_cellranger_arc.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/subworkflows/local/align_cellranger_arc.nf b/subworkflows/local/align_cellranger_arc.nf index 2bbc8a27..a58f4748 100644 --- a/subworkflows/local/align_cellranger_arc.nf +++ b/subworkflows/local/align_cellranger_arc.nf @@ -4,7 +4,6 @@ include {CELLRANGERARC_MKGTF} from "../../modules/local/cellrangerarc/mkgtf/main.nf" include {CELLRANGERARC_MKREF} from "../../modules/local/cellrangerarc/mkref/main.nf" -include {GENERATELIBCSV} from "../../modules/local/generate_cellranger_lib_csv.nf" include {GENERATECONFIG} from "../../modules/local/generate_cellranger_mkref_config.nf" include {CELLRANGERARC_COUNT} from "../../modules/local/cellrangerarc/count/main.nf" From 0b86e18097eb5970bfb6c9ccbc4d8ca2d44a3986 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 16:56:30 +0200 Subject: [PATCH 11/66] Rename cellrangerarc subworkflow --- subworkflows/local/align_cellranger_arc.nf | 51 ---------------------- 1 file changed, 51 deletions(-) delete mode 100644 subworkflows/local/align_cellranger_arc.nf diff --git a/subworkflows/local/align_cellranger_arc.nf b/subworkflows/local/align_cellranger_arc.nf deleted file mode 100644 index a58f4748..00000000 --- a/subworkflows/local/align_cellranger_arc.nf +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Alignment with Cellranger Arc - */ - -include {CELLRANGERARC_MKGTF} from "../../modules/local/cellrangerarc/mkgtf/main.nf" -include {CELLRANGERARC_MKREF} from "../../modules/local/cellrangerarc/mkref/main.nf" -include {GENERATECONFIG} from "../../modules/local/generate_cellranger_mkref_config.nf" -include {CELLRANGERARC_COUNT} from "../../modules/local/cellrangerarc/count/main.nf" - -// Define workflow to subset and index a genome region fasta file -workflow CELLRANGERARC_ALIGN { - take: - fasta - gtf - motifs - cellranger_index - ch_fastq - - main: - ch_versions = Channel.empty() - - assert cellranger_index || (fasta && gtf && motifs): - "Must provide either a cellranger-atac index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf') + motif file (--motifs)." - - if (!cellranger_index) { - // Filter GTF based on gene biotypes passed in params.modules - CELLRANGERARC_MKGTF( gtf ) - filtered_gtf = CELLRANGERARC_MKGTF.out.gtf - ch_versions = ch_versions.mix(CELLRANGERARC_MKGTF.out.versions) - - // Generate the config for mkref - GENERATECONFIG(fasta.name, filtered_gtf.name, motifs.name) - ch_versions.mix(GENERATECONFIG.out.versions) - - // Make reference genome - CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, GENERATECONFIG.out.config, "cellranger_arc_reference" ) - ch_versions = ch_versions.mix(CELLRANGERARC_MKREF.out.versions) - cellranger_index = CELLRANGERARC_MKREF.out.reference - } - - // Obtain read counts - CELLRANGERARC_COUNT ( - ch_fastq, - cellranger_index - ) - ch_versions = ch_versions.mix(CELLRANGERARC_COUNT.out.versions) - - emit: - ch_versions - cellranger_arc_out = CELLRANGERARC_COUNT.out.outs -} \ No newline at end of file From 4b343d71bd6be577120d73baa7d73f258ee1b6d7 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 16:57:01 +0200 Subject: [PATCH 12/66] update cellrangerarc modules --- modules/local/cellrangerarc/count/main.nf | 4 ++-- modules/local/cellrangerarc/mkgtf/main.nf | 4 ++-- modules/local/cellrangerarc/mkref/main.nf | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/local/cellrangerarc/count/main.nf index a858a2eb..df8e8fd1 100644 --- a/modules/local/cellrangerarc/count/main.nf +++ b/modules/local/cellrangerarc/count/main.nf @@ -1,8 +1,8 @@ process CELLRANGERARC_COUNT { tag "$meta.id" - label 'process_high' + label 'process_low' //TOFLO turn to high - container "nf-core/cellranger-arc:2.0.2" + container "heylf/cellranger-arc:2.0.2" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/cellrangerarc/mkgtf/main.nf b/modules/local/cellrangerarc/mkgtf/main.nf index f304c6bc..bb3cbc8f 100644 --- a/modules/local/cellrangerarc/mkgtf/main.nf +++ b/modules/local/cellrangerarc/mkgtf/main.nf @@ -2,7 +2,7 @@ process CELLRANGERARC_MKGTF { tag "$gtf" label 'process_low' - container "nf-core/cellranger-arc:2.0.2" + container "heylf/cellranger-arc:2.0.2" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { @@ -33,4 +33,4 @@ process CELLRANGERARC_MKGTF { cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) END_VERSIONS """ -} +} \ No newline at end of file diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/local/cellrangerarc/mkref/main.nf index 41e9db30..27efb80a 100644 --- a/modules/local/cellrangerarc/mkref/main.nf +++ b/modules/local/cellrangerarc/mkref/main.nf @@ -1,8 +1,8 @@ process CELLRANGERARC_MKREF { tag "$reference_config" - label 'process_medium' + label 'process_low' //TOFLO change to medium - container "nf-core/cellranger-arc:2.0.2" + container "heylf/cellranger-arc:2.0.2" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { @@ -36,4 +36,4 @@ process CELLRANGERARC_MKREF { cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) END_VERSIONS """ -} +} \ No newline at end of file From 68a265485b666e8bc1c5419bb2233741f740e5e4 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 16:58:04 +0200 Subject: [PATCH 13/66] Adding generate config workflow and align cellrangerarc subworkflow --- .../local/generate_cellranger_mkref_config.nf | 36 +++++++++++++ subworkflows/local/align_cellrangerarc.nf | 51 +++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 modules/local/generate_cellranger_mkref_config.nf create mode 100644 subworkflows/local/align_cellrangerarc.nf diff --git a/modules/local/generate_cellranger_mkref_config.nf b/modules/local/generate_cellranger_mkref_config.nf new file mode 100644 index 00000000..f28f7d92 --- /dev/null +++ b/modules/local/generate_cellranger_mkref_config.nf @@ -0,0 +1,36 @@ +process CELLRANGERARC_GENERATECONFIG { + tag "$samplesheet" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'quay.io/biocontainers/python:3.8.3' }" + + input: + val(fasta) + val(gtf) + val(motifs) + + output: + path '*config' , emit: config + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/scrnaseq/bin/ + def args = task.ext.args ?: '' + """ + generate_config.py \\ + --fasta $fasta \\ + --gtf $gtf \\ + --motifs $motifs \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/subworkflows/local/align_cellrangerarc.nf b/subworkflows/local/align_cellrangerarc.nf new file mode 100644 index 00000000..b8acd2dd --- /dev/null +++ b/subworkflows/local/align_cellrangerarc.nf @@ -0,0 +1,51 @@ +/* + * Alignment with Cellranger Arc + */ + +include {CELLRANGERARC_MKGTF} from "../../modules/local/cellrangerarc/mkgtf/main.nf" +include {CELLRANGERARC_MKREF} from "../../modules/local/cellrangerarc/mkref/main.nf" +include {CELLRANGERARC_GENERATECONFIG} from "../../modules/local/generate_cellranger_mkref_config.nf" +include {CELLRANGERARC_COUNT} from "../../modules/local/cellrangerarc/count/main.nf" + +// Define workflow to subset and index a genome region fasta file +workflow CELLRANGERARC_ALIGN { + take: + fasta + gtf + motifs + cellranger_index + ch_fastq + + main: + ch_versions = Channel.empty() + + assert cellranger_index || (fasta && gtf && motifs): + "Must provide either a cellranger-atac index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf') + motif file (--motifs)." + + if (!cellranger_index) { + // Filter GTF based on gene biotypes passed in params.modules + CELLRANGERARC_MKGTF( gtf ) + filtered_gtf = CELLRANGERARC_MKGTF.out.gtf + ch_versions = ch_versions.mix(CELLRANGERARC_MKGTF.out.versions) + + // Generate the config for mkref + CELLRANGERARC_GENERATECONFIG(fasta.name, filtered_gtf.name, motifs.name) + ch_versions = ch_versions.mix(CELLRANGERARC_GENERATECONFIG.out.versions) + + // Make reference genome + CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, CELLRANGERARC_GENERATECONFIG.out.config, "cellrangerarc_reference" ) + ch_versions = ch_versions.mix(CELLRANGERARC_MKREF.out.versions) + cellranger_index = CELLRANGERARC_MKREF.out.reference + } + + // Obtain read counts + CELLRANGERARC_COUNT ( + ch_fastq, + cellranger_index + ) + ch_versions = ch_versions.mix(CELLRANGERARC_COUNT.out.versions) + + emit: + ch_versions + cellranger_arc_out = CELLRANGERARC_COUNT.out.outs +} \ No newline at end of file From a81849c28e48d10ca744a915818cf05a2c8ea658 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 16:59:19 +0200 Subject: [PATCH 14/66] Updating mtx conversion scripts --- modules/local/mtx_to_h5ad.nf | 9 +++++---- modules/local/mtx_to_seurat.nf | 13 +++++++------ subworkflows/local/mtx_conversion.nf | 2 +- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 7961e057..bc329e4c 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -1,11 +1,12 @@ process MTX_TO_H5AD { tag "$meta.id" - label 'process_medium' + label 'process_low' //TOFLO set to medium + //TOFLO quay.io/ conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : - 'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + 'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" input: // inputs from cellranger nf-core module does not come in a single sample dir @@ -41,11 +42,11 @@ process MTX_TO_H5AD { // // run script // - if (params.aligner == 'cellranger') + if (params.aligner == 'cellranger' || params.aligner == 'cellrangerarc') """ # convert file types mtx_to_h5ad.py \\ - --aligner ${params.aligner} \\ + --aligner cellranger \\ --input filtered_feature_bc_matrix.h5 \\ --sample ${meta.id} \\ --out ${meta.id}/${meta.id}_matrix.h5ad diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 4351f4b3..c1f40640 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -1,9 +1,10 @@ process MTX_TO_SEURAT { tag "$meta.id" - label 'process_medium' + label 'process_low' //TOFLO set to medium conda "r-seurat" - container "nf-core/seurat:4.3.0" + //TOFLO remove quay.io + container "quay.io/nf-core/seurat:4.3.0" input: // inputs from cellranger nf-core module does not come in a single sample dir @@ -19,10 +20,10 @@ process MTX_TO_SEURAT { script: def aligner = params.aligner - if (params.aligner == "cellranger") { - matrix = "matrix.mtx.gz" - barcodes = "barcodes.tsv.gz" - features = "features.tsv.gz" + if (params.aligner == "cellranger" || params.aligner == "cellrangerarc") { + matrix = "filtered_feature_bc_matrix/matrix.mtx.gz" + barcodes = "filtered_feature_bc_matrix/barcodes.tsv.gz" + features = "filtered_feature_bc_matrix/features.tsv.gz" } else if (params.aligner == "kallisto") { matrix = "*count/counts_unfiltered/*.mtx" barcodes = "*count/counts_unfiltered/*.barcodes.txt" diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf index 5286a1b5..956285c0 100644 --- a/subworkflows/local/mtx_conversion.nf +++ b/subworkflows/local/mtx_conversion.nf @@ -15,7 +15,7 @@ workflow MTX_CONVERSION { ch_versions = Channel.empty() // Cellranger module output contains too many files which cause path collisions, we filter to the ones we need. - if ( params.aligner == "cellranger" ) { + if ( params.aligner == "cellranger" || params.aligner == "cellrangerarc" ) { mtx_matrices = mtx_matrices.map { meta, mtx_files -> [ meta, mtx_files.findAll { it.toString().contains("filtered_feature_bc_matrix") } ] } From f7c07a44cb788153deaf7e4db6c7c524ec0ee7a2 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 16:59:54 +0200 Subject: [PATCH 15/66] Changing module.config and nextflow.config --- conf/modules.config | 4 ++-- nextflow.config | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index cdce4f1c..f2051e41 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -73,7 +73,7 @@ if(params.aligner == "cellranger") { } } -if(params.aligner == "cellranger-arc") { +if(params.aligner == "cellrangerarc") { process { withName: CELLRANGERARC_MKGTF { publishDir = [ @@ -83,7 +83,7 @@ if(params.aligner == "cellranger-arc") { ] ext.args = "--attribute=gene_biotype:protein_coding --attribute=gene_biotype:lncRNA --attribute=gene_biotype:pseudogene" } - withName: GENERATECONFIG { + withName: CELLRANGERARC_GENERATECONFIG { publishDir = [ path: "${params.outdir}/${params.aligner}/config", mode: params.publish_dir_mode diff --git a/nextflow.config b/nextflow.config index 16425abf..01c72714 100644 --- a/nextflow.config +++ b/nextflow.config @@ -230,7 +230,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] // Will not be used unless Docker / Podman are enabled // Set to your registry if you have a mirror of containers singularity.registry = 'quay.io' -docker.registry = 'quay.io' +docker.registry = '' podman.registry = 'quay.io' def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') From bf180dadc299d2e6c04a77c0cdab7182ee9c4705 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 17:00:36 +0200 Subject: [PATCH 16/66] Changing scripts for the input check --- modules/local/samplesheet_check.nf | 3 ++- subworkflows/local/input_check.nf | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index feaf3dfc..f8dabac6 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -2,10 +2,11 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" label 'process_low' + //TOFLO quay.io/ conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" + 'quay.io/biocontainers/python:3.8.3' }" input: path samplesheet diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 55eec787..80b3cbae 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -14,7 +14,7 @@ workflow INPUT_CHECK { reads = null versions = null - if (params.aligner == "cellranger-arc"){ + if (params.aligner == "cellrangerarc"){ SAMPLESHEET_CHECK ( samplesheet ) .csv .splitCsv ( header:true, sep:',' ) @@ -85,7 +85,7 @@ def create_fastq_channel(LinkedHashMap row) { def multi_meta = [] multi_meta = row.sample_type ? [row.sample_type] : [param.sample_type] - if (params.aligner == "cellranger-arc"){ + if (params.aligner == "cellrangerarc"){ sub_sample = row.fastq_1.split("/")[-1].replaceAll("_S[0-9]+_L[0-9]+_R1_[0-9]+.fastq.gz","") fastqs.each{ if(!it.name.contains(sub_sample)){ @@ -98,7 +98,7 @@ def create_fastq_channel(LinkedHashMap row) { fastq_meta = [ meta, fastqs ] - if (params.aligner == "cellranger-arc"){ + if (params.aligner == "cellrangerarc"){ fastq_meta = [ meta, multi_meta, fastqs ] } From 4d4ed84e2986eab0e9343651cb13c7345f14b3d6 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 17:01:00 +0200 Subject: [PATCH 17/66] Bugfix for generate config python script --- bin/generate_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/generate_config.py b/bin/generate_config.py index 93077a1f..e9c9d45e 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -16,8 +16,8 @@ config = open("config", "w") config.write("{\n") - config.write("\torganism: \"scrnaseq\"\n") - config.write("\tgenome: [\"cellranger_arc_reference\"]\n") + config.write("\torganism: \"{}\"\n".format(args["fasta"].split(".")[0])) + config.write("\tgenome: [\"cellrangerarc_reference\"]\n") config.write("\tinput_fasta: [\"{}\"]\n".format(args["fasta"])) config.write("\tinput_gtf: [\"{}\"]\n".format(args["gtf"])) config.write("\tinput_motifs: \"{}\"\n".format(args["motifs"])) From cad902400595ca3d68f3a8a3e83469e95da1f5cf Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 17:01:34 +0200 Subject: [PATCH 18/66] Changing multiqc and dumsoftware scripts for containers --- modules/nf-core/custom/dumpsoftwareversions/main.nf | 3 ++- modules/nf-core/multiqc/main.nf | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index ebc87273..536b282c 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -1,11 +1,12 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' + //TOFLO remove https://quay.io/ // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1fc387be..c673ee05 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,11 @@ process MULTIQC { label 'process_single' + //TOFLO remove https://quay.io/ conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" From 37e61d6676561f5fe5d7a38c26bbffb3f057b20d Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 17:02:01 +0200 Subject: [PATCH 19/66] Changing concat_h5ad.nf --- modules/local/concat_h5ad.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf index 96920f9e..5f0870f8 100644 --- a/modules/local/concat_h5ad.nf +++ b/modules/local/concat_h5ad.nf @@ -1,10 +1,11 @@ process CONCAT_H5AD { - label 'process_medium' + label 'process_low' //TOFLO set to medium + //TOFLO quay.io/ conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : - 'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + 'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" input: path h5ad From f884d00d30e6a9334d16f8f944039ba97de0c1ec Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 17:02:20 +0200 Subject: [PATCH 20/66] Changing gtf_gene_filter.nf --- modules/local/gtf_gene_filter.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/local/gtf_gene_filter.nf b/modules/local/gtf_gene_filter.nf index 063bd228..bd775711 100644 --- a/modules/local/gtf_gene_filter.nf +++ b/modules/local/gtf_gene_filter.nf @@ -2,10 +2,11 @@ process GTF_GENE_FILTER { tag "$fasta" label 'process_low' + //TOFLO remove https://quay.io/ conda "conda-forge::python=3.9.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.9--1' : - 'biocontainers/python:3.9--1' }" + 'quay.io/biocontainers/python:3.9--1' }" input: path fasta From fe187a63d6ef7f027f6db8a25856425fd7ea4fd2 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 30 Jun 2023 17:02:35 +0200 Subject: [PATCH 21/66] Chaning scrnaseq.nf --- workflows/scrnaseq.nf | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 4407ca2e..09e16127 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -44,7 +44,7 @@ include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' include { STARSOLO } from '../subworkflows/local/starsolo' include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" -include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellranger_arc" +include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellrangerarc" include { UNIVERSC_ALIGN } from "../subworkflows/local/align_universc" include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' @@ -77,6 +77,7 @@ ch_input = file(params.input) ch_genome_fasta = params.fasta ? file(params.fasta) : [] ch_gtf = params.gtf ? file(params.gtf) : [] ch_transcript_fasta = params.transcript_fasta ? file(params.transcript_fasta): [] +ch_motifs = params.motifs ? file(params.motifs) : [] ch_txp2gene = params.txp2gene ? file(params.txp2gene) : [] ch_multiqc_alevin = Channel.empty() ch_multiqc_star = Channel.empty() @@ -117,7 +118,6 @@ workflow SCRNASEQ { ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) // Run FastQC - /* ch_multiqc_fastqc = Channel.empty() if (!params.skip_fastqc) { FASTQC_CHECK ( ch_fastq ) @@ -126,7 +126,6 @@ workflow SCRNASEQ { } else { ch_multiqc_fastqc = Channel.empty() } - */ ch_filter_gtf = GTF_GENE_FILTER ( ch_genome_fasta, ch_gtf ).gtf @@ -210,19 +209,18 @@ workflow SCRNASEQ { } // Run cellranger pipeline - if (params.aligner == "cellranger-arc") { - CELLRANGER_ARC_ALIGN( + if (params.aligner == "cellrangerarc") { + CELLRANGERARC_ALIGN( ch_genome_fasta, ch_filter_gtf, ch_motifs, ch_cellranger_index, ch_fastq ) - ch_versions = ch_versions.mix(CELLRANGER_ARC_ALIGN.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ARC_ALIGN.out.cellranger_arc_out) + ch_versions = ch_versions.mix(CELLRANGERARC_ALIGN.out.ch_versions) + ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGERARC_ALIGN.out.cellranger_arc_out) } - /* // Run mtx to h5ad conversion subworkflow MTX_CONVERSION ( ch_mtx_matrices, @@ -230,7 +228,6 @@ workflow SCRNASEQ { ch_txp2gene, ch_star_index ) - */ //Add Versions from MTX Conversion workflow too ch_versions.mix(MTX_CONVERSION.out.ch_versions) From e6cd7989bc311869e4da286bd9b7b982351efe73 Mon Sep 17 00:00:00 2001 From: Florian Date: Mon, 30 Oct 2023 18:05:18 +0100 Subject: [PATCH 22/66] Changes to Dockerfile and README for cellrangerarc --- modules/local/cellrangerarc/Dockerfile | 4 ++-- modules/local/cellrangerarc/README.md | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/local/cellrangerarc/Dockerfile b/modules/local/cellrangerarc/Dockerfile index ccf55582..081dc7bd 100644 --- a/modules/local/cellrangerarc/Dockerfile +++ b/modules/local/cellrangerarc/Dockerfile @@ -1,5 +1,5 @@ # Dockerfile to create container with Cell Ranger v2.0.2 -# Push to nfcore/cellranger-arc: +# Push to quay.io/nf-core/cellranger-arc: FROM continuumio/miniconda3:4.8.2 LABEL authors="Gisela Gabernet , Florian Heyl" \ @@ -25,4 +25,4 @@ RUN \ tar -xzvf cellranger-arc-$CELLRANGER_ARC_VER.tar.gz && \ export PATH=/opt/cellranger-arc-$CELLRANGER_ARC_VER:$PATH && \ ln -s /opt/cellranger-arc-$CELLRANGER_ARC_VER/cellranger-arc /usr/bin/cellranger-arc && \ - rm -rf /opt/cellranger-arc-$CELLRANGER_ARC_VER.tar.gz + rm -rf /opt/cellranger-arc-$CELLRANGER_ARC_VER.tar.gz \ No newline at end of file diff --git a/modules/local/cellrangerarc/README.md b/modules/local/cellrangerarc/README.md index d4192553..9f4358f1 100644 --- a/modules/local/cellrangerarc/README.md +++ b/modules/local/cellrangerarc/README.md @@ -13,7 +13,11 @@ ENV CELLRANGER_ARC_VER= 3. Create and test the container: ```bash -docker build . -t nfcore/cellranger-arc: +docker build . -t quay.io/nf-core/cellranger-arc: ``` -4. **Access rights are needed to push the container to the Dockerhub nfcore organization, please ask a core team member to do so.** \ No newline at end of file +4. Access rights are needed to push the container to the Dockerhub nfcore organization, please ask a core team member to do so. + +```bash +docker push quay.io/nf-core/cellranger-arc: +``` \ No newline at end of file From 2c4a83238b4385b0e17fd0f8c79cd9a7cf168293 Mon Sep 17 00:00:00 2001 From: Florian Date: Mon, 30 Oct 2023 18:05:39 +0100 Subject: [PATCH 23/66] Changes to meta.yml for mkref --- modules/local/cellrangerarc/mkref/meta.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/cellrangerarc/mkref/meta.yml b/modules/local/cellrangerarc/mkref/meta.yml index a68adf96..1eac878a 100644 --- a/modules/local/cellrangerarc/mkref/meta.yml +++ b/modules/local/cellrangerarc/mkref/meta.yml @@ -29,12 +29,12 @@ input: description: JSON-like file holding organism, genome, reference fasta path, reference annotation gtf path, contigs that should be excluded and sequence format motif file path pattern: config - reference_name: - type: val + type: string description: The name to give the new reference folder pattern: str output: - reference: - type: folder + type: directory description: Folder called like the reference_name containing all the reference indices needed by Cell Ranger Arc - versions: type: file From 7ed40fdd6a315516453250588985289d43447597 Mon Sep 17 00:00:00 2001 From: Florian Date: Mon, 30 Oct 2023 18:11:02 +0100 Subject: [PATCH 24/66] Remove TODOS --- modules/local/cellrangerarc/count/main.nf | 2 +- modules/local/cellrangerarc/mkref/main.nf | 2 +- modules/local/concat_h5ad.nf | 5 ++--- modules/local/gtf_gene_filter.nf | 3 +-- modules/local/mtx_to_h5ad.nf | 5 ++--- modules/local/mtx_to_seurat.nf | 6 +++--- modules/local/samplesheet_check.nf | 3 +-- modules/nf-core/custom/dumpsoftwareversions/main.nf | 3 +-- modules/nf-core/multiqc/main.nf | 3 +-- nextflow.config | 2 +- 10 files changed, 14 insertions(+), 20 deletions(-) diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/local/cellrangerarc/count/main.nf index df8e8fd1..56057a61 100644 --- a/modules/local/cellrangerarc/count/main.nf +++ b/modules/local/cellrangerarc/count/main.nf @@ -1,6 +1,6 @@ process CELLRANGERARC_COUNT { tag "$meta.id" - label 'process_low' //TOFLO turn to high + label 'process_high' container "heylf/cellranger-arc:2.0.2" diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/local/cellrangerarc/mkref/main.nf index 27efb80a..45c2f09f 100644 --- a/modules/local/cellrangerarc/mkref/main.nf +++ b/modules/local/cellrangerarc/mkref/main.nf @@ -1,6 +1,6 @@ process CELLRANGERARC_MKREF { tag "$reference_config" - label 'process_low' //TOFLO change to medium + label 'process_medium' container "heylf/cellranger-arc:2.0.2" diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf index 5f0870f8..96920f9e 100644 --- a/modules/local/concat_h5ad.nf +++ b/modules/local/concat_h5ad.nf @@ -1,11 +1,10 @@ process CONCAT_H5AD { - label 'process_low' //TOFLO set to medium + label 'process_medium' - //TOFLO quay.io/ conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : - 'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + 'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" input: path h5ad diff --git a/modules/local/gtf_gene_filter.nf b/modules/local/gtf_gene_filter.nf index bd775711..063bd228 100644 --- a/modules/local/gtf_gene_filter.nf +++ b/modules/local/gtf_gene_filter.nf @@ -2,11 +2,10 @@ process GTF_GENE_FILTER { tag "$fasta" label 'process_low' - //TOFLO remove https://quay.io/ conda "conda-forge::python=3.9.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.9--1' : - 'quay.io/biocontainers/python:3.9--1' }" + 'biocontainers/python:3.9--1' }" input: path fasta diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index bc329e4c..ca9b1d48 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -1,12 +1,11 @@ process MTX_TO_H5AD { tag "$meta.id" - label 'process_low' //TOFLO set to medium + label 'process_medium' - //TOFLO quay.io/ conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : - 'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + 'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" input: // inputs from cellranger nf-core module does not come in a single sample dir diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index c1f40640..b85864ee 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -1,10 +1,10 @@ process MTX_TO_SEURAT { tag "$meta.id" - label 'process_low' //TOFLO set to medium + label 'process_medium' conda "r-seurat" - //TOFLO remove quay.io - container "quay.io/nf-core/seurat:4.3.0" + + container "nf-core/seurat:4.3.0" input: // inputs from cellranger nf-core module does not come in a single sample dir diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index f8dabac6..feaf3dfc 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -2,11 +2,10 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" label 'process_low' - //TOFLO quay.io/ conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" + 'biocontainers/python:3.8.3' }" input: path samplesheet diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 536b282c..ebc87273 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -1,12 +1,11 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' - //TOFLO remove https://quay.io/ // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index c673ee05..1fc387be 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,11 +1,10 @@ process MULTIQC { label 'process_single' - //TOFLO remove https://quay.io/ conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/nextflow.config b/nextflow.config index 01c72714..16425abf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -230,7 +230,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] // Will not be used unless Docker / Podman are enabled // Set to your registry if you have a mirror of containers singularity.registry = 'quay.io' -docker.registry = '' +docker.registry = 'quay.io' podman.registry = 'quay.io' def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') From d8a4e8e78c273b2cdaa0c432097e66dc1d4418b4 Mon Sep 17 00:00:00 2001 From: Florian Date: Mon, 30 Oct 2023 18:14:04 +0100 Subject: [PATCH 25/66] Changing containers for cellrangerarc modules --- modules/local/cellrangerarc/count/main.nf | 2 +- modules/local/cellrangerarc/mkgtf/main.nf | 2 +- modules/local/cellrangerarc/mkref/main.nf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/local/cellrangerarc/count/main.nf index 56057a61..a858a2eb 100644 --- a/modules/local/cellrangerarc/count/main.nf +++ b/modules/local/cellrangerarc/count/main.nf @@ -2,7 +2,7 @@ process CELLRANGERARC_COUNT { tag "$meta.id" label 'process_high' - container "heylf/cellranger-arc:2.0.2" + container "nf-core/cellranger-arc:2.0.2" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/cellrangerarc/mkgtf/main.nf b/modules/local/cellrangerarc/mkgtf/main.nf index bb3cbc8f..bb96af64 100644 --- a/modules/local/cellrangerarc/mkgtf/main.nf +++ b/modules/local/cellrangerarc/mkgtf/main.nf @@ -2,7 +2,7 @@ process CELLRANGERARC_MKGTF { tag "$gtf" label 'process_low' - container "heylf/cellranger-arc:2.0.2" + container "nf-core/cellranger-arc:2.0.2" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/local/cellrangerarc/mkref/main.nf index 45c2f09f..4dab010c 100644 --- a/modules/local/cellrangerarc/mkref/main.nf +++ b/modules/local/cellrangerarc/mkref/main.nf @@ -2,7 +2,7 @@ process CELLRANGERARC_MKREF { tag "$reference_config" label 'process_medium' - container "heylf/cellranger-arc:2.0.2" + container "nf-core/cellranger-arc:2.0.2" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { From c1d4e47059a2ad4e2104c4f63187778aa6e74ab7 Mon Sep 17 00:00:00 2001 From: Florian Date: Mon, 30 Oct 2023 19:07:07 +0100 Subject: [PATCH 26/66] Adding cellrangerarc_index to nextflow.config nextflow_schema.json align_cellrangerarc.nf and scrnaseq.nf --- nextflow.config | 1 + nextflow_schema.json | 2 +- subworkflows/local/align_cellrangerarc.nf | 10 +++++----- workflows/scrnaseq.nf | 7 +++++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/nextflow.config b/nextflow.config index 8e344df2..c6c4fa11 100644 --- a/nextflow.config +++ b/nextflow.config @@ -41,6 +41,7 @@ params { cellranger_index = null // Cellranger ARC parameters + cellrangerarc_index = null motifs = null // UniverSC paramaters diff --git a/nextflow_schema.json b/nextflow_schema.json index c0353ce4..351cee8a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -240,7 +240,7 @@ "description": "Params related to the Cellranger pipeline", "default": "", "properties": { - "cellranger_index": { + "cellrangerarc_index": { "type": "string", "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. " }, diff --git a/subworkflows/local/align_cellrangerarc.nf b/subworkflows/local/align_cellrangerarc.nf index b8acd2dd..c2dffde6 100644 --- a/subworkflows/local/align_cellrangerarc.nf +++ b/subworkflows/local/align_cellrangerarc.nf @@ -13,16 +13,16 @@ workflow CELLRANGERARC_ALIGN { fasta gtf motifs - cellranger_index + cellrangerarc_index ch_fastq main: ch_versions = Channel.empty() - assert cellranger_index || (fasta && gtf && motifs): + assert cellrangerarc_index || (fasta && gtf && motifs): "Must provide either a cellranger-atac index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf') + motif file (--motifs)." - if (!cellranger_index) { + if (!cellrangerarc_index) { // Filter GTF based on gene biotypes passed in params.modules CELLRANGERARC_MKGTF( gtf ) filtered_gtf = CELLRANGERARC_MKGTF.out.gtf @@ -35,13 +35,13 @@ workflow CELLRANGERARC_ALIGN { // Make reference genome CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, CELLRANGERARC_GENERATECONFIG.out.config, "cellrangerarc_reference" ) ch_versions = ch_versions.mix(CELLRANGERARC_MKREF.out.versions) - cellranger_index = CELLRANGERARC_MKREF.out.reference + cellrangerarc_index = CELLRANGERARC_MKREF.out.reference } // Obtain read counts CELLRANGERARC_COUNT ( ch_fastq, - cellranger_index + cellrangerarc_index ) ch_versions = ch_versions.mix(CELLRANGERARC_COUNT.out.versions) diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 7039478e..65895dc6 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -12,7 +12,7 @@ def checkPathParamList = [ params.input, params.multiqc_config, params.fasta, params.gtf, params.transcript_fasta, params.salmon_index, params.kallisto_index, params.star_index, params.txp2gene, params.barcode_whitelist, params.cellranger_index, - params.universc_index + params.cellrangerarc_index, params.universc_index ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -104,6 +104,9 @@ star_feature = params.star_feature //cellranger params ch_cellranger_index = params.cellranger_index ? file(params.cellranger_index) : [] +//cellrangerarc params +ch_cellrangerarc_index = params.cellrangerarc_index ? file(params.cellrangerarc_index) : [] + //universc params ch_universc_index = params.universc_index ? file(params.universc_index) : [] @@ -220,7 +223,7 @@ workflow SCRNASEQ { ch_genome_fasta, ch_filter_gtf, ch_motifs, - ch_cellranger_index, + ch_cellrangerarc_index, ch_fastq ) ch_versions = ch_versions.mix(CELLRANGERARC_ALIGN.out.ch_versions) From 78b06b1c11b33db4711f0be4c507c291abe819e4 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 10:32:28 +0100 Subject: [PATCH 27/66] Linting fix --- modules/local/cellrangerarc/Dockerfile | 2 +- modules/local/cellrangerarc/mkgtf/main.nf | 2 +- modules/local/cellrangerarc/mkref/main.nf | 2 +- modules/local/generate_cellranger_mkref_config.nf | 2 +- modules/local/mtx_to_seurat.nf | 1 - 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/modules/local/cellrangerarc/Dockerfile b/modules/local/cellrangerarc/Dockerfile index 081dc7bd..812b64ba 100644 --- a/modules/local/cellrangerarc/Dockerfile +++ b/modules/local/cellrangerarc/Dockerfile @@ -25,4 +25,4 @@ RUN \ tar -xzvf cellranger-arc-$CELLRANGER_ARC_VER.tar.gz && \ export PATH=/opt/cellranger-arc-$CELLRANGER_ARC_VER:$PATH && \ ln -s /opt/cellranger-arc-$CELLRANGER_ARC_VER/cellranger-arc /usr/bin/cellranger-arc && \ - rm -rf /opt/cellranger-arc-$CELLRANGER_ARC_VER.tar.gz \ No newline at end of file + rm -rf /opt/cellranger-arc-$CELLRANGER_ARC_VER.tar.gz diff --git a/modules/local/cellrangerarc/mkgtf/main.nf b/modules/local/cellrangerarc/mkgtf/main.nf index bb96af64..f304c6bc 100644 --- a/modules/local/cellrangerarc/mkgtf/main.nf +++ b/modules/local/cellrangerarc/mkgtf/main.nf @@ -33,4 +33,4 @@ process CELLRANGERARC_MKGTF { cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/local/cellrangerarc/mkref/main.nf index 4dab010c..41e9db30 100644 --- a/modules/local/cellrangerarc/mkref/main.nf +++ b/modules/local/cellrangerarc/mkref/main.nf @@ -36,4 +36,4 @@ process CELLRANGERARC_MKREF { cellrangerarc: \$(echo \$( cellranger-arc --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/local/generate_cellranger_mkref_config.nf b/modules/local/generate_cellranger_mkref_config.nf index f28f7d92..4bc474c8 100644 --- a/modules/local/generate_cellranger_mkref_config.nf +++ b/modules/local/generate_cellranger_mkref_config.nf @@ -8,7 +8,7 @@ process CELLRANGERARC_GENERATECONFIG { 'quay.io/biocontainers/python:3.8.3' }" input: - val(fasta) + val(fasta) val(gtf) val(motifs) diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index b85864ee..8d344035 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -3,7 +3,6 @@ process MTX_TO_SEURAT { label 'process_medium' conda "r-seurat" - container "nf-core/seurat:4.3.0" input: From 00ccaaa3bbec2f15823c45cba99883e041943e30 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 10:42:43 +0100 Subject: [PATCH 28/66] Fixing run for test data --- nextflow.config | 1 + nextflow_schema.json | 4 ++++ subworkflows/local/input_check.nf | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index c6c4fa11..8c2488e3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,6 +43,7 @@ params { // Cellranger ARC parameters cellrangerarc_index = null motifs = null + sample_type = 'gex' // UniverSC paramaters universc_index = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 351cee8a..31f95e5f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -247,6 +247,10 @@ "motifs": { "type": "string", "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database." + }, + "sample_type": { + "type": "string", + "description": "Specify the type of data (gex or atac)." } } }, diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 80b3cbae..0de0ac59 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -83,7 +83,7 @@ def create_fastq_channel(LinkedHashMap row) { // define meta_data for multiome def multi_meta = [] - multi_meta = row.sample_type ? [row.sample_type] : [param.sample_type] + multi_meta = row.sample_type ? [row.sample_type] : [params.sample_type] if (params.aligner == "cellrangerarc"){ sub_sample = row.fastq_1.split("/")[-1].replaceAll("_S[0-9]+_L[0-9]+_R1_[0-9]+.fastq.gz","") From 44d8cab3989eec415cfb23cb997beec78b9598c8 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 10:44:07 +0100 Subject: [PATCH 29/66] Prettier --- modules/local/cellrangerarc/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/cellrangerarc/README.md b/modules/local/cellrangerarc/README.md index 9f4358f1..6089d994 100644 --- a/modules/local/cellrangerarc/README.md +++ b/modules/local/cellrangerarc/README.md @@ -20,4 +20,4 @@ docker build . -t quay.io/nf-core/cellranger-arc: ```bash docker push quay.io/nf-core/cellranger-arc: -``` \ No newline at end of file +``` From 75eb1637e4eb84f223e70d65be435128338fe28e Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 10:44:53 +0100 Subject: [PATCH 30/66] PythonBlack --- bin/check_samplesheet.py | 26 +++++++++++++++++++++++--- bin/generate_config.py | 12 ++++++------ bin/generate_lib_csv.py | 14 ++++++++------ 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 59d39ca5..3544dab9 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -158,8 +158,14 @@ def check_samplesheet(file_in, file_out): sample_type = "" if "sample_type" in header: sample_type = lspl[colmap["sample_type"]] - if (sample_type not in SAMPLE_TYPES): - print_error("Sample type {} is not supported! Please specify either {}".format(sample_type, " or ".join(SAMPLE_TYPES)), "Line", line) + if sample_type not in SAMPLE_TYPES: + print_error( + "Sample type {} is not supported! Please specify either {}".format( + sample_type, " or ".join(SAMPLE_TYPES) + ), + "Line", + line, + ) for fastq in fastq_list: if fastq: @@ -194,7 +200,21 @@ def check_samplesheet(file_in, file_out): ## Write validated samplesheet with appropriate columns if len(sample_mapping_dict) > 0: with open(file_out, "w") as fout: - fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2", "expected_cells", "seq_center" , "fastq_barcode", "sample_type"]) + "\n") + fout.write( + ",".join( + [ + "sample", + "single_end", + "fastq_1", + "fastq_2", + "expected_cells", + "seq_center", + "fastq_barcode", + "sample_type", + ] + ) + + "\n" + ) for sample in sorted(sample_mapping_dict.keys()): ## Check that multiple runs of the same sample are of the same datatype diff --git a/bin/generate_config.py b/bin/generate_config.py index e9c9d45e..85647826 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -16,12 +16,12 @@ config = open("config", "w") config.write("{\n") - config.write("\torganism: \"{}\"\n".format(args["fasta"].split(".")[0])) - config.write("\tgenome: [\"cellrangerarc_reference\"]\n") - config.write("\tinput_fasta: [\"{}\"]\n".format(args["fasta"])) - config.write("\tinput_gtf: [\"{}\"]\n".format(args["gtf"])) - config.write("\tinput_motifs: \"{}\"\n".format(args["motifs"])) - if(args["add"] != "none"): + config.write('\torganism: "{}"\n'.format(args["fasta"].split(".")[0])) + config.write('\tgenome: ["cellrangerarc_reference"]\n') + config.write('\tinput_fasta: ["{}"]\n'.format(args["fasta"])) + config.write('\tinput_gtf: ["{}"]\n'.format(args["gtf"])) + config.write('\tinput_motifs: "{}"\n'.format(args["motifs"])) + if args["add"] != "none": config.write(args["add"] + "\n") config.write("}") config.close() diff --git a/bin/generate_lib_csv.py b/bin/generate_lib_csv.py index f93a23eb..07ab9661 100755 --- a/bin/generate_lib_csv.py +++ b/bin/generate_lib_csv.py @@ -22,13 +22,15 @@ lib_csv = open(args["out"], "w") lib_csv.write("fastqs,sample,library_type") - for i in range(0,len(sample_types)): - if (sample_names[i] in unique_samples_names): - unique_samples_names.remove(sample_names[i]) # this has to be done to account for different Lane files (e.g., L002) - if(sample_types[i] == "gex"): - lib_csv.write("\n{},{},{}".format(args["fastq_folder"], sample_names[i],"Gene Expression")) + for i in range(0, len(sample_types)): + if sample_names[i] in unique_samples_names: + unique_samples_names.remove( + sample_names[i] + ) # this has to be done to account for different Lane files (e.g., L002) + if sample_types[i] == "gex": + lib_csv.write("\n{},{},{}".format(args["fastq_folder"], sample_names[i], "Gene Expression")) else: - lib_csv.write("\n{},{},{}".format(args["fastq_folder"], sample_names[i],"Chromatin Accessibility")) + lib_csv.write("\n{},{},{}".format(args["fastq_folder"], sample_names[i], "Chromatin Accessibility")) lib_csv.close() From 129ad8c11dd5520552755707a2fc8c1f4c77b76c Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 10:53:53 +0100 Subject: [PATCH 31/66] Python Black --- modules.json | 28 +++--- nextflow_schema.json | 233 +++++++++++++++++-------------------------- 2 files changed, 108 insertions(+), 153 deletions(-) diff --git a/modules.json b/modules.json index 5b4e4a3f..06849b69 100644 --- a/modules.json +++ b/modules.json @@ -8,65 +8,65 @@ "cellranger/count": { "branch": "master", "git_sha": "5df79e0383386a9e43462a6e81bf978ce0a6db09", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "cellranger/mkgtf": { "branch": "master", "git_sha": "716ef3019b66772a817b417078edce2f7b337858", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "cellranger/mkref": { "branch": "master", "git_sha": "716ef3019b66772a817b417078edce2f7b337858", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "fastqc": { "branch": "master", "git_sha": "9a4517e720bc812e95b56d23d15a1653b6db4f53", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "gffread": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "gunzip": { "branch": "master", "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "kallistobustools/count": { "branch": "master", "git_sha": "de204d3c950f091336539ad74f0e47ddffe69ed4", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "kallistobustools/ref": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "multiqc": { "branch": "master", "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "star/genomegenerate": { "branch": "master", "git_sha": "cc08a888069f67cab8120259bddab8032d4c0fe3", - "installed_by": ["modules"] + "installed_by": ["modules"], }, "universc": { "branch": "master", "git_sha": "cf67a6d7d043e2bd6a3099be84c72046fc71508f", - "installed_by": ["modules"] - } + "installed_by": ["modules"], + }, } } } - } + }, } diff --git a/nextflow_schema.json b/nextflow_schema.json index 31f95e5f..c850796d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -19,27 +19,27 @@ "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/rnaseq/usage#samplesheet-input)." + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/rnaseq/usage#samplesheet-input).", }, "outdir": { "type": "string", "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", }, "email": { "type": "string", "description": "Email address for completion summary.", "fa_icon": "fas fa-envelope", "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", }, "multiqc_title": { "type": "string", "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", - "fa_icon": "fas fa-file-signature" - } - } + "fa_icon": "fas fa-file-signature", + }, + }, }, "mandatory_arguments": { "title": "Mandatory arguments", @@ -50,7 +50,7 @@ "barcode_whitelist": { "type": "string", "description": "If not using the 10X Genomics platform, a custom barcode whitelist can be used with `--barcode_whitelist`.", - "fa_icon": "fas fa-barcode" + "fa_icon": "fas fa-barcode", }, "aligner": { "type": "string", @@ -58,17 +58,17 @@ "default": "alevin", "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon Alevin + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.", "fa_icon": "fas fa-align-center", - "enum": ["kallisto", "star", "alevin", "cellranger", "cellrangerarc", "universc"] + "enum": ["kallisto", "star", "alevin", "cellranger", "cellrangerarc", "universc"], }, "protocol": { "type": "string", "description": "The protocol that was used to generate the single cell data, e.g. 10XV2 (default).", "default": "10XV2", "fa_icon": "fas fa-cogs", - "enum": ["10XV3", "10XV2", "10XV1", "dropseq"] - } + "enum": ["10XV3", "10XV2", "10XV1", "dropseq"], + }, }, - "fa_icon": "fas fa-terminal" + "fa_icon": "fas fa-terminal", }, "skip_tools": { "title": "Skip Tools", @@ -77,15 +77,9 @@ "default": "", "fa_icon": "fas fa-forward", "properties": { - "skip_multiqc": { - "type": "boolean", - "description": "Skip MultiQC Report" - }, - "skip_fastqc": { - "type": "boolean", - "description": "Skip FastQC" - } - } + "skip_multiqc": {"type": "boolean", "description": "Skip MultiQC Report"}, + "skip_fastqc": {"type": "boolean", "description": "Skip FastQC"}, + }, }, "reference_genome_options": { "title": "Reference genome options", @@ -97,7 +91,7 @@ "type": "string", "description": "Name of iGenomes reference.", "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.", }, "fasta": { "type": "string", @@ -107,29 +101,25 @@ "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" + "fa_icon": "far fa-file-code", }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", "fa_icon": "fas fa-ban", "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." - }, - "transcript_fasta": { - "type": "string", - "description": "A cDNA FASTA file", - "fa_icon": "fas fa-dna" + "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.", }, + "transcript_fasta": {"type": "string", "description": "A cDNA FASTA file", "fa_icon": "fas fa-dna"}, "gtf": { "type": "string", "description": "Reference GTF annotation file", - "fa_icon": "fas fa-code-branch" + "fa_icon": "fas fa-code-branch", }, "save_reference": { "type": "boolean", "description": "Specify this parameter to save the indices created (STAR, Kallisto, Salmon) to the results.", - "fa_icon": "fas fa-bookmark" + "fa_icon": "fas fa-bookmark", }, "igenomes_base": { "type": "string", @@ -137,9 +127,9 @@ "description": "Directory / URL base for iGenomes references.", "default": "s3://ngi-igenomes/igenomes", "fa_icon": "fas fa-cloud-download-alt", - "hidden": true - } - } + "hidden": true, + }, + }, }, "alevin_options": { "title": "Alevin Options", @@ -150,21 +140,21 @@ "salmon_index": { "type": "string", "description": "This can be used to specify a precomputed Salmon index in the pipeline, in order to skip the generation of required indices by Salmon itself.", - "fa_icon": "fas fa-fish" + "fa_icon": "fas fa-fish", }, "txp2gene": { "type": "string", "description": "Path to transcript to gene mapping file. This allows the specification of a transcript to gene mapping file for Salmon Alevin and AlevinQC.", "help_text": "> This is not the same as the `kallisto_gene_map` parameter down below and is only used by the Salmon Alevin workflow.", - "fa_icon": "fas fa-map-marked-alt" + "fa_icon": "fas fa-map-marked-alt", }, "simpleaf_rlen": { "type": "integer", "default": 91, "description": "It is the target read length the index will be built for, using simpleaf.", - "fa_icon": "fas fa-map-marked-alt" - } - } + "fa_icon": "fas fa-map-marked-alt", + }, + }, }, "starsolo_options": { "title": "STARSolo Options", @@ -176,25 +166,19 @@ "type": "string", "description": "Specify a path to the precomputed STAR index.", "help_text": "> NB: This has to be computed with STAR Version 2.7 or later, as STARsolo was only first supported by STAR Version 2.7.", - "fa_icon": "fas fa-asterisk" - }, - "star_ignore_sjdbgtf": { - "type": "string", - "description": "Ignore the SJDB GTF file." - }, - "seq_center": { - "type": "string", - "description": "Name of sequencing center for BAM read group tag." + "fa_icon": "fas fa-asterisk", }, + "star_ignore_sjdbgtf": {"type": "string", "description": "Ignore the SJDB GTF file."}, + "seq_center": {"type": "string", "description": "Name of sequencing center for BAM read group tag."}, "star_feature": { "type": "string", "default": "Gene", "enum": ["Gene", "GeneFull", "Gene Velocyto"], "description": "Quantification type of different transcriptomic feature. Use `GeneFull` on pre-mRNA count for single-nucleus RNA-seq reads. Use `Gene Velocyto` to generate RNA velocity matrix.", - "fa_icon": "fas fa-asterisk" - } + "fa_icon": "fas fa-asterisk", + }, }, - "fa_icon": "fas fa-star" + "fa_icon": "fas fa-star", }, "kallisto_bus_options": { "title": "Kallisto/BUS Options", @@ -206,21 +190,21 @@ "kallisto_gene_map": { "type": "string", "description": "Specify a Kallisto gene mapping file here. If you don't, this will be automatically created in the Kallisto workflow when specifying a valid `--gtf` file.", - "fa_icon": "fas fa-fish" + "fa_icon": "fas fa-fish", }, "kallisto_index": { "type": "string", "description": "Specify a path to the precomputed Kallisto index.", - "fa_icon": "fas fa-fish" + "fa_icon": "fas fa-fish", }, "kb_workflow": { "type": "string", "default": "standard", "description": "Type of workflow. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. Use `nucleus` for RNA velocity on single-nucleus RNA-seq reads. Use `kite` for feature barcoding. Use `kite: 10xFB` for 10x Genomics Feature Barcoding technology. (default: standard)", "fa_icon": "fas fa-fish", - "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"] - } - } + "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"], + }, + }, }, "cellranger_options": { "title": "Cellranger Options", @@ -230,9 +214,9 @@ "properties": { "cellranger_index": { "type": "string", - "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. " + "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. ", } - } + }, }, "cellrangerarc_options": { "title": "Cellranger ARC Options", @@ -242,17 +226,14 @@ "properties": { "cellrangerarc_index": { "type": "string", - "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. " + "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. ", }, "motifs": { "type": "string", - "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database." + "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database.", }, - "sample_type": { - "type": "string", - "description": "Specify the type of data (gex or atac)." - } - } + "sample_type": {"type": "string", "description": "Specify the type of data (gex or atac)."}, + }, }, "universc_options": { "title": "UniverSC Options", @@ -262,14 +243,14 @@ "properties": { "universc_index": { "type": "string", - "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website." + "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website.", }, "universc_technology": { "type": "string", "description": "Specify a single-cell technology, vendor, or platform. See the UniverSC documentation or GitHub repository for more details.", - "default": "10x" - } - } + "default": "10x", + }, + }, }, "institutional_config_options": { "title": "Institutional config options", @@ -283,7 +264,7 @@ "description": "Git commit id for Institutional configs.", "default": "master", "hidden": true, - "fa_icon": "fas fa-users-cog" + "fa_icon": "fas fa-users-cog", }, "custom_config_base": { "type": "string", @@ -291,33 +272,33 @@ "default": "https://raw.githubusercontent.com/nf-core/configs/master", "hidden": true, "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", - "fa_icon": "fas fa-users-cog" + "fa_icon": "fas fa-users-cog", }, "config_profile_name": { "type": "string", "description": "Institutional config name.", "hidden": true, - "fa_icon": "fas fa-users-cog" + "fa_icon": "fas fa-users-cog", }, "config_profile_description": { "type": "string", "description": "Institutional config description.", "hidden": true, - "fa_icon": "fas fa-users-cog" + "fa_icon": "fas fa-users-cog", }, "config_profile_contact": { "type": "string", "description": "Institutional config contact information.", "hidden": true, - "fa_icon": "fas fa-users-cog" + "fa_icon": "fas fa-users-cog", }, "config_profile_url": { "type": "string", "description": "Institutional config URL link.", "hidden": true, - "fa_icon": "fas fa-users-cog" - } - } + "fa_icon": "fas fa-users-cog", + }, + }, }, "max_job_request_options": { "title": "Max job request options", @@ -332,7 +313,7 @@ "default": 16, "fa_icon": "fas fa-microchip", "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`", }, "max_memory": { "type": "string", @@ -341,7 +322,7 @@ "fa_icon": "fas fa-memory", "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`", }, "max_time": { "type": "string", @@ -350,9 +331,9 @@ "fa_icon": "far fa-clock", "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" - } - } + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`", + }, + }, }, "generic_options": { "title": "Generic options", @@ -365,13 +346,13 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "hidden": true + "hidden": true, }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "hidden": true + "hidden": true, }, "publish_dir_mode": { "type": "string", @@ -380,7 +361,7 @@ "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], - "hidden": true + "hidden": true, }, "email_on_fail": { "type": "string", @@ -388,13 +369,13 @@ "fa_icon": "fas fa-exclamation-triangle", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", - "hidden": true + "hidden": true, }, "plaintext_email": { "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "hidden": true + "hidden": true, }, "max_multiqc_email_size": { "type": "string", @@ -402,109 +383,83 @@ "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "default": "25.MB", "fa_icon": "fas fa-file-upload", - "hidden": true + "hidden": true, }, "monochrome_logs": { "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "hidden": true + "hidden": true, }, "hook_url": { "type": "string", "description": "Incoming hook URL for messaging service", "fa_icon": "fas fa-people-group", "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", - "hidden": true + "hidden": true, }, "multiqc_config": { "type": "string", "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", - "hidden": true + "hidden": true, }, "multiqc_logo": { "type": "string", "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", "fa_icon": "fas fa-image", - "hidden": true + "hidden": true, }, "multiqc_methods_description": { "type": "string", "description": "Custom MultiQC yaml file containing HTML including a methods description.", - "fa_icon": "fas fa-cog" + "fa_icon": "fas fa-cog", }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", "default": true, "fa_icon": "fas fa-check-square", - "hidden": true + "hidden": true, }, "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters.", }, "validationFailUnrecognisedParams": { "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig.", }, "validationLenientMode": { "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", "hidden": true, - "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." - } - } - } + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode).", + }, + }, + }, }, "allOf": [ - { - "$ref": "#/definitions/input_output_options" - }, - { - "$ref": "#/definitions/mandatory_arguments" - }, - { - "$ref": "#/definitions/skip_tools" - }, - { - "$ref": "#/definitions/reference_genome_options" - }, - { - "$ref": "#/definitions/alevin_options" - }, - { - "$ref": "#/definitions/starsolo_options" - }, - { - "$ref": "#/definitions/kallisto_bus_options" - }, - { - "$ref": "#/definitions/cellranger_options" - }, - { - "$ref": "#/definitions/cellrangerarc_options" - }, - { - "$ref": "#/definitions/universc_options" - }, - { - "$ref": "#/definitions/institutional_config_options" - }, - { - "$ref": "#/definitions/max_job_request_options" - }, - { - "$ref": "#/definitions/generic_options" - } - ] + {"$ref": "#/definitions/input_output_options"}, + {"$ref": "#/definitions/mandatory_arguments"}, + {"$ref": "#/definitions/skip_tools"}, + {"$ref": "#/definitions/reference_genome_options"}, + {"$ref": "#/definitions/alevin_options"}, + {"$ref": "#/definitions/starsolo_options"}, + {"$ref": "#/definitions/kallisto_bus_options"}, + {"$ref": "#/definitions/cellranger_options"}, + {"$ref": "#/definitions/cellrangerarc_options"}, + {"$ref": "#/definitions/universc_options"}, + {"$ref": "#/definitions/institutional_config_options"}, + {"$ref": "#/definitions/max_job_request_options"}, + {"$ref": "#/definitions/generic_options"}, + ], } From afbfd040b043c00566c57d94eab5834ad739cf42 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 11:01:45 +0100 Subject: [PATCH 32/66] Remove pythonblack correction --- modules.json | 24 ++--- nextflow_schema.json | 233 ++++++++++++++++++++++++++----------------- 2 files changed, 151 insertions(+), 106 deletions(-) diff --git a/modules.json b/modules.json index 06849b69..c44a2a15 100644 --- a/modules.json +++ b/modules.json @@ -8,62 +8,62 @@ "cellranger/count": { "branch": "master", "git_sha": "5df79e0383386a9e43462a6e81bf978ce0a6db09", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "cellranger/mkgtf": { "branch": "master", "git_sha": "716ef3019b66772a817b417078edce2f7b337858", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "cellranger/mkref": { "branch": "master", "git_sha": "716ef3019b66772a817b417078edce2f7b337858", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "fastqc": { "branch": "master", "git_sha": "9a4517e720bc812e95b56d23d15a1653b6db4f53", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "gffread": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "gunzip": { "branch": "master", "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "kallistobustools/count": { "branch": "master", "git_sha": "de204d3c950f091336539ad74f0e47ddffe69ed4", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "kallistobustools/ref": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "star/genomegenerate": { "branch": "master", "git_sha": "cc08a888069f67cab8120259bddab8032d4c0fe3", - "installed_by": ["modules"], + "installed_by": ["modules"] }, "universc": { "branch": "master", "git_sha": "cf67a6d7d043e2bd6a3099be84c72046fc71508f", - "installed_by": ["modules"], + "installed_by": ["modules"] }, } } diff --git a/nextflow_schema.json b/nextflow_schema.json index c850796d..31f95e5f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -19,27 +19,27 @@ "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/rnaseq/usage#samplesheet-input).", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/rnaseq/usage#samplesheet-input)." }, "outdir": { "type": "string", "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open", + "fa_icon": "fas fa-folder-open" }, "email": { "type": "string", "description": "Email address for completion summary.", "fa_icon": "fas fa-envelope", "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" }, "multiqc_title": { "type": "string", "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", - "fa_icon": "fas fa-file-signature", - }, - }, + "fa_icon": "fas fa-file-signature" + } + } }, "mandatory_arguments": { "title": "Mandatory arguments", @@ -50,7 +50,7 @@ "barcode_whitelist": { "type": "string", "description": "If not using the 10X Genomics platform, a custom barcode whitelist can be used with `--barcode_whitelist`.", - "fa_icon": "fas fa-barcode", + "fa_icon": "fas fa-barcode" }, "aligner": { "type": "string", @@ -58,17 +58,17 @@ "default": "alevin", "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon Alevin + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.", "fa_icon": "fas fa-align-center", - "enum": ["kallisto", "star", "alevin", "cellranger", "cellrangerarc", "universc"], + "enum": ["kallisto", "star", "alevin", "cellranger", "cellrangerarc", "universc"] }, "protocol": { "type": "string", "description": "The protocol that was used to generate the single cell data, e.g. 10XV2 (default).", "default": "10XV2", "fa_icon": "fas fa-cogs", - "enum": ["10XV3", "10XV2", "10XV1", "dropseq"], - }, + "enum": ["10XV3", "10XV2", "10XV1", "dropseq"] + } }, - "fa_icon": "fas fa-terminal", + "fa_icon": "fas fa-terminal" }, "skip_tools": { "title": "Skip Tools", @@ -77,9 +77,15 @@ "default": "", "fa_icon": "fas fa-forward", "properties": { - "skip_multiqc": {"type": "boolean", "description": "Skip MultiQC Report"}, - "skip_fastqc": {"type": "boolean", "description": "Skip FastQC"}, - }, + "skip_multiqc": { + "type": "boolean", + "description": "Skip MultiQC Report" + }, + "skip_fastqc": { + "type": "boolean", + "description": "Skip FastQC" + } + } }, "reference_genome_options": { "title": "Reference genome options", @@ -91,7 +97,7 @@ "type": "string", "description": "Name of iGenomes reference.", "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.", + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, "fasta": { "type": "string", @@ -101,25 +107,29 @@ "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code", + "fa_icon": "far fa-file-code" }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", "fa_icon": "fas fa-ban", "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.", + "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + }, + "transcript_fasta": { + "type": "string", + "description": "A cDNA FASTA file", + "fa_icon": "fas fa-dna" }, - "transcript_fasta": {"type": "string", "description": "A cDNA FASTA file", "fa_icon": "fas fa-dna"}, "gtf": { "type": "string", "description": "Reference GTF annotation file", - "fa_icon": "fas fa-code-branch", + "fa_icon": "fas fa-code-branch" }, "save_reference": { "type": "boolean", "description": "Specify this parameter to save the indices created (STAR, Kallisto, Salmon) to the results.", - "fa_icon": "fas fa-bookmark", + "fa_icon": "fas fa-bookmark" }, "igenomes_base": { "type": "string", @@ -127,9 +137,9 @@ "description": "Directory / URL base for iGenomes references.", "default": "s3://ngi-igenomes/igenomes", "fa_icon": "fas fa-cloud-download-alt", - "hidden": true, - }, - }, + "hidden": true + } + } }, "alevin_options": { "title": "Alevin Options", @@ -140,21 +150,21 @@ "salmon_index": { "type": "string", "description": "This can be used to specify a precomputed Salmon index in the pipeline, in order to skip the generation of required indices by Salmon itself.", - "fa_icon": "fas fa-fish", + "fa_icon": "fas fa-fish" }, "txp2gene": { "type": "string", "description": "Path to transcript to gene mapping file. This allows the specification of a transcript to gene mapping file for Salmon Alevin and AlevinQC.", "help_text": "> This is not the same as the `kallisto_gene_map` parameter down below and is only used by the Salmon Alevin workflow.", - "fa_icon": "fas fa-map-marked-alt", + "fa_icon": "fas fa-map-marked-alt" }, "simpleaf_rlen": { "type": "integer", "default": 91, "description": "It is the target read length the index will be built for, using simpleaf.", - "fa_icon": "fas fa-map-marked-alt", - }, - }, + "fa_icon": "fas fa-map-marked-alt" + } + } }, "starsolo_options": { "title": "STARSolo Options", @@ -166,19 +176,25 @@ "type": "string", "description": "Specify a path to the precomputed STAR index.", "help_text": "> NB: This has to be computed with STAR Version 2.7 or later, as STARsolo was only first supported by STAR Version 2.7.", - "fa_icon": "fas fa-asterisk", + "fa_icon": "fas fa-asterisk" + }, + "star_ignore_sjdbgtf": { + "type": "string", + "description": "Ignore the SJDB GTF file." + }, + "seq_center": { + "type": "string", + "description": "Name of sequencing center for BAM read group tag." }, - "star_ignore_sjdbgtf": {"type": "string", "description": "Ignore the SJDB GTF file."}, - "seq_center": {"type": "string", "description": "Name of sequencing center for BAM read group tag."}, "star_feature": { "type": "string", "default": "Gene", "enum": ["Gene", "GeneFull", "Gene Velocyto"], "description": "Quantification type of different transcriptomic feature. Use `GeneFull` on pre-mRNA count for single-nucleus RNA-seq reads. Use `Gene Velocyto` to generate RNA velocity matrix.", - "fa_icon": "fas fa-asterisk", - }, + "fa_icon": "fas fa-asterisk" + } }, - "fa_icon": "fas fa-star", + "fa_icon": "fas fa-star" }, "kallisto_bus_options": { "title": "Kallisto/BUS Options", @@ -190,21 +206,21 @@ "kallisto_gene_map": { "type": "string", "description": "Specify a Kallisto gene mapping file here. If you don't, this will be automatically created in the Kallisto workflow when specifying a valid `--gtf` file.", - "fa_icon": "fas fa-fish", + "fa_icon": "fas fa-fish" }, "kallisto_index": { "type": "string", "description": "Specify a path to the precomputed Kallisto index.", - "fa_icon": "fas fa-fish", + "fa_icon": "fas fa-fish" }, "kb_workflow": { "type": "string", "default": "standard", "description": "Type of workflow. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. Use `nucleus` for RNA velocity on single-nucleus RNA-seq reads. Use `kite` for feature barcoding. Use `kite: 10xFB` for 10x Genomics Feature Barcoding technology. (default: standard)", "fa_icon": "fas fa-fish", - "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"], - }, - }, + "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"] + } + } }, "cellranger_options": { "title": "Cellranger Options", @@ -214,9 +230,9 @@ "properties": { "cellranger_index": { "type": "string", - "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. ", + "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. " } - }, + } }, "cellrangerarc_options": { "title": "Cellranger ARC Options", @@ -226,14 +242,17 @@ "properties": { "cellrangerarc_index": { "type": "string", - "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. ", + "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. " }, "motifs": { "type": "string", - "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database.", + "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database." }, - "sample_type": {"type": "string", "description": "Specify the type of data (gex or atac)."}, - }, + "sample_type": { + "type": "string", + "description": "Specify the type of data (gex or atac)." + } + } }, "universc_options": { "title": "UniverSC Options", @@ -243,14 +262,14 @@ "properties": { "universc_index": { "type": "string", - "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website.", + "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website." }, "universc_technology": { "type": "string", "description": "Specify a single-cell technology, vendor, or platform. See the UniverSC documentation or GitHub repository for more details.", - "default": "10x", - }, - }, + "default": "10x" + } + } }, "institutional_config_options": { "title": "Institutional config options", @@ -264,7 +283,7 @@ "description": "Git commit id for Institutional configs.", "default": "master", "hidden": true, - "fa_icon": "fas fa-users-cog", + "fa_icon": "fas fa-users-cog" }, "custom_config_base": { "type": "string", @@ -272,33 +291,33 @@ "default": "https://raw.githubusercontent.com/nf-core/configs/master", "hidden": true, "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", - "fa_icon": "fas fa-users-cog", + "fa_icon": "fas fa-users-cog" }, "config_profile_name": { "type": "string", "description": "Institutional config name.", "hidden": true, - "fa_icon": "fas fa-users-cog", + "fa_icon": "fas fa-users-cog" }, "config_profile_description": { "type": "string", "description": "Institutional config description.", "hidden": true, - "fa_icon": "fas fa-users-cog", + "fa_icon": "fas fa-users-cog" }, "config_profile_contact": { "type": "string", "description": "Institutional config contact information.", "hidden": true, - "fa_icon": "fas fa-users-cog", + "fa_icon": "fas fa-users-cog" }, "config_profile_url": { "type": "string", "description": "Institutional config URL link.", "hidden": true, - "fa_icon": "fas fa-users-cog", - }, - }, + "fa_icon": "fas fa-users-cog" + } + } }, "max_job_request_options": { "title": "Max job request options", @@ -313,7 +332,7 @@ "default": 16, "fa_icon": "fas fa-microchip", "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`", + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" }, "max_memory": { "type": "string", @@ -322,7 +341,7 @@ "fa_icon": "fas fa-memory", "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`", + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" }, "max_time": { "type": "string", @@ -331,9 +350,9 @@ "fa_icon": "far fa-clock", "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`", - }, - }, + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + } + } }, "generic_options": { "title": "Generic options", @@ -346,13 +365,13 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "hidden": true, + "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "hidden": true, + "hidden": true }, "publish_dir_mode": { "type": "string", @@ -361,7 +380,7 @@ "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], - "hidden": true, + "hidden": true }, "email_on_fail": { "type": "string", @@ -369,13 +388,13 @@ "fa_icon": "fas fa-exclamation-triangle", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", - "hidden": true, + "hidden": true }, "plaintext_email": { "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "hidden": true, + "hidden": true }, "max_multiqc_email_size": { "type": "string", @@ -383,83 +402,109 @@ "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "default": "25.MB", "fa_icon": "fas fa-file-upload", - "hidden": true, + "hidden": true }, "monochrome_logs": { "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "hidden": true, + "hidden": true }, "hook_url": { "type": "string", "description": "Incoming hook URL for messaging service", "fa_icon": "fas fa-people-group", "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", - "hidden": true, + "hidden": true }, "multiqc_config": { "type": "string", "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", - "hidden": true, + "hidden": true }, "multiqc_logo": { "type": "string", "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", "fa_icon": "fas fa-image", - "hidden": true, + "hidden": true }, "multiqc_methods_description": { "type": "string", "description": "Custom MultiQC yaml file containing HTML including a methods description.", - "fa_icon": "fas fa-cog", + "fa_icon": "fas fa-cog" }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", "default": true, "fa_icon": "fas fa-check-square", - "hidden": true, + "hidden": true }, "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters.", + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, "validationFailUnrecognisedParams": { "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warinig.", + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." }, "validationLenientMode": { "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", "hidden": true, - "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode).", - }, - }, - }, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + } + } + } }, "allOf": [ - {"$ref": "#/definitions/input_output_options"}, - {"$ref": "#/definitions/mandatory_arguments"}, - {"$ref": "#/definitions/skip_tools"}, - {"$ref": "#/definitions/reference_genome_options"}, - {"$ref": "#/definitions/alevin_options"}, - {"$ref": "#/definitions/starsolo_options"}, - {"$ref": "#/definitions/kallisto_bus_options"}, - {"$ref": "#/definitions/cellranger_options"}, - {"$ref": "#/definitions/cellrangerarc_options"}, - {"$ref": "#/definitions/universc_options"}, - {"$ref": "#/definitions/institutional_config_options"}, - {"$ref": "#/definitions/max_job_request_options"}, - {"$ref": "#/definitions/generic_options"}, - ], + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/mandatory_arguments" + }, + { + "$ref": "#/definitions/skip_tools" + }, + { + "$ref": "#/definitions/reference_genome_options" + }, + { + "$ref": "#/definitions/alevin_options" + }, + { + "$ref": "#/definitions/starsolo_options" + }, + { + "$ref": "#/definitions/kallisto_bus_options" + }, + { + "$ref": "#/definitions/cellranger_options" + }, + { + "$ref": "#/definitions/cellrangerarc_options" + }, + { + "$ref": "#/definitions/universc_options" + }, + { + "$ref": "#/definitions/institutional_config_options" + }, + { + "$ref": "#/definitions/max_job_request_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] } From 6884791d02eb7d100b6a3a77bee4192640bde059 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 11:03:08 +0100 Subject: [PATCH 33/66] Prettier modules json --- modules.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules.json b/modules.json index c44a2a15..5b4e4a3f 100644 --- a/modules.json +++ b/modules.json @@ -64,9 +64,9 @@ "branch": "master", "git_sha": "cf67a6d7d043e2bd6a3099be84c72046fc71508f", "installed_by": ["modules"] - }, + } } } } - }, + } } From c2004eb1fba63774cf6d00fdfa2ad29d4ec3a880 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 11:24:06 +0100 Subject: [PATCH 34/66] Python black for bin/check_samplesheet.py bin/generate_config.py bin/generate_lib_csv.py --- bin/check_samplesheet.py | 2 -- bin/generate_config.py | 1 - bin/generate_lib_csv.py | 1 - 3 files changed, 4 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 3544dab9..bd713438 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -84,7 +84,6 @@ def check_samplesheet(file_in, file_out): sample_mapping_dict = {} with open(file_in, "r") as fin: - ## Check header MIN_COLS = 2 MIN_HEADER = ["sample", "fastq_1", "fastq_2"] @@ -216,7 +215,6 @@ def check_samplesheet(file_in, file_out): + "\n" ) for sample in sorted(sample_mapping_dict.keys()): - ## Check that multiple runs of the same sample are of the same datatype if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): print_error( diff --git a/bin/generate_config.py b/bin/generate_config.py index 85647826..62434aef 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -2,7 +2,6 @@ import argparse if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generate the lib.csv for cellranger-arc.") parser.add_argument("-f", "--fasta", dest="fasta", help="Name of the fasta file.") diff --git a/bin/generate_lib_csv.py b/bin/generate_lib_csv.py index 07ab9661..5c1c0c4f 100755 --- a/bin/generate_lib_csv.py +++ b/bin/generate_lib_csv.py @@ -3,7 +3,6 @@ import os if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generate the lib.csv for cellranger-arc.") parser.add_argument("-t", "--sample_types", dest="sample_types", help="Comma seperated list of sample types.") From cfc00fb151cb70a8ddd7db612edc47887b9a8660 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 31 Oct 2023 11:45:59 +0100 Subject: [PATCH 35/66] Forgot to output the lib.csv from count --- modules/local/cellrangerarc/count/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/local/cellrangerarc/count/main.nf index a858a2eb..988caec5 100644 --- a/modules/local/cellrangerarc/count/main.nf +++ b/modules/local/cellrangerarc/count/main.nf @@ -15,6 +15,7 @@ process CELLRANGERARC_COUNT { output: tuple val(meta), path("${meta.id}/outs/*"), emit: outs + path("${meta.id}_lib.csv") , emit: lib path "versions.yml" , emit: versions when: From a97b8586321d6709a849b41a8fad232296e7dd99 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 10 Nov 2023 16:21:25 +0100 Subject: [PATCH 36/66] Adding more documentation for the generat config python script --- bin/generate_config.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bin/generate_config.py b/bin/generate_config.py index 62434aef..2eef125e 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -2,7 +2,10 @@ import argparse if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generate the lib.csv for cellranger-arc.") + parser = argparse.ArgumentParser(description="Generate the config for cellranger-arc mkref. \ + cellranger-arc mkref takes as input a configuration file that bundles various inputs to the tool. \ + You can also create a config file on your own, please find more information here:\ + https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references") parser.add_argument("-f", "--fasta", dest="fasta", help="Name of the fasta file.") parser.add_argument("-g", "--gtf", dest="gtf", help="Name of the gtf file.") From 1d2b48a39d0ec8285ecd2f2a5f44bb49f9ed9683 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 10 Nov 2023 16:27:24 +0100 Subject: [PATCH 37/66] Changes to cellrangarc count main-nf from review --- modules/local/cellrangerarc/count/main.nf | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/local/cellrangerarc/count/main.nf index 988caec5..ed42527d 100644 --- a/modules/local/cellrangerarc/count/main.nf +++ b/modules/local/cellrangerarc/count/main.nf @@ -10,7 +10,7 @@ process CELLRANGERARC_COUNT { } input: - tuple val(meta), val(multi_meta), path(reads) + tuple val(meta), val(multi_meta), path(reads, stageAs: "fastqs/*") path reference output: @@ -31,14 +31,6 @@ process CELLRANGERARC_COUNT { def lib_csv = meta.id + "_lib.csv" """ - # The following ugly three commands (mkdir, mv, generate_lib_csv) - # are required because cellranger-arc only deals with abolsute paths - if [ ! -d "fastqs" ]; then - mkdir fastqs - fi - - mv *.fastq.gz fastqs/ - generate_lib_csv.py \\ --sample_types $sample_types \\ --sample_names $sample_names \\ From d4411f8ceeb462a8d67ba8d12d055966ac30c978 Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 22 Nov 2023 17:50:39 +0100 Subject: [PATCH 38/66] Changing multi_meta to sub_sample and sample_type --- modules/local/cellrangerarc/count/main.nf | 8 ++++---- subworkflows/local/input_check.nf | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/local/cellrangerarc/count/main.nf index ed42527d..dbba4815 100644 --- a/modules/local/cellrangerarc/count/main.nf +++ b/modules/local/cellrangerarc/count/main.nf @@ -10,7 +10,7 @@ process CELLRANGERARC_COUNT { } input: - tuple val(meta), val(multi_meta), path(reads, stageAs: "fastqs/*") + tuple val(meta), val(sample_type), val(sub_sample), path(reads, stageAs: "fastqs/*") path reference output: @@ -25,9 +25,9 @@ process CELLRANGERARC_COUNT { def args = task.ext.args ?: '' def reference_name = reference.name - def multi_meta_info = multi_meta.collate(2).transpose() - def sample_types = multi_meta_info[0].join(",") - def sample_names = multi_meta_info[1].join(",") + //def multi_meta_info = multi_meta.collate(2).transpose() + def sample_types = sample_type.join(",") + def sample_names = sub_sample.join(",") def lib_csv = meta.id + "_lib.csv" """ diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 0de0ac59..619787d6 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -23,8 +23,9 @@ workflow INPUT_CHECK { // [ val(meta), [ multimeta_s1, multimeta_s1 ], [ [reads_rep1], [reads_repN] ] ] .groupTuple(by: [0]) // needs to flatten due to last "groupTuple", so we now have reads as a single array as expected by - // nf-core modules: [ val(meta), [multi_meta], [ reads ] ] - .map { meta, multi_meta, reads -> [ meta, multi_meta.flatten(), reads.flatten() ] } + // nf-core modules: [ val(meta), [sample_type], [sub_sample], [ reads ] ] + .map { meta, sample_type, sub_sample, reads -> [ meta, sample_type.flatten(), sub_sample.flatten(), + reads.flatten() ] } .set { reads } versions = SAMPLESHEET_CHECK.out.versions } else { @@ -42,7 +43,7 @@ workflow INPUT_CHECK { } emit: - reads // channel: [ val(meta), [multi_meta], [ reads ] ] + reads // channel: [ val(meta), [*], [ reads ] ] versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] } @@ -82,9 +83,9 @@ def create_fastq_channel(LinkedHashMap row) { } // define meta_data for multiome - def multi_meta = [] - multi_meta = row.sample_type ? [row.sample_type] : [params.sample_type] + def sample_type = row.sample_type ? [row.sample_type] : [params.sample_type] + def sub_sample = "" if (params.aligner == "cellrangerarc"){ sub_sample = row.fastq_1.split("/")[-1].replaceAll("_S[0-9]+_L[0-9]+_R1_[0-9]+.fastq.gz","") fastqs.each{ @@ -93,13 +94,12 @@ def create_fastq_channel(LinkedHashMap row) { "${sub_sample} in common!\n${it}" } } - multi_meta.add(sub_sample) } fastq_meta = [ meta, fastqs ] if (params.aligner == "cellrangerarc"){ - fastq_meta = [ meta, multi_meta, fastqs ] + fastq_meta = [ meta, sample_type, sub_sample, fastqs ] } return fastq_meta From 0e049ce0587a6cd614132db9744a0b03fbe5e0bb Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 22 Nov 2023 18:08:32 +0100 Subject: [PATCH 39/66] black cleanup --- bin/generate_config.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/generate_config.py b/bin/generate_config.py index 2eef125e..f2d818d1 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -2,10 +2,12 @@ import argparse if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generate the config for cellranger-arc mkref. \ + parser = argparse.ArgumentParser( + description="Generate the config for cellranger-arc mkref. \ cellranger-arc mkref takes as input a configuration file that bundles various inputs to the tool. \ You can also create a config file on your own, please find more information here:\ - https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references") + https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references" + ) parser.add_argument("-f", "--fasta", dest="fasta", help="Name of the fasta file.") parser.add_argument("-g", "--gtf", dest="gtf", help="Name of the gtf file.") From a8923c615f9d3c4b2976ab0e43f0de0ff052f6d5 Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 22 Nov 2023 18:27:30 +0100 Subject: [PATCH 40/66] Adapting fastqc for cellrangerarc main.nf of fastqc and scrnaseq.nf --- subworkflows/local/fastqc.nf | 13 ++++++++++--- workflows/scrnaseq.nf | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/fastqc.nf b/subworkflows/local/fastqc.nf index f18214a1..53e20857 100644 --- a/subworkflows/local/fastqc.nf +++ b/subworkflows/local/fastqc.nf @@ -8,9 +8,16 @@ workflow FASTQC_CHECK { ch_fastq main: - ch_fastq - .map { ch -> [ ch[0], ch[1] ] } - .set { ch_fastq } + + if( params.aligner == 'cellrangerarc'){ + ch_fastq + .map { ch -> [ ch[0], ch[3] ] } + .set { ch_fastq } + } else { + ch_fastq + .map { ch -> [ ch[0], ch[1] ] } + .set { ch_fastq } + } /* * FastQ QC using FASTQC diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 65895dc6..8b64e0f2 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -47,6 +47,7 @@ include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellrangerarc include { UNIVERSC_ALIGN } from "../subworkflows/local/align_universc" include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS From 1d020ba8ddf490106eade2d7418d0991c6fc3239 Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 22 Nov 2023 18:27:48 +0100 Subject: [PATCH 41/66] Comment change to input_check.nf --- subworkflows/local/input_check.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 619787d6..30b9cf2e 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -20,7 +20,7 @@ workflow INPUT_CHECK { .splitCsv ( header:true, sep:',' ) .map { create_fastq_channel(it) } // group replicate files together, modifies channel to - // [ val(meta), [ multimeta_s1, multimeta_s1 ], [ [reads_rep1], [reads_repN] ] ] + // [ val(meta), [sample_type], [sub_sample], [ [reads_rep1], [reads_repN] ] ] .groupTuple(by: [0]) // needs to flatten due to last "groupTuple", so we now have reads as a single array as expected by // nf-core modules: [ val(meta), [sample_type], [sub_sample], [ reads ] ] From 5fc6c051b23ef5efa355c138dcbe63003d406954 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 24 Nov 2023 17:24:36 +0100 Subject: [PATCH 42/66] Changes to mtx to h5ad --- modules/local/mtx_to_h5ad.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index ca9b1d48..84d98608 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -41,7 +41,7 @@ process MTX_TO_H5AD { // // run script // - if (params.aligner == 'cellranger' || params.aligner == 'cellrangerarc') + if (params.aligner in [ 'cellranger', 'cellrangerarc' ]) """ # convert file types mtx_to_h5ad.py \\ From 050de407bf66d771876057a1c96a2b48f30b5c29 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 24 Nov 2023 17:24:54 +0100 Subject: [PATCH 43/66] Changes to mtx to seurat --- modules/local/mtx_to_seurat.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 8d344035..73e260d2 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -19,7 +19,7 @@ process MTX_TO_SEURAT { script: def aligner = params.aligner - if (params.aligner == "cellranger" || params.aligner == "cellrangerarc") { + if (params.aligner in [ 'cellranger', 'cellrangerarc' ]) { matrix = "filtered_feature_bc_matrix/matrix.mtx.gz" barcodes = "filtered_feature_bc_matrix/barcodes.tsv.gz" features = "filtered_feature_bc_matrix/features.tsv.gz" From cbe54933960d68d1cbb4be5a35c2f5d3ec2c0ce1 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 24 Nov 2023 17:41:04 +0100 Subject: [PATCH 44/66] Pretty fastqc.main --- subworkflows/local/fastqc.nf | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/subworkflows/local/fastqc.nf b/subworkflows/local/fastqc.nf index 53e20857..6825a9e0 100644 --- a/subworkflows/local/fastqc.nf +++ b/subworkflows/local/fastqc.nf @@ -9,15 +9,8 @@ workflow FASTQC_CHECK { main: - if( params.aligner == 'cellrangerarc'){ - ch_fastq - .map { ch -> [ ch[0], ch[3] ] } - .set { ch_fastq } - } else { - ch_fastq - .map { ch -> [ ch[0], ch[1] ] } - .set { ch_fastq } - } + def n = (params.aligner == 'cellrangerarc') ? 3 : 1 + ch_fastq.map { ch -> [ ch[0], ch[n] ] }.set { ch_fastq } /* * FastQ QC using FASTQC From fce8e26f26ce892a9c5dc8684dd4c1ee1f8ccb50 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 24 Nov 2023 17:41:28 +0100 Subject: [PATCH 45/66] Remove cellrangerarc_index --- nextflow.config | 2 +- nextflow_schema.json | 4 ---- subworkflows/local/align_cellrangerarc.nf | 10 +++++----- workflows/scrnaseq.nf | 7 ++----- 4 files changed, 8 insertions(+), 15 deletions(-) diff --git a/nextflow.config b/nextflow.config index 8c2488e3..9cc0a2d0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -41,7 +41,6 @@ params { cellranger_index = null // Cellranger ARC parameters - cellrangerarc_index = null motifs = null sample_type = 'gex' @@ -208,6 +207,7 @@ profiles { } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } + test_multiome { includeConfig 'conf/test_multiome.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile diff --git a/nextflow_schema.json b/nextflow_schema.json index 31f95e5f..ea6df842 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -240,10 +240,6 @@ "description": "Params related to the Cellranger pipeline", "default": "", "properties": { - "cellrangerarc_index": { - "type": "string", - "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. " - }, "motifs": { "type": "string", "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database." diff --git a/subworkflows/local/align_cellrangerarc.nf b/subworkflows/local/align_cellrangerarc.nf index c2dffde6..b8acd2dd 100644 --- a/subworkflows/local/align_cellrangerarc.nf +++ b/subworkflows/local/align_cellrangerarc.nf @@ -13,16 +13,16 @@ workflow CELLRANGERARC_ALIGN { fasta gtf motifs - cellrangerarc_index + cellranger_index ch_fastq main: ch_versions = Channel.empty() - assert cellrangerarc_index || (fasta && gtf && motifs): + assert cellranger_index || (fasta && gtf && motifs): "Must provide either a cellranger-atac index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf') + motif file (--motifs)." - if (!cellrangerarc_index) { + if (!cellranger_index) { // Filter GTF based on gene biotypes passed in params.modules CELLRANGERARC_MKGTF( gtf ) filtered_gtf = CELLRANGERARC_MKGTF.out.gtf @@ -35,13 +35,13 @@ workflow CELLRANGERARC_ALIGN { // Make reference genome CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, CELLRANGERARC_GENERATECONFIG.out.config, "cellrangerarc_reference" ) ch_versions = ch_versions.mix(CELLRANGERARC_MKREF.out.versions) - cellrangerarc_index = CELLRANGERARC_MKREF.out.reference + cellranger_index = CELLRANGERARC_MKREF.out.reference } // Obtain read counts CELLRANGERARC_COUNT ( ch_fastq, - cellrangerarc_index + cellranger_index ) ch_versions = ch_versions.mix(CELLRANGERARC_COUNT.out.versions) diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 8b64e0f2..053865a6 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -12,7 +12,7 @@ def checkPathParamList = [ params.input, params.multiqc_config, params.fasta, params.gtf, params.transcript_fasta, params.salmon_index, params.kallisto_index, params.star_index, params.txp2gene, params.barcode_whitelist, params.cellranger_index, - params.cellrangerarc_index, params.universc_index + params.universc_index ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -105,9 +105,6 @@ star_feature = params.star_feature //cellranger params ch_cellranger_index = params.cellranger_index ? file(params.cellranger_index) : [] -//cellrangerarc params -ch_cellrangerarc_index = params.cellrangerarc_index ? file(params.cellrangerarc_index) : [] - //universc params ch_universc_index = params.universc_index ? file(params.universc_index) : [] @@ -224,7 +221,7 @@ workflow SCRNASEQ { ch_genome_fasta, ch_filter_gtf, ch_motifs, - ch_cellrangerarc_index, + ch_cellranger_index, ch_fastq ) ch_versions = ch_versions.mix(CELLRANGERARC_ALIGN.out.ch_versions) From a46f63d090e1f4a996a0f00df5f727de5c3c6199 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 24 Nov 2023 17:56:42 +0100 Subject: [PATCH 46/66] Pretty input:_check --- subworkflows/local/input_check.nf | 40 ++++++++++++------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 30b9cf2e..56fd4ddf 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -14,32 +14,22 @@ workflow INPUT_CHECK { reads = null versions = null - if (params.aligner == "cellrangerarc"){ - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - // group replicate files together, modifies channel to - // [ val(meta), [sample_type], [sub_sample], [ [reads_rep1], [reads_repN] ] ] - .groupTuple(by: [0]) - // needs to flatten due to last "groupTuple", so we now have reads as a single array as expected by - // nf-core modules: [ val(meta), [sample_type], [sub_sample], [ reads ] ] - .map { meta, sample_type, sub_sample, reads -> [ meta, sample_type.flatten(), sub_sample.flatten(), - reads.flatten() ] } - .set { reads } - versions = SAMPLESHEET_CHECK.out.versions + grouped_ch = + SAMPLESHEET_CHECK ( samplesheet ) + .csv + .splitCsv ( header:true, sep:',' ) + .map { create_fastq_channel(it) } + // group replicate files together, modifies channel to [ val(meta), [ [reads_rep1], [reads_repN] ] ] + .groupTuple(by: [0]) + + if (params.aligner == 'cellrangerarc' ) { + grouped_ch + .map { meta, sample_type, sub_sample, reads -> [ meta, sample_type.flatten(), sub_sample.flatten(), reads.flatten() ] } + .set { reads } } else { - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - // group replicate files together, modifies channel to [ val(meta), [ [reads_rep1], [reads_repN] ] ] - .groupTuple(by: [0]) - // needs to flatten due to last "groupTuple", so we now have reads as a single array as expected by - // nf-core modules: [ val(meta), [ reads ] ] - .map { meta, reads -> [ meta, reads.flatten() ] } - .set { reads } - versions = SAMPLESHEET_CHECK.out.versions + grouped_ch + .map { meta, reads -> [ meta, reads.flatten() ] } + .set { reads } } emit: From 509450ad71defd437d2026bdc744b0d2b915bb63 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 24 Nov 2023 17:57:03 +0100 Subject: [PATCH 47/66] Pretty mtx_converstion --- subworkflows/local/mtx_conversion.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf index d1f8345a..958da400 100644 --- a/subworkflows/local/mtx_conversion.nf +++ b/subworkflows/local/mtx_conversion.nf @@ -15,7 +15,7 @@ workflow MTX_CONVERSION { ch_versions = Channel.empty() // Cellranger module output contains too many files which cause path collisions, we filter to the ones we need. - if ( params.aligner == "cellranger" || params.aligner == "cellrangerarc" ) { + if (params.aligner in [ 'cellranger', 'cellrangerarc' ]) { mtx_matrices = mtx_matrices.map { meta, mtx_files -> [ meta, mtx_files.findAll { it.toString().contains("filtered_feature_bc_matrix") } ] } From d2c0eee23150a8155edb8b2829d3828773f23b25 Mon Sep 17 00:00:00 2001 From: Florian Date: Mon, 27 Nov 2023 15:47:13 +0100 Subject: [PATCH 48/66] Removing global parameter sample_type --- nextflow.config | 1 - nextflow_schema.json | 4 ---- subworkflows/local/align_cellrangerarc.nf | 2 +- subworkflows/local/input_check.nf | 2 +- 4 files changed, 2 insertions(+), 7 deletions(-) diff --git a/nextflow.config b/nextflow.config index 9cc0a2d0..9b105f27 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,7 +42,6 @@ params { // Cellranger ARC parameters motifs = null - sample_type = 'gex' // UniverSC paramaters universc_index = null diff --git a/nextflow_schema.json b/nextflow_schema.json index ea6df842..493ca47c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -243,10 +243,6 @@ "motifs": { "type": "string", "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database." - }, - "sample_type": { - "type": "string", - "description": "Specify the type of data (gex or atac)." } } }, diff --git a/subworkflows/local/align_cellrangerarc.nf b/subworkflows/local/align_cellrangerarc.nf index b8acd2dd..8313fd47 100644 --- a/subworkflows/local/align_cellrangerarc.nf +++ b/subworkflows/local/align_cellrangerarc.nf @@ -20,7 +20,7 @@ workflow CELLRANGERARC_ALIGN { ch_versions = Channel.empty() assert cellranger_index || (fasta && gtf && motifs): - "Must provide either a cellranger-atac index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf') + motif file (--motifs)." + "Must provide either a cellranger index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf') + motif file (--motifs)." if (!cellranger_index) { // Filter GTF based on gene biotypes passed in params.modules diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 56fd4ddf..2e06e889 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -73,7 +73,7 @@ def create_fastq_channel(LinkedHashMap row) { } // define meta_data for multiome - def sample_type = row.sample_type ? [row.sample_type] : [params.sample_type] + def sample_type = row.sample_type ? [row.sample_type] : ['gex'] def sub_sample = "" if (params.aligner == "cellrangerarc"){ From 7ffe8b07a15064377766913798bbd8086c67bc94 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 28 Nov 2023 18:21:24 +0100 Subject: [PATCH 49/66] Addig the config generation into the mkref module and also adding the option to procide your config file --- bin/generate_config.py | 2 +- modules.json | 5 +++ modules/local/cellrangerarc/mkref/main.nf | 26 ++++++++++++-- .../local/generate_cellranger_mkref_config.nf | 36 ------------------- nextflow.config | 2 ++ nextflow_schema.json | 8 +++++ subworkflows/local/align_cellrangerarc.nf | 19 ++++++---- workflows/scrnaseq.nf | 4 ++- 8 files changed, 56 insertions(+), 46 deletions(-) delete mode 100644 modules/local/generate_cellranger_mkref_config.nf diff --git a/bin/generate_config.py b/bin/generate_config.py index f2d818d1..aae903a4 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -25,7 +25,7 @@ config.write('\tinput_fasta: ["{}"]\n'.format(args["fasta"])) config.write('\tinput_gtf: ["{}"]\n'.format(args["gtf"])) config.write('\tinput_motifs: "{}"\n'.format(args["motifs"])) - if args["add"] != "none": + if args["add"] != None: config.write(args["add"] + "\n") config.write("}") config.close() diff --git a/modules.json b/modules.json index 5b4e4a3f..0d7aefd8 100644 --- a/modules.json +++ b/modules.json @@ -64,6 +64,11 @@ "branch": "master", "git_sha": "cf67a6d7d043e2bd6a3099be84c72046fc71508f", "installed_by": ["modules"] + }, + "unzip": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] } } } diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/local/cellrangerarc/mkref/main.nf index 41e9db30..88029ebd 100644 --- a/modules/local/cellrangerarc/mkref/main.nf +++ b/modules/local/cellrangerarc/mkref/main.nf @@ -1,5 +1,5 @@ process CELLRANGERARC_MKREF { - tag "$reference_config" + tag "$reference_name" label 'process_medium' container "nf-core/cellranger-arc:2.0.2" @@ -18,17 +18,39 @@ process CELLRANGERARC_MKREF { output: path "${reference_name}", emit: reference + path "config" , emit: config path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: + def fast_name = fasta.name + def gtf_name = gtf.name + def motifs_name = motifs.name + def reference_config = reference_config.name def args = task.ext.args ?: '' + + if ( !reference_name ){ + reference_name = "cellrangerarc_reference" + } + """ + if [ $reference_config == [] ]; then + generate_config.py \\ + --fasta $fast_name \\ + --gtf $gtf_name \\ + --motifs $motifs_name \\ + $args + else + if [ ! -f config ]; then + mv -i $reference_config config + fi + fi + cellranger-arc \\ mkref \\ - --config=$reference_config \\ + --config=config \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/generate_cellranger_mkref_config.nf b/modules/local/generate_cellranger_mkref_config.nf deleted file mode 100644 index 4bc474c8..00000000 --- a/modules/local/generate_cellranger_mkref_config.nf +++ /dev/null @@ -1,36 +0,0 @@ -process CELLRANGERARC_GENERATECONFIG { - tag "$samplesheet" - label 'process_low' - - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" - - input: - val(fasta) - val(gtf) - val(motifs) - - output: - path '*config' , emit: config - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/scrnaseq/bin/ - def args = task.ext.args ?: '' - """ - generate_config.py \\ - --fasta $fasta \\ - --gtf $gtf \\ - --motifs $motifs \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/nextflow.config b/nextflow.config index 9b105f27..6d35d957 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,6 +42,8 @@ params { // Cellranger ARC parameters motifs = null + cellrangerarc_config = null + cellrangerarc_reference = null // UniverSC paramaters universc_index = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 493ca47c..b94728fc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -243,6 +243,14 @@ "motifs": { "type": "string", "description": "Specify a motif file to create a cellranger-arc index. Can be taken, e.g., from the JASPAR database." + }, + "cellrangerarc_config": { + "type": "string", + "description": "Specify a config file to create the cellranger-arc index." + }, + "cellrangerarc_reference": { + "type": "string", + "description": "Specify the genome reference name in the config file to create a cellranger-arc index." } } }, diff --git a/subworkflows/local/align_cellrangerarc.nf b/subworkflows/local/align_cellrangerarc.nf index 8313fd47..8355e0a3 100644 --- a/subworkflows/local/align_cellrangerarc.nf +++ b/subworkflows/local/align_cellrangerarc.nf @@ -4,7 +4,6 @@ include {CELLRANGERARC_MKGTF} from "../../modules/local/cellrangerarc/mkgtf/main.nf" include {CELLRANGERARC_MKREF} from "../../modules/local/cellrangerarc/mkref/main.nf" -include {CELLRANGERARC_GENERATECONFIG} from "../../modules/local/generate_cellranger_mkref_config.nf" include {CELLRANGERARC_COUNT} from "../../modules/local/cellrangerarc/count/main.nf" // Define workflow to subset and index a genome region fasta file @@ -15,6 +14,7 @@ workflow CELLRANGERARC_ALIGN { motifs cellranger_index ch_fastq + cellrangerarc_config main: ch_versions = Channel.empty() @@ -28,12 +28,19 @@ workflow CELLRANGERARC_ALIGN { filtered_gtf = CELLRANGERARC_MKGTF.out.gtf ch_versions = ch_versions.mix(CELLRANGERARC_MKGTF.out.versions) - // Generate the config for mkref - CELLRANGERARC_GENERATECONFIG(fasta.name, filtered_gtf.name, motifs.name) - ch_versions = ch_versions.mix(CELLRANGERARC_GENERATECONFIG.out.versions) - // Make reference genome - CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, CELLRANGERARC_GENERATECONFIG.out.config, "cellrangerarc_reference" ) + if ( ( params.cellrangerarc_reference && !cellrangerarc_config ) || + ( !params.cellrangerarc_reference && cellrangerarc_config ) ) { + exit 1, "ERROR: If you provide a config file you also have to specific the reference name and vice versa." + } else { + + cellrangerarc_reference = 'cellrangerarc_reference' + if ( params.cellrangerarc_reference ){ + cellrangerarc_reference = params.cellrangerarc_reference + } + + CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, cellrangerarc_config, cellrangerarc_reference ) + } ch_versions = ch_versions.mix(CELLRANGERARC_MKREF.out.versions) cellranger_index = CELLRANGERARC_MKREF.out.reference } diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 053865a6..ffdb8421 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -78,6 +78,7 @@ ch_genome_fasta = Channel.value(params.fasta ? file(params.fasta) : []) ch_gtf = params.gtf ? file(params.gtf) : [] ch_transcript_fasta = params.transcript_fasta ? file(params.transcript_fasta): [] ch_motifs = params.motifs ? file(params.motifs) : [] +ch_cellrangerarc_config = params.cellrangerarc_config ? file(params.cellrangerarc_config) : [] ch_txp2gene = params.txp2gene ? file(params.txp2gene) : [] ch_multiqc_alevin = Channel.empty() ch_multiqc_star = Channel.empty() @@ -222,7 +223,8 @@ workflow SCRNASEQ { ch_filter_gtf, ch_motifs, ch_cellranger_index, - ch_fastq + ch_fastq, + ch_cellrangerarc_config ) ch_versions = ch_versions.mix(CELLRANGERARC_ALIGN.out.ch_versions) ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGERARC_ALIGN.out.cellranger_arc_out) From 3f9218994974fd8900545984bf66097f5ec0f361 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 28 Nov 2023 18:41:32 +0100 Subject: [PATCH 50/66] Documentation added to output.md --- docs/output.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/output.md b/docs/output.md index c1e2b013..1fca8b5c 100644 --- a/docs/output.md +++ b/docs/output.md @@ -17,6 +17,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [STARsolo](#starsolo) - [Salmon Alevin & AlevinQC](#salmon-alevin--alevinqc) - [Cellranger](#cellranger) + - [Cellranger ARC](#cellrangerarc) - [UniverSC](#universc) - [Other output data](#other-output-data) - [MultiQC](#multiqc) @@ -103,6 +104,14 @@ Cell Ranger is a set of analysis scripts that processes 10X Chromium single cell - Contains the mapped BAM files, filtered and unfiltered HDF5 matrices and output metrics created by Cellranger +## Cellranger ARC + +Cell Ranger ARC is a set of analysis pipelines that process Chromium Single Cell Multiome ATAC + Gene Expression sequencing data to generate a variety of analyses pertaining to gene expression (GEX), chromatin accessibility, and their linkage. Furthermore, since the ATAC and GEX measurements are on the very same cell, we are able to perform analyses that link chromatin accessibility and GEX. See [Cellranger ARC](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc) for more information on Cellranger. + +**Output directory: `results/cellrangerarc`** + +- Contains the mapped BAM files, filtered and unfiltered HDF5 matrices and output metrics created by Cellranger ARC + ## UniverSC UniverSC is a wrapper that calls an open-source implementation of Cell Ranger v3.0.2 and adjusts run parameters for compatibility with a wide ranger of technologies. From 2852db356e1373995b0d926d4f73b14fbee44a03 Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 28 Nov 2023 18:42:47 +0100 Subject: [PATCH 51/66] Documentation added to output.md --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 1fca8b5c..7e9f0cd8 100644 --- a/docs/output.md +++ b/docs/output.md @@ -17,7 +17,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [STARsolo](#starsolo) - [Salmon Alevin & AlevinQC](#salmon-alevin--alevinqc) - [Cellranger](#cellranger) - - [Cellranger ARC](#cellrangerarc) + - [Cellranger ARC](#cellranger-arc) - [UniverSC](#universc) - [Other output data](#other-output-data) - [MultiQC](#multiqc) From a39c4d9dd71e22ce66930a8b9e76fb53a4ad4ad0 Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 29 Nov 2023 11:53:31 +0100 Subject: [PATCH 52/66] Update usage.md for cellranger-arc --- docs/usage.md | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index f90dc242..0de496a8 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -97,6 +97,55 @@ UniverSC automatically updates the barcode whitelist and chemistry parameters. U Currently only 3\' scRNA-Seq parameters are supported in nextflow, although chemistry parameters for 5\' scRNA-Seq and full-length scRNA-Seq libraries are supported by teh container. +### If using cellranger-arc + +#### Automatic file name detection + +This pipeline currently **does not** automatically renames input FASTQ files to follow the +[naming convention by 10x](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/fastq-input): + +``` +[Sample Name]_S1_L00[Lane Number]_[Read Type]_001.fastq.gz +``` + +For more details, see + +- [this issue](https://github.com/nf-core/scrnaseq/issues/241), discussing various mechanisms to deal with non-conformant filenames +- [the README of the cellranger/count module](https://github.com/nf-core/modules/blob/master/modules/nf-core/cellranger/count/README.md) which demonstrates that renaming files does not affect the results. +- [the code for renaming files in the cellranger/count module](https://github.com/nf-core/modules/blob/master/modules/nf-core/cellranger/count/templates/cellranger_count.py) +- [the code for renaming files in UniverSC](https://github.com/minoda-lab/universc/blob/99a20652430c1dc9f962536a2793536f643810b7/launch_universc.sh#L1411-L1609) + +#### Sample sheet definition + +If you are using cellranger-arc you have to add the column *sample_type* (atac for scATAC or gex for scRNA) and *fastq_barcode* (part of the scATAC data) to your samplesheet as an input. + +**Beware of the following points:** + - It is important that you give your scRNA and scATAC different [Sample Name]s. + - Check first which file is your barcode fastq file for your scATAC data [see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/using/fastq-input). + - If you have more than one sequencing run then you have to give them another suffix (e.g., rep*) to your [Sample Name] [see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/fastq-input#atac_quick_start). + +An example samplesheet for a dataset called test_scARC that has two sequencing runs for the scATAC and one seqeuncing run +from two lanes for the scRNA could look like this: + +sample,fastq_1,fastq_2,fastq_barcode,sample_type +test_scARC,path/test_scARC_atac_rep1_S1_L001_R1_001.fastq.gz,path/test_scARC_atac_rep1_S1_L001_R2_001.fastq.gz,path/test_scARC_atac_rep1_S1_L001_I2_001.fastq.gz,atac +test_scARC,path/test_scARC_atac_rep2_S2_L001_R1_001.fastq.gz,path/test_scARC_atac_rep2_S2_L001_R2_001.fastq.gz,path/test_scARC_atac_rep2_S2_L001_I2_001.fastq.gz,atac +test_scARC,path/test_scARC_gex_S1_L001_R1_001.fastq.gz,path/test_scARC_gex_S1_L001_R2_001.fastq.gz,,gex +test_scARC,path/test_scARC_gex_S1_L002_R1_001.fastq.gz,path/test_scARC_gex_S1_L002_R2_001.fastq.gz,,gex + +#### Config file and index + +Cellranger-arc needs a reference index directory that you can provide with `--cellranger_index`. Be aware, you can use +for cellranger-arc the same index you use for cellranger [see](https://kb.10xgenomics.com/hc/en-us/articles/4408281606797-Are-the-references-interchangeable-between-pipelines). +Yet, a cellranger-arc index might include additional data (e.g., TF binding motifs). Therefore, please check first if +you have to create a new cellranger-arc index ([see here](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references) for +more information) + +If you decide to create a cellranger-arc index, then you need to create a config file to generate the index. The pipeline +can do this autmatically for you if you provide a `--fasta`, `--gtf`, and an optional `--motif` file. However, you can +also decide to provide your own config file with `--cellrangerarc_config`, then you also have to specify with `--cellrangerarc_reference` +the reference genome name that you have used and stated as genome: [""] in your config file. + ## Running the pipeline The minimum typical command for running the pipeline is as follows: From e24df897ab104dd4fd4a64c33ae9b5e69baeb2a4 Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 29 Nov 2023 12:00:37 +0100 Subject: [PATCH 53/66] Update usage.md for cellranger-arc --- docs/usage.md | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 0de496a8..6c393861 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -108,12 +108,7 @@ This pipeline currently **does not** automatically renames input FASTQ files to [Sample Name]_S1_L00[Lane Number]_[Read Type]_001.fastq.gz ``` -For more details, see - -- [this issue](https://github.com/nf-core/scrnaseq/issues/241), discussing various mechanisms to deal with non-conformant filenames -- [the README of the cellranger/count module](https://github.com/nf-core/modules/blob/master/modules/nf-core/cellranger/count/README.md) which demonstrates that renaming files does not affect the results. -- [the code for renaming files in the cellranger/count module](https://github.com/nf-core/modules/blob/master/modules/nf-core/cellranger/count/templates/cellranger_count.py) -- [the code for renaming files in UniverSC](https://github.com/minoda-lab/universc/blob/99a20652430c1dc9f962536a2793536f643810b7/launch_universc.sh#L1411-L1609) +Thus please make sure your files follow this naming convention. #### Sample sheet definition @@ -121,8 +116,8 @@ If you are using cellranger-arc you have to add the column *sample_type* (atac f **Beware of the following points:** - It is important that you give your scRNA and scATAC different [Sample Name]s. - - Check first which file is your barcode fastq file for your scATAC data [see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/using/fastq-input). - - If you have more than one sequencing run then you have to give them another suffix (e.g., rep*) to your [Sample Name] [see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/fastq-input#atac_quick_start). + - Check first which file is your barcode fastq file for your scATAC data ([see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/using/fastq-input)). + - If you have more than one sequencing run then you have to give them another suffix (e.g., rep*) to your [Sample Name] ([see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/fastq-input#atac_quick_start)). An example samplesheet for a dataset called test_scARC that has two sequencing runs for the scATAC and one seqeuncing run from two lanes for the scRNA could look like this: @@ -136,15 +131,15 @@ test_scARC,path/test_scARC_gex_S1_L002_R1_001.fastq.gz,path/test_scARC_gex_S1_L0 #### Config file and index Cellranger-arc needs a reference index directory that you can provide with `--cellranger_index`. Be aware, you can use -for cellranger-arc the same index you use for cellranger [see](https://kb.10xgenomics.com/hc/en-us/articles/4408281606797-Are-the-references-interchangeable-between-pipelines). -Yet, a cellranger-arc index might include additional data (e.g., TF binding motifs). Therefore, please check first if +for cellranger-arc the same index you use for cellranger ((see](https://kb.10xgenomics.com/hc/en-us/articles/4408281606797-Are-the-references-interchangeable-between-pipelines)). +Yet, a cellranger-arc index might include additional data (e.g., TF binding motifs). Therefore, please first check if you have to create a new cellranger-arc index ([see here](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references) for more information) If you decide to create a cellranger-arc index, then you need to create a config file to generate the index. The pipeline can do this autmatically for you if you provide a `--fasta`, `--gtf`, and an optional `--motif` file. However, you can also decide to provide your own config file with `--cellrangerarc_config`, then you also have to specify with `--cellrangerarc_reference` -the reference genome name that you have used and stated as genome: [""] in your config file. +the reference genome name that you have used and stated as *genome:* in your config file. ## Running the pipeline From 561720cb1121e123c77073b3e37e47a5ce4b4bfd Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 29 Nov 2023 12:00:58 +0100 Subject: [PATCH 54/66] Update usage.md for cellranger-arc --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 6c393861..07a6124e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -131,7 +131,7 @@ test_scARC,path/test_scARC_gex_S1_L002_R1_001.fastq.gz,path/test_scARC_gex_S1_L0 #### Config file and index Cellranger-arc needs a reference index directory that you can provide with `--cellranger_index`. Be aware, you can use -for cellranger-arc the same index you use for cellranger ((see](https://kb.10xgenomics.com/hc/en-us/articles/4408281606797-Are-the-references-interchangeable-between-pipelines)). +for cellranger-arc the same index you use for cellranger ([see](https://kb.10xgenomics.com/hc/en-us/articles/4408281606797-Are-the-references-interchangeable-between-pipelines)). Yet, a cellranger-arc index might include additional data (e.g., TF binding motifs). Therefore, please first check if you have to create a new cellranger-arc index ([see here](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references) for more information) From d7f100cc173db30752d86e24b442595f89feadad Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 29 Nov 2023 12:58:01 +0100 Subject: [PATCH 55/66] Making motif file optional --- bin/generate_config.py | 7 ++++--- subworkflows/local/align_cellrangerarc.nf | 22 ++++++++++------------ 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/bin/generate_config.py b/bin/generate_config.py index aae903a4..91d0a209 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -9,8 +9,8 @@ https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references" ) - parser.add_argument("-f", "--fasta", dest="fasta", help="Name of the fasta file.") - parser.add_argument("-g", "--gtf", dest="gtf", help="Name of the gtf file.") + parser.add_argument("-f", "--fasta", dest="fasta", help="Name of the fasta file.", required=True) + parser.add_argument("-g", "--gtf", dest="gtf", help="Name of the gtf file.", required=True) parser.add_argument("-m", "--motifs", dest="motifs", help="Name of the motifs file.") parser.add_argument("-a", "--add", dest="add", help="Additional filter line.") @@ -24,7 +24,8 @@ config.write('\tgenome: ["cellrangerarc_reference"]\n') config.write('\tinput_fasta: ["{}"]\n'.format(args["fasta"])) config.write('\tinput_gtf: ["{}"]\n'.format(args["gtf"])) - config.write('\tinput_motifs: "{}"\n'.format(args["motifs"])) + if args["motifs"] != '[]': + config.write('\tinput_motifs: "{}"\n'.format(args["motifs"])) if args["add"] != None: config.write(args["add"] + "\n") config.write("}") diff --git a/subworkflows/local/align_cellrangerarc.nf b/subworkflows/local/align_cellrangerarc.nf index 8355e0a3..b33cd61b 100644 --- a/subworkflows/local/align_cellrangerarc.nf +++ b/subworkflows/local/align_cellrangerarc.nf @@ -19,8 +19,8 @@ workflow CELLRANGERARC_ALIGN { main: ch_versions = Channel.empty() - assert cellranger_index || (fasta && gtf && motifs): - "Must provide either a cellranger index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf') + motif file (--motifs)." + assert cellranger_index || (fasta && gtf): + "Must provide either a cellranger index or a bundle of a fasta file ('--fasta') + gtf file ('--gtf')." if (!cellranger_index) { // Filter GTF based on gene biotypes passed in params.modules @@ -29,18 +29,16 @@ workflow CELLRANGERARC_ALIGN { ch_versions = ch_versions.mix(CELLRANGERARC_MKGTF.out.versions) // Make reference genome - if ( ( params.cellrangerarc_reference && !cellrangerarc_config ) || - ( !params.cellrangerarc_reference && cellrangerarc_config ) ) { - exit 1, "ERROR: If you provide a config file you also have to specific the reference name and vice versa." - } else { + assert ( ( !params.cellrangerarc_reference && !cellrangerarc_config ) || + ( params.cellrangerarc_reference && cellrangerarc_config ) ) : + "If you provide a config file you also have to specific the reference name and vice versa." - cellrangerarc_reference = 'cellrangerarc_reference' - if ( params.cellrangerarc_reference ){ - cellrangerarc_reference = params.cellrangerarc_reference - } - - CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, cellrangerarc_config, cellrangerarc_reference ) + cellrangerarc_reference = 'cellrangerarc_reference' + if ( params.cellrangerarc_reference ){ + cellrangerarc_reference = params.cellrangerarc_reference } + + CELLRANGERARC_MKREF( fasta, filtered_gtf, motifs, cellrangerarc_config, cellrangerarc_reference ) ch_versions = ch_versions.mix(CELLRANGERARC_MKREF.out.versions) cellranger_index = CELLRANGERARC_MKREF.out.reference } From 67aa26da63ab072ba5644a4f2f4eb529a1c08f6d Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 29 Nov 2023 13:01:08 +0100 Subject: [PATCH 56/66] Prettier for usage.md --- docs/usage.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 07a6124e..9b37ee6f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -112,12 +112,13 @@ Thus please make sure your files follow this naming convention. #### Sample sheet definition -If you are using cellranger-arc you have to add the column *sample_type* (atac for scATAC or gex for scRNA) and *fastq_barcode* (part of the scATAC data) to your samplesheet as an input. +If you are using cellranger-arc you have to add the column _sample_type_ (atac for scATAC or gex for scRNA) and _fastq_barcode_ (part of the scATAC data) to your samplesheet as an input. **Beware of the following points:** - - It is important that you give your scRNA and scATAC different [Sample Name]s. - - Check first which file is your barcode fastq file for your scATAC data ([see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/using/fastq-input)). - - If you have more than one sequencing run then you have to give them another suffix (e.g., rep*) to your [Sample Name] ([see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/fastq-input#atac_quick_start)). + +- It is important that you give your scRNA and scATAC different [Sample Name]s. +- Check first which file is your barcode fastq file for your scATAC data ([see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/using/fastq-input)). +- If you have more than one sequencing run then you have to give them another suffix (e.g., rep\*) to your [Sample Name] ([see](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/using/fastq-input#atac_quick_start)). An example samplesheet for a dataset called test_scARC that has two sequencing runs for the scATAC and one seqeuncing run from two lanes for the scRNA could look like this: @@ -130,16 +131,16 @@ test_scARC,path/test_scARC_gex_S1_L002_R1_001.fastq.gz,path/test_scARC_gex_S1_L0 #### Config file and index -Cellranger-arc needs a reference index directory that you can provide with `--cellranger_index`. Be aware, you can use +Cellranger-arc needs a reference index directory that you can provide with `--cellranger_index`. Be aware, you can use for cellranger-arc the same index you use for cellranger ([see](https://kb.10xgenomics.com/hc/en-us/articles/4408281606797-Are-the-references-interchangeable-between-pipelines)). -Yet, a cellranger-arc index might include additional data (e.g., TF binding motifs). Therefore, please first check if -you have to create a new cellranger-arc index ([see here](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references) for +Yet, a cellranger-arc index might include additional data (e.g., TF binding motifs). Therefore, please first check if +you have to create a new cellranger-arc index ([see here](https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references) for more information) If you decide to create a cellranger-arc index, then you need to create a config file to generate the index. The pipeline can do this autmatically for you if you provide a `--fasta`, `--gtf`, and an optional `--motif` file. However, you can -also decide to provide your own config file with `--cellrangerarc_config`, then you also have to specify with `--cellrangerarc_reference` -the reference genome name that you have used and stated as *genome:* in your config file. +also decide to provide your own config file with `--cellrangerarc_config`, then you also have to specify with `--cellrangerarc_reference` +the reference genome name that you have used and stated as _genome:_ in your config file. ## Running the pipeline From df964d81d539e0202b85df5340fee2952073b3f0 Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 29 Nov 2023 13:01:51 +0100 Subject: [PATCH 57/66] Black generate_config.py --- bin/generate_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/generate_config.py b/bin/generate_config.py index 91d0a209..c01260dd 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -24,7 +24,7 @@ config.write('\tgenome: ["cellrangerarc_reference"]\n') config.write('\tinput_fasta: ["{}"]\n'.format(args["fasta"])) config.write('\tinput_gtf: ["{}"]\n'.format(args["gtf"])) - if args["motifs"] != '[]': + if args["motifs"] != "[]": config.write('\tinput_motifs: "{}"\n'.format(args["motifs"])) if args["add"] != None: config.write(args["add"] + "\n") From a74de9edb80a50e0625fda7ff3b1580fea480f7f Mon Sep 17 00:00:00 2001 From: heylf Date: Sun, 3 Dec 2023 16:41:33 +0100 Subject: [PATCH 58/66] Update docs/usage.md Co-authored-by: Gregor Sturm --- docs/usage.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index 9b37ee6f..c9e96eb0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -123,11 +123,13 @@ If you are using cellranger-arc you have to add the column _sample_type_ (atac f An example samplesheet for a dataset called test_scARC that has two sequencing runs for the scATAC and one seqeuncing run from two lanes for the scRNA could look like this: +```csv sample,fastq_1,fastq_2,fastq_barcode,sample_type test_scARC,path/test_scARC_atac_rep1_S1_L001_R1_001.fastq.gz,path/test_scARC_atac_rep1_S1_L001_R2_001.fastq.gz,path/test_scARC_atac_rep1_S1_L001_I2_001.fastq.gz,atac test_scARC,path/test_scARC_atac_rep2_S2_L001_R1_001.fastq.gz,path/test_scARC_atac_rep2_S2_L001_R2_001.fastq.gz,path/test_scARC_atac_rep2_S2_L001_I2_001.fastq.gz,atac test_scARC,path/test_scARC_gex_S1_L001_R1_001.fastq.gz,path/test_scARC_gex_S1_L001_R2_001.fastq.gz,,gex test_scARC,path/test_scARC_gex_S1_L002_R1_001.fastq.gz,path/test_scARC_gex_S1_L002_R2_001.fastq.gz,,gex +``` #### Config file and index From 7dba557b599f5e43f77d13efe978bcb11559cb97 Mon Sep 17 00:00:00 2001 From: heylf Date: Sun, 3 Dec 2023 16:41:55 +0100 Subject: [PATCH 59/66] Update nextflow_schema.json Co-authored-by: Gregor Sturm --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index b94728fc..b9758014 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -250,7 +250,7 @@ }, "cellrangerarc_reference": { "type": "string", - "description": "Specify the genome reference name in the config file to create a cellranger-arc index." + "description": "Specify the genome reference name used in the config file to create a cellranger-arc index." } } }, From 6c9151aca9dd68465d3ec4573ab4d79494e6f606 Mon Sep 17 00:00:00 2001 From: heylf Date: Wed, 20 Dec 2023 18:16:02 +0100 Subject: [PATCH 60/66] Delete unnecessary files and update file naming convention --- bin/generate_config.py | 34 -------- .../count/templates/cellranger_count.py | 84 ------------------- 2 files changed, 118 deletions(-) delete mode 100755 bin/generate_config.py delete mode 100644 modules/nf-core/cellranger/count/templates/cellranger_count.py diff --git a/bin/generate_config.py b/bin/generate_config.py deleted file mode 100755 index c01260dd..00000000 --- a/bin/generate_config.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python -import argparse - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Generate the config for cellranger-arc mkref. \ - cellranger-arc mkref takes as input a configuration file that bundles various inputs to the tool. \ - You can also create a config file on your own, please find more information here:\ - https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/advanced/references" - ) - - parser.add_argument("-f", "--fasta", dest="fasta", help="Name of the fasta file.", required=True) - parser.add_argument("-g", "--gtf", dest="gtf", help="Name of the gtf file.", required=True) - parser.add_argument("-m", "--motifs", dest="motifs", help="Name of the motifs file.") - parser.add_argument("-a", "--add", dest="add", help="Additional filter line.") - - args = vars(parser.parse_args()) - - print(args) - - config = open("config", "w") - config.write("{\n") - config.write('\torganism: "{}"\n'.format(args["fasta"].split(".")[0])) - config.write('\tgenome: ["cellrangerarc_reference"]\n') - config.write('\tinput_fasta: ["{}"]\n'.format(args["fasta"])) - config.write('\tinput_gtf: ["{}"]\n'.format(args["gtf"])) - if args["motifs"] != "[]": - config.write('\tinput_motifs: "{}"\n'.format(args["motifs"])) - if args["add"] != None: - config.write(args["add"] + "\n") - config.write("}") - config.close() - - print("Wrote config file") diff --git a/modules/nf-core/cellranger/count/templates/cellranger_count.py b/modules/nf-core/cellranger/count/templates/cellranger_count.py deleted file mode 100644 index 4bfb9f4f..00000000 --- a/modules/nf-core/cellranger/count/templates/cellranger_count.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python3 -""" -Automatically rename staged files for input into cellranger count. - -Copyright (c) Gregor Sturm 2023 - MIT License -""" -from subprocess import run -from pathlib import Path -from textwrap import dedent -import shlex -import re - - -def chunk_iter(seq, size): - """iterate over `seq` in chunks of `size`""" - return (seq[pos : pos + size] for pos in range(0, len(seq), size)) - - -sample_id = "${meta.id}" - -# get fastqs, ordered by path. Files are staged into -# - "fastq_001/{original_name.fastq.gz}" -# - "fastq_002/{oritinal_name.fastq.gz}" -# - ... -# Since we require fastq files in the input channel to be ordered such that a R1/R2 pair -# of files follows each other, ordering will get us a sequence of [R1, R2, R1, R2, ...] -fastqs = sorted(Path(".").glob("fastq_*/*")) -assert len(fastqs) % 2 == 0 - -# target directory in which the renamed fastqs will be placed -fastq_all = Path("./fastq_all") -fastq_all.mkdir(exist_ok=True) - -# Match R1 in the filename, but only if it is followed by a non-digit or non-character -# match "file_R1.fastq.gz", "file.R1_000.fastq.gz", etc. but -# do not match "SRR12345", "file_INFIXR12", etc -filename_pattern = r"([^a-zA-Z0-9])R1([^a-zA-Z0-9])" - -for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2)): - # double escapes are required because nextflow processes this python 'template' - if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name: - raise AssertionError( - dedent( - f"""\ - We expect R1 and R2 of the same sample to have the same filename except for R1/R2. - This has been checked by replacing "R1" with "R2" in the first filename and comparing it to the second filename. - If you believe this check shouldn't have failed on your filenames, please report an issue on GitHub! - - Files involved: - - {r1} - - {r2} - """ - ) - ) - r1.rename(fastq_all / f"{sample_id}_S1_L{i:03d}_R1_001.fastq.gz") - r2.rename(fastq_all / f"{sample_id}_S1_L{i:03d}_R2_001.fastq.gz") - -run( - # fmt: off - [ - "cellranger", "count", - "--id", "${prefix}", - "--fastqs", str(fastq_all), - "--transcriptome", "${reference.name}", - "--localcores", "${task.cpus}", - "--localmem", "${task.memory.toGiga()}", - *shlex.split("""${args}""") - ], - # fmt: on - check=True, -) - -# Output version information -version = run( - ["cellranger", "-V"], - text=True, - check=True, - capture_output=True, -).stdout.replace("cellranger cellranger-", "") - -# alas, no `pyyaml` pre-installed in the cellranger container -with open("versions.yml", "w") as f: - f.write('"${task.process}":\\n') - f.write(f' cellranger: "{version}"\\n') From c0bad15494221af9c3c58cb05dccc8c66e012996 Mon Sep 17 00:00:00 2001 From: heylf Date: Wed, 20 Dec 2023 18:17:10 +0100 Subject: [PATCH 61/66] Remove CELLRANGERARC_GENERATECONFIG module --- conf/modules.config | 7 ------- 1 file changed, 7 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 8a8c7b0d..b6e4e8c8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -93,13 +93,6 @@ if(params.aligner == "cellrangerarc") { ] ext.args = "--attribute=gene_biotype:protein_coding --attribute=gene_biotype:lncRNA --attribute=gene_biotype:pseudogene" } - withName: CELLRANGERARC_GENERATECONFIG { - publishDir = [ - path: "${params.outdir}/${params.aligner}/config", - mode: params.publish_dir_mode - ] - ext.args = "--add none" - } withName: CELLRANGERARC_MKREF { publishDir = [ path: "${params.outdir}/${params.aligner}/mkref", From 83830142b4ec2f95bb6a5fd16a78f9755456d8c8 Mon Sep 17 00:00:00 2001 From: heylf Date: Wed, 20 Dec 2023 18:17:38 +0100 Subject: [PATCH 62/66] Wirte python script into count module --- modules/local/cellrangerarc/count/main.nf | 33 ++++++++++++++++++----- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/local/cellrangerarc/count/main.nf index dbba4815..2bf0193a 100644 --- a/modules/local/cellrangerarc/count/main.nf +++ b/modules/local/cellrangerarc/count/main.nf @@ -24,18 +24,37 @@ process CELLRANGERARC_COUNT { script: def args = task.ext.args ?: '' def reference_name = reference.name - - //def multi_meta_info = multi_meta.collate(2).transpose() def sample_types = sample_type.join(",") def sample_names = sub_sample.join(",") def lib_csv = meta.id + "_lib.csv" """ - generate_lib_csv.py \\ - --sample_types $sample_types \\ - --sample_names $sample_names \\ - --fastq_folder \$(readlink -f fastqs)\\ - --out $lib_csv + fastq_folder=\$(readlink -f fastqs) + + python3 < Date: Wed, 20 Dec 2023 18:17:56 +0100 Subject: [PATCH 63/66] Write python script into mkref module --- modules/local/cellrangerarc/mkref/main.nf | 45 +++++++++++++++++------ 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/local/cellrangerarc/mkref/main.nf index 88029ebd..c8cc3d2b 100644 --- a/modules/local/cellrangerarc/mkref/main.nf +++ b/modules/local/cellrangerarc/mkref/main.nf @@ -36,17 +36,39 @@ process CELLRANGERARC_MKREF { } """ - if [ $reference_config == [] ]; then - generate_config.py \\ - --fasta $fast_name \\ - --gtf $gtf_name \\ - --motifs $motifs_name \\ - $args - else - if [ ! -f config ]; then - mv -i $reference_config config - fi - fi + + python3 < Date: Wed, 20 Dec 2023 18:20:00 +0100 Subject: [PATCH 64/66] eclint mkref main --- modules/local/cellrangerarc/mkref/main.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/local/cellrangerarc/mkref/main.nf index c8cc3d2b..079776ba 100644 --- a/modules/local/cellrangerarc/mkref/main.nf +++ b/modules/local/cellrangerarc/mkref/main.nf @@ -41,7 +41,7 @@ process CELLRANGERARC_MKREF { from os.path import exists import shutil - + fasta = "${fast_name}" gtf = "${gtf_name}" motifs = "${motifs_name}" @@ -81,4 +81,3 @@ process CELLRANGERARC_MKREF { END_VERSIONS """ } - From 46a076381f92ce6ef5358293e46067b37d8409a2 Mon Sep 17 00:00:00 2001 From: heylf Date: Thu, 21 Dec 2023 14:02:48 +0100 Subject: [PATCH 65/66] Adding nf-core cellrangerarc and remove local cellrangearc --- modules/{local => nf-core}/cellrangerarc/Dockerfile | 0 modules/{local => nf-core}/cellrangerarc/README.md | 0 modules/{local => nf-core}/cellrangerarc/count/main.nf | 0 modules/{local => nf-core}/cellrangerarc/count/meta.yml | 3 ++- modules/{local => nf-core}/cellrangerarc/mkgtf/main.nf | 0 modules/{local => nf-core}/cellrangerarc/mkgtf/meta.yml | 3 ++- modules/{local => nf-core}/cellrangerarc/mkref/main.nf | 0 modules/{local => nf-core}/cellrangerarc/mkref/meta.yml | 3 ++- 8 files changed, 6 insertions(+), 3 deletions(-) rename modules/{local => nf-core}/cellrangerarc/Dockerfile (100%) rename modules/{local => nf-core}/cellrangerarc/README.md (100%) rename modules/{local => nf-core}/cellrangerarc/count/main.nf (100%) rename modules/{local => nf-core}/cellrangerarc/count/meta.yml (97%) rename modules/{local => nf-core}/cellrangerarc/mkgtf/main.nf (100%) rename modules/{local => nf-core}/cellrangerarc/mkgtf/meta.yml (96%) rename modules/{local => nf-core}/cellrangerarc/mkref/main.nf (100%) rename modules/{local => nf-core}/cellrangerarc/mkref/meta.yml (97%) diff --git a/modules/local/cellrangerarc/Dockerfile b/modules/nf-core/cellrangerarc/Dockerfile similarity index 100% rename from modules/local/cellrangerarc/Dockerfile rename to modules/nf-core/cellrangerarc/Dockerfile diff --git a/modules/local/cellrangerarc/README.md b/modules/nf-core/cellrangerarc/README.md similarity index 100% rename from modules/local/cellrangerarc/README.md rename to modules/nf-core/cellrangerarc/README.md diff --git a/modules/local/cellrangerarc/count/main.nf b/modules/nf-core/cellrangerarc/count/main.nf similarity index 100% rename from modules/local/cellrangerarc/count/main.nf rename to modules/nf-core/cellrangerarc/count/main.nf diff --git a/modules/local/cellrangerarc/count/meta.yml b/modules/nf-core/cellrangerarc/count/meta.yml similarity index 97% rename from modules/local/cellrangerarc/count/meta.yml rename to modules/nf-core/cellrangerarc/count/meta.yml index f69bc1fa..919de4dc 100644 --- a/modules/local/cellrangerarc/count/meta.yml +++ b/modules/nf-core/cellrangerarc/count/meta.yml @@ -10,7 +10,8 @@ tools: homepage: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc documentation: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc tool_dev_url: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc - licence: 10x Genomics EULA + licence: + - 10x Genomics EULA input: - meta: type: map diff --git a/modules/local/cellrangerarc/mkgtf/main.nf b/modules/nf-core/cellrangerarc/mkgtf/main.nf similarity index 100% rename from modules/local/cellrangerarc/mkgtf/main.nf rename to modules/nf-core/cellrangerarc/mkgtf/main.nf diff --git a/modules/local/cellrangerarc/mkgtf/meta.yml b/modules/nf-core/cellrangerarc/mkgtf/meta.yml similarity index 96% rename from modules/local/cellrangerarc/mkgtf/meta.yml rename to modules/nf-core/cellrangerarc/mkgtf/meta.yml index 7ce211eb..923c3e18 100644 --- a/modules/local/cellrangerarc/mkgtf/meta.yml +++ b/modules/nf-core/cellrangerarc/mkgtf/meta.yml @@ -10,7 +10,8 @@ tools: homepage: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc documentation: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc tool_dev_url: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc - licence: 10x Genomics EULA + licence: + - 10x Genomics EULA input: - gtf: type: file diff --git a/modules/local/cellrangerarc/mkref/main.nf b/modules/nf-core/cellrangerarc/mkref/main.nf similarity index 100% rename from modules/local/cellrangerarc/mkref/main.nf rename to modules/nf-core/cellrangerarc/mkref/main.nf diff --git a/modules/local/cellrangerarc/mkref/meta.yml b/modules/nf-core/cellrangerarc/mkref/meta.yml similarity index 97% rename from modules/local/cellrangerarc/mkref/meta.yml rename to modules/nf-core/cellrangerarc/mkref/meta.yml index 1eac878a..cf98e60c 100644 --- a/modules/local/cellrangerarc/mkref/meta.yml +++ b/modules/nf-core/cellrangerarc/mkref/meta.yml @@ -10,7 +10,8 @@ tools: homepage: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc documentation: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc tool_dev_url: https://support.10xgenomics.com/single-cell-multiome-atac-gex/software/pipelines/latest/what-is-cell-ranger-arc - licence: 10x Genomics EULA + licence: + - 10x Genomics EULA input: - fasta: type: file From f0e34c13e12cfae7f1017321d6ae9aef4ca648cb Mon Sep 17 00:00:00 2001 From: heylf Date: Thu, 21 Dec 2023 14:27:42 +0100 Subject: [PATCH 66/66] Changes from local to nf-core cellrangerarc --- subworkflows/local/align_cellrangerarc.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/align_cellrangerarc.nf b/subworkflows/local/align_cellrangerarc.nf index b33cd61b..3232a020 100644 --- a/subworkflows/local/align_cellrangerarc.nf +++ b/subworkflows/local/align_cellrangerarc.nf @@ -2,9 +2,9 @@ * Alignment with Cellranger Arc */ -include {CELLRANGERARC_MKGTF} from "../../modules/local/cellrangerarc/mkgtf/main.nf" -include {CELLRANGERARC_MKREF} from "../../modules/local/cellrangerarc/mkref/main.nf" -include {CELLRANGERARC_COUNT} from "../../modules/local/cellrangerarc/count/main.nf" +include {CELLRANGERARC_MKGTF} from "../../modules/nf-core/cellrangerarc/mkgtf/main.nf" +include {CELLRANGERARC_MKREF} from "../../modules/nf-core/cellrangerarc/mkref/main.nf" +include {CELLRANGERARC_COUNT} from "../../modules/nf-core/cellrangerarc/count/main.nf" // Define workflow to subset and index a genome region fasta file workflow CELLRANGERARC_ALIGN {