diff --git a/modules/local/templates/mtx_to_h5ad_kallisto.py b/modules/local/templates/mtx_to_h5ad_kallisto.py index cf27f13c..24efe7e0 100755 --- a/modules/local/templates/mtx_to_h5ad_kallisto.py +++ b/modules/local/templates/mtx_to_h5ad_kallisto.py @@ -13,14 +13,16 @@ import glob def _mtx_to_adata( - input: str, - sample: str, - t2g: str + matrix: str, + barcodes: str, + features: str, + t2g: str, + sample: str ): - adata = sc.read_mtx(glob.glob(f"{input}/*.mtx")[0]) - adata.obs_names = pd.read_csv(glob.glob(f"{input}/*.barcodes.txt")[0], header=None, sep="\\t")[0].values - adata.var_names = pd.read_csv(glob.glob(f"{input}/*.genes.txt")[0], header=None, sep="\\t")[0].values + adata = sc.read_mtx(matrix) + adata.obs_names = pd.read_csv(barcodes, header=None, sep="\\t")[0].values + adata.var_names = pd.read_csv(features, header=None, sep="\\t")[0].values adata.obs["sample"] = sample txp2gene = pd.read_table(glob.glob(f"{t2g}")[0], header=None, names=["gene_id", "gene_symbol"], usecols=[1, 2]) @@ -60,15 +62,17 @@ def dump_versions(): f.write(format_yaml_like(versions)) def input_to_adata( - input_data: str, + matrix: str, + barcodes: str, + features: str, + t2g: str, output: str, sample: str, - t2g: str ): - print(f"Reading in {input_data}") + print(f"Reading in {matrix}") # open main data - adata = _mtx_to_adata(input_data, sample, t2g) + adata = _mtx_to_adata(matrix=matrix, barcodes=barcodes, features=features, sample=sample, t2g=t2g) # standard format # index are gene IDs and symbols are a column @@ -82,11 +86,6 @@ def input_to_adata( adata.write_h5ad(f"{output}", compression="gzip") print(f"Wrote h5ad file to {output}") - # dump versions - dump_versions() - - return adata - # # Run main script # @@ -95,9 +94,26 @@ def input_to_adata( os.makedirs("${meta.id}", exist_ok=True) # input_type comes from NF module -adata = input_to_adata( - input_data="${inputs}", - output="${meta.id}/${meta.id}_${meta.input_type}_matrix.h5ad", - sample="${meta.id}", - t2g="${txp2gene}" -) +if "${params.kb_workflow}" == "standard": + input_to_adata( + matrix=glob.glob("${inputs}/*.mtx")[0], + barcodes=glob.glob("${inputs}/*.barcodes.txt")[0], + features=glob.glob("${inputs}/*.genes.txt")[0], + output="${meta.id}/${meta.id}_${meta.input_type}_matrix.h5ad", + sample="${meta.id}", + t2g="${txp2gene}" + ) + +else: + for type in ['spliced', 'unspliced']: + input_to_adata( + matrix=glob.glob("${inputs}/" + f"{type}*.mtx")[0], + barcodes=glob.glob("${inputs}/" + f"{type}*.barcodes.txt")[0], + features=glob.glob("${inputs}/" + f"{type}*.genes.txt")[0], + output="${meta.id}/${meta.id}_${meta.input_type}" + f"_{type}_matrix.h5ad", + sample="${meta.id}", + t2g="${txp2gene}" + ) + +# dump versions +dump_versions() diff --git a/subworkflows/local/emptydrops_removal.nf b/subworkflows/local/emptydrops_removal.nf index 7171c3f3..2ccacc26 100644 --- a/subworkflows/local/emptydrops_removal.nf +++ b/subworkflows/local/emptydrops_removal.nf @@ -16,7 +16,7 @@ workflow EMPTY_DROPLET_REMOVAL { .join(CELLBENDER_REMOVEBACKGROUND.out.barcodes) .map { meta, h5ad, csv -> def meta_clone = meta.clone() - meta_clone.input_type = 'emptydrops_filter' + meta_clone.input_type = meta['input_type'].toString().replaceAll('raw', 'emptydrops_filter') [ meta_clone, h5ad, csv ] } diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf index 3ecd1b36..55bd68dc 100644 --- a/subworkflows/local/mtx_conversion.nf +++ b/subworkflows/local/mtx_conversion.nf @@ -25,7 +25,23 @@ workflow MTX_CONVERSION { star_index ) ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first()) - ch_h5ads = MTX_TO_H5AD.out.h5ad + + // fix channel size when kallisto non-standard workflow + if (params.aligner == 'kallisto' && !(params.kb_workflow == 'standard')) { + ch_h5ads = + MTX_TO_H5AD.out.h5ad + .transpose() + .map { meta, h5ad -> + def meta_clone = meta.clone() + def spc_prefix = h5ad.toString().contains('unspliced') ? 'un' : '' + + meta_clone["input_type"] = "${meta.input_type}_${spc_prefix}spliced" + + [ meta_clone, h5ad ] + } + } else { + ch_h5ads = MTX_TO_H5AD.out.h5ad + } // // SUBWORKFLOW: Run cellbender emptydrops filter @@ -34,7 +50,7 @@ workflow MTX_CONVERSION { // emptydrops should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it EMPTY_DROPLET_REMOVAL ( - MTX_TO_H5AD.out.h5ad.filter { meta, mtx_files -> meta.input_type == 'raw' } + ch_h5ads.filter { meta, mtx_files -> meta.input_type.contains('raw') } ) ch_h5ads = ch_h5ads.mix( EMPTY_DROPLET_REMOVAL.out.h5ad )