Skip to content

Commit

Permalink
Account for non-standard kallisto workflows
Browse files Browse the repository at this point in the history
  • Loading branch information
zxBIB Almeida,Felipe (GCBDS) EXTERNAL committed Oct 4, 2024
1 parent d0d1f81 commit aa99cd1
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 24 deletions.
58 changes: 37 additions & 21 deletions modules/local/templates/mtx_to_h5ad_kallisto.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,16 @@
import glob

def _mtx_to_adata(
input: str,
sample: str,
t2g: str
matrix: str,
barcodes: str,
features: str,
t2g: str,
sample: str
):

adata = sc.read_mtx(glob.glob(f"{input}/*.mtx")[0])
adata.obs_names = pd.read_csv(glob.glob(f"{input}/*.barcodes.txt")[0], header=None, sep="\\t")[0].values
adata.var_names = pd.read_csv(glob.glob(f"{input}/*.genes.txt")[0], header=None, sep="\\t")[0].values
adata = sc.read_mtx(matrix)
adata.obs_names = pd.read_csv(barcodes, header=None, sep="\\t")[0].values
adata.var_names = pd.read_csv(features, header=None, sep="\\t")[0].values
adata.obs["sample"] = sample

txp2gene = pd.read_table(glob.glob(f"{t2g}")[0], header=None, names=["gene_id", "gene_symbol"], usecols=[1, 2])
Expand Down Expand Up @@ -60,15 +62,17 @@ def dump_versions():
f.write(format_yaml_like(versions))

def input_to_adata(
input_data: str,
matrix: str,
barcodes: str,
features: str,
t2g: str,
output: str,
sample: str,
t2g: str
):
print(f"Reading in {input_data}")
print(f"Reading in {matrix}")

# open main data
adata = _mtx_to_adata(input_data, sample, t2g)
adata = _mtx_to_adata(matrix=matrix, barcodes=barcodes, features=features, sample=sample, t2g=t2g)

# standard format
# index are gene IDs and symbols are a column
Expand All @@ -82,11 +86,6 @@ def input_to_adata(
adata.write_h5ad(f"{output}", compression="gzip")
print(f"Wrote h5ad file to {output}")

# dump versions
dump_versions()

return adata

#
# Run main script
#
Expand All @@ -95,9 +94,26 @@ def input_to_adata(
os.makedirs("${meta.id}", exist_ok=True)

# input_type comes from NF module
adata = input_to_adata(
input_data="${inputs}",
output="${meta.id}/${meta.id}_${meta.input_type}_matrix.h5ad",
sample="${meta.id}",
t2g="${txp2gene}"
)
if "${params.kb_workflow}" == "standard":
input_to_adata(
matrix=glob.glob("${inputs}/*.mtx")[0],
barcodes=glob.glob("${inputs}/*.barcodes.txt")[0],
features=glob.glob("${inputs}/*.genes.txt")[0],
output="${meta.id}/${meta.id}_${meta.input_type}_matrix.h5ad",
sample="${meta.id}",
t2g="${txp2gene}"
)

else:
for type in ['spliced', 'unspliced']:
input_to_adata(
matrix=glob.glob("${inputs}/" + f"{type}*.mtx")[0],
barcodes=glob.glob("${inputs}/" + f"{type}*.barcodes.txt")[0],
features=glob.glob("${inputs}/" + f"{type}*.genes.txt")[0],
output="${meta.id}/${meta.id}_${meta.input_type}" + f"_{type}_matrix.h5ad",
sample="${meta.id}",
t2g="${txp2gene}"
)

# dump versions
dump_versions()
2 changes: 1 addition & 1 deletion subworkflows/local/emptydrops_removal.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ workflow EMPTY_DROPLET_REMOVAL {
.join(CELLBENDER_REMOVEBACKGROUND.out.barcodes)
.map { meta, h5ad, csv ->
def meta_clone = meta.clone()
meta_clone.input_type = 'emptydrops_filter'
meta_clone.input_type = meta['input_type'].toString().replaceAll('raw', 'emptydrops_filter')

[ meta_clone, h5ad, csv ]
}
Expand Down
20 changes: 18 additions & 2 deletions subworkflows/local/mtx_conversion.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,23 @@ workflow MTX_CONVERSION {
star_index
)
ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first())
ch_h5ads = MTX_TO_H5AD.out.h5ad

// fix channel size when kallisto non-standard workflow
if (params.aligner == 'kallisto' && !(params.kb_workflow == 'standard')) {
ch_h5ads =
MTX_TO_H5AD.out.h5ad
.transpose()
.map { meta, h5ad ->
def meta_clone = meta.clone()
def spc_prefix = h5ad.toString().contains('unspliced') ? 'un' : ''

meta_clone["input_type"] = "${meta.input_type}_${spc_prefix}spliced"

[ meta_clone, h5ad ]
}
} else {
ch_h5ads = MTX_TO_H5AD.out.h5ad
}

//
// SUBWORKFLOW: Run cellbender emptydrops filter
Expand All @@ -34,7 +50,7 @@ workflow MTX_CONVERSION {

// emptydrops should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it
EMPTY_DROPLET_REMOVAL (
MTX_TO_H5AD.out.h5ad.filter { meta, mtx_files -> meta.input_type == 'raw' }
ch_h5ads.filter { meta, mtx_files -> meta.input_type.contains('raw') }
)
ch_h5ads = ch_h5ads.mix( EMPTY_DROPLET_REMOVAL.out.h5ad )

Expand Down

0 comments on commit aa99cd1

Please sign in to comment.