Skip to content

Commit

Permalink
Merge branch 'locotact_p18' of github.com:snakemake-workflows/rna-seq…
Browse files Browse the repository at this point in the history
…-kallisto-sleuth into locotact_p18
  • Loading branch information
Addimator committed Jul 8, 2024
2 parents f6cc352 + baac24e commit 0e56286
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 29 deletions.
33 changes: 9 additions & 24 deletions workflow/rules/datavzrd.smk
Original file line number Diff line number Diff line change
Expand Up @@ -14,33 +14,18 @@ rule postprocess_go_enrichment:


# Postprocessing Differential Expression Data
# Does not work for level = genes-aggregated since it does not contain beta values.
rule postprocess_diffexp:
input:
genes_representative="results/tables/diffexp/{model}.genes-representative.diffexp.tsv",
genes_representative="results/tables/diffexp/{model}.{level}.diffexp.tsv",
output:
"results/tables/diffexp/{model}.genes-representative.diffexp_postprocessed.tsv",
"results/tables/diffexp/{model}.{level}.diffexp_postprocessed.tsv",
conda:
"../envs/pandas.yaml"
params:
model=get_model,
log:
"logs/yte/postprocess_diffexp/{model}.log",
script:
"../scripts/postprocess_diffexp.py"


# Postprocessing Differential Expression Data
rule postprocess_transcripts:
input:
"results/tables/diffexp/{model}.transcripts.diffexp.tsv",
output:
"results/tables/diffexp/{model}.transcripts.diffexp_postprocessed.tsv",
conda:
"../envs/pandas.yaml"
params:
model=get_model,
log:
"logs/yte/postprocess_diffexp/{model}.log",
"logs/yte/postprocess_diffexp/{model}/{level}.log",
script:
"../scripts/postprocess_diffexp.py"

Expand All @@ -49,7 +34,7 @@ rule postprocess_transcripts:
rule postprocess_logcount_matrix:
input:
logcount="results/tables/logcount-matrix/{model}.logcount-matrix.tsv",
genes_representative="results/tables/diffexp/{model}.genes-representative.diffexp_postprocessed.tsv",
diffexp="results/tables/diffexp/{model}.transcripts.diffexp_postprocessed.tsv",
output:
"results/tables/logcount-matrix/{model}.logcount-matrix_postprocessed.tsv",
conda:
Expand Down Expand Up @@ -86,7 +71,7 @@ rule spia_datavzrd:
offer_excel=lookup(within=config, dpath="report/offer_excel", default=False),
pathway_db=config["enrichment"]["spia"]["pathway_database"],
wrapper:
"v3.13.2/utils/datavzrd"
"v3.13.4/utils/datavzrd"


# Generating Differential Expression Datavzrd Report
Expand Down Expand Up @@ -116,7 +101,7 @@ rule diffexp_datavzrd:
offer_excel=lookup(within=config, dpath="report/offer_excel", default=False),
samples=get_model_samples,
wrapper:
"v3.13.2/utils/datavzrd"
"v3.13.4/utils/datavzrd"


# Generating GO Enrichment Datavzrd Report
Expand Down Expand Up @@ -152,7 +137,7 @@ rule go_enrichment_datavzrd:
offer_excel=lookup(within=config, dpath="report/offer_excel", default=False),
samples=get_model_samples,
wrapper:
"v3.13.2/utils/datavzrd"
"v3.13.4/utils/datavzrd"


# Generating Meta Comparison Datavzrd Reports
Expand All @@ -178,4 +163,4 @@ rule meta_compare_datavzrd:
log:
"logs/datavzrd-report/meta_comp_{method}.{meta_comp}.log",
wrapper:
"v3.13.2/utils/datavzrd"
"v3.13.4/utils/datavzrd"
2 changes: 1 addition & 1 deletion workflow/scripts/postprocess_diffexp.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def sort_rows(df, primary_variable):
"""Sort DataFrame by the absolute value of signed_p_value of primary variable in ascending order."""
print(df)
df = df.reindex(
df['signed_pi_value_' + primary_variable + '+'].abs().sort_values().index)
df['signed_pi_value_' + primary_variable + '+'].abs().sort_values(ascending=False).index)
return df


Expand Down
8 changes: 4 additions & 4 deletions workflow/scripts/postprocess_logcount.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

# Read the TSV files
logcount_matrix = pd.read_csv(snakemake.input['logcount'], sep='\t')
genes_representative = pd.read_csv(
snakemake.input['genes_representative'], sep='\t')
diffexp = pd.read_csv(snakemake.input['diffexp'], sep='\t')

# Filter logcount_matrix to only include rows where 'transcript' is in 'target_id' of genes_representative
filtered_logcount_matrix = logcount_matrix[logcount_matrix['transcript'].isin(
genes_representative['target_id'])]
filtered_logcount_matrix = logcount_matrix[
logcount_matrix['transcript'].isin(diffexp['target_id'])
]

# Save the filtered dataframe to a new TSV file
filtered_logcount_matrix.to_csv(snakemake.output[0], sep='\t', index=False)

0 comments on commit 0e56286

Please sign in to comment.