Skip to content

Commit

Permalink
sort diffexp bei signd_pi_val prefix only
Browse files Browse the repository at this point in the history
  • Loading branch information
Addimator committed Jul 8, 2024
1 parent 9ebfe31 commit fe53c1f
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 39 deletions.
28 changes: 14 additions & 14 deletions workflow/scripts/postprocess_diffexp.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,28 @@ def sort_columns(df, matching_columns):
return df[other_columns + b_column_order]


def sort_rows(df, primary_variable):
def sort_rows(df):
"""Sort DataFrame by the absolute value of signed_p_value of primary variable in ascending order."""
print(df.columns)
df = df.reindex(
df['signed_pi_value_' + primary_variable + '+'].abs().sort_values(ascending=False).index)
return df
signed_pi_start = 'signed_pi_value_' + \
snakemake.params['model']['primary_variable']
columns_with_prefix = [
col for col in df.columns if col.startswith(signed_pi_start)]

if len(columns_with_prefix) != 1:
raise ValueError(
f"Expected exactly one column starting with '{signed_pi_start}', found {len(columns_with_prefix)}")

# def sort_rows(df, first_b_val):
# """Sort by b_vals if b_val < 0 sort by lower interval limit else by upper limit"""
# df['sort_value'] = df.apply(lambda row: abs(
# row[f"{first_b_val}_lower"]) if row[first_b_val] < 0 else abs(row[f"{first_b_val}_upper"]), axis=1)
# df = df.sort_values(by='sort_value')
# df.drop(columns=["sort_value"], inplace=True)
# return df
signed_pi_col = columns_with_prefix[0]

df_sorted = df.reindex(
df[signed_pi_col].abs().sort_values(ascending=False).index)
return df_sorted


df = pd.read_csv(snakemake.input[0], sep='\t')
df, matching_columns = process_columns(df)
df = sort_columns(df, matching_columns)
# df = sort_rows(df, matching_columns[0])
df = sort_rows(df, snakemake.params['model']['primary_variable'])
df = sort_rows(df)
df = df.dropna(subset=matching_columns, how='all')

df.to_csv(snakemake.output[0], sep='\t', index=False)
25 changes: 0 additions & 25 deletions workflow/scripts/postprocess_go_enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,33 +20,8 @@ def sort_group(group):
df_enr = pd.read_csv(snakemake.input["enrichment"], sep='\t')
df_sig = pd.read_csv(snakemake.input["significant_terms"], sep='\t')

# Only keep data if GO term exists in both tables
# common_ids = df_sig[df_sig['GO'].isin(df_enr['GO'])]['GO']
# df_enr_filtered = df_enr[df_enr['GO'].isin(common_ids)]
# df_sig_filtered = df_sig[df_sig['GO'].isin(common_ids)]

# # Add study items from significant terms to dataset
# df_enr_filtered['study_items_sig_terms'] = df_enr_filtered['GO'].map(
# df_sig_filtered.set_index('GO')['study_items'])

# # Sort and calculate enrichment ratios
# df_enr_filtered_sorted = df_enr_filtered.groupby(
# 'class', group_keys=False).apply(sort_group)

# if not df_enr_filtered_sorted.empty:
# df_enr_filtered_sorted['enrichment'] = df_enr_filtered_sorted.apply(
# lambda row: calculate_enrichment(row['ratio_in_study'], row['ratio_in_pop']), axis=1)
# else:
# df_enr_filtered_sorted['enrichment'] = None

# # Save the result to a file
# df_enr_filtered_sorted.to_csv(snakemake.output[0], sep='\t', index=False)


# Merge the two dataframes on the 'GO' column to keep only common GO terms
df_merged = df_sig.join(df_enr.set_index('GO'), on='GO', rsuffix='_enr')
print(df_merged, df_merged.columns)

# Add study items from significant terms to the merged dataset
df_merged['study_items_sig_terms'] = df_merged['study_items']

Expand Down

0 comments on commit fe53c1f

Please sign in to comment.