From 098ab550c244c6802cc710fd302bde6278bc4536 Mon Sep 17 00:00:00 2001 From: JohannesKersting Date: Thu, 21 Mar 2024 11:50:33 +0100 Subject: [PATCH 1/5] use has_constant='add' for add_constant() --- src/dysregnet/functions.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/dysregnet/functions.py b/src/dysregnet/functions.py index 29b5543..7fc1219 100644 --- a/src/dysregnet/functions.py +++ b/src/dysregnet/functions.py @@ -80,7 +80,7 @@ def dyregnet_model(data): # prepare control for fitting model x_train = control[ [edge[0]] + covariate_name ] - x_train = sm.add_constant(x_train) # add bias + x_train = sm.add_constant(x_train, has_constant='add') # add bias y_train = control[edge[1]].values # fit the model @@ -89,13 +89,12 @@ def dyregnet_model(data): model_stats[edge] = [results.rsquared] + list(results.params.values) + list(results.pvalues.values) - # get residuals of control resid_control = results.predict(x_train) - y_train # test data (case or condition) x_test = case[ [edge[0]]+ covariate_name ] - x_test = sm.add_constant(x_test) # add bias + x_test = sm.add_constant(x_test, has_constant='add') # add bias y_test = case[edge[1]].values From d9694b110686b3fb24e260e3e7a2ed2b34626397 Mon Sep 17 00:00:00 2001 From: JohannesKersting Date: Thu, 21 Mar 2024 12:56:50 +0100 Subject: [PATCH 2/5] Set default value of direction_condition to False --- src/dysregnet/dysregnet.py | 77 +++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/src/dysregnet/dysregnet.py b/src/dysregnet/dysregnet.py index 3459a73..c554988 100644 --- a/src/dysregnet/dysregnet.py +++ b/src/dysregnet/dysregnet.py @@ -21,62 +21,61 @@ def __init__(self, CatCov=[], ConCov=[], zscoring=False, - bonferroni_alpha= 1e-2, + bonferroni_alpha=1e-2, R2_threshold=None, normaltest=False, normaltest_alpha=1e-3, - direction_condition=True): - + direction_condition=False): """ Raw data processing for further analysis - expression_data: a pandas DataFrame (rows=samples, columns=genes) - Gene expression matrix with the format: patients as rows (first column - patients/samples ids), and genes as columns. - Patients/samples ids must match the ones in meta DataFrame. - Gene names or ids must match the ones in GRN DataFrame - - GRN: a pandas DataFrame - Gene Regulatory Network (GRN) with two columns in the following order ['TF', 'target']. - - meta: a pandas DataFrame - Meta data: First column should contain patients/samples ids and other column for covariates/condition. - Please make sure to have condition column in the meta DataFrame with 0 as control and 1 as the condition. - Specify the condition Column name in "conCol". - Optionally : - Specify categorical variable columns in the parameter CatCov. - Specify continuous variable columns in the parameter ConCov. + expression_data: pandas DataFrame (rows=samples, columns=genes) + Gene expression matrix with the format: patients as rows (first column - patients/samples ids), and genes as columns. + Patients/sample IDs must match the ones in the meta DataFrame. + Gene names or IDs must match the ones in the GRN DataFrame. - conCol: str, default=='condition' - Column name for the condition in the meta data. Should be provided in case of desing=="two". + GRN: pandas DataFrame + Gene Regulatory Network (GRN) with two columns in the following order ['TF', 'target']. + meta: pandas DataFrame + The first column has to contain patients/sample IDs. + Further columns can be used to define covariates and the sample condition. + Please make sure to have a condition column in the meta DataFrame with 0 indicating control and 1 indicating case samples. + Specify the condition Column name in 'conCol'. + Optionally : + Specify categorical variable columns in the parameter CatCov. + Specify continuous variable columns in the parameter ConCov. - CatCov: List of strings. - List of categorical variable names. They should match the name of their columns in meta Dataframe. + conCol: str, default: 'condition' + Column name for the condition in the metadata. - ConCov: List of strings. - List of continuous covariates. They should match the name of their columns in meta Dataframe. + CatCov: List of strings, default: [] + List of categorical variable names. They should match the name of their columns in the meta Dataframe. + ConCov: List of strings, default: [] + List of continuous covariates. They should match the name of their columns in the meta Dataframe. - zscoring: boolean, default: False - zscoring of expression data (if needed). + zscoring: bool, default: False + If True, DysRegNet will scale the expression of each gene and all continuous confounders based on their mean and standard deviation in the control samples. + This can make the obtained model coefficients more interpretable. - bonferroni_alpha: Float - P value threshold for multiple testing correction + bonferroni_alpha: float, default: 0.01 + P value threshold for multiple testing correction. - normaltest: Bool - If True. Run a normality test for residuals "scipy.stats.normaltest". If residuals are not normal, the edge will not be considered in the analysis. + normaltest: bool, default: False + If True, DysRegNet runs a normality test for the control residuals with "scipy.stats.normaltest". + If residuals do not follow a normal distribution, the edge will not be considered in the analysis. - normaltest_alpha: Float - normaltest p value threshold. + normaltest_alpha: float, default: 0.001 + P-value threshold for the normal test. - R2_threshold: float from 0 to 1 (optional) + R2_threshold: float, default: None + Coefficient of determination threshold for every edge in the GRN. If the R2 is less than the threshold, the edge will not be considered in the analysis. - Coefficient of determination threshold for every edge in GRN. If the R2 is less that threshold, the edge will not be considered in the analysis. - - direction_condition: Bool - If True: only include dysregulation that are relevalant for the interactions: down regulation of an activation or up regulation of a supressions. Please check the paper for more details. - - """ + direction_condition: boolean, default: False + If True, DysRegNet will only consider case samples with positive residuals (target gene overexpressed) for models with a negative TF coefficient + as potentially dysregulated. Similarly, for positive TF coefficients, only case samples with negative residuals are considered. + """ From ee6e9f190c189558d161dc5d7016c0356a588ab9 Mon Sep 17 00:00:00 2001 From: JohannesKersting Date: Thu, 21 Mar 2024 16:39:42 +0100 Subject: [PATCH 3/5] output z-scores will keep their sign, flipped sign of residuals --- src/dysregnet/functions.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/dysregnet/functions.py b/src/dysregnet/functions.py index 7fc1219..812be1d 100644 --- a/src/dysregnet/functions.py +++ b/src/dysregnet/functions.py @@ -90,7 +90,7 @@ def dyregnet_model(data): model_stats[edge] = [results.rsquared] + list(results.params.values) + list(results.pvalues.values) # get residuals of control - resid_control = results.predict(x_train) - y_train + resid_control = y_train - results.predict(x_train) # test data (case or condition) x_test = case[ [edge[0]]+ covariate_name ] @@ -99,7 +99,7 @@ def dyregnet_model(data): # define residue for cases - resid_case = results.predict(x_test) - y_test + resid_case = y_test - results.predict(x_test) # condition of direction @@ -112,7 +112,7 @@ def dyregnet_model(data): sides = 2 if data.direction_condition: - cond = ( direction * resid_case ) > 0 + cond = ( direction * resid_case ) < 0 # if direction_condition is true only calculate one sided p value sides = 1 @@ -150,9 +150,6 @@ def dyregnet_model(data): pvalues= pvalues < data.bonferroni_alpha - # add direction to z scores - zscore=abs(zscore) * direction - # direction condition and a p_value valid= cond * pvalues From aa42fe764af9b4728a71da59b9b1f28da0a867a5 Mon Sep 17 00:00:00 2001 From: JohannesKersting Date: Tue, 16 Apr 2024 18:18:15 +0200 Subject: [PATCH 4/5] scale expression data and continuous confounders based on control samples --- src/dysregnet/functions.py | 44 +++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/src/dysregnet/functions.py b/src/dysregnet/functions.py index 812be1d..a8c848a 100644 --- a/src/dysregnet/functions.py +++ b/src/dysregnet/functions.py @@ -1,6 +1,6 @@ import pandas as pd from scipy.stats import zscore -from sklearn.linear_model import LinearRegression +from sklearn.preprocessing import StandardScaler from tqdm import tqdm import numpy as np from scipy import stats @@ -13,12 +13,12 @@ def process_data(data): # process covariates and design matrix - all_covariates= data.CatCov + data.ConCov + all_covariates = data.CatCov + data.ConCov if not all_covariates or len(data.meta)==1: # No covariate provided - print('You did not input any covariates in CatCov or ConCov parameters, proceed without them.') + print('You did not input any covariates in CatCov or ConCov parameters, proceeding without them.') cov_df=None else: @@ -28,22 +28,46 @@ def process_data(data): if not set(all_covariates).issubset(data.meta.columns): raise ValueError("Invalid elements in CatCov or ConCov. Please check that all covariates names (continuous or categorials) are in the meta DataFrame. ") - cov_df=data.meta[all_covariates] + cov_df = data.meta[all_covariates] # process categorial covariate # drop_first is important to avoid multicollinear - cov_df=pd.get_dummies(cov_df, columns=data.CatCov, drop_first=True, dtype=int) + cov_df = pd.get_dummies(cov_df, columns=data.CatCov, drop_first=True, dtype=int) - # z scoring of expression - if data.zscoring: expr=data.expression_data.apply(zscore) - else: expr=data.expression_data + # z scoring + if data.zscoring: + + # expression data + # fit a scaler base on the control samples + scaler = StandardScaler() + scaler.fit(data.expression_data[data.meta[data.conCol]==0]) + + # scale the expression data + expr = pd.DataFrame( + scaler.transform(data.expression_data), + columns=data.expression_data.columns, + index=data.expression_data.index + ) + + # continuous confounders + if cov_df is not None and len(data.ConCov)>0: + + # fit a scaler base on the control samples + scaler = StandardScaler() + scaler.fit(data.meta.loc[data.meta[data.conCol]==0,data.ConCov]) + + # scale the continuous confounders data + cov_df[data.ConCov] = scaler.transform(data.meta[data.ConCov]) + + else: + expr = data.expression_data #get control and case sample - control= data.meta[ data.meta[data.conCol]==0 ].index.values.tolist() - case=data.meta[ data.meta[data.conCol]==1 ].index.values.tolist() + control = data.meta[ data.meta[data.conCol]==0 ].index.values.tolist() + case = data.meta[ data.meta[data.conCol]==1 ].index.values.tolist() return cov_df, expr, control, case From 94d0c34117ddeb7eff6cc0e6327cde4d15f2d734 Mon Sep 17 00:00:00 2001 From: JohannesKersting Date: Thu, 18 Apr 2024 09:15:17 +0200 Subject: [PATCH 5/5] preparing for release 0.1.0 --- README.md | 84 +-- dev_tests.ipynb | 1148 ++---------------------------------- setup.py | 2 +- src/dysregnet/dysregnet.py | 6 +- test.ipynb | 18 +- 5 files changed, 81 insertions(+), 1177 deletions(-) diff --git a/README.md b/README.md index c795c7a..3056de2 100644 --- a/README.md +++ b/README.md @@ -1,103 +1,75 @@ [![PyPI version](https://badge.fury.io/py/dysregnet.svg)](https://badge.fury.io/py/dysregnet) # DysRegNet package - - DysRegNet, is a method for inferring patient-specific regulatory alterations (dysregulations) from gene expression profiles. DysRegNet uses linear models to account for confounders and residual-derived z-scores to assess significance. - - ## Installation To install the package from PyPI please run: - -`pip install dysregnet` - +```bash +pip install dysregnet +``` or you can install it from git: - -`git clone https://github.com/biomedbigdata/DysRegNet_package.git && cd DysRegNet_package` - -`python setup.py install` - - +```bash +git clone https://github.com/biomedbigdata/DysRegNet_package.git && cd DysRegNet_package +python setup.py install +``` ## Data input +The inputs of the package are the following Pandas DataFrame objects: -The inputs of the package are the following Pandas DataFrame object: - - -- expression_data - Gene expression matrix with the format: patients as rows (first column - patients/samples ids), and genes as columns. +- expression_data - Gene expression matrix in the format: patients as rows (first column - patients/samples ids), and genes as columns. - GRN - Gene Regulatory Network (GRN) with two columns in the following order ['TF', 'target']. - meta - Metadata with the first column containing patients/samples ids and other columns for the condition and the covariates. - The patients id or samples ids must be the same in the "expression_data" and "meta". Additionally, gene names or ids must match the ones in the "GRN" DataFrame. In the condition column of the meta DataFrame, the control samples should be encoded as 0 and case samples as 1. The gene regulatory network should be provided by the user. You can either use an experimental validated GRN or learn it from control samples. We recommend using software like [arboreto](https://github.com/aertslab/arboreto) since you can use its output directly to DysRegNet. - - - - ## Parameters - - Additionally, you can provide the following parameters: - - - conCol: Column name for the condition in the meta DataFrame. - CatCov: List of categorical variable names. They should match the name of their columns in the meta Dataframe. - ConCov: List of continuous covariates. They should match the name of their columns in the meta Dataframe. -- zscoring: Boolean, default: False. zscoring of expression data (if needed). +- zscoring: If True, DysRegNet will scale the expression of each gene and all continuous confounders based on their mean and standard deviation in the control samples. - bonferroni_alpha: P-value threshold for multiple testing correction -- normaltest: Boolean. If True, Run a normality test for residuals "scipy.stats.normaltest". If residuals are not normal, the edge will not be considered in the analysis. +- normaltest: If True, DysRegNet runs a normality test for residuals "scipy.stats.normaltest". If residuals are not normal, the edge will not be considered in the analysis. -- normaltest_alpha: p-value threshold for normaltest (if True). +- normaltest_alpha: P-value threshold for normaltest (if True). - R2_threshold: R-squared (R2) threshold from 0 to 1 (optional). If the fit is weaker, the edge will not be considered in the analysis. -- direction_condition: Boolean. If True: only include dysregulation that are relevant for the interactions (down-regulation of an activation or up-regulation of a supressions). Please check the paper for more details. +- direction_condition: If True, DysRegNet will only consider case samples with positive residuals (target gene overexpressed) for models with a negative TF coefficient as potentially dysregulated. Similarly, for positive TF coefficients, only case samples with negative residuals are considered. Please check the paper for more details. +The parameters are also annotated with dockstrings for more details. ## Get Started - - -Please note that the functions are annotated with dockstrings for more details. - Import the package and pandas: - - ```python import dysregnet import pandas as pd ``` - - Define the confounding variables or the design matrix - ```python -# The condition column +# define condition column (0 indicated control, 1 indicates case) conCol='condition' -# categorical variable columns in meta dataframe. -# these columns will be transformed to variables for regression +# define categorical confounder columns in meta dataframe CatCov=['race','gender'] -# continuous variable columns in meta dataframe. +# define continuous confounder columns in meta dataframe. ConCov=['birth_days_to'] ``` - Run DysRegNet - ```python data=dysregnet.run(expression_data=expr, meta=meta, @@ -107,46 +79,36 @@ data=dysregnet.run(expression_data=expr, ConCov=ConCov, direction_condition=True, normaltest=True, - R2_threshold=.2 ) + R2_threshold=.2) -# results table +# get the patient-specific dysregulate networks data.get_results() -# or a binary result - +# or with binary edges data.get_results_binary() # get R2 values, coefficients, and coefficient p-values for all models/edges data.get_model_stats() - ``` -The expected run time for the installation and running the demo dataset on a "normal" desktop computer is around 3~5 minutes. - - - ## The output - The package outputs a data frame that represents patient-specific dysregulated edges. The columns represent edges, and the rows are patient IDs. -In the result table, a value of 0 means that the edge is not significantly dysregulated (different from control samples). Otherwise, the z-score is reported, with a positive in case of activation and a negative sign in case of repression (different than the sign of the residual). +In the result table, a value of 0 means that the edge is not significantly dysregulated (different from control samples). Otherwise, the z-score is reported. The method "get_results_binary()" outputs binarized dysregulations instead of z-scores. +"get_model_stats()" outputs R2 values, coefficients, and coefficient p-values for all models/edges. ## Example A simple example for running DysRegNet: ([Notebook](https://github.com/biomedbigdata/DysRegNet_package/blob/main/test.ipynb)/[Google Colab](https://colab.research.google.com/github/biomedbigdata/DysRegNet_package/blob/main/test.ipynb)). - You will need to download the demo dataset and extract the files into test dataset/ Link for the demo dataset: https://figshare.com/ndownloader/files/35142652 - - ## Cite - "DysRegNet: Patient-specific and confounder-aware dysregulated network inference" -Olga Lazareva*, Zakaria Louadi*, Johannes Kersting, Jan Baumbach, David B. Blumenthal, Markus List. bioRxiv 2022.04.29.490015; doi: https://doi.org/10.1101/2022.04.29.490015. * equal first-authors +Johannes Kersting*, Olga Lazareva*, Zakaria Louadi*, David B. Blumenthal, Jan Baumbach, Markus List. bioRxiv 2022.04.29.490015; doi: https://doi.org/10.1101/2022.04.29.490015. * equal first-authors \ No newline at end of file diff --git a/dev_tests.ipynb b/dev_tests.ipynb index 1c16fad..ac55d9c 100644 --- a/dev_tests.ipynb +++ b/dev_tests.ipynb @@ -2,10 +2,19 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 50, "id": "d03d274e-6792-4bbf-93bf-b8c7259c1d7f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", @@ -582,7 +591,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 60, "id": "62b921d3-27e0-4f54-a77b-d3b35d5eedfb", "metadata": {}, "outputs": [ @@ -590,7 +599,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "14979it [00:45, 332.12it/s]\n" + "14979it [00:48, 307.28it/s]\n" ] } ], @@ -600,7 +609,8 @@ " CatCov=CatCov,\n", " ConCov=ConCov,\n", " GRN=grn,\n", - " conCol='condition')" + " conCol='condition',\n", + ")" ] }, { @@ -608,1117 +618,49 @@ "execution_count": 9, "id": "c3aede6a-7044-456e-affd-3804f54eb9d7", "metadata": {}, + "outputs": [], + "source": [ + "data_old = data.get_results()\n", + "data_binary_old = data.get_results_binary()\n", + "model_stats_old = data.get_model_stats()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "f93aab62-29c1-483f-b08c-8b7505917834", + "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
(PARP1, BRCA2)(AHR, CYP1B1)(AHR, FOS)(AHR, SOS1)(AHR, UGT1A6)(AR, AADAC)(AR, ABCA1)(AR, ABCA2)(AR, ABCF1)(AR, ABCA4)...(ZNF419, CDKN2A)(ZNF671, CDKN2A)(THAP7, CDKN2A)(FOXP2, PLAUR)(FOXP2, CNTNAP2)(ZNF653, CDKN2A)(E2F7, SP1)(ZNF417, CDKN2A)(ZNF384, CDKN2A)(ZNF384, COL1A1)
patient id
TCGA-55-7995-010.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.0-11.10.00.00.00.0
TCGA-69-7761-010.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
TCGA-67-6216-010.00.00.00.00.06.90.00.00.00.0...0.00.00.00.00.0-5.30.00.00.00.0
TCGA-44-6148-010.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
TCGA-71-8520-010.00.00.00.00.00.00.00.00.00.0...4.80.05.50.00.00.00.04.74.70.0
..................................................................
TCGA-69-7763-010.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
TCGA-78-7150-010.00.00.00.00.05.40.00.00.00.0...5.34.15.40.00.00.00.04.95.30.0
TCGA-MP-A4TI-010.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.0-8.30.00.00.00.0
TCGA-44-6145-010.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.0-8.50.00.00.00.0
TCGA-05-4427-010.00.00.00.00.00.00.00.00.00.0...0.00.00.00.06.0-11.40.00.00.00.0
\n", - "

515 rows × 14979 columns

\n", - "
" - ], - "text/plain": [ - " (PARP1, BRCA2) (AHR, CYP1B1) (AHR, FOS) (AHR, SOS1) \\\n", - "patient id \n", - "TCGA-55-7995-01 0.0 0.0 0.0 0.0 \n", - "TCGA-69-7761-01 0.0 0.0 0.0 0.0 \n", - "TCGA-67-6216-01 0.0 0.0 0.0 0.0 \n", - "TCGA-44-6148-01 0.0 0.0 0.0 0.0 \n", - "TCGA-71-8520-01 0.0 0.0 0.0 0.0 \n", - "... ... ... ... ... \n", - "TCGA-69-7763-01 0.0 0.0 0.0 0.0 \n", - "TCGA-78-7150-01 0.0 0.0 0.0 0.0 \n", - "TCGA-MP-A4TI-01 0.0 0.0 0.0 0.0 \n", - "TCGA-44-6145-01 0.0 0.0 0.0 0.0 \n", - "TCGA-05-4427-01 0.0 0.0 0.0 0.0 \n", - "\n", - " (AHR, UGT1A6) (AR, AADAC) (AR, ABCA1) (AR, ABCA2) \\\n", - "patient id \n", - "TCGA-55-7995-01 0.0 0.0 0.0 0.0 \n", - "TCGA-69-7761-01 0.0 0.0 0.0 0.0 \n", - "TCGA-67-6216-01 0.0 6.9 0.0 0.0 \n", - "TCGA-44-6148-01 0.0 0.0 0.0 0.0 \n", - "TCGA-71-8520-01 0.0 0.0 0.0 0.0 \n", - "... ... ... ... ... \n", - "TCGA-69-7763-01 0.0 0.0 0.0 0.0 \n", - "TCGA-78-7150-01 0.0 5.4 0.0 0.0 \n", - "TCGA-MP-A4TI-01 0.0 0.0 0.0 0.0 \n", - "TCGA-44-6145-01 0.0 0.0 0.0 0.0 \n", - "TCGA-05-4427-01 0.0 0.0 0.0 0.0 \n", - "\n", - " (AR, ABCF1) (AR, ABCA4) ... (ZNF419, CDKN2A) \\\n", - "patient id ... \n", - "TCGA-55-7995-01 0.0 0.0 ... 0.0 \n", - "TCGA-69-7761-01 0.0 0.0 ... 0.0 \n", - "TCGA-67-6216-01 0.0 0.0 ... 0.0 \n", - "TCGA-44-6148-01 0.0 0.0 ... 0.0 \n", - "TCGA-71-8520-01 0.0 0.0 ... 4.8 \n", - "... ... ... ... ... \n", - "TCGA-69-7763-01 0.0 0.0 ... 0.0 \n", - "TCGA-78-7150-01 0.0 0.0 ... 5.3 \n", - "TCGA-MP-A4TI-01 0.0 0.0 ... 0.0 \n", - "TCGA-44-6145-01 0.0 0.0 ... 0.0 \n", - "TCGA-05-4427-01 0.0 0.0 ... 0.0 \n", - "\n", - " (ZNF671, CDKN2A) (THAP7, CDKN2A) (FOXP2, PLAUR) \\\n", - "patient id \n", - "TCGA-55-7995-01 0.0 0.0 0.0 \n", - "TCGA-69-7761-01 0.0 0.0 0.0 \n", - "TCGA-67-6216-01 0.0 0.0 0.0 \n", - "TCGA-44-6148-01 0.0 0.0 0.0 \n", - "TCGA-71-8520-01 0.0 5.5 0.0 \n", - "... ... ... ... \n", - "TCGA-69-7763-01 0.0 0.0 0.0 \n", - "TCGA-78-7150-01 4.1 5.4 0.0 \n", - "TCGA-MP-A4TI-01 0.0 0.0 0.0 \n", - "TCGA-44-6145-01 0.0 0.0 0.0 \n", - "TCGA-05-4427-01 0.0 0.0 0.0 \n", - "\n", - " (FOXP2, CNTNAP2) (ZNF653, CDKN2A) (E2F7, SP1) \\\n", - "patient id \n", - "TCGA-55-7995-01 0.0 -11.1 0.0 \n", - "TCGA-69-7761-01 0.0 0.0 0.0 \n", - "TCGA-67-6216-01 0.0 -5.3 0.0 \n", - "TCGA-44-6148-01 0.0 0.0 0.0 \n", - "TCGA-71-8520-01 0.0 0.0 0.0 \n", - "... ... ... ... \n", - "TCGA-69-7763-01 0.0 0.0 0.0 \n", - "TCGA-78-7150-01 0.0 0.0 0.0 \n", - "TCGA-MP-A4TI-01 0.0 -8.3 0.0 \n", - "TCGA-44-6145-01 0.0 -8.5 0.0 \n", - "TCGA-05-4427-01 6.0 -11.4 0.0 \n", - "\n", - " (ZNF417, CDKN2A) (ZNF384, CDKN2A) (ZNF384, COL1A1) \n", - "patient id \n", - "TCGA-55-7995-01 0.0 0.0 0.0 \n", - "TCGA-69-7761-01 0.0 0.0 0.0 \n", - "TCGA-67-6216-01 0.0 0.0 0.0 \n", - "TCGA-44-6148-01 0.0 0.0 0.0 \n", - "TCGA-71-8520-01 4.7 4.7 0.0 \n", - "... ... ... ... \n", - "TCGA-69-7763-01 0.0 0.0 0.0 \n", - "TCGA-78-7150-01 4.9 5.3 0.0 \n", - "TCGA-MP-A4TI-01 0.0 0.0 0.0 \n", - "TCGA-44-6145-01 0.0 0.0 0.0 \n", - "TCGA-05-4427-01 0.0 0.0 0.0 \n", - "\n", - "[515 rows x 14979 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data.get_results()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "e719292a-4d97-426a-b1aa-c64e1f0b1837", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
R2coef_interceptcoef_TFcoef_birth_days_tocoef_gender_MALEpval_interceptpval_TFpval_birth_days_topval_gender_MALE
(PARP1, BRCA2)0.1663371.5157850.3307290.0000680.2853080.0877951.234803e-020.0565020.263960
(AHR, CYP1B1)0.0784801.125920-0.0429420.0000530.3446630.2162617.472208e-010.1474720.203034
(AHR, FOS)0.125917-0.9981600.297254-0.0000410.0469220.2597782.512678e-020.2509620.857749
(AHR, SOS1)0.545301-0.9857230.696733-0.0000400.0826820.1247166.121357e-100.1237210.661683
(AHR, UGT1A6)0.146786-0.752809-0.174668-0.000041-0.5629130.3885051.764701e-010.2391520.033036
..............................
(ZNF653, CDKN2A)0.333566-2.066818-0.020549-0.000100-0.8100590.0092358.554798e-010.0019180.000677
(E2F7, SP1)0.100177-1.7008840.162334-0.0000670.1942400.0585942.147244e-010.0621780.460722
(ZNF417, CDKN2A)0.337528-1.9906190.066682-0.000097-0.8240560.0107855.495458e-010.0021480.000572
(ZNF384, CDKN2A)0.340745-1.8830790.090320-0.000093-0.8338610.0181864.297584e-010.0040010.000505
(ZNF384, COL1A1)0.144069-0.5717880.348782-0.000025-0.0455090.5191969.296426e-030.4888150.860108
\n", - "

14979 rows × 9 columns

\n", - "
" - ], - "text/plain": [ - " R2 coef_intercept coef_TF coef_birth_days_to \\\n", - "(PARP1, BRCA2) 0.166337 1.515785 0.330729 0.000068 \n", - "(AHR, CYP1B1) 0.078480 1.125920 -0.042942 0.000053 \n", - "(AHR, FOS) 0.125917 -0.998160 0.297254 -0.000041 \n", - "(AHR, SOS1) 0.545301 -0.985723 0.696733 -0.000040 \n", - "(AHR, UGT1A6) 0.146786 -0.752809 -0.174668 -0.000041 \n", - "... ... ... ... ... \n", - "(ZNF653, CDKN2A) 0.333566 -2.066818 -0.020549 -0.000100 \n", - "(E2F7, SP1) 0.100177 -1.700884 0.162334 -0.000067 \n", - "(ZNF417, CDKN2A) 0.337528 -1.990619 0.066682 -0.000097 \n", - "(ZNF384, CDKN2A) 0.340745 -1.883079 0.090320 -0.000093 \n", - "(ZNF384, COL1A1) 0.144069 -0.571788 0.348782 -0.000025 \n", - "\n", - " coef_gender_MALE pval_intercept pval_TF \\\n", - "(PARP1, BRCA2) 0.285308 0.087795 1.234803e-02 \n", - "(AHR, CYP1B1) 0.344663 0.216261 7.472208e-01 \n", - "(AHR, FOS) 0.046922 0.259778 2.512678e-02 \n", - "(AHR, SOS1) 0.082682 0.124716 6.121357e-10 \n", - "(AHR, UGT1A6) -0.562913 0.388505 1.764701e-01 \n", - "... ... ... ... \n", - "(ZNF653, CDKN2A) -0.810059 0.009235 8.554798e-01 \n", - "(E2F7, SP1) 0.194240 0.058594 2.147244e-01 \n", - "(ZNF417, CDKN2A) -0.824056 0.010785 5.495458e-01 \n", - "(ZNF384, CDKN2A) -0.833861 0.018186 4.297584e-01 \n", - "(ZNF384, COL1A1) -0.045509 0.519196 9.296426e-03 \n", - "\n", - " pval_birth_days_to pval_gender_MALE \n", - "(PARP1, BRCA2) 0.056502 0.263960 \n", - "(AHR, CYP1B1) 0.147472 0.203034 \n", - "(AHR, FOS) 0.250962 0.857749 \n", - "(AHR, SOS1) 0.123721 0.661683 \n", - "(AHR, UGT1A6) 0.239152 0.033036 \n", - "... ... ... \n", - "(ZNF653, CDKN2A) 0.001918 0.000677 \n", - "(E2F7, SP1) 0.062178 0.460722 \n", - "(ZNF417, CDKN2A) 0.002148 0.000572 \n", - "(ZNF384, CDKN2A) 0.004001 0.000505 \n", - "(ZNF384, COL1A1) 0.488815 0.860108 \n", - "\n", - "[14979 rows x 9 columns]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "0\n", + "0\n", + "128425\n" + ] } ], "source": [ - "data.get_model_stats()" + "print((data.get_results() != data_old).sum().sum())\n", + "print((data.get_results_binary() != data_binary_old).sum().sum())\n", + "print((data.get_model_stats() != model_stats_old).sum().sum())" ] }, { "cell_type": "code", - "execution_count": 11, - "id": "3aacbc4e-8f13-48ef-849a-7404f45e9573", + "execution_count": 67, + "id": "a77d69fa-9401-466a-a20a-29d738649016", "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
(PARP1, BRCA2)(AHR, CYP1B1)(AHR, FOS)(AHR, SOS1)(AHR, UGT1A6)(AR, AADAC)(AR, ABCA1)(AR, ABCA2)(AR, ABCF1)(AR, ABCA4)...(ZNF419, CDKN2A)(ZNF671, CDKN2A)(THAP7, CDKN2A)(FOXP2, PLAUR)(FOXP2, CNTNAP2)(ZNF653, CDKN2A)(E2F7, SP1)(ZNF417, CDKN2A)(ZNF384, CDKN2A)(ZNF384, COL1A1)
patient id
TCGA-55-7995-010.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.01.00.00.00.00.0
TCGA-69-7761-010.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
TCGA-67-6216-010.00.00.00.00.01.00.00.00.00.0...0.00.00.00.00.01.00.00.00.00.0
TCGA-44-6148-010.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
TCGA-71-8520-010.00.00.00.00.00.00.00.00.00.0...1.00.01.00.00.00.00.01.01.00.0
..................................................................
TCGA-69-7763-010.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
TCGA-78-7150-010.00.00.00.00.01.00.00.00.00.0...1.01.01.00.00.00.00.01.01.00.0
TCGA-MP-A4TI-010.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.01.00.00.00.00.0
TCGA-44-6145-010.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.01.00.00.00.00.0
TCGA-05-4427-010.00.00.00.00.00.00.00.00.00.0...0.00.00.00.01.01.00.00.00.00.0
\n", - "

515 rows × 14979 columns

\n", - "
" - ], - "text/plain": [ - " (PARP1, BRCA2) (AHR, CYP1B1) (AHR, FOS) (AHR, SOS1) \\\n", - "patient id \n", - "TCGA-55-7995-01 0.0 0.0 0.0 0.0 \n", - "TCGA-69-7761-01 0.0 0.0 0.0 0.0 \n", - "TCGA-67-6216-01 0.0 0.0 0.0 0.0 \n", - "TCGA-44-6148-01 0.0 0.0 0.0 0.0 \n", - "TCGA-71-8520-01 0.0 0.0 0.0 0.0 \n", - "... ... ... ... ... \n", - "TCGA-69-7763-01 0.0 0.0 0.0 0.0 \n", - "TCGA-78-7150-01 0.0 0.0 0.0 0.0 \n", - "TCGA-MP-A4TI-01 0.0 0.0 0.0 0.0 \n", - "TCGA-44-6145-01 0.0 0.0 0.0 0.0 \n", - "TCGA-05-4427-01 0.0 0.0 0.0 0.0 \n", - "\n", - " (AHR, UGT1A6) (AR, AADAC) (AR, ABCA1) (AR, ABCA2) \\\n", - "patient id \n", - "TCGA-55-7995-01 0.0 0.0 0.0 0.0 \n", - "TCGA-69-7761-01 0.0 0.0 0.0 0.0 \n", - "TCGA-67-6216-01 0.0 1.0 0.0 0.0 \n", - "TCGA-44-6148-01 0.0 0.0 0.0 0.0 \n", - "TCGA-71-8520-01 0.0 0.0 0.0 0.0 \n", - "... ... ... ... ... \n", - "TCGA-69-7763-01 0.0 0.0 0.0 0.0 \n", - "TCGA-78-7150-01 0.0 1.0 0.0 0.0 \n", - "TCGA-MP-A4TI-01 0.0 0.0 0.0 0.0 \n", - "TCGA-44-6145-01 0.0 0.0 0.0 0.0 \n", - "TCGA-05-4427-01 0.0 0.0 0.0 0.0 \n", - "\n", - " (AR, ABCF1) (AR, ABCA4) ... (ZNF419, CDKN2A) \\\n", - "patient id ... \n", - "TCGA-55-7995-01 0.0 0.0 ... 0.0 \n", - "TCGA-69-7761-01 0.0 0.0 ... 0.0 \n", - "TCGA-67-6216-01 0.0 0.0 ... 0.0 \n", - "TCGA-44-6148-01 0.0 0.0 ... 0.0 \n", - "TCGA-71-8520-01 0.0 0.0 ... 1.0 \n", - "... ... ... ... ... \n", - "TCGA-69-7763-01 0.0 0.0 ... 0.0 \n", - "TCGA-78-7150-01 0.0 0.0 ... 1.0 \n", - "TCGA-MP-A4TI-01 0.0 0.0 ... 0.0 \n", - "TCGA-44-6145-01 0.0 0.0 ... 0.0 \n", - "TCGA-05-4427-01 0.0 0.0 ... 0.0 \n", - "\n", - " (ZNF671, CDKN2A) (THAP7, CDKN2A) (FOXP2, PLAUR) \\\n", - "patient id \n", - "TCGA-55-7995-01 0.0 0.0 0.0 \n", - "TCGA-69-7761-01 0.0 0.0 0.0 \n", - "TCGA-67-6216-01 0.0 0.0 0.0 \n", - "TCGA-44-6148-01 0.0 0.0 0.0 \n", - "TCGA-71-8520-01 0.0 1.0 0.0 \n", - "... ... ... ... \n", - "TCGA-69-7763-01 0.0 0.0 0.0 \n", - "TCGA-78-7150-01 1.0 1.0 0.0 \n", - "TCGA-MP-A4TI-01 0.0 0.0 0.0 \n", - "TCGA-44-6145-01 0.0 0.0 0.0 \n", - "TCGA-05-4427-01 0.0 0.0 0.0 \n", - "\n", - " (FOXP2, CNTNAP2) (ZNF653, CDKN2A) (E2F7, SP1) \\\n", - "patient id \n", - "TCGA-55-7995-01 0.0 1.0 0.0 \n", - "TCGA-69-7761-01 0.0 0.0 0.0 \n", - "TCGA-67-6216-01 0.0 1.0 0.0 \n", - "TCGA-44-6148-01 0.0 0.0 0.0 \n", - "TCGA-71-8520-01 0.0 0.0 0.0 \n", - "... ... ... ... \n", - "TCGA-69-7763-01 0.0 0.0 0.0 \n", - "TCGA-78-7150-01 0.0 0.0 0.0 \n", - "TCGA-MP-A4TI-01 0.0 1.0 0.0 \n", - "TCGA-44-6145-01 0.0 1.0 0.0 \n", - "TCGA-05-4427-01 1.0 1.0 0.0 \n", - "\n", - " (ZNF417, CDKN2A) (ZNF384, CDKN2A) (ZNF384, COL1A1) \n", - "patient id \n", - "TCGA-55-7995-01 0.0 0.0 0.0 \n", - "TCGA-69-7761-01 0.0 0.0 0.0 \n", - "TCGA-67-6216-01 0.0 0.0 0.0 \n", - "TCGA-44-6148-01 0.0 0.0 0.0 \n", - "TCGA-71-8520-01 1.0 1.0 0.0 \n", - "... ... ... ... \n", - "TCGA-69-7763-01 0.0 0.0 0.0 \n", - "TCGA-78-7150-01 1.0 1.0 0.0 \n", - "TCGA-MP-A4TI-01 0.0 0.0 0.0 \n", - "TCGA-44-6145-01 0.0 0.0 0.0 \n", - "TCGA-05-4427-01 0.0 0.0 0.0 \n", - "\n", - "[515 rows x 14979 columns]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "1.6502188025642124\n" + ] } ], - "source": [ - "data.get_results_binary()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a77d69fa-9401-466a-a20a-29d738649016", - "metadata": {}, - "outputs": [], "source": [] } ], @@ -1738,7 +680,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.11.0" } }, "nbformat": 4, diff --git a/setup.py b/setup.py index 5dd849f..691622f 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup(name='dysregnet', - version='0.0.5', + version='0.1.0', description='DysRegNet', long_description=README, long_description_content_type="text/markdown", diff --git a/src/dysregnet/dysregnet.py b/src/dysregnet/dysregnet.py index c554988..f5f99d0 100644 --- a/src/dysregnet/dysregnet.py +++ b/src/dysregnet/dysregnet.py @@ -30,7 +30,7 @@ def __init__(self, Raw data processing for further analysis expression_data: pandas DataFrame (rows=samples, columns=genes) - Gene expression matrix with the format: patients as rows (first column - patients/samples ids), and genes as columns. + Gene expression matrix in the format: patients as rows (first column - patients/samples ids), and genes as columns. Patients/sample IDs must match the ones in the meta DataFrame. Gene names or IDs must match the ones in the GRN DataFrame. @@ -70,11 +70,11 @@ def __init__(self, P-value threshold for the normal test. R2_threshold: float, default: None - Coefficient of determination threshold for every edge in the GRN. If the R2 is less than the threshold, the edge will not be considered in the analysis. + R-squared (R2) threshold from 0 to 1. If the fit is weaker, the edge will not be considered in the analysis. direction_condition: boolean, default: False If True, DysRegNet will only consider case samples with positive residuals (target gene overexpressed) for models with a negative TF coefficient - as potentially dysregulated. Similarly, for positive TF coefficients, only case samples with negative residuals are considered. + as potentially dysregulated. Similarly, for positive TF coefficients, only case samples with negative residuals are considered. Please check the paper for more details. """ diff --git a/test.ipynb b/test.ipynb index aa080da..7acbc63 100644 --- a/test.ipynb +++ b/test.ipynb @@ -556,7 +556,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 14, "id": "43ac057e-9d32-4a39-892a-0b728906e68f", "metadata": {}, "outputs": [ @@ -564,7 +564,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "14162it [00:50, 280.14it/s]\n" + "14162it [00:53, 265.22it/s]\n" ] } ], @@ -589,7 +589,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 17, "id": "b8791d43-05eb-4f64-a8aa-a7e2be784da5", "metadata": {}, "outputs": [ @@ -1019,7 +1019,7 @@ "[1098 rows x 14147 columns]" ] }, - "execution_count": 9, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1030,7 +1030,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 18, "id": "62d482da-f15d-4b16-8df9-3559030997a7", "metadata": {}, "outputs": [ @@ -1460,7 +1460,7 @@ "[1098 rows x 14147 columns]" ] }, - "execution_count": 10, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1471,7 +1471,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 19, "id": "0aa5c03b-1886-4b06-a6dd-c7933a00046c", "metadata": {}, "outputs": [ @@ -1836,7 +1836,7 @@ "[14147 rows x 17 columns]" ] }, - "execution_count": 11, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1862,7 +1862,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.11.0" } }, "nbformat": 4,