Skip to content

Commit

Permalink
Add typehints_defaults (#619)
Browse files Browse the repository at this point in the history
* Add typehints_defaults

Signed-off-by: zethson <[email protected]>

* Remove defaults to docstrings

Signed-off-by: zethson <[email protected]>

---------

Signed-off-by: zethson <[email protected]>
  • Loading branch information
Zethson authored May 31, 2024
1 parent 7abca65 commit 4b770ba
Show file tree
Hide file tree
Showing 31 changed files with 286 additions and 1,766 deletions.
3 changes: 2 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@

language = "en"

# The name of the Pygments (syntax highlighting) style to use.
typehints_defaults = "comma"

pygments_style = "default"
pygments_dark_style = "native"

Expand Down
47 changes: 18 additions & 29 deletions pertpy/metadata/_cell_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,15 +194,13 @@ def annotate(
Args:
adata: The data object to annotate.
query_id: The column of `.obs` with cell line information. Defaults to "DepMap_ID".
query_id: The column of `.obs` with cell line information.
reference_id: The type of cell line identifier in the metadata, e.g. ModelID, CellLineName or StrippedCellLineName.
If fetching cell line metadata from Cancerrxgene, it is recommended to choose
"stripped_cell_line_name". Defaults to "ModelID".
fetch: The metadata to fetch. Defaults to None (=all).
cell_line_source: The source of cell line metadata, DepMap or Cancerrxgene. Defaults to "DepMap".
If fetching cell line metadata from Cancerrxgene, it is recommended to choose "stripped_cell_line_name".
fetch: The metadata to fetch.
cell_line_source: The source of cell line metadata, DepMap or Cancerrxgene.
verbosity: The number of unmatched identifiers to print, can be either non-negative values or "all".
Defaults to 5.
copy: Determines whether a copy of the `adata` is returned. Defaults to False.
copy: Determines whether a copy of the `adata` is returned.
Returns:
Returns an AnnData object with cell line annotation.
Expand Down Expand Up @@ -318,9 +316,9 @@ def annotate_bulk_rna(
Args:
adata: The data object to annotate.
query_id: The column of `.obs` with cell line information. Defaults to "cell_line_name" if `cell_line_source` is sanger, otherwise "DepMap_ID".
cell_line_source: The bulk rna expression data from either broad or sanger cell line. Defaults to "sanger".
verbosity: The number of unmatched identifiers to print, can be either non-negative values or "all". Defaults to 5.
copy: Determines whether a copy of the `adata` is returned. Defaults to False.
cell_line_source: The bulk rna expression data from either broad or sanger cell line.
verbosity: The number of unmatched identifiers to print, can be either non-negative values or "all".
copy: Determines whether a copy of the `adata` is returned.
Returns:
Returns an AnnData object with bulk rna expression annotation.
Expand Down Expand Up @@ -418,16 +416,12 @@ def annotate_protein_expression(
Args:
adata: The data object to annotate.
query_id: The column of `.obs` with cell line information. Defaults to "cell_line_name".
query_id: The column of `.obs` with cell line information.
reference_id: The type of cell line identifier in the meta data, model_name or model_id.
Defaults to "model_name".
protein_information: The type of protein expression data to fetch, protein_intensity or zscore.
Defaults to "protein_intensity".
protein_id: The protein identifier saved in the fetched meta data, uniprot_id or symbol.
Defaults to "uniprot_id".
verbosity: The number of unmatched identifiers to print, can be either non-negative values or "all".
Defaults to 5.
copy: Determines whether a copy of the `adata` is returned. Defaults to False.
copy: Determines whether a copy of the `adata` is returned.
Returns:
Returns an AnnData object with protein expression annotation.
Expand Down Expand Up @@ -502,22 +496,17 @@ def annotate_from_gdsc(
Args:
adata: The data object to annotate.
query_id: The column of `.obs` with cell line information. Defaults to "cell_line_name".
query_id: The column of `.obs` with cell line information.
reference_id: The type of cell line identifier in the metadata, cell_line_name, sanger_model_id or cosmic_id.
Defaults to "cell_line_name".
query_perturbation: The column of `.obs` with perturbation information.
Defaults to "perturbation".
reference_perturbation: The type of perturbation in the metadata, drug_name or drug_id.
Defaults to 'drug_name'.
gdsc_dataset: The GDSC dataset, 1 or 2.
The GDSC1 dataset updates previous releases with additional drug screening data from the
Sanger Institute and Massachusetts General Hospital.
It covers 970 Cell lines and 403 Compounds with 333292 IC50s.
GDSC2 is new and has 243,466 IC50 results from the latest screening at the Sanger Institute.
Defaults to 1.
verbosity: The number of unmatched identifiers to print, can be either non-negative values or 'all'.
Defaults to 5.
copy: Determines whether a copy of the `adata` is returned. Defaults to False.
copy: Determines whether a copy of the `adata` is returned.
Returns:
Returns an AnnData object with drug response annotation.
Expand Down Expand Up @@ -658,8 +647,8 @@ def correlate(
Args:
adata: Input data object.
identifier: Column in `.obs` containing cell line identifiers. Defaults to "DepMap_ID".
metadata_key: Key of the AnnData obsm for comparison with the X matrix. Defaults to "bulk_rna_broad".
identifier: Column in `.obs` containing cell line identifiers.
metadata_key: Key of the AnnData obsm for comparison with the X matrix.
Returns:
Returns pearson correlation coefficients and their corresponding p-values for matched and unmatched cell lines separately.
Expand Down Expand Up @@ -706,6 +695,7 @@ def plot_correlation(
adata: AnnData,
corr: pd.DataFrame,
pval: pd.DataFrame,
*,
identifier: str = "DepMap_ID",
metadata_key: str = "bulk_rna_broad",
category: str = "cell line",
Expand All @@ -717,13 +707,12 @@ def plot_correlation(
adata: Input data object.
corr: Pearson correlation scores.
pval: P-values for pearson correlation.
identifier: Column in `.obs` containing the identifiers. Defaults to 'DepMap_ID'.
metadata_key: Key of the AnnData obsm for comparison with the X matrix. Defaults to 'bulk_rna_broad'.
category: The category for correlation comparison. Defaults to "cell line".
identifier: Column in `.obs` containing the identifiers.
metadata_key: Key of the AnnData obsm for comparison with the X matrix.
category: The category for correlation comparison.
subset_identifier: Selected identifiers for scatter plot visualization between the X matrix and `metadata_key`.
If not None, only the chosen cell line will be plotted, either specified as a value in `identifier` (string) or as an index number.
If None, all cell lines will be plotted.
Defaults to None.
Returns:
Pearson correlation coefficients and their corresponding p-values for matched and unmatched cell lines separately.
"""
Expand Down
7 changes: 3 additions & 4 deletions pertpy/metadata/_compound.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,10 @@ def annotate_compounds(
Args:
adata: The data object to annotate.
query_id: The column of `.obs` with compound identifiers. Defaults to 'perturbation'.
query_id_type: The type of compound identifiers, 'name' or 'cid'. Defaults to 'name'.
query_id: The column of `.obs` with compound identifiers.
query_id_type: The type of compound identifiers, 'name' or 'cid'.
verbosity: The number of unmatched identifiers to print, can be either non-negative values or "all".
Defaults to 5.
copy: Determines whether a copy of the `adata` is returned. Defaults to False.
copy: Determines whether a copy of the `adata` is returned.
Returns:
Returns an AnnData object with compound annotation.
Expand Down
4 changes: 2 additions & 2 deletions pertpy/metadata/_drug.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ def annotate(
Args:
adata: AnnData object containing log-normalised data.
source: Source of the metadata, chembl, dgidb or pharmgkb. Defaults to chembl.
copy: Determines whether a copy of the `adata` is returned. Defaults to False.
source: Source of the metadata, chembl, dgidb or pharmgkb.
copy: Determines whether a copy of the `adata` is returned.
Returns:
An AnnData object with a new column `drug` in the var slot.
Expand Down
35 changes: 11 additions & 24 deletions pertpy/metadata/_look_up.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,9 @@ def __init__(
):
"""
Args:
type: Metadata type for annotation. One of 'cell_line', 'compound', 'moa' or 'drug. Defaults to cell_line.
type: Metadata type for annotation. One of 'cell_line', 'compound', 'moa' or 'drug.
transfer_metadata: DataFrames used to generate Lookup object.
This is currently set to None for CompoundMetaData which does not require any dataframes for transfer.
Defaults to 'cell_line'.
"""
self.type = type
if type == "cell_line":
Expand Down Expand Up @@ -285,12 +284,11 @@ def available_cell_lines(
"""A brief summary of cell line metadata.
Args:
cell_line_source: the source of cell line annotation, DepMap or Cancerrxgene. Defaults to "DepMap".
cell_line_source: the source of cell line annotation, DepMap or Cancerrxgene.
reference_id: The type of cell line identifier in the meta data, e.g. ModelID, CellLineName or StrippedCellLineName.
If fetch cell line metadata from Cancerrxgene, it is recommended to choose
"stripped_cell_line_name". Defaults to "ModelID".
If fetch cell line metadata from Cancerrxgene, it is recommended to choose "stripped_cell_line_name".
query_id_list: Unique cell line identifiers to test the number of matched ids present in the
metadata. If set to None, the query of metadata identifiers will be disabled. Defaults to None.
metadata. If set to None, the query of metadata identifiers will be disabled.
"""
if self.type != "cell_line":
raise ValueError("This is not a LookUp object specifically for CellLineMetaData!")
Expand Down Expand Up @@ -324,9 +322,9 @@ def available_bulk_rna(
"""A brief summary of bulk RNA expression data.
Args:
cell_line_source: the source of RNA-seq data, broad or sanger. Defaults to "sanger".
cell_line_source: the source of RNA-seq data, broad or sanger.
query_id_list: Unique cell line identifiers to test the number of matched ids present in the
metadata. If set to None, the query of metadata identifiers will be disabled. Defaults to None.
metadata. If set to None, the query of metadata identifiers will be disabled.
"""
if self.type != "cell_line":
raise ValueError("This is not a LookUp object specific for CellLineMetaData!")
Expand All @@ -352,9 +350,8 @@ def available_protein_expression(
Args:
reference_id: The type of cell line identifier in the meta data, model_name or model_id.
Defaults to "model_name".
query_id_list: Unique cell line identifiers to test the number of matched ids present in the
metadata. If set to None, the query of metadata identifiers will be disabled. Defaults to None.
metadata. If set to None, the query of metadata identifiers will be disabled.
"""
if self.type != "cell_line":
raise ValueError("This is not a LookUp object specific for CellLineMetaData!")
Expand All @@ -381,20 +378,16 @@ def available_drug_response(
"""A brief summary of drug response data.
Args:
gdsc_dataset: The GDSC dataset, 1 or 2. Defaults to 1.
gdsc_dataset: The GDSC dataset, 1 or 2.
The GDSC1 dataset updates previous releases with additional drug screening data from the Wellcome Sanger Institute and Massachusetts General Hospital.
It covers 970 Cell lines and 403 Compounds with 333292 IC50s.
GDSC2 is new and has 243,466 IC50 results from the latest screening at the Wellcome Sanger Institute using improved experimental procedures.
reference_id: The type of cell line identifier in the meta data, cell_line_name, sanger_model_id or cosmic_id.
Defaults to 'cell_line_name'.
query_id_list: Unique cell line identifiers to test the number of matched ids present in the metadata.
If set to None, the query of metadata identifiers will be disabled.
Defaults to None.
reference_perturbation: The perturbation information in the meta data, drug_name or drug_id.
Defaults to 'drug_name'.
query_perturbation_list: Unique perturbation types to test the number of matched ones present in the metadata.
If set to None, the query of perturbation types will be disabled.
Defaults to None.
"""
if self.type != "cell_line":
raise ValueError("This is not a LookUp object specific for CellLineMetaData!")
Expand Down Expand Up @@ -432,9 +425,7 @@ def available_genes_annotation(
Args:
reference_id: The type of gene identifier in the meta data, gene_id, ensembl_gene_id, hgnc_id, hgnc_symbol.
Defaults to "ensembl_gene_id".
query_id_list: Unique gene identifiers to test the number of matched ids present in the metadata.
Defaults to None.
"""
if self.type != "cell_line":
raise ValueError("This is not a LookUp object specific for CellLineMetaData!")
Expand Down Expand Up @@ -472,10 +463,8 @@ def available_moa(
Args:
query_id_list: Unique perturbagens to test the number of matched ones present in the metadata.
If set to None, the query of metadata perturbagens will be disabled.
Defaults to None.
target_list: Unique molecular targets to test the number of matched ones present in the metadata.
If set to None, the comparison of molecular targets in the query of metadata perturbagens will be disabled.
Defaults to None.
"""
if query_id_list is not None:
if self.type != "moa":
Expand Down Expand Up @@ -503,8 +492,7 @@ def available_compounds(
Args:
query_id_list: Unique compounds to test the number of matched ones present in the metadata.
If set to None, query of compound identifiers will be disabled.
Defaults to None.
query_id_type: The type of compound identifiers, name or cid. Defaults to 'name'.
query_id_type: The type of compound identifiers, name or cid.
"""
if self.type != "compound":
raise ValueError("This is not a LookUp object specific for CompoundData!")
Expand Down Expand Up @@ -535,11 +523,10 @@ def available_drug_annotation(
"""A brief summary of drug annotation.
Args:
drug_annotation_source: the source of drug annotation data, chembl, dgidb or pharmgkb. Defaults to "chembl".
drug_annotation_source: the source of drug annotation data, chembl, dgidb or pharmgkb.
query_id_list: Unique target or compound names to test the number of matched ones present in the metadata.
If set to None, query of compound identifiers will be disabled.
Defaults to None.
query_id_type: The type of identifiers, target, compound and disease(pharmgkb only). Defaults to 'target'.
query_id_type: The type of identifiers, target, compound and disease(pharmgkb only).
"""
if self.type != "drug":
raise ValueError("This is not a LookUp object specific for DrugMetaData!")
Expand Down
2 changes: 0 additions & 2 deletions pertpy/metadata/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,7 @@ def _warn_unmatch(
reference_id: The type of cell line identifier in the metadata.
metadata_type: The type of metadata where some identifiers are not matched during annotation such as
cell line, protein expression, bulk RNA expression, drug response, moa or compound.
Defaults to 'cell line'.
verbosity: The number of unmatched identifiers to print, can be either non-negative values or 'all'.
Defaults to 5.
"""
if isinstance(verbosity, str):
if verbosity != "all":
Expand Down
6 changes: 2 additions & 4 deletions pertpy/metadata/_moa.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,10 @@ def annotate(
Args:
adata: The data object to annotate.
query_id: The column of `.obs` with the name of a perturbagen. Defaults to 'perturbation'.
query_id: The column of `.obs` with the name of a perturbagen.
target: The column of `.obs` with target information. If set to None, all MoAs are retrieved without comparing molecular targets.
Defaults to None.
verbosity: The number of unmatched identifiers to print, can be either non-negative values or 'all'.
Defaults to 5.
copy: Determines whether a copy of the `adata` is returned. Defaults to False.
copy: Determines whether a copy of the `adata` is returned.
Returns:
Returns an AnnData object with MoA annotation.
Expand Down
7 changes: 0 additions & 7 deletions pertpy/plot/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +0,0 @@
from pertpy.plot._augur import AugurpyPlot as ag
from pertpy.plot._coda import CodaPlot as coda
from pertpy.plot._guide_rna import GuideRnaPlot as guide
from pertpy.plot._milopy import MilopyPlot as milo
from pertpy.plot._mixscape import MixscapePlot as ms

__all__ = ["AugurpyPlot", "CodaPlot", "GuideRnaPlot", "MilopyPlot", "MixscapePlot"]
Loading

0 comments on commit 4b770ba

Please sign in to comment.