From 2dbd630bab7b5429f77b4b74785f7fa91610a40f Mon Sep 17 00:00:00 2001 From: jdhayhurst Date: Fri, 6 Nov 2020 17:33:03 +0000 Subject: [PATCH 1/4] add qtl groups endpoint --- sumstats/explorer.py | 5 +++ sumstats/server/api_endpoints_impl.py | 30 +++++++++++++ sumstats/server/api_utils.py | 7 +++ sumstats/server/app.py | 63 +++++++++++++++++++++++++-- sumstats/utils/sqlite_client.py | 9 ++++ 5 files changed, 111 insertions(+), 3 deletions(-) diff --git a/sumstats/explorer.py b/sumstats/explorer.py index 4810853..5f7109f 100644 --- a/sumstats/explorer.py +++ b/sumstats/explorer.py @@ -57,6 +57,11 @@ def get_tissue_ont_dict(self): tissue_ont_dict = sq.get_tissue_ont_dict() return tissue_ont_dict + def get_qtl_list(self): + sq = sql_client.sqlClient(self.sqlite_db) + qtl_list = sq.get_qtl_list() + return sorted(list(set(qtl_list))) + def get_studies_of_tissue(self, tissue): sq = sql_client.sqlClient(self.sqlite_db) studies = sq.get_studies_for_tissue(tissue) diff --git a/sumstats/server/api_endpoints_impl.py b/sumstats/server/api_endpoints_impl.py index 71f3e70..8c4de50 100644 --- a/sumstats/server/api_endpoints_impl.py +++ b/sumstats/server/api_endpoints_impl.py @@ -19,6 +19,7 @@ def root(): ('molecular_phenotypes', apiu._create_href(method_name='api.get_traits')), ('studies', apiu._create_href(method_name='api.get_studies')), ('tissues', apiu._create_href(method_name='api.get_tissues')), + ('qtl_groups', apiu._create_href(method_name='api.get_qtl_groups')), ('genes', apiu._create_href(method_name='api.get_genes')), ('chromosomes', apiu._create_href(method_name='api.get_chromosomes')) ]) @@ -434,6 +435,35 @@ def tissue(tissue): raise RequestedNotFound(str(error)) +def qtl_groups(): + args = request.args.to_dict() + try: + start, size, p_lower, p_upper, pval_interval, quant_method, snp, _, gene, study, trait, paginate, links, qtl_group = apiu._get_basic_arguments(args) + except ValueError as error: + logging.error("/qtl_groups. " + (str(error))) + raise BadUserRequest(str(error)) + + explorer = ex.Explorer(apiu.properties) + qtls = explorer.get_qtl_list() + qtl_list = apiu._get_qtl_list(qtls=qtls, start=start, size=size, links=links) + response = apiu._create_response(collection_name='qtl_groups', method_name='api.get_qtl_groups', + start=start, size=size, index_marker=size, data_dict=qtl_list) + + return simplejson.dumps(response) + + +def qtl_group(qtl_group): + try: + explorer = ex.Explorer(config_properties=properties) + if explorer.get_studies_of_tissue(tissue): + response = apiu._create_info_for_tissue(tissue) + return simplejson.dumps(response, ignore_nan=True) + else: + raise RequestedNotFound("Tissue: {} not found".format(tissue)) + except NotFoundError as error: + logging.error("/tissue/" + tissue + ". " + (str(error))) + raise RequestedNotFound(str(error)) + def genes(): args = request.args.to_dict() try: diff --git a/sumstats/server/api_utils.py b/sumstats/server/api_utils.py index d089f92..7266c88 100644 --- a/sumstats/server/api_utils.py +++ b/sumstats/server/api_utils.py @@ -46,6 +46,13 @@ def _get_tissue_list(tissues, start, size, links=None): tissue_list.append(_create_info_for_tissue(tissue, tissue_name, links)) return tissue_list +def _get_qtl_list(qtls, start, size, links=None): + qtl_list = [] + end = min(start + size, len(qtls)) + for qtl in qtls[start:end]: + qtl_list.append({'qtl_group': qtl}) + return qtl_list + def _create_study_info_for_trait(studies, trait=None): study_list = [] diff --git a/sumstats/server/app.py b/sumstats/server/app.py index a1cbc38..214e0f6 100644 --- a/sumstats/server/app.py +++ b/sumstats/server/app.py @@ -1517,13 +1517,13 @@ def get_tissues(): }, _links: { self: { - href: "http://wwwdev.ebi.ac.uk/eqtl/api/tissues" + href: "http://www.ebi.ac.uk/eqtl/api/tissues" }, first: { - href: "http://wwwdev.ebi.ac.uk/eqtl/api/tissues?start=0&size=2" + href: "http://www.ebi.ac.uk/eqtl/api/tissues?start=0&size=2" }, next: { - href: "http://wwwdev.ebi.ac.uk/eqtl/api/tissues?start=2&size=2" + href: "http://www.ebi.ac.uk/eqtl/api/tissues?start=2&size=2" } } } @@ -1589,6 +1589,63 @@ def get_tissue(tissue): mimetype="application/json") +@api.route('/qtl_groups') +def get_qtl_groups(): + """QTL groups + + .. :quickref: QTL groups; List all existing qtl groups (datasets) + + Lists all of the existing qtl groups. + + **Example request**: + + .. sourcecode:: http + + GET /qtl_groups HTTP/1.1 + Host: www.ebi.ac.uk + + **Example response**: + + .. sourcecode:: http + + HTTP/1.1 200 OK + Content-Type: application/json + + { + _embedded: { + qtl_groups: [{ + qtl_group: "Adipose_Subcutaneous" + }, + { + qtl_group: "Adipose_Visceral_Omentum" + } + ] + }, + _links: { + self: { + href: "http://www.ebi.ac.uk/eqtl/api/qtl_groups" + }, + first: { + href: "http://www.ebi.ac.uk/eqtl/api/qtl_groups?start=0&size=2" + }, + next: { + href: "http://www.ebi.ac.uk/eqtl/api/qtl_groups?start=2&size=2" + } + } + } + + :query start: offset number. default is 0 + :query size: number of items returned. default is 20 + + :statuscode 200: no error + + """ + resp = endpoints.qtl_groups() + return Response(response=resp, + status=200, + mimetype="application/json") + + @api.route('/genes') def get_genes(): """Genes diff --git a/sumstats/utils/sqlite_client.py b/sumstats/utils/sqlite_client.py index 3835b79..a3ac367 100644 --- a/sumstats/utils/sqlite_client.py +++ b/sumstats/utils/sqlite_client.py @@ -141,6 +141,15 @@ def get_tissue_ont_dict(self): else: return False + def get_qtl_list(self): + data = [] + for row in self.cur.execute("SELECT qtl_group FROM study_info"): + data.append(row[0]) + if data: + return data + else: + return False + def get_tissue_ontos(self): data = [] for row in self.cur.execute("SELECT tissue_ontology FROM study_info"): From 22f3b6ed86d535adfa95f507d08f9a32dfabc111 Mon Sep 17 00:00:00 2001 From: jdhayhurst Date: Fri, 6 Nov 2020 18:13:40 +0000 Subject: [PATCH 2/4] add neg log10 pvalues --- docs/source/index.rst | 3 +++ sumstats/chr/search/association_search.py | 6 ++++++ sumstats/common_constants.py | 5 +++-- sumstats/server/app.py | 15 +++++++++++++++ 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 814eaa6..90f5739 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -155,6 +155,8 @@ Available data fields +-------------------------+--------+--------------------------------------------------------------+ | pvalue | Number | P-value of association between the variant and the phenotype | +-------------------------+--------+--------------------------------------------------------------+ +| neg_log10_pvalue | Number | Negative log10 p-value | ++-------------------------+--------+--------------------------------------------------------------+ | ac | Number | Allele count | +-------------------------+--------+--------------------------------------------------------------+ | alt | String | GRCh38 effect allele (alt allele) | @@ -237,6 +239,7 @@ Links will be provided in the response to navigate the resources. "maf": 0.246914, "median_tpm": 12.272, "pvalue": 0.0166984, + "neg_log10_pvalue": 1.77732514, "molecular_trait_id": "ENSG00000011304", "gene_id": "ENSG00000011304", "tissue": "UBERON_0009834", diff --git a/sumstats/chr/search/association_search.py b/sumstats/chr/search/association_search.py index 055e132..7d03277 100644 --- a/sumstats/chr/search/association_search.py +++ b/sumstats/chr/search/association_search.py @@ -1,4 +1,5 @@ import pandas as pd +import numpy as np import re import glob import itertools @@ -324,6 +325,7 @@ def paginated_request(self): print(len(chunk)) self.df = self.df.append(chunk) + self.add_neg_log10_pvalue() if len(self.df.index) >= self.size: # break once we have enough @@ -353,6 +355,7 @@ def unpaginated_request(self): chunk = self._update_df_with_metadata(chunk, meta_dict) if self.search_dir == "study" else chunk self.df = pd.concat([self.df, chunk]) + self.add_neg_log10_pvalue() @staticmethod def _update_df_with_metadata(df, meta_dict): @@ -364,6 +367,9 @@ def _update_df_with_metadata(df, meta_dict): df[TISSUE_LABEL_DSET] = meta_dict['tissue_label'] return df + def add_neg_log10_pvalue(self): + self.df[NEG_LOG_PVAL_DSET] = np.negative(np.log10(self.df[PVAL_DSET])) + def _construct_conditional_statement(self): conditions = [] diff --git a/sumstats/common_constants.py b/sumstats/common_constants.py index 6743b28..d274763 100644 --- a/sumstats/common_constants.py +++ b/sumstats/common_constants.py @@ -20,6 +20,7 @@ GENE_DSET = 'gene_id' MTO_DSET = 'molecular_trait_object_id' EXPR_DSET = 'median_tpm' +NEG_LOG_PVAL_DSET = 'neg_log10_pvalue' HM_OR_DSET = 'hm_odds_ratio' HM_RANGE_U_DSET = 'hm_ci_upper' HM_RANGE_L_DSET = 'hm_ci_lower' @@ -40,14 +41,14 @@ DSET_TYPES = {SNP_DSET: str, RSID_DSET: str, MUTATION_DSET: str, AC_DSET: float, AN_DSET: float, PVAL_DSET: float, MANTISSA_DSET: float, EXP_DSET: "int64", STUDY_DSET: str, CHR_DSET: str, BP_DSET: "int64", R2_DSET: float, BETA_DSET: float, SE_DSET: float, GENE_DSET: str, PHEN_DSET: str, MTO_DSET: str, EFFECT_DSET: str, OTHER_DSET: str, FREQ_DSET: float, EXPR_DSET: float, TISSUE_DSET: str, - QTL_GROUP_DSET: str, CONDITION_DSET: str, CONDITION_LABEL_DSET: str, TISSUE_LABEL_DSET: str} + QTL_GROUP_DSET: str, CONDITION_DSET: str, CONDITION_LABEL_DSET: str, TISSUE_LABEL_DSET: str, NEG_LOG_PVAL_DSET: float} REFERENCE_DSET = SNP_DSET HARMONISATION_PREFIX = 'hm_' GWAS_CATALOG_STUDY_PREFIX = 'GCST' -TO_DISPLAY_DEFAULT = {SNP_DSET, PVAL_DSET, STUDY_DSET, CHR_DSET, BP_DSET, EFFECT_DSET, OTHER_DSET, BETA_DSET, RSID_DSET, MUTATION_DSET, AC_DSET, AN_DSET, FREQ_DSET, R2_DSET, EXPR_DSET, QTL_GROUP_DSET, CONDITION_DSET, CONDITION_LABEL_DSET, TISSUE_LABEL_DSET, SE_DSET} +TO_DISPLAY_DEFAULT = {SNP_DSET, PVAL_DSET, STUDY_DSET, CHR_DSET, BP_DSET, EFFECT_DSET, OTHER_DSET, BETA_DSET, RSID_DSET, MUTATION_DSET, AC_DSET, AN_DSET, FREQ_DSET, R2_DSET, EXPR_DSET, QTL_GROUP_DSET, CONDITION_DSET, CONDITION_LABEL_DSET, TISSUE_LABEL_DSET, SE_DSET, NEG_LOG_PVAL_DSET} TO_DISPLAY_RAW = {SNP_DSET, PVAL_DSET, STUDY_DSET, CHR_DSET, BP_DSET, BETA_DSET, EFFECT_DSET, OTHER_DSET} diff --git a/sumstats/server/app.py b/sumstats/server/app.py index 214e0f6..ab0c396 100644 --- a/sumstats/server/app.py +++ b/sumstats/server/app.py @@ -102,6 +102,7 @@ def get_assocs(): "maf": 0.246914, "median_tpm": 12.272, "pvalue": 0.0166984, + "neg_log10_p_value": 1.77732514, "molecular_trait_id": "ENSG00000011304", "gene_id": "ENSG00000011304", "tissue": "UBERON_0009834", @@ -126,6 +127,7 @@ def get_assocs(): "maf": 0.246914, "median_tpm": 27.623, "pvalue": 0.424836, + "neg_log10_p_value": 0.371778689, "molecular_trait_id": "ENSG00000129951", "gene_id": "ENSG00000129951", "tissue": "UBERON_0009834", @@ -228,6 +230,7 @@ def get_variant(variant_id=None, rsid=None): "maf": 0.246914, "median_tpm": 12.272, "pvalue": 0.0166984, + "neg_log10_p_value": 1.77732514, "molecular_trait_id": "ENSG00000011304", "gene_id": "ENSG00000011304", "tissue": "UBERON_0009834", @@ -252,6 +255,7 @@ def get_variant(variant_id=None, rsid=None): "maf": 0.246914, "median_tpm": 27.623, "pvalue": 0.424836, + "neg_log10_p_value": 0.371778689, "molecular_trait_id": "ENSG00000129951", "gene_id": "ENSG00000129951", "tissue": "UBERON_0009834", @@ -461,6 +465,7 @@ def get_trait_assocs(molecular_trait_id): "maf": 0.246914, "median_tpm": 12.272, "pvalue": 0.0166984, + "neg_log10_p_value": 1.77732514, "molecular_trait_id": "ENSG00000011304", "gene_id": "ENSG00000011304", "tissue": "UBERON_0009834", @@ -772,6 +777,7 @@ def get_tissue_assocs(tissue): "type": "SNP", "maf": 0.380952, "pvalue": 0.5004, + "neg_log10_p_value": 0.300682699, "molecular_trait_id": "ENSG00000011304", "gene_id": "ENSG00000011304", "tissue": "CL_0000235", @@ -796,6 +802,7 @@ def get_tissue_assocs(tissue): "type": "SNP", "maf": 0.380952, "pvalue": 0.0528997, + "neg_log10_p_value": 1.276546791, "molecular_trait_id": "ENSG00000099817", "gene_id": "ENSG00000099817", "tissue": "CL_0000235", @@ -930,6 +937,7 @@ def get_tissue_study_assocs(study, tissue=None): "qtl_group": "macrophage_IFNg+Salmonella", "ref": "G", "pvalue": 0.5004, + "neg_log10_p_value": 0.300682699, "position": 229783, "variant": "chr19_229783_G_A", "tissue_label": "macrophage", @@ -954,6 +962,7 @@ def get_tissue_study_assocs(study, tissue=None): "qtl_group": "macrophage_IFNg+Salmonella", "ref": "G", "pvalue": 0.0528997, + "neg_log10_p_value": 1.276546791, "position": 229783, "variant": "chr19_229783_G_A", "tissue_label": "macrophage", @@ -1288,6 +1297,7 @@ def get_chromosome_assocs(chromosome): "alt": "A", "position": 1053768, "pvalue": 0.483624, + "neg_log10_p_value": 0.315492155, "maf": 0.0123457, "type": "SNP", "ac": 12, @@ -1312,6 +1322,7 @@ def get_chromosome_assocs(chromosome): "alt": "A", "position": 1053768, "pvalue": 0.468078, + "neg_log10_p_value": 0.329681771, "maf": 0.0123457, "type": "SNP", "ac": 12, @@ -1408,6 +1419,7 @@ def get_chromosome_variants(chromosome, variant_id): "qtl_group": "macrophage_naive", "ref": "T", "pvalue": 0.24666, + "neg_log10_p_value": 0.607901273, "position": 814583, "variant": "rs56197012", "tissue_label": "macrophage", @@ -1432,6 +1444,7 @@ def get_chromosome_variants(chromosome, variant_id): "qtl_group": "macrophage_naive", "ref": "T", "pvalue": 0.711897, + "neg_log10_p_value": 0.147582837, "position": 814583, "variant": "rs56197012", "tissue_label": "macrophage", @@ -1803,6 +1816,7 @@ def get_gene_assocs(gene_id): "alt": "C", "position": 192658, "pvalue": 0.644378, + "neg_log10_p_value": 0.190859295, "maf": 0.00925926, "type": "SNP", "ac": 9, @@ -1827,6 +1841,7 @@ def get_gene_assocs(gene_id): "alt": "A", "position": 193051, "pvalue": 0.166599, + "neg_log10_p_value": 0.77832761, "maf": 0.0277778, "type": "SNP", "ac": 27, From 8d854653e32556250da9eeea22aa0fd27e5e90c0 Mon Sep 17 00:00:00 2001 From: jdhayhurst Date: Fri, 6 Nov 2020 18:21:09 +0000 Subject: [PATCH 3/4] tidy docs --- sumstats/server/app.py | 45 +++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/sumstats/server/app.py b/sumstats/server/app.py index ab0c396..81bd05d 100644 --- a/sumstats/server/app.py +++ b/sumstats/server/app.py @@ -1624,28 +1624,29 @@ def get_qtl_groups(): HTTP/1.1 200 OK Content-Type: application/json - { - _embedded: { - qtl_groups: [{ - qtl_group: "Adipose_Subcutaneous" - }, - { - qtl_group: "Adipose_Visceral_Omentum" - } - ] - }, - _links: { - self: { - href: "http://www.ebi.ac.uk/eqtl/api/qtl_groups" - }, - first: { - href: "http://www.ebi.ac.uk/eqtl/api/qtl_groups?start=0&size=2" - }, - next: { - href: "http://www.ebi.ac.uk/eqtl/api/qtl_groups?start=2&size=2" - } - } - } + { + "_embedded": { + "qtl_groups": [ + { + "qtl_group": "Adipose_Subcutaneous" + }, + { + "qtl_group": "Adipose_Visceral_Omentum" + } + ] + }, + "_links": { + "self": { + "href": "http://localhost:8000/eqtl/api/qtl_groups" + }, + "first": { + "href": "http://localhost:8000/eqtl/api/qtl_groups?start=0&size=2" + }, + "next": { + "href": "http://localhost:8000/eqtl/api/qtl_groups?start=2&size=2" + } + } + } :query start: offset number. default is 0 :query size: number of items returned. default is 20 From bfabb0f60b388c412ceb5e4652b2814f370e7491 Mon Sep 17 00:00:00 2001 From: jdhayhurst Date: Mon, 9 Nov 2020 14:46:15 +0000 Subject: [PATCH 4/4] variant resource endpoint not limitted to one element --- sumstats/server/api_endpoints_impl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sumstats/server/api_endpoints_impl.py b/sumstats/server/api_endpoints_impl.py index 8c4de50..d31e54e 100644 --- a/sumstats/server/api_endpoints_impl.py +++ b/sumstats/server/api_endpoints_impl.py @@ -347,8 +347,8 @@ def variants(variant, chromosome=None): args = request.args.to_dict() try: start, size, p_lower, p_upper, pval_interval, quant_method, _, tissue, gene, study, trait, paginate, links, qtl_group = apiu._get_basic_arguments(args) - if study is not None: - return variant_resource(variant=variant, chromosome=chromosome) + #if study is not None: + # return variant_resource(variant=variant, chromosome=chromosome) except ValueError as error: logging.debug("/chromosomes/" + chromosome + "/associations/" + variant + ". " + (str(error))) raise BadUserRequest(str(error)) @@ -398,7 +398,7 @@ def variant_resource(variant, chromosome=None): if chromosome is not None: params['chromosome'] = chromosome response = apiu._create_resource_response(data_dict=data_dict, params=params) - + print(response) return simplejson.dumps(response, ignore_nan=True) except (NotFoundError, SubgroupError) as error: logging.debug(str(error))