-
Notifications
You must be signed in to change notification settings - Fork 0
/
query.py
156 lines (117 loc) · 4.17 KB
/
query.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import abc
import json
import threading
from enum import Enum
from pathlib import Path
from typing import Optional, List, Dict
import sys
import typer
import requests
from requests.sessions import HTTPAdapter
from requests.adapters import Retry, Response
import logging
from lib.const import APP_DIRECTORY, LOGFILE
logging.getLogger("rich")
class UNIPROT_RESPONSE(Enum):
ACCESSION = "accession"
STRUCTURE = "structure"
DB_REFERENCES = "dbReferences"
class HTMLQuery:
"""
"""
def __init__(self, output_directory: Path):
"""
"""
self._response_history: List = []
self._session = self.create_http_session()
self._output_directory: Path = output_directory
@property
def response_history(self):
if self._response_history is None:
raise Exception("History is None")
return self._response_history
@property
@abc.abstractmethod
def html_base(self) -> str:
"""
:return:
"""
@staticmethod
def create_http_session():
"""
A requests session configured with retries.
"""
http_ = requests.Session()
# Retry has been set for all server related errors
retry_ = Retry(total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
adaptor = HTTPAdapter(max_retries=retry_)
http_.mount('https://', adaptor)
return http_
def query(self, query: str) -> Optional[Dict]:
"""
:param query:
:return:
"""
logging.info("Querying:" + str(self.html_base) + query)
response = self._session.get(str(self.html_base) + query, headers={"Accept": "application/json"})
return self._query_cleanup(response)
def _query_cleanup(self, response) -> None:
"""
:param response:
:return:
"""
if response.status_code == 404:
UserWarning(f"File was not found: {response.status_code}", )
raise FileExistsError(f"File was not found {response.status_code}")
elif not response.ok:
response.raise_for_status()
sys.exit(1)
if response.headers["Content-Type"] == "application/json":
self._response_history.append(response.json())
open(self._output_directory.joinpath(response.request.url.split(self.html_base)[1]).as_posix()+".json", 'wb').write(
response.content)
else:
self._response_history.append(response.content)
# lock = threading.Lock()
# with lock:
open(self._output_directory.joinpath(response.request.url.split(self.html_base)[1]).as_posix(), 'wb').write(
response.content)
return None
class UniProtIDQuery(HTMLQuery):
"""
"""
def __init__(self, meta_data_file_name: str, output_directory: Path):
super().__init__(output_directory)
self._meta_data_file_name: str = meta_data_file_name
@property
def html_base(self) -> str:
return "https://rest.uniprot.org/uniprotkb/"
#return "https://www.ebi.ac.uk/proteins/api/proteins?offset=0&size=100&accession="
# def parse_response(self) -> Dict:
# with open(self._meta_data_file_name, 'w') as out:
# json.dump(self.response_history[0], out)
# accession = self.response_history[0].get(UNIPROT_RESPONSE.ACCESSION.value)
# data: List[Dict] = self.response_history[0].get(UNIPROT_RESPONSE.DB_REFERENCES.value)
# if data is None:
# raise UserWarning(f"No known model! {data}")
# pdb_results: List[str] = [entry.get('id') for entry in data if entry.get('type') == "PDB"]
# return {UNIPROT_RESPONSE.ACCESSION.value: accession,
# UNIPROT_RESPONSE.STRUCTURE.value: pdb_results}
class FastaQuery(HTMLQuery):
"""
"""
@property
def html_base(self) -> str:
return "https://rest.uniprot.org/uniprotkb/"
class PDBQuery(HTMLQuery):
"""
"""
@property
def html_base(self) -> str:
return "https://files.rcsb.org/download/"
class AlphaFoldQuery(HTMLQuery):
"""
"""
@property
def html_base(self) -> str:
return "https://alphafold.ebi.ac.uk/files/"