Skip to content

Commit

Permalink
[#56]: update split function to handle api errors, write to google cl…
Browse files Browse the repository at this point in the history
…oud if error happens
  • Loading branch information
tw-jeff-burroughs committed Oct 3, 2019
1 parent 6ae5fc7 commit 8924f52
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 8 deletions.
2 changes: 1 addition & 1 deletion invisible_flow/api/copa_scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def scrape_data_json(self):
def scrape_copa_csv(self):
query_string = ".csv?$where=assignment=\"COPA\""
url = SCRAPE_URL + query_string
return requests.get(url=url).content
return requests.get(url=url)

def scrape_not_copa_csv(self):
query_string = ".csv?$where=assignment!=\"COPA\""
Expand Down
16 changes: 12 additions & 4 deletions invisible_flow/transformers/copa_scrape_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,18 @@ def save_scraped_data(self):
self.storage.store_string('transform_error.csv', error_to_write, f'Scrape-{self.current_date}/errors')

def split(self) -> Dict[str, List]:
return {
'copa': self.scraper.scrape_copa_csv(),
'no_copa': self.scraper.scrape_not_copa_csv()
}
# on error needs to call store_string with a string describing the error
scraper = CopaScrape()
response = scraper.scrape_copa_csv()
if response.status_code == 200:
return {
'copa': response.content,
'no_copa': self.scraper.scrape_not_copa_csv()
}
else:
error_to_write = str(response.status_code) + "\n" + response.text
self.storage.store_string('transform_error.csv', error_to_write, f'Scrape-{self.current_date}/errors')
return {}

def upload_to_gcs(self, conversion_results: Dict):
for result in conversion_results:
Expand Down
2 changes: 1 addition & 1 deletion tests/api/test_copa_scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def test_scrape_data_json(self, get_mock):
assert should_be_json_data == {"key1": "value1"}

def test_scrape_copa_csv(self, get_mock):
should_be_bubbles = CopaScrape().scrape_copa_csv()
should_be_bubbles = CopaScrape().scrape_copa_csv().content
assert should_be_bubbles == "bubbles"

def test_scrape_not_copa_csv(self, get_mock):
Expand Down
22 changes: 20 additions & 2 deletions tests/transformers/test_copa_scrape_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,11 @@ def test_split_passes(self, get_mock):
self.copa = False
self.no_copa = False
raw_data = self.transformer.split()
assert not raw_data['copa'].find(b'BIA') > -1
assert not raw_data['no_copa'].find(b'COPA') > -1
if response_code == 200:
assert not raw_data['copa'].find(b'BIA') > -1
assert not raw_data['no_copa'].find(b'COPA') > -1
else:
assert len(raw_data) == 0

def test_upload_to_gcs(self, get_mock):
copa_split_csv = os.path.join(IFTestBase.resource_directory, 'copa_scraped_split.csv')
Expand Down Expand Up @@ -112,3 +115,18 @@ def test_save_scraped_data_with_all_response_codes(self, get_mock):
call(filename, mock.ANY, f'Scrape-{self.current_date}/{pathname}')
]
store_string_mock.assert_has_calls(calls)

@patch('invisible_flow.app.GlobalsFactory.get_current_datetime_utc', lambda: datetime(2019, 3, 25, 5, 30, 50, 0))
def test_split_with_all_response_codes(self, get_mock):
with patch('invisible_flow.app.StorageFactory.get_storage') as get_storage_mock:
with patch('invisible_flow.storage.LocalStorage.store_string') as store_string_mock:
get_storage_mock.return_value = LocalStorage()
split_data = CopaScrapeTransformer().split()
self.current_date = GlobalsFactory.get_current_datetime_utc().isoformat(sep='_').replace(':', '-')
if response_code == 200:
assert split_data is not None
else:
calls = [
call('transform_error.csv', mock.ANY, f'Scrape-{self.current_date}/errors')
]
store_string_mock.assert_has_calls(calls)

0 comments on commit 8924f52

Please sign in to comment.