diff --git a/clean/utils.py b/clean/utils.py index dda7195d..b1b86897 100644 --- a/clean/utils.py +++ b/clean/utils.py @@ -287,3 +287,61 @@ def get_repeated_asset_url(self, objects: List[MetadataDict]): else: seen_urls.add(asset_url) return repeated_urls + + +@retry(tries=3, delay=15, backoff=2) +def post_url( + url, user_agent="Big Local News (biglocalnews.org)", session=None, **kwargs +): + """Request the provided URL and return a response object. + + Args: + url (str): the url to be requested + user_agent (str): the user-agent header passed with the request (default: biglocalnews.org) + session: a session object to use when making the request. optional + """ + logger.debug(f"Requesting {url}") + + # Set the headers + if "headers" not in kwargs: + kwargs["headers"] = {} + kwargs["headers"]["User-Agent"] = user_agent + + # Go get it + if session is not None: + logger.debug(f"Requesting with session {session}") + response = session.post(url, **kwargs) + else: + response = requests.post(url, **kwargs) + logger.debug(f"Response code: {response.status_code}") + + # Verify that the response is 200 + assert response.ok + + # Return the response + return response + + +@retry(tries=3, delay=15, backoff=2) +def get_cookies(url, user_agent="Big Local News (biglocalnews.org)", **kwargs): + """Request the provided URL and return cookie object. + + Args: + url (str): the url to be requested + user_agent (str): the user-agent header passed with the request (default: biglocalnews.org) + """ + logger.debug(f"Requesting {url}") + + # Set the headers + if "headers" not in kwargs: + kwargs["headers"] = {} + kwargs["headers"]["User-Agent"] = user_agent + response = requests.get(url, **kwargs) + + # Verify that the response is 200 + assert response.ok + + cookies = response.cookies.get_dict() + + # Return the response + return cookies