diff --git a/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/source.py b/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/source.py index c111964a44ad..263bd932999f 100644 --- a/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/source.py +++ b/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/source.py @@ -28,7 +28,7 @@ from .helpers import Helpers from .models.spreadsheet import Spreadsheet from .models.spreadsheet_values import SpreadsheetValues -from .utils import safe_name_conversion +from .utils import exception_description_by_status_code, safe_name_conversion # set default batch read size ROW_BATCH_SIZE = 200 @@ -131,23 +131,16 @@ def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog: return AirbyteCatalog(streams=streams) except errors.HttpError as err: - reason = str(err) + error_description = exception_description_by_status_code(err.resp.status, spreadsheet_id) config_error_status_codes = [status_codes.NOT_FOUND, status_codes.FORBIDDEN] if err.resp.status in config_error_status_codes: - if err.resp.status == status_codes.NOT_FOUND: - reason = f"Requested spreadsheet with id {spreadsheet_id} was not found" - if err.resp.status == status_codes.FORBIDDEN: - reason = f"Forbidden when requesting spreadsheet with id {spreadsheet_id}" - message = ( - f"{reason}. {err.reason}. See docs for more details here: " - f"https://cloud.google.com/service-infrastructure/docs/service-control/reference/rpc/google.api/servicecontrol.v1#code" - ) + message = f"{error_description}. {err.reason}." raise AirbyteTracedException( message=message, internal_message=message, failure_type=FailureType.config_error, ) from err - raise Exception(f"Could not run discovery: {reason}") + raise Exception(f"Could not discover the schema of your spreadsheet. {error_description}. {err.reason}.") def _read( self, @@ -213,15 +206,24 @@ def read( catalog: ConfiguredAirbyteCatalog, state: Union[List[AirbyteStateMessage], MutableMapping[str, Any]] = None, ) -> Generator[AirbyteMessage, None, None]: + spreadsheet_id = Helpers.get_spreadsheet_id(config["spreadsheet_id"]) try: yield from self._read(logger, config, catalog, state) except errors.HttpError as e: - if e.status_code == 429: - logger.info(f"Stopped syncing process due to rate limits. {e.reason}") + error_description = exception_description_by_status_code(e.status_code, spreadsheet_id) + + if e.status_code == status_codes.TOO_MANY_REQUESTS: + logger.info(f"Stopped syncing process due to rate limits. {error_description}") + if e.status_code == status_codes.FORBIDDEN: + raise AirbyteTracedException( + message=f"Stopped syncing process. {error_description}", + internal_message=error_description, + failure_type=FailureType.config_error, + ) from e else: - logger.info(f"{e.status_code}: {e.reason}") + logger.info(f"{e.status_code}: {e.reason}. {error_description}") finally: - logger.info(f"Finished syncing spreadsheet {Helpers.get_spreadsheet_id(config['spreadsheet_id'])}") + logger.info(f"Finished syncing spreadsheet {spreadsheet_id}") @staticmethod def get_credentials(config): diff --git a/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/utils.py b/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/utils.py index 9a39e9f7e33a..a7f0498b6845 100644 --- a/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/utils.py +++ b/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/utils.py @@ -6,6 +6,7 @@ import re import unidecode +from requests.status_codes import codes as status_codes TOKEN_PATTERN = re.compile(r"[A-Z]+[a-z]*|[a-z]+|\d+|(?P[^a-zA-Z\d]+)") DEFAULT_SEPARATOR = "_" @@ -40,3 +41,26 @@ def safe_name_conversion(text): if not new: raise Exception(f"initial string '{text}' converted to empty") return new + + +def exception_description_by_status_code(code: int, spreadsheet_id) -> str: + if code in [status_codes.INTERNAL_SERVER_ERROR, status_codes.BAD_GATEWAY, status_codes.SERVICE_UNAVAILABLE]: + return ( + "There was an issue with the Google Sheets API. This is usually a temporary issue from Google's side." + " Please try again. If this issue persists, contact support" + ) + if code == status_codes.FORBIDDEN: + return ( + f"The authenticated Google Sheets user does not have permissions to view the spreadsheet with id {spreadsheet_id}. " + "Please ensure the authenticated user has access to the Spreadsheet and reauthenticate. If the issue persists, contact support" + ) + if code == status_codes.NOT_FOUND: + return ( + f"The requested Google Sheets spreadsheet with id {spreadsheet_id} does not exist. " + f"Please ensure the Spreadsheet Link you have set is valid and Spreadsheet exists. If the issue persists, contact support" + ) + + if code == status_codes.TOO_MANY_REQUESTS: + return "Rate limit has been reached. Please try later or request a higher quota for your account." + + return "" diff --git a/airbyte-integrations/connectors/source-google-sheets/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-google-sheets/unit_tests/test_stream.py index 1ad212b6878e..802d8aa241fc 100644 --- a/airbyte-integrations/connectors/source-google-sheets/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-google-sheets/unit_tests/test_stream.py @@ -51,8 +51,8 @@ def test_discover_404_error(mocker, invalid_config): with pytest.raises(AirbyteTracedException) as e: source.discover(logger=mocker.MagicMock(), config=invalid_config) - expected_message = ("Requested spreadsheet with id invalid_spreadsheet_id was not found. Requested entity was not found. " - "See docs for more details here: https://cloud.google.com/service-infrastructure/docs/service-control/reference/rpc/google.api/servicecontrol.v1#code") + expected_message = ("The requested Google Sheets spreadsheet with id invalid_spreadsheet_id does not exist." + " Please ensure the Spreadsheet Link you have set is valid and Spreadsheet exists. If the issue persists, contact support. Requested entity was not found.") assert e.value.args[0] == expected_message @@ -66,8 +66,10 @@ def test_discover_403_error(mocker, invalid_config): with pytest.raises(AirbyteTracedException) as e: source.discover(logger=mocker.MagicMock(), config=invalid_config) - expected_message = ("Forbidden when requesting spreadsheet with id invalid_spreadsheet_id. The caller does not have right permissions. " - "See docs for more details here: https://cloud.google.com/service-infrastructure/docs/service-control/reference/rpc/google.api/servicecontrol.v1#code") + expected_message = ("The authenticated Google Sheets user does not have permissions to view the " + "spreadsheet with id invalid_spreadsheet_id. Please ensure the authenticated user has access" + " to the Spreadsheet and reauthenticate. If the issue persists, contact support. " + "The caller does not have right permissions.") assert e.value.args[0] == expected_message @@ -127,6 +129,21 @@ def test_discover(mocker, invalid_config): assert len(res.streams) == 1 +def test_discover_incorrect_spreadsheet_name(mocker, invalid_config): + sheet1_first_row = ["1", "2", "3", "4"] + data = [ + GridData(rowData=[RowData(values=[CellData(formattedValue=v) for v in sheet1_first_row]) + ])] + sheet = Sheet(properties=SheetProperties(title='sheet1 test test', gridProperties='true', sheetType="GRID"), data=data) + + spreadsheet = Spreadsheet(spreadsheetId='spreadsheet_id', sheets=[sheet]) + source = SourceGoogleSheets() + mocker.patch.object(GoogleSheetsClient, "__init__", lambda s, credentials, scopes=SCOPES: None) + mocker.patch.object(GoogleSheetsClient, "get", return_value=spreadsheet) + res = source.discover(logger=mocker.MagicMock(), config=invalid_config) + assert len(res.streams) == 1 + + def test_discover_could_not_run_discover(mocker, invalid_config): source = SourceGoogleSheets() resp = requests.Response() @@ -137,7 +154,8 @@ def test_discover_could_not_run_discover(mocker, invalid_config): with pytest.raises(Exception) as e: source.discover(logger=mocker.MagicMock(), config=invalid_config) - expected_message = 'Could not run discovery: ' + expected_message = ("Could not discover the schema of your spreadsheet. There was an issue with the Google Sheets API." + " This is usually a temporary issue from Google's side. Please try again. If this issue persists, contact support. Interval Server error.") assert e.value.args[0] == expected_message @@ -180,4 +198,4 @@ def test_read_429_error(mocker, invalid_config, caplog): ) records = list(source.read(logger=logging.getLogger("airbyte"), config=invalid_config, catalog=catalog)) assert [] == records - assert "Stopped syncing process due to rate limits. Request a higher quota limit" in caplog.text + assert "Request a higher quota limit" in caplog.text