Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Relax healthcheck threshold to 600 seconds (#6)
It appears that the `SSLEOFError`s are temporary. The latest one lasted 2 minutes: ``` 2024-08-19 06:24:03,030 - uvicorn.access - INFO - 10.244.0.239:55856 - "GET /metrics HTTP/1.1" 200 2024-08-19 06:24:04,924 - uvicorn.access - INFO - 10.244.0.239:55856 - "POST /sign-up HTTP/1.1" 200 2024-08-19 06:24:09,942 - uvicorn.access - INFO - 10.224.0.4:34496 - "GET /health HTTP/1.1" 200 2024-08-19 06:24:17,315 - uvicorn.access - INFO - 10.244.0.239:58018 - "GET /confirm/<redacted> HTTP/1.1" 200 2024-08-19 06:24:18,024 - uvicorn.access - INFO - 10.244.0.239:58034 - "GET /metrics HTTP/1.1" 200 2024-08-19 06:24:19,942 - uvicorn.access - INFO - 10.224.0.4:49800 - "GET /health HTTP/1.1" 200 2024-08-19 06:24:29,941 - uvicorn.access - INFO - 10.224.0.4:58664 - "GET /health HTTP/1.1" 200 2024-08-19 06:24:33,021 - uvicorn.access - INFO - 10.244.0.239:52558 - "GET /metrics HTTP/1.1" 200 2024-08-19 06:24:39,942 - uvicorn.access - INFO - 10.224.0.4:36176 - "GET /health HTTP/1.1" 200 2024-08-19 06:24:48,030 - uvicorn.access - INFO - 10.244.0.239:39646 - "GET /metrics HTTP/1.1" 200 2024-08-19 06:24:49,942 - uvicorn.access - INFO - 10.224.0.4:38100 - "GET /health HTTP/1.1" 200 2024-08-19 06:24:59,942 - uvicorn.access - INFO - 10.224.0.4:42906 - "GET /health HTTP/1.1" 200 2024-08-19 06:25:00,000 - apscheduler.executors.default - INFO - Running job "clean_up (trigger: cron[month='*', day='*', day_of_week='*', hour='*', minute='*'], next run at: 2024-08-19 06:26:00 UTC)" (scheduled at 2024-08-19 06:25:00+00:00) 2024-08-19 06:25:00,002 - apscheduler.executors.default - INFO - Running job "commit (trigger: cron[month='*', day='*', day_of_week='*', hour='*', minute='*'], next run at: 2024-08-19 06:26:00 UTC)" (scheduled at 2024-08-19 06:25:00+00:00) 2024-08-19 06:25:00,017 - root - INFO - clean_up: Deleted 0 expired signup(s). 2024-08-19 06:25:00,017 - apscheduler.executors.default - INFO - Job "clean_up (trigger: cron[month='*', day='*', day_of_week='*', hour='*', minute='*'], next run at: 2024-08-19 06:26:00 UTC)" executed successfully 2024-08-19 06:25:00,303 - apscheduler.executors.default - ERROR - Job "commit (trigger: cron[month='*', day='*', day_of_week='*', hour='*', minute='*'], next run at: 2024-08-19 06:26:00 UTC)" raised an exception Traceback (most recent call last): File "/usr/lib/python3.12/site-packages/apscheduler/executors/base.py", line 125, in run_job retval = job.func(*job.args, **job.kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/app/main.py", line 265, in commit directory_service.insert_member(mailing_list, email) File "/app/google_admin_sdk_utils.py", line 41, in insert_member self.service.members().insert(groupKey=group_key, body={"email": email}).execute() File "/usr/lib/python3.12/site-packages/googleapiclient/_helpers.py", line 130, in positional_wrapper return wrapped(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/googleapiclient/http.py", line 923, in execute resp, content = _retry_request( ^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/googleapiclient/http.py", line 222, in _retry_request raise exception File "/usr/lib/python3.12/site-packages/googleapiclient/http.py", line 191, in _retry_request resp, content = http.request(uri, method, *args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/google_auth_httplib2.py", line 209, in request self.credentials.before_request(self._request, method, uri, request_headers) File "/usr/lib/python3.12/site-packages/google/auth/credentials.py", line 228, in before_request self._blocking_refresh(request) File "/usr/lib/python3.12/site-packages/google/auth/credentials.py", line 191, in _blocking_refresh self.refresh(request) File "/usr/lib/python3.12/site-packages/google/oauth2/service_account.py", line 446, in refresh access_token, expiry, _ = _client.jwt_grant( ^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/google/oauth2/_client.py", line 298, in jwt_grant response_data = _token_endpoint_request( ^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/google/oauth2/_client.py", line 258, in _token_endpoint_request response_status_ok, response_data, retryable_error = _token_endpoint_request_no_throw( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/google/oauth2/_client.py", line 191, in _token_endpoint_request_no_throw response = request( ^^^^^^^^ File "/usr/lib/python3.12/site-packages/google_auth_httplib2.py", line 119, in __call__ response, data = self.http.request( ^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/httplib2/__init__.py", line 1724, in request (response, content) = self._request( ^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/httplib2/__init__.py", line 1444, in _request (response, content) = self._conn_request(conn, request_uri, method, body, headers) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/httplib2/__init__.py", line 1367, in _conn_request conn.request(method, request_uri, body, headers) File "/usr/lib/python3.12/http/client.py", line 1336, in request self._send_request(method, url, body, headers, encode_chunked) File "/usr/lib/python3.12/http/client.py", line 1382, in _send_request self.endheaders(body, encode_chunked=encode_chunked) File "/usr/lib/python3.12/http/client.py", line 1331, in endheaders self._send_output(message_body, encode_chunked=encode_chunked) File "/usr/lib/python3.12/http/client.py", line 1091, in _send_output self.send(msg) File "/usr/lib/python3.12/http/client.py", line 1055, in send self.sock.sendall(data) File "/usr/lib/python3.12/ssl.py", line 1211, in sendall v = self.send(byte_view[count:]) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/ssl.py", line 1180, in send return self._sslobj.write(data) ^^^^^^^^^^^^^^^^^^^^^^^^ ssl.SSLEOFError: EOF occurred in violation of protocol (_ssl.c:2406) 2024-08-19 06:25:03,022 - uvicorn.access - INFO - 10.244.0.239:46708 - "GET /metrics HTTP/1.1" 200 2024-08-19 06:25:09,942 - uvicorn.access - INFO - 10.224.0.4:46756 - "GET /health HTTP/1.1" 200 2024-08-19 06:25:18,024 - uvicorn.access - INFO - 10.244.0.239:32788 - "GET /metrics HTTP/1.1" 200 2024-08-19 06:25:19,942 - uvicorn.access - INFO - 10.224.0.4:44380 - "GET /health HTTP/1.1" 200 2024-08-19 06:25:29,942 - uvicorn.access - INFO - 10.224.0.4:53422 - "GET /health HTTP/1.1" 200 2024-08-19 06:25:33,038 - uvicorn.access - INFO - 10.244.0.239:50182 - "GET /metrics HTTP/1.1" 200 2024-08-19 06:25:39,942 - uvicorn.access - INFO - 10.224.0.4:58434 - "GET /health HTTP/1.1" 200 2024-08-19 06:25:48,022 - uvicorn.access - INFO - 10.244.0.239:59808 - "GET /metrics HTTP/1.1" 200 2024-08-19 06:25:49,942 - uvicorn.access - INFO - 10.224.0.4:42222 - "GET /health HTTP/1.1" 200 2024-08-19 06:25:59,942 - uvicorn.access - INFO - 10.224.0.4:52178 - "GET /health HTTP/1.1" 200 2024-08-19 06:26:00,000 - apscheduler.executors.default - INFO - Running job "clean_up (trigger: cron[month='*', day='*', day_of_week='*', hour='*', minute='*'], next run at: 2024-08-19 06:27:00 UTC)" (scheduled at 2024-08-19 06:26:00+00:00) 2024-08-19 06:26:00,002 - apscheduler.executors.default - INFO - Running job "commit (trigger: cron[month='*', day='*', day_of_week='*', hour='*', minute='*'], next run at: 2024-08-19 06:27:00 UTC)" (scheduled at 2024-08-19 06:26:00+00:00) 2024-08-19 06:26:00,015 - root - INFO - clean_up: Deleted 0 expired signup(s). 2024-08-19 06:26:00,015 - apscheduler.executors.default - INFO - Job "clean_up (trigger: cron[month='*', day='*', day_of_week='*', hour='*', minute='*'], next run at: 2024-08-19 06:27:00 UTC)" executed successfully 2024-08-19 06:26:00,351 - apscheduler.executors.default - ERROR - Job "commit (trigger: cron[month='*', day='*', day_of_week='*', hour='*', minute='*'], next run at: 2024-08-19 06:27:00 UTC)" raised an exception Traceback (most recent call last): File "/usr/lib/python3.12/site-packages/apscheduler/executors/base.py", line 125, in run_job retval = job.func(*job.args, **job.kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/app/main.py", line 265, in commit directory_service.insert_member(mailing_list, email) File "/app/google_admin_sdk_utils.py", line 41, in insert_member self.service.members().insert(groupKey=group_key, body={"email": email}).execute() File "/usr/lib/python3.12/site-packages/googleapiclient/_helpers.py", line 130, in positional_wrapper return wrapped(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/googleapiclient/http.py", line 923, in execute resp, content = _retry_request( ^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/googleapiclient/http.py", line 222, in _retry_request raise exception File "/usr/lib/python3.12/site-packages/googleapiclient/http.py", line 191, in _retry_request resp, content = http.request(uri, method, *args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/google_auth_httplib2.py", line 218, in request response, content = self.http.request( ^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/httplib2/__init__.py", line 1724, in request (response, content) = self._request( ^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/httplib2/__init__.py", line 1444, in _request (response, content) = self._conn_request(conn, request_uri, method, body, headers) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/site-packages/httplib2/__init__.py", line 1367, in _conn_request conn.request(method, request_uri, body, headers) File "/usr/lib/python3.12/http/client.py", line 1336, in request self._send_request(method, url, body, headers, encode_chunked) File "/usr/lib/python3.12/http/client.py", line 1382, in _send_request self.endheaders(body, encode_chunked=encode_chunked) File "/usr/lib/python3.12/http/client.py", line 1331, in endheaders self._send_output(message_body, encode_chunked=encode_chunked) File "/usr/lib/python3.12/http/client.py", line 1091, in _send_output self.send(msg) File "/usr/lib/python3.12/http/client.py", line 1055, in send self.sock.sendall(data) File "/usr/lib/python3.12/ssl.py", line 1211, in sendall v = self.send(byte_view[count:]) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/ssl.py", line 1180, in send return self._sslobj.write(data) ^^^^^^^^^^^^^^^^^^^^^^^^ ssl.SSLEOFError: EOF occurred in violation of protocol (_ssl.c:2406) 2024-08-19 06:26:03,025 - uvicorn.access - INFO - 10.244.0.239:56136 - "GET /metrics HTTP/1.1" 200 2024-08-19 06:26:09,942 - root - ERROR - Last commit was more than 120 seconds ago. 2024-08-19 06:26:09,952 - uvicorn.access - INFO - 10.224.0.4:34312 - "GET /health HTTP/1.1" 500 2024-08-19 06:26:18,029 - uvicorn.access - INFO - 10.244.0.239:44078 - "GET /metrics HTTP/1.1" 200 2024-08-19 06:26:19,942 - root - ERROR - Last commit was more than 120 seconds ago. 2024-08-19 06:26:19,950 - uvicorn.access - INFO - 10.224.0.4:40384 - "GET /health HTTP/1.1" 500 2024-08-19 06:26:29,942 - root - ERROR - Last commit was more than 120 seconds ago. 2024-08-19 06:26:29,950 - uvicorn.access - INFO - 10.224.0.4:51358 - "GET /health HTTP/1.1" 500 2024-08-19 06:26:30,000 - root - ERROR - Last commit was more than 120 seconds ago. 2024-08-19 06:26:30,099 - uvicorn.access - INFO - 10.224.0.4:51370 - "GET /health HTTP/1.1" 500 2024-08-19 06:26:39,941 - root - ERROR - Last commit was more than 120 seconds ago. 2024-08-19 06:26:39,949 - uvicorn.access - INFO - 10.224.0.4:47360 - "GET /health HTTP/1.1" 500 2024-08-19 06:26:49,942 - root - ERROR - Last commit was more than 120 seconds ago. 2024-08-19 06:26:49,950 - uvicorn.access - INFO - 10.224.0.4:54824 - "GET /health HTTP/1.1" 500 2024-08-19 06:26:59,941 - root - ERROR - Last commit was more than 120 seconds ago. 2024-08-19 06:26:59,950 - uvicorn.access - INFO - 10.224.0.4:55204 - "GET /health HTTP/1.1" 500 2024-08-19 06:27:00,000 - apscheduler.executors.default - INFO - Running job "clean_up (trigger: cron[month='*', day='*', day_of_week='*', hour='*', minute='*'], next run at: 2024-08-19 06:28:00 UTC)" (scheduled at 2024-08-19 06:27:00+00:00) 2024-08-19 06:27:00,001 - apscheduler.executors.default - INFO - Running job "commit (trigger: cron[month='*', day='*', day_of_week='*', hour='*', minute='*'], next run at: 2024-08-19 06:28:00 UTC)" (scheduled at 2024-08-19 06:27:00+00:00) 2024-08-19 06:27:00,014 - root - INFO - clean_up: Deleted 0 expired signup(s). 2024-08-19 06:27:00,014 - apscheduler.executors.default - INFO - Job "clean_up (trigger: cron[month='*', day='*', day_of_week='*', hour='*', minute='*'], next run at: 2024-08-19 06:28:00 UTC)" executed successfully 2024-08-19 06:27:00,805 - root - INFO - commit: Committed 1 confirmed signup(s) to the mailing list. 2024-08-19 06:27:00,805 - apscheduler.executors.default - INFO - Job "commit (trigger: cron[month='*', day='*', day_of_week='*', hour='*', minute='*'], next run at: 2024-08-19 06:28:00 UTC)" executed successfully 2024-08-19 06:27:09,942 - uvicorn.access - INFO - 10.224.0.4:33450 - "GET /health HTTP/1.1" 200 2024-08-19 06:27:18,025 - uvicorn.access - INFO - 10.244.0.239:40376 - "GET /metrics HTTP/1.1" 200 2024-08-19 06:27:19,942 - uvicorn.access - INFO - 10.224.0.4:42302 - "GET /health HTTP/1.1" 200 2024-08-19 06:27:29,942 - uvicorn.access - INFO - 10.224.0.4:60694 - "GET /health HTTP/1.1" 200 ``` This happened when accessing the Google API after prolonged (an hour?) inactivity. This PR relaxes the healthcheck threshold from 2 minutes to 10 minutes, so that we can tolerate temporary Google API errors.
- Loading branch information