From d6964a4cfda60ef7c5f1871b215a9cf222361e7e Mon Sep 17 00:00:00 2001 From: Igor Bernstein Date: Thu, 29 Feb 2024 15:39:21 -0500 Subject: [PATCH] feat: improve batching summary errors (#2509) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To avoid data loss, the Batching api throws an exception when the batcher is closed when at least 1 entry failed. To help debugging, the BatchingException tries to be helpful by giving some details about the errors. Since the Batcher lifetime can extend indefinitely, the Batcher implementation tries to keep a bound on the amount of resources it uses to track the errors. Previously it would only track exception types and counts. The idea being that if the customer has the need for fine grained details, the customer can retrieve the details from the ApiFuture that was returned when an entry was added. However this hasn't panned out and creates confusion. This PR stores a sample of the error messages. The sample is by default capped to 50 entry and 50 rpc error messages. This can be adjusted by setting system properties Thank you for opening a Pull Request! For general contributing guidelines, please refer to [contributing guide](https://github.com/googleapis/gapic-generator-java/blob/main/CONTRIBUTING.md) Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/gapic-generator-java/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # ☕️ --- .../google/api/gax/batching/BatcherStats.java | 32 +++++++++++++++++++ .../api/gax/batching/BatcherStatsTest.java | 22 +++++++++++-- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java b/gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java index 784af6f599..20b0f95954 100644 --- a/gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java +++ b/gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java @@ -31,7 +31,9 @@ import com.google.api.gax.rpc.ApiException; import com.google.api.gax.rpc.StatusCode.Code; +import com.google.common.base.Joiner; import com.google.common.base.MoreObjects; +import com.google.common.collect.EvictingQueue; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -52,6 +54,21 @@ class BatcherStats { private final Map entryExceptionCounts = new HashMap<>(); private final Map entryStatusCounts = new HashMap<>(); + /** + * The maximum number of error messages that a Batcher instance will retain. By default, a Batcher + * instance will retain 50 entry error messages and 50 RPC error messages. This limit can be + * temporarily increased by setting the {@code com.google.api.gax.batching.errors.max-samples} + * system property. This should only be needed in very rare situations and should not be + * considered part of the public api. + */ + private final int MAX_ERROR_MSG_SAMPLES = + Integer.getInteger("com.google.api.gax.batching.errors.max-samples", 50); + + private final EvictingQueue sampleOfRpcErrors = + EvictingQueue.create(MAX_ERROR_MSG_SAMPLES); + private final EvictingQueue sampleOfEntryErrors = + EvictingQueue.create(MAX_ERROR_MSG_SAMPLES); + /** * Records the count of the exception and it's type when a complete batch failed to apply. * @@ -69,6 +86,8 @@ synchronized void recordBatchFailure(Throwable throwable) { requestStatusCounts.put(code, oldStatusCount + 1); } + sampleOfRpcErrors.add(throwable.toString()); + int oldExceptionCount = MoreObjects.firstNonNull(requestExceptionCounts.get(exceptionClass), 0); requestExceptionCounts.put(exceptionClass, oldExceptionCount + 1); } @@ -96,6 +115,8 @@ synchronized void recordBatchElementsCompletion( Throwable actualCause = throwable.getCause(); Class exceptionClass = actualCause.getClass(); + sampleOfEntryErrors.add(actualCause.toString()); + if (actualCause instanceof ApiException) { Code code = ((ApiException) actualCause).getStatusCode().getCode(); exceptionClass = ApiException.class; @@ -144,6 +165,17 @@ synchronized BatchingException asException() { .append(buildExceptionList(entryExceptionCounts, entryStatusCounts)) .append("."); } + + if (!sampleOfRpcErrors.isEmpty()) { + messageBuilder.append(" Sample of RPC errors: "); + messageBuilder.append(Joiner.on(", ").join(sampleOfRpcErrors)); + messageBuilder.append("."); + } + if (!sampleOfEntryErrors.isEmpty()) { + messageBuilder.append(" Sample of entry errors: "); + messageBuilder.append(Joiner.on(", ").join(sampleOfEntryErrors)); + messageBuilder.append("."); + } return new BatchingException(messageBuilder.toString()); } diff --git a/gax-java/gax/src/test/java/com/google/api/gax/batching/BatcherStatsTest.java b/gax-java/gax/src/test/java/com/google/api/gax/batching/BatcherStatsTest.java index 1a95e4d3cb..1d10917aeb 100644 --- a/gax-java/gax/src/test/java/com/google/api/gax/batching/BatcherStatsTest.java +++ b/gax-java/gax/src/test/java/com/google/api/gax/batching/BatcherStatsTest.java @@ -55,7 +55,10 @@ public void testRequestFailuresOnly() { batcherStats.recordBatchFailure( ApiExceptionFactory.createException( - new RuntimeException(), FakeStatusCode.of(StatusCode.Code.INVALID_ARGUMENT), false)); + "fake api error", + new RuntimeException(), + FakeStatusCode.of(StatusCode.Code.INVALID_ARGUMENT), + false)); batcherStats.recordBatchFailure(new RuntimeException("Request failed")); @@ -65,6 +68,10 @@ public void testRequestFailuresOnly() { assertThat(exception).hasMessageThat().contains("1 RuntimeException"); assertThat(exception).hasMessageThat().contains("1 ApiException(1 INVALID_ARGUMENT)"); assertThat(exception).hasMessageThat().contains("and 0 partial failures."); + assertThat(exception) + .hasMessageThat() + .contains( + "com.google.api.gax.rpc.InvalidArgumentException: fake api error, java.lang.RuntimeException: Request failed."); } @Test @@ -79,7 +86,10 @@ public void testEntryFailureOnly() { SettableApiFuture batchTwoResult = SettableApiFuture.create(); batchTwoResult.setException( ApiExceptionFactory.createException( - new RuntimeException(), FakeStatusCode.of(StatusCode.Code.UNAVAILABLE), false)); + "fake entry error", + new RuntimeException(), + FakeStatusCode.of(StatusCode.Code.UNAVAILABLE), + false)); batcherStats.recordBatchElementsCompletion( ImmutableList.of(BatchEntry.create(2, batchTwoResult))); @@ -89,6 +99,10 @@ public void testEntryFailureOnly() { .contains("The 2 partial failures contained 2 entries that failed with:"); assertThat(ex).hasMessageThat().contains("1 ApiException(1 UNAVAILABLE)"); assertThat(ex).hasMessageThat().contains("1 IllegalStateException"); + assertThat(ex) + .hasMessageThat() + .contains( + "Sample of entry errors: java.lang.IllegalStateException: local element failure, com.google.api.gax.rpc.UnavailableException: fake entry error."); } @Test @@ -110,6 +124,8 @@ public void testRequestAndEntryFailures() { .contains( "Batching finished with 1 batches failed to apply due to: 1 RuntimeException and 1 " + "partial failures. The 1 partial failures contained 1 entries that failed with:" - + " 1 ApiException(1 ALREADY_EXISTS)."); + + " 1 ApiException(1 ALREADY_EXISTS)." + + " Sample of RPC errors: java.lang.RuntimeException: Batch failure." + + " Sample of entry errors: com.google.api.gax.rpc.AlreadyExistsException: java.lang.RuntimeException."); } }