diff --git a/build.gradle b/build.gradle index 97643dfc2e88..3d107f3e5c47 100644 --- a/build.gradle +++ b/build.gradle @@ -2697,6 +2697,7 @@ project(':streams') { ':streams:upgrade-system-tests-36:test', ':streams:upgrade-system-tests-37:test', ':streams:upgrade-system-tests-38:test', + ':streams:upgrade-system-tests-39:test', ':streams:examples:test' ] ) @@ -3244,6 +3245,21 @@ project(':streams:upgrade-system-tests-38') { } } +project(':streams:upgrade-system-tests-39') { + base { + archivesName = "kafka-streams-upgrade-system-tests-39" + } + + dependencies { + testImplementation libs.kafkaStreams_39 + testRuntimeOnly libs.junitJupiter + } + + systemTestLibs { + dependsOn testJar + } +} + project(':jmh-benchmarks') { apply plugin: 'io.github.goooler.shadow' diff --git a/checkstyle/suppressions.xml b/checkstyle/suppressions.xml index 458738185355..0bfed4f1df4c 100644 --- a/checkstyle/suppressions.xml +++ b/checkstyle/suppressions.xml @@ -90,7 +90,7 @@ files="ClientUtils.java"/> + files="(KafkaConsumer|ConsumerCoordinator|AbstractFetch|KafkaProducer|AbstractRequest|AbstractResponse|TransactionManager|Admin|KafkaAdminClient|MockAdminClient|KafkaRaftClient|KafkaRaftClientTest|KafkaNetworkChannelTest).java"/> diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractMembershipManager.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractMembershipManager.java index 82b4e567d344..c6aa70d805e0 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractMembershipManager.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractMembershipManager.java @@ -851,6 +851,10 @@ void maybeReconcile() { revokedPartitions ); + // Mark partitions as pending revocation to stop fetching from the partitions (no new + // fetches sent out, and no in-flight fetches responses processed). + markPendingRevocationToPauseFetching(revokedPartitions); + // Commit offsets if auto-commit enabled before reconciling a new assignment. Request will // be retried until it succeeds, fails with non-retriable error, or timer expires. CompletableFuture commitResult; diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AsyncKafkaConsumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AsyncKafkaConsumer.java index fcf688b7f8e9..da3f3f2f25b4 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AsyncKafkaConsumer.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AsyncKafkaConsumer.java @@ -76,10 +76,12 @@ import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.Uuid; import org.apache.kafka.common.errors.AuthenticationException; +import org.apache.kafka.common.errors.GroupAuthorizationException; import org.apache.kafka.common.errors.InterruptException; import org.apache.kafka.common.errors.InvalidGroupIdException; import org.apache.kafka.common.errors.InvalidTopicException; import org.apache.kafka.common.errors.TimeoutException; +import org.apache.kafka.common.errors.TopicAuthorizationException; import org.apache.kafka.common.internals.ClusterResourceListeners; import org.apache.kafka.common.metrics.KafkaMetric; import org.apache.kafka.common.metrics.Metrics; @@ -1571,10 +1573,12 @@ public void unsubscribe() { subscriptions.assignedPartitions()); try { - // If users subscribe to an invalid topic name, they will get InvalidTopicException in error events, - // because network thread keeps trying to send MetadataRequest in the background. - // Ignore it to avoid unsubscribe failed. - processBackgroundEvents(unsubscribeEvent.future(), timer, e -> e instanceof InvalidTopicException); + // If users subscribe to a topic with invalid name or without permission, they will get some exceptions. + // Because network thread keeps trying to send MetadataRequest or ConsumerGroupHeartbeatRequest in the background, + // there will be some error events in the background queue. + // When running unsubscribe, these exceptions should be ignored, or users can't unsubscribe successfully. + processBackgroundEvents(unsubscribeEvent.future(), timer, + e -> e instanceof InvalidTopicException || e instanceof TopicAuthorizationException || e instanceof GroupAuthorizationException); log.info("Unsubscribed all topics or patterns and assigned partitions"); } catch (TimeoutException e) { log.error("Failed while waiting for the unsubscribe event to complete"); diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/AsyncKafkaConsumerTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/AsyncKafkaConsumerTest.java index eaf974b91c43..8eb8ec4c85bd 100644 --- a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/AsyncKafkaConsumerTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/AsyncKafkaConsumerTest.java @@ -63,6 +63,7 @@ import org.apache.kafka.common.errors.InvalidTopicException; import org.apache.kafka.common.errors.RetriableException; import org.apache.kafka.common.errors.TimeoutException; +import org.apache.kafka.common.errors.TopicAuthorizationException; import org.apache.kafka.common.errors.WakeupException; import org.apache.kafka.common.metrics.Metrics; import org.apache.kafka.common.protocol.Errors; @@ -282,6 +283,23 @@ public void testCloseWithInvalidTopicException() { assertDoesNotThrow(() -> consumer.close()); } + @Test + public void testUnsubscribeWithTopicAuthorizationException() { + consumer = newConsumer(); + backgroundEventQueue.add(new ErrorEvent(new TopicAuthorizationException(Set.of("test-topic")))); + completeUnsubscribeApplicationEventSuccessfully(); + assertDoesNotThrow(() -> consumer.unsubscribe()); + assertDoesNotThrow(() -> consumer.close()); + } + + @Test + public void testCloseWithTopicAuthorizationException() { + consumer = newConsumer(); + backgroundEventQueue.add(new ErrorEvent(new TopicAuthorizationException(Set.of("test-topic")))); + completeUnsubscribeApplicationEventSuccessfully(); + assertDoesNotThrow(() -> consumer.close()); + } + @Test public void testCommitAsyncWithNullCallback() { consumer = newConsumer(); diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerMembershipManagerTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerMembershipManagerTest.java index 7faf4cc55c33..9d09aee2697c 100644 --- a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerMembershipManagerTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerMembershipManagerTest.java @@ -45,6 +45,7 @@ import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.InOrder; import java.util.ArrayList; import java.util.Arrays; @@ -86,6 +87,7 @@ import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.clearInvocations; import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.spy; @@ -1433,6 +1435,7 @@ public void testReconcilePartitionsRevokedNoAutoCommitNoCallbacks() { membershipManager.poll(time.milliseconds()); testRevocationOfAllPartitionsCompleted(membershipManager); + verify(subscriptionState, times(2)).markPendingRevocation(Set.of(new TopicPartition("topic1", 0))); } @Test @@ -1456,6 +1459,10 @@ public void testReconcilePartitionsRevokedWithSuccessfulAutoCommitNoCallbacks() // Complete commit request commitResult.complete(null); + InOrder inOrder = inOrder(subscriptionState, commitRequestManager); + inOrder.verify(subscriptionState).markPendingRevocation(Set.of(new TopicPartition("topic1", 0))); + inOrder.verify(commitRequestManager).maybeAutoCommitSyncBeforeRevocation(anyLong()); + inOrder.verify(subscriptionState).markPendingRevocation(Set.of(new TopicPartition("topic1", 0))); testRevocationOfAllPartitionsCompleted(membershipManager); } @@ -1480,6 +1487,7 @@ public void testReconcilePartitionsRevokedWithFailedAutoCommitCompletesRevocatio // Complete commit request commitResult.completeExceptionally(new KafkaException("Commit request failed with " + "non-retriable error")); + verify(subscriptionState, times(2)).markPendingRevocation(Set.of(new TopicPartition("topic1", 0))); testRevocationOfAllPartitionsCompleted(membershipManager); } @@ -1579,11 +1587,11 @@ public void testRevokePartitionsUsesTopicNamesLocalCacheWhenMetadataNotAvailable mockOwnedPartitionAndAssignmentReceived(membershipManager, topicId, topicName, Collections.emptyList()); // Member received assignment to reconcile; - receiveAssignment(topicId, Arrays.asList(0, 1), membershipManager); verifyReconciliationNotTriggered(membershipManager); membershipManager.poll(time.milliseconds()); + verify(subscriptionState).markPendingRevocation(Set.of()); // Member should complete reconciliation assertEquals(MemberState.ACKNOWLEDGING, membershipManager.state()); @@ -1607,6 +1615,7 @@ public void testRevokePartitionsUsesTopicNamesLocalCacheWhenMetadataNotAvailable receiveAssignment(topicId, Collections.singletonList(1), membershipManager); membershipManager.poll(time.milliseconds()); + verify(subscriptionState, times(2)).markPendingRevocation(Set.of(new TopicPartition(topicName, 0))); // Revocation should complete without requesting any metadata update given that the topic // received in target assignment should exist in local topic name cache. @@ -2551,7 +2560,6 @@ private void testRevocationCompleted(ConsumerMembershipManager membershipManager assertEquals(assignmentByTopicId, membershipManager.currentAssignment().partitions); assertFalse(membershipManager.reconciliationInProgress()); - verify(subscriptionState).markPendingRevocation(anySet()); List expectedTopicPartitionAssignment = buildTopicPartitions(expectedCurrentAssignment); HashSet expectedSet = new HashSet<>(expectedTopicPartitionAssignment); diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ShareMembershipManagerTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ShareMembershipManagerTest.java index 36e482507141..7c4c5684bcce 100644 --- a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ShareMembershipManagerTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ShareMembershipManagerTest.java @@ -1100,6 +1100,7 @@ public void testRevokePartitionsUsesTopicNamesLocalCacheWhenMetadataNotAvailable verifyReconciliationNotTriggered(membershipManager); membershipManager.poll(time.milliseconds()); + verify(subscriptionState).markPendingRevocation(Set.of()); // Member should complete reconciliation assertEquals(MemberState.ACKNOWLEDGING, membershipManager.state()); @@ -1123,6 +1124,7 @@ public void testRevokePartitionsUsesTopicNamesLocalCacheWhenMetadataNotAvailable receiveAssignment(topicId, Collections.singletonList(1), membershipManager); membershipManager.poll(time.milliseconds()); + verify(subscriptionState, times(2)).markPendingRevocation(Set.of(new TopicPartition(topicName, 0))); // Revocation should complete without requesting any metadata update given that the topic // received in target assignment should exist in local topic name cache. @@ -1423,7 +1425,6 @@ private void testRevocationCompleted(ShareMembershipManager membershipManager, assertEquals(assignmentByTopicId, membershipManager.currentAssignment().partitions); assertFalse(membershipManager.reconciliationInProgress()); - verify(subscriptionState).markPendingRevocation(anySet()); List expectedTopicPartitionAssignment = buildTopicPartitions(expectedCurrentAssignment); HashSet expectedSet = new HashSet<>(expectedTopicPartitionAssignment); diff --git a/core/src/main/java/kafka/server/share/DelayedShareFetch.java b/core/src/main/java/kafka/server/share/DelayedShareFetch.java index 4cb9ce0cf424..9f1ad3ef651b 100644 --- a/core/src/main/java/kafka/server/share/DelayedShareFetch.java +++ b/core/src/main/java/kafka/server/share/DelayedShareFetch.java @@ -25,8 +25,9 @@ import org.apache.kafka.common.protocol.Errors; import org.apache.kafka.common.requests.FetchRequest; import org.apache.kafka.server.purgatory.DelayedOperation; +import org.apache.kafka.server.share.SharePartitionKey; import org.apache.kafka.server.share.fetch.DelayedShareFetchGroupKey; -import org.apache.kafka.server.share.fetch.ShareFetchData; +import org.apache.kafka.server.share.fetch.ShareFetch; import org.apache.kafka.server.storage.log.FetchIsolation; import org.apache.kafka.server.storage.log.FetchPartitionData; import org.apache.kafka.storage.internals.log.LogOffsetMetadata; @@ -36,11 +37,11 @@ import org.slf4j.LoggerFactory; import java.util.Collections; -import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.locks.Lock; import java.util.stream.Collectors; import scala.Tuple2; @@ -55,23 +56,22 @@ public class DelayedShareFetch extends DelayedOperation { private static final Logger log = LoggerFactory.getLogger(DelayedShareFetch.class); - private final ShareFetchData shareFetchData; + private final ShareFetch shareFetch; private final ReplicaManager replicaManager; - - private Map partitionsAcquired; - private Map partitionsAlreadyFetched; private final SharePartitionManager sharePartitionManager; // The topic partitions that need to be completed for the share fetch request are given by sharePartitions. // sharePartitions is a subset of shareFetchData. The order of insertion/deletion of entries in sharePartitions is important. private final LinkedHashMap sharePartitions; + private LinkedHashMap partitionsAcquired; + private LinkedHashMap partitionsAlreadyFetched; DelayedShareFetch( - ShareFetchData shareFetchData, + ShareFetch shareFetch, ReplicaManager replicaManager, SharePartitionManager sharePartitionManager, LinkedHashMap sharePartitions) { - super(shareFetchData.fetchParams().maxWaitMs, Optional.empty()); - this.shareFetchData = shareFetchData; + super(shareFetch.fetchParams().maxWaitMs, Optional.empty()); + this.shareFetch = shareFetch; this.replicaManager = replicaManager; this.partitionsAcquired = new LinkedHashMap<>(); this.partitionsAlreadyFetched = new LinkedHashMap<>(); @@ -90,31 +90,39 @@ public void onExpiration() { */ @Override public void onComplete() { + // We are utilizing lock so that onComplete doesn't do a dirty read for global variables - + // partitionsAcquired and partitionsAlreadyFetched, since these variables can get updated in a different tryComplete thread. + lock.lock(); log.trace("Completing the delayed share fetch request for group {}, member {}, " - + "topic partitions {}", shareFetchData.groupId(), shareFetchData.memberId(), + + "topic partitions {}", shareFetch.groupId(), shareFetch.memberId(), partitionsAcquired.keySet()); - if (shareFetchData.future().isDone()) - return; + try { + LinkedHashMap topicPartitionData; + // tryComplete did not invoke forceComplete, so we need to check if we have any partitions to fetch. + if (partitionsAcquired.isEmpty()) + topicPartitionData = acquirablePartitions(); + // tryComplete invoked forceComplete, so we can use the data from tryComplete. + else + topicPartitionData = partitionsAcquired; - Map topicPartitionData; - // tryComplete did not invoke forceComplete, so we need to check if we have any partitions to fetch. - if (partitionsAcquired.isEmpty()) - topicPartitionData = acquirablePartitions(); - // tryComplete invoked forceComplete, so we can use the data from tryComplete. - else - topicPartitionData = partitionsAcquired; + if (topicPartitionData.isEmpty()) { + // No locks for share partitions could be acquired, so we complete the request with an empty response. + shareFetch.maybeComplete(Collections.emptyMap()); + return; + } + log.trace("Fetchable share partitions data: {} with groupId: {} fetch params: {}", + topicPartitionData, shareFetch.groupId(), shareFetch.fetchParams()); - if (topicPartitionData.isEmpty()) { - // No locks for share partitions could be acquired, so we complete the request with an empty response. - shareFetchData.future().complete(Collections.emptyMap()); - return; + completeShareFetchRequest(topicPartitionData); + } finally { + lock.unlock(); } - log.trace("Fetchable share partitions data: {} with groupId: {} fetch params: {}", - topicPartitionData, shareFetchData.groupId(), shareFetchData.fetchParams()); + } + private void completeShareFetchRequest(LinkedHashMap topicPartitionData) { try { - Map responseData; + LinkedHashMap responseData; if (partitionsAlreadyFetched.isEmpty()) responseData = readFromLog(topicPartitionData); else @@ -122,15 +130,15 @@ public void onComplete() { // updated in a different tryComplete thread. responseData = combineLogReadResponse(topicPartitionData, partitionsAlreadyFetched); - Map fetchPartitionsData = new LinkedHashMap<>(); + LinkedHashMap fetchPartitionsData = new LinkedHashMap<>(); for (Map.Entry entry : responseData.entrySet()) fetchPartitionsData.put(entry.getKey(), entry.getValue().toFetchPartitionData(false)); - shareFetchData.future().complete(ShareFetchUtils.processFetchResponse(shareFetchData, fetchPartitionsData, + shareFetch.maybeComplete(ShareFetchUtils.processFetchResponse(shareFetch, fetchPartitionsData, sharePartitions, replicaManager)); } catch (Exception e) { log.error("Error processing delayed share fetch request", e); - sharePartitionManager.handleFetchException(shareFetchData.groupId(), topicPartitionData.keySet(), shareFetchData.future(), e); + handleFetchException(shareFetch, topicPartitionData.keySet(), e); } finally { // Releasing the lock to move ahead with the next request in queue. releasePartitionLocks(topicPartitionData.keySet()); @@ -140,7 +148,7 @@ public void onComplete() { // we directly call delayedShareFetchPurgatory.checkAndComplete replicaManager.addToActionQueue(() -> topicPartitionData.keySet().forEach(topicIdPartition -> replicaManager.completeDelayedShareFetchRequest( - new DelayedShareFetchGroupKey(shareFetchData.groupId(), topicIdPartition.topicId(), topicIdPartition.partition())))); + new DelayedShareFetchGroupKey(shareFetch.groupId(), topicIdPartition.topicId(), topicIdPartition.partition())))); } } @@ -149,14 +157,14 @@ public void onComplete() { */ @Override public boolean tryComplete() { - Map topicPartitionData = acquirablePartitions(); + LinkedHashMap topicPartitionData = acquirablePartitions(); try { if (!topicPartitionData.isEmpty()) { // In case, fetch offset metadata doesn't exist for one or more topic partitions, we do a // replicaManager.readFromLog to populate the offset metadata and update the fetch offset metadata for // those topic partitions. - Map replicaManagerReadResponse = maybeReadFromLog(topicPartitionData); + LinkedHashMap replicaManagerReadResponse = maybeReadFromLog(topicPartitionData); maybeUpdateFetchOffsetMetadata(replicaManagerReadResponse); if (anyPartitionHasLogReadError(replicaManagerReadResponse) || isMinBytesSatisfied(topicPartitionData)) { partitionsAcquired = topicPartitionData; @@ -170,13 +178,13 @@ public boolean tryComplete() { return completedByMe; } else { log.debug("minBytes is not satisfied for the share fetch request for group {}, member {}, " + - "topic partitions {}", shareFetchData.groupId(), shareFetchData.memberId(), + "topic partitions {}", shareFetch.groupId(), shareFetch.memberId(), sharePartitions.keySet()); releasePartitionLocks(topicPartitionData.keySet()); } } else { log.trace("Can't acquire records for any partition in the share fetch request for group {}, member {}, " + - "topic partitions {}", shareFetchData.groupId(), shareFetchData.memberId(), + "topic partitions {}", shareFetch.groupId(), shareFetch.memberId(), sharePartitions.keySet()); } return false; @@ -193,12 +201,12 @@ public boolean tryComplete() { * Prepare fetch request structure for partitions in the share fetch request for which we can acquire records. */ // Visible for testing - Map acquirablePartitions() { + LinkedHashMap acquirablePartitions() { // Initialize the topic partitions for which the fetch should be attempted. - Map topicPartitionData = new LinkedHashMap<>(); + LinkedHashMap topicPartitionData = new LinkedHashMap<>(); sharePartitions.forEach((topicIdPartition, sharePartition) -> { - int partitionMaxBytes = shareFetchData.partitionMaxBytes().getOrDefault(topicIdPartition, 0); + int partitionMaxBytes = shareFetch.partitionMaxBytes().getOrDefault(topicIdPartition, 0); // Add the share partition to the list of partitions to be fetched only if we can // acquire the fetch lock on it. if (sharePartition.maybeAcquireFetchLock()) { @@ -230,8 +238,8 @@ Map acquirablePartitions() { return topicPartitionData; } - private Map maybeReadFromLog(Map topicPartitionData) { - Map partitionsMissingFetchOffsetMetadata = new LinkedHashMap<>(); + private LinkedHashMap maybeReadFromLog(LinkedHashMap topicPartitionData) { + LinkedHashMap partitionsMissingFetchOffsetMetadata = new LinkedHashMap<>(); topicPartitionData.forEach((topicIdPartition, partitionData) -> { SharePartition sharePartition = sharePartitions.get(topicIdPartition); if (sharePartition.fetchOffsetMetadata().isEmpty()) { @@ -239,14 +247,14 @@ private Map maybeReadFromLog(Map(); } // We fetch data from replica manager corresponding to the topic partitions that have missing fetch offset metadata. return readFromLog(partitionsMissingFetchOffsetMetadata); } private void maybeUpdateFetchOffsetMetadata( - Map replicaManagerReadResponseData) { + LinkedHashMap replicaManagerReadResponseData) { for (Map.Entry entry : replicaManagerReadResponseData.entrySet()) { TopicIdPartition topicIdPartition = entry.getKey(); SharePartition sharePartition = sharePartitions.get(topicIdPartition); @@ -261,12 +269,21 @@ private void maybeUpdateFetchOffsetMetadata( } // minByes estimation currently assumes the common case where all fetched data is acquirable. - private boolean isMinBytesSatisfied(Map topicPartitionData) { + private boolean isMinBytesSatisfied(LinkedHashMap topicPartitionData) { long accumulatedSize = 0; for (Map.Entry entry : topicPartitionData.entrySet()) { TopicIdPartition topicIdPartition = entry.getKey(); FetchRequest.PartitionData partitionData = entry.getValue(); - LogOffsetMetadata endOffsetMetadata = endOffsetMetadataForTopicPartition(topicIdPartition); + + LogOffsetMetadata endOffsetMetadata; + try { + endOffsetMetadata = endOffsetMetadataForTopicPartition(topicIdPartition); + } catch (Exception e) { + shareFetch.addErroneous(topicIdPartition, e); + sharePartitionManager.handleFencedSharePartitionException( + new SharePartitionKey(shareFetch.groupId(), topicIdPartition), e); + continue; + } if (endOffsetMetadata == LogOffsetMetadata.UNKNOWN_OFFSET_METADATA) continue; @@ -280,14 +297,14 @@ private boolean isMinBytesSatisfied(Map endOffsetMetadata.messageOffset) { log.debug("Satisfying delayed share fetch request for group {}, member {} since it is fetching later segments of " + - "topicIdPartition {}", shareFetchData.groupId(), shareFetchData.memberId(), topicIdPartition); + "topicIdPartition {}", shareFetch.groupId(), shareFetch.memberId(), topicIdPartition); return true; } else if (fetchOffsetMetadata.messageOffset < endOffsetMetadata.messageOffset) { if (fetchOffsetMetadata.onOlderSegment(endOffsetMetadata)) { // This can happen when the fetch operation is falling behind the current segment or the partition // has just rolled a new segment. log.debug("Satisfying delayed share fetch request for group {}, member {} immediately since it is fetching older " + - "segments of topicIdPartition {}", shareFetchData.groupId(), shareFetchData.memberId(), topicIdPartition); + "segments of topicIdPartition {}", shareFetch.groupId(), shareFetch.memberId(), topicIdPartition); return true; } else if (fetchOffsetMetadata.onSameSegment(endOffsetMetadata)) { // we take the partition fetch size as upper bound when accumulating the bytes. @@ -296,15 +313,15 @@ private boolean isMinBytesSatisfied(Map= shareFetchData.fetchParams().minBytes; + return accumulatedSize >= shareFetch.fetchParams().minBytes; } private LogOffsetMetadata endOffsetMetadataForTopicPartition(TopicIdPartition topicIdPartition) { - Partition partition = replicaManager.getPartitionOrException(topicIdPartition.topicPartition()); + Partition partition = ShareFetchUtils.partition(replicaManager, topicIdPartition.topicPartition()); LogOffsetSnapshot offsetSnapshot = partition.fetchOffsetSnapshot(Optional.empty(), true); // The FetchIsolation type that we use for share fetch is FetchIsolation.HIGH_WATERMARK. In the future, we can // extend it to support other FetchIsolation types. - FetchIsolation isolationType = shareFetchData.fetchParams().isolation; + FetchIsolation isolationType = shareFetch.fetchParams().isolation; if (isolationType == FetchIsolation.LOG_END) return offsetSnapshot.logEndOffset; else if (isolationType == FetchIsolation.HIGH_WATERMARK) @@ -314,17 +331,23 @@ else if (isolationType == FetchIsolation.HIGH_WATERMARK) } - private Map readFromLog(Map topicPartitionData) { + private LinkedHashMap readFromLog(LinkedHashMap topicPartitionData) { + // Filter if there already exists any erroneous topic partition. + Set partitionsToFetch = shareFetch.filterErroneousTopicPartitions(topicPartitionData.keySet()); + if (partitionsToFetch.isEmpty()) { + return new LinkedHashMap<>(); + } + Seq> responseLogResult = replicaManager.readFromLog( - shareFetchData.fetchParams(), + shareFetch.fetchParams(), CollectionConverters.asScala( - topicPartitionData.entrySet().stream().map(entry -> - new Tuple2<>(entry.getKey(), entry.getValue())).collect(Collectors.toList()) + partitionsToFetch.stream().map(topicIdPartition -> + new Tuple2<>(topicIdPartition, topicPartitionData.get(topicIdPartition))).collect(Collectors.toList()) ), QuotaFactory.UNBOUNDED_QUOTA, true); - Map responseData = new HashMap<>(); + LinkedHashMap responseData = new LinkedHashMap<>(); responseLogResult.foreach(tpLogResult -> { responseData.put(tpLogResult._1(), tpLogResult._2()); return BoxedUnit.UNIT; @@ -334,15 +357,38 @@ private Map readFromLog(Map replicaManagerReadResponse) { + private boolean anyPartitionHasLogReadError(LinkedHashMap replicaManagerReadResponse) { return replicaManagerReadResponse.values().stream() .anyMatch(logReadResult -> logReadResult.error().code() != Errors.NONE.code()); } + /** + * The handleFetchException method is used to handle the exception that occurred while reading from log. + * The method will handle the exception for each topic-partition in the request. The share partition + * might get removed from the cache. + *

+ * The replica read request might error out for one share partition + * but as we cannot determine which share partition errored out, we might remove all the share partitions + * in the request. + * + * @param shareFetch The share fetch request. + * @param topicIdPartitions The topic-partitions in the replica read request. + * @param throwable The exception that occurred while fetching messages. + */ + private void handleFetchException( + ShareFetch shareFetch, + Set topicIdPartitions, + Throwable throwable + ) { + topicIdPartitions.forEach(topicIdPartition -> sharePartitionManager.handleFencedSharePartitionException( + new SharePartitionKey(shareFetch.groupId(), topicIdPartition), throwable)); + shareFetch.maybeCompleteWithException(topicIdPartitions, throwable); + } + // Visible for testing. - Map combineLogReadResponse(Map topicPartitionData, - Map existingFetchedData) { - Map missingLogReadTopicPartitions = new LinkedHashMap<>(); + LinkedHashMap combineLogReadResponse(LinkedHashMap topicPartitionData, + LinkedHashMap existingFetchedData) { + LinkedHashMap missingLogReadTopicPartitions = new LinkedHashMap<>(); topicPartitionData.forEach((topicIdPartition, partitionData) -> { if (!existingFetchedData.containsKey(topicIdPartition)) { missingLogReadTopicPartitions.put(topicIdPartition, partitionData); @@ -351,7 +397,7 @@ Map combineLogReadResponse(Map missingTopicPartitionsLogReadResponse = readFromLog(missingLogReadTopicPartitions); + LinkedHashMap missingTopicPartitionsLogReadResponse = readFromLog(missingLogReadTopicPartitions); missingTopicPartitionsLogReadResponse.putAll(existingFetchedData); return missingTopicPartitionsLogReadResponse; } @@ -363,4 +409,9 @@ void releasePartitionLocks(Set topicIdPartitions) { sharePartition.releaseFetchLock(); }); } + + // Visible for testing. + Lock lock() { + return lock; + } } diff --git a/core/src/main/java/kafka/server/share/ShareFetchUtils.java b/core/src/main/java/kafka/server/share/ShareFetchUtils.java index 3515362152b0..e3608128eb5f 100644 --- a/core/src/main/java/kafka/server/share/ShareFetchUtils.java +++ b/core/src/main/java/kafka/server/share/ShareFetchUtils.java @@ -29,7 +29,7 @@ import org.apache.kafka.common.record.FileRecords; import org.apache.kafka.common.requests.ListOffsetsRequest; import org.apache.kafka.server.share.fetch.ShareAcquiredRecords; -import org.apache.kafka.server.share.fetch.ShareFetchData; +import org.apache.kafka.server.share.fetch.ShareFetch; import org.apache.kafka.server.storage.log.FetchPartitionData; import org.slf4j.Logger; @@ -55,7 +55,7 @@ public class ShareFetchUtils { * by acquiring records from the share partition. */ static Map processFetchResponse( - ShareFetchData shareFetchData, + ShareFetch shareFetch, Map responseData, LinkedHashMap sharePartitions, ReplicaManager replicaManager @@ -91,7 +91,7 @@ static Map processFetchR partitionData.setErrorMessage(Errors.NONE.message()); } } else { - ShareAcquiredRecords shareAcquiredRecords = sharePartition.acquire(shareFetchData.memberId(), shareFetchData.maxFetchRecords() - acquiredRecordsCount, fetchPartitionData); + ShareAcquiredRecords shareAcquiredRecords = sharePartition.acquire(shareFetch.memberId(), shareFetch.maxFetchRecords() - acquiredRecordsCount, fetchPartitionData); log.trace("Acquired records: {} for topicIdPartition: {}", shareAcquiredRecords, topicIdPartition); // Maybe, in the future, check if no records are acquired, and we want to retry // replica manager fetch. Depends on the share partition manager implementation, @@ -151,11 +151,15 @@ static long offsetForLatestTimestamp(TopicIdPartition topicIdPartition, ReplicaM } static int leaderEpoch(ReplicaManager replicaManager, TopicPartition tp) { + return partition(replicaManager, tp).getLeaderEpoch(); + } + + static Partition partition(ReplicaManager replicaManager, TopicPartition tp) { Partition partition = replicaManager.getPartitionOrException(tp); if (!partition.isLeader()) { log.debug("The broker is not the leader for topic partition: {}-{}", tp.topic(), tp.partition()); throw new NotLeaderOrFollowerException(); } - return partition.getLeaderEpoch(); + return partition; } } diff --git a/core/src/main/java/kafka/server/share/SharePartition.java b/core/src/main/java/kafka/server/share/SharePartition.java index 71baea101744..632cb1e31691 100644 --- a/core/src/main/java/kafka/server/share/SharePartition.java +++ b/core/src/main/java/kafka/server/share/SharePartition.java @@ -1082,8 +1082,8 @@ boolean canAcquireRecords() { /** * Prior to fetching records from the leader, the fetch lock is acquired to ensure that the same - * share partition does not enter a fetch queue while another one is being fetched within the queue. - * The fetch lock is released once the records are fetched from the leader. + * share partition is not fetched concurrently by multiple clients. The fetch lock is released once + * the records are fetched and acquired. * * @return A boolean which indicates whether the fetch lock is acquired. */ diff --git a/core/src/main/java/kafka/server/share/SharePartitionManager.java b/core/src/main/java/kafka/server/share/SharePartitionManager.java index 4288dd55703d..1c6c5492372f 100644 --- a/core/src/main/java/kafka/server/share/SharePartitionManager.java +++ b/core/src/main/java/kafka/server/share/SharePartitionManager.java @@ -49,7 +49,7 @@ import org.apache.kafka.server.share.fetch.DelayedShareFetchGroupKey; import org.apache.kafka.server.share.fetch.DelayedShareFetchKey; import org.apache.kafka.server.share.fetch.DelayedShareFetchPartitionKey; -import org.apache.kafka.server.share.fetch.ShareFetchData; +import org.apache.kafka.server.share.fetch.ShareFetch; import org.apache.kafka.server.share.persister.Persister; import org.apache.kafka.server.share.session.ShareSession; import org.apache.kafka.server.share.session.ShareSessionCache; @@ -71,10 +71,8 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; -import java.util.stream.Collectors; /** * The SharePartitionManager is responsible for managing the SharePartitions and ShareSessions. @@ -250,7 +248,7 @@ public CompletableFuture> fetchMessages( partitionMaxBytes.keySet(), groupId, fetchParams); CompletableFuture> future = new CompletableFuture<>(); - processShareFetch(new ShareFetchData(fetchParams, groupId, memberId, future, partitionMaxBytes, maxFetchRecords)); + processShareFetch(new ShareFetch(fetchParams, groupId, memberId, future, partitionMaxBytes, maxFetchRecords)); return future; } @@ -498,30 +496,6 @@ public void acknowledgeSessionUpdate(String groupId, ShareRequestMetadata reqMet } } - /** - * The handleFetchException method is used to handle the exception that occurred while reading from log. - * The method will handle the exception for each topic-partition in the request. The share partition - * might get removed from the cache. - *

- * The replica read request might error out for one share partition - * but as we cannot determine which share partition errored out, we might remove all the share partitions - * in the request. - * - * @param groupId The group id in the share fetch request. - * @param topicIdPartitions The topic-partitions in the replica read request. - * @param future The future to complete with the exception. - * @param throwable The exception that occurred while fetching messages. - */ - public void handleFetchException( - String groupId, - Set topicIdPartitions, - CompletableFuture> future, - Throwable throwable - ) { - topicIdPartitions.forEach(topicIdPartition -> handleFencedSharePartitionException(sharePartitionKey(groupId, topicIdPartition), throwable)); - maybeCompleteShareFetchWithException(future, topicIdPartitions, throwable); - } - /** * The cachedTopicIdPartitionsInShareSession method is used to get the cached topic-partitions in the share session. * @@ -564,20 +538,18 @@ private static String partitionsToLogString(Collection partiti } // Visible for testing. - void processShareFetch(ShareFetchData shareFetchData) { - if (shareFetchData.partitionMaxBytes().isEmpty()) { + void processShareFetch(ShareFetch shareFetch) { + if (shareFetch.partitionMaxBytes().isEmpty()) { // If there are no partitions to fetch then complete the future with an empty map. - shareFetchData.future().complete(Collections.emptyMap()); + shareFetch.maybeComplete(Collections.emptyMap()); return; } - // Initialize lazily, if required. - Map erroneous = null; List delayedShareFetchWatchKeys = new ArrayList<>(); LinkedHashMap sharePartitions = new LinkedHashMap<>(); - for (TopicIdPartition topicIdPartition : shareFetchData.partitionMaxBytes().keySet()) { + for (TopicIdPartition topicIdPartition : shareFetch.partitionMaxBytes().keySet()) { SharePartitionKey sharePartitionKey = sharePartitionKey( - shareFetchData.groupId(), + shareFetch.groupId(), topicIdPartition ); @@ -585,15 +557,8 @@ void processShareFetch(ShareFetchData shareFetchData) { try { sharePartition = getOrCreateSharePartition(sharePartitionKey); } catch (Exception e) { - // Complete the whole fetch request with an exception if there is an error processing. - // The exception currently can be thrown only if there is an error while initializing - // the share partition. But skip the processing for other share partitions in the request - // as this situation is not expected. - log.error("Error processing share fetch request", e); - if (erroneous == null) { - erroneous = new HashMap<>(); - } - erroneous.put(topicIdPartition, e); + log.debug("Error processing share fetch request", e); + shareFetch.addErroneous(topicIdPartition, e); // Continue iteration for other partitions in the request. continue; } @@ -601,37 +566,42 @@ void processShareFetch(ShareFetchData shareFetchData) { // We add a key corresponding to each share partition in the request in the group so that when there are // acknowledgements/acquisition lock timeout etc., we have a way to perform checkAndComplete for all // such requests which are delayed because of lack of data to acquire for the share partition. - delayedShareFetchWatchKeys.add(new DelayedShareFetchGroupKey(shareFetchData.groupId(), topicIdPartition.topicId(), topicIdPartition.partition())); + DelayedShareFetchKey delayedShareFetchKey = new DelayedShareFetchGroupKey(shareFetch.groupId(), + topicIdPartition.topicId(), topicIdPartition.partition()); + delayedShareFetchWatchKeys.add(delayedShareFetchKey); // We add a key corresponding to each topic partition in the request so that when the HWM is updated // for any topic partition, we have a way to perform checkAndComplete for all such requests which are // delayed because of lack of data to acquire for the topic partition. delayedShareFetchWatchKeys.add(new DelayedShareFetchPartitionKey(topicIdPartition.topicId(), topicIdPartition.partition())); - // The share partition is initialized asynchronously, so we need to wait for it to be initialized. - // But if the share partition is already initialized, then the future will be completed immediately. - // Hence, it's safe to call the maybeInitialize method and then wait for the future to be completed. - // TopicPartitionData list will be populated only if the share partition is already initialized. - sharePartition.maybeInitialize().whenComplete((result, throwable) -> { + + CompletableFuture initializationFuture = sharePartition.maybeInitialize(); + final boolean initialized = initializationFuture.isDone(); + initializationFuture.whenComplete((result, throwable) -> { if (throwable != null) { - // TODO: Complete error handling for initialization. We have to record the error - // for respective share partition as completing the full request might result in - // some acquired records to not being sent: https://issues.apache.org/jira/browse/KAFKA-17510 - maybeCompleteInitializationWithException(sharePartitionKey, shareFetchData.future(), throwable); + handleInitializationException(sharePartitionKey, shareFetch, throwable); } + // Though the share partition is initialized asynchronously, but if already initialized or + // errored then future should be completed immediately. If the initialization is not completed + // immediately then the requests might be waiting in purgatory until the share partition + // is initialized. Hence, trigger the completion of all pending delayed share fetch requests + // for the share partition. + if (!initialized) + replicaManager.completeDelayedShareFetchRequest(delayedShareFetchKey); }); sharePartitions.put(topicIdPartition, sharePartition); } // If all the partitions in the request errored out, then complete the fetch request with an exception. - if (erroneous != null && erroneous.size() == shareFetchData.partitionMaxBytes().size()) { - completeShareFetchWithException(shareFetchData.future(), erroneous); + if (shareFetch.errorInAllPartitions()) { + shareFetch.maybeComplete(Collections.emptyMap()); // Do not proceed with share fetch processing as all the partitions errored out. return; } - // TODO: If there exists some erroneous partitions then they will not be part of response. - // Add the share fetch to the delayed share fetch purgatory to process the fetch request. - addDelayedShareFetch(new DelayedShareFetch(shareFetchData, replicaManager, this, sharePartitions), delayedShareFetchWatchKeys); + // The request will be added irrespective of whether the share partition is initialized or not. + // Once the share partition is initialized, the delayed share fetch will be completed. + addDelayedShareFetch(new DelayedShareFetch(shareFetch, replicaManager, this, sharePartitions), delayedShareFetchWatchKeys); } private SharePartition getOrCreateSharePartition(SharePartitionKey sharePartitionKey) { @@ -657,28 +627,35 @@ private SharePartition getOrCreateSharePartition(SharePartitionKey sharePartitio }); } - private void maybeCompleteInitializationWithException( + private void handleInitializationException( SharePartitionKey sharePartitionKey, - CompletableFuture> future, + ShareFetch shareFetch, Throwable throwable) { if (throwable instanceof LeaderNotAvailableException) { log.debug("The share partition with key {} is not initialized yet", sharePartitionKey); - // Do not process the fetch request for this partition as the leader is not initialized yet. - // The fetch request will be retried in the next poll. - // TODO: Add the request to delayed fetch purgatory. + // Skip any handling for this error as the share partition is still loading. The request + // to fetch will be added in purgatory and will be completed once either timed out + // or the share partition initialization completes. return; } // Remove the partition from the cache as it's failed to initialize. - partitionCacheMap.remove(sharePartitionKey); - // The partition initialization failed, so complete the request with the exception. - // The server should not be in this state, so log the error on broker and surface the same - // to the client. The broker should not be in this state, investigate the root cause of the error. - log.error("Error initializing share partition with key {}", sharePartitionKey, throwable); - maybeCompleteShareFetchWithException(future, Collections.singletonList(sharePartitionKey.topicIdPartition()), throwable); + SharePartition sharePartition = partitionCacheMap.remove(sharePartitionKey); + if (sharePartition != null) { + sharePartition.markFenced(); + } + // The partition initialization failed, so add the partition to the erroneous partitions. + log.debug("Error initializing share partition with key {}", sharePartitionKey, throwable); + shareFetch.addErroneous(sharePartitionKey.topicIdPartition(), throwable); } - private void handleFencedSharePartitionException( + /** + * The method is used to handle the share partition exception. + * + * @param sharePartitionKey The share partition key. + * @param throwable The exception. + */ + public void handleFencedSharePartitionException( SharePartitionKey sharePartitionKey, Throwable throwable ) { @@ -695,23 +672,6 @@ private void handleFencedSharePartitionException( } } - private void maybeCompleteShareFetchWithException(CompletableFuture> future, - Collection topicIdPartitions, Throwable throwable) { - if (!future.isDone()) { - future.complete(topicIdPartitions.stream().collect(Collectors.toMap( - tp -> tp, tp -> new PartitionData().setErrorCode(Errors.forException(throwable).code()).setErrorMessage(throwable.getMessage())))); - } - } - - private void completeShareFetchWithException(CompletableFuture> future, - Map erroneous) { - future.complete(erroneous.entrySet().stream().collect(Collectors.toMap( - Map.Entry::getKey, entry -> { - Throwable t = entry.getValue(); - return new PartitionData().setErrorCode(Errors.forException(t).code()).setErrorMessage(t.getMessage()); - }))); - } - private SharePartitionKey sharePartitionKey(String groupId, TopicIdPartition topicIdPartition) { return new SharePartitionKey(groupId, topicIdPartition); } diff --git a/core/src/test/java/kafka/server/share/DelayedShareFetchTest.java b/core/src/test/java/kafka/server/share/DelayedShareFetchTest.java index 0c7b488f1802..12b89cffd377 100644 --- a/core/src/test/java/kafka/server/share/DelayedShareFetchTest.java +++ b/core/src/test/java/kafka/server/share/DelayedShareFetchTest.java @@ -31,7 +31,7 @@ import org.apache.kafka.server.purgatory.DelayedOperationPurgatory; import org.apache.kafka.server.share.fetch.DelayedShareFetchGroupKey; import org.apache.kafka.server.share.fetch.ShareAcquiredRecords; -import org.apache.kafka.server.share.fetch.ShareFetchData; +import org.apache.kafka.server.share.fetch.ShareFetch; import org.apache.kafka.server.storage.log.FetchIsolation; import org.apache.kafka.server.storage.log.FetchParams; import org.apache.kafka.server.storage.log.FetchPartitionData; @@ -113,13 +113,13 @@ public void testDelayedShareFetchTryCompleteReturnsFalseDueToNonAcquirablePartit sharePartitions.put(tp0, sp0); sharePartitions.put(tp1, sp1); - ShareFetchData shareFetchData = new ShareFetchData(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), + ShareFetch shareFetch = new ShareFetch(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), new CompletableFuture<>(), partitionMaxBytes, MAX_FETCH_RECORDS); when(sp0.canAcquireRecords()).thenReturn(false); when(sp1.canAcquireRecords()).thenReturn(false); DelayedShareFetch delayedShareFetch = spy(DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData) + .withShareFetchData(shareFetch) .withSharePartitions(sharePartitions) .build()); @@ -127,6 +127,8 @@ public void testDelayedShareFetchTryCompleteReturnsFalseDueToNonAcquirablePartit assertFalse(delayedShareFetch.tryComplete()); assertFalse(delayedShareFetch.isCompleted()); Mockito.verify(delayedShareFetch, times(0)).releasePartitionLocks(any()); + assertTrue(delayedShareFetch.lock().tryLock()); + delayedShareFetch.lock().unlock(); } @Test @@ -150,7 +152,7 @@ public void testTryCompleteWhenMinBytesNotSatisfiedOnFirstFetch() { sharePartitions.put(tp0, sp0); sharePartitions.put(tp1, sp1); - ShareFetchData shareFetchData = new ShareFetchData( + ShareFetch shareFetch = new ShareFetch( new FetchParams(ApiKeys.SHARE_FETCH.latestVersion(), FetchRequest.ORDINARY_CONSUMER_ID, -1, MAX_WAIT_MS, 2, 1024 * 1024, FetchIsolation.HIGH_WATERMARK, Optional.empty()), groupId, Uuid.randomUuid().toString(), new CompletableFuture<>(), partitionMaxBytes, MAX_FETCH_RECORDS); @@ -172,7 +174,7 @@ public void testTryCompleteWhenMinBytesNotSatisfiedOnFirstFetch() { doAnswer(invocation -> buildLogReadResult(Collections.singleton(tp0))).when(replicaManager).readFromLog(any(), any(), any(ReplicaQuota.class), anyBoolean()); DelayedShareFetch delayedShareFetch = spy(DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData) + .withShareFetchData(shareFetch) .withSharePartitions(sharePartitions) .withReplicaManager(replicaManager) .build()); @@ -182,6 +184,8 @@ public void testTryCompleteWhenMinBytesNotSatisfiedOnFirstFetch() { assertFalse(delayedShareFetch.tryComplete()); assertFalse(delayedShareFetch.isCompleted()); Mockito.verify(delayedShareFetch, times(1)).releasePartitionLocks(any()); + assertTrue(delayedShareFetch.lock().tryLock()); + delayedShareFetch.lock().unlock(); } @Test @@ -205,7 +209,7 @@ public void testTryCompleteWhenMinBytesNotSatisfiedOnSubsequentFetch() { sharePartitions.put(tp0, sp0); sharePartitions.put(tp1, sp1); - ShareFetchData shareFetchData = new ShareFetchData( + ShareFetch shareFetch = new ShareFetch( new FetchParams(ApiKeys.SHARE_FETCH.latestVersion(), FetchRequest.ORDINARY_CONSUMER_ID, -1, MAX_WAIT_MS, 2, 1024 * 1024, FetchIsolation.HIGH_WATERMARK, Optional.empty()), groupId, Uuid.randomUuid().toString(), new CompletableFuture<>(), partitionMaxBytes, MAX_FETCH_RECORDS); @@ -223,7 +227,7 @@ public void testTryCompleteWhenMinBytesNotSatisfiedOnSubsequentFetch() { mockTopicIdPartitionFetchBytes(replicaManager, tp0, hwmOffsetMetadata); DelayedShareFetch delayedShareFetch = spy(DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData) + .withShareFetchData(shareFetch) .withSharePartitions(sharePartitions) .withReplicaManager(replicaManager) .build()); @@ -233,6 +237,8 @@ public void testTryCompleteWhenMinBytesNotSatisfiedOnSubsequentFetch() { assertFalse(delayedShareFetch.tryComplete()); assertFalse(delayedShareFetch.isCompleted()); Mockito.verify(delayedShareFetch, times(1)).releasePartitionLocks(any()); + assertTrue(delayedShareFetch.lock().tryLock()); + delayedShareFetch.lock().unlock(); } @Test @@ -256,7 +262,7 @@ public void testDelayedShareFetchTryCompleteReturnsTrue() { sharePartitions.put(tp0, sp0); sharePartitions.put(tp1, sp1); - ShareFetchData shareFetchData = new ShareFetchData(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), + ShareFetch shareFetch = new ShareFetch(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), new CompletableFuture<>(), partitionMaxBytes, MAX_FETCH_RECORDS); when(sp0.canAcquireRecords()).thenReturn(true); @@ -268,7 +274,7 @@ public void testDelayedShareFetchTryCompleteReturnsTrue() { when(sp0.fetchOffsetMetadata()).thenReturn(Optional.of(new LogOffsetMetadata(0, 1, 0))); mockTopicIdPartitionToReturnDataEqualToMinBytes(replicaManager, tp0, 1); DelayedShareFetch delayedShareFetch = spy(DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData) + .withShareFetchData(shareFetch) .withSharePartitions(sharePartitions) .withReplicaManager(replicaManager) .build()); @@ -278,6 +284,8 @@ public void testDelayedShareFetchTryCompleteReturnsTrue() { assertTrue(delayedShareFetch.tryComplete()); assertTrue(delayedShareFetch.isCompleted()); Mockito.verify(delayedShareFetch, times(1)).releasePartitionLocks(any()); + assertTrue(delayedShareFetch.lock().tryLock()); + delayedShareFetch.lock().unlock(); } @Test @@ -301,13 +309,14 @@ public void testEmptyFutureReturnedByDelayedShareFetchOnComplete() { sharePartitions.put(tp0, sp0); sharePartitions.put(tp1, sp1); - ShareFetchData shareFetchData = new ShareFetchData(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), - new CompletableFuture<>(), partitionMaxBytes, MAX_FETCH_RECORDS); + CompletableFuture> future = new CompletableFuture<>(); + ShareFetch shareFetch = new ShareFetch(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), + future, partitionMaxBytes, MAX_FETCH_RECORDS); when(sp0.canAcquireRecords()).thenReturn(false); when(sp1.canAcquireRecords()).thenReturn(false); DelayedShareFetch delayedShareFetch = spy(DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData) + .withShareFetchData(shareFetch) .withReplicaManager(replicaManager) .withSharePartitions(sharePartitions) .build()); @@ -315,11 +324,13 @@ public void testEmptyFutureReturnedByDelayedShareFetchOnComplete() { delayedShareFetch.forceComplete(); // Since no partition could be acquired, the future should be empty and replicaManager.readFromLog should not be called. - assertEquals(0, shareFetchData.future().join().size()); + assertEquals(0, future.join().size()); Mockito.verify(replicaManager, times(0)).readFromLog( any(), any(), any(ReplicaQuota.class), anyBoolean()); assertTrue(delayedShareFetch.isCompleted()); Mockito.verify(delayedShareFetch, times(0)).releasePartitionLocks(any()); + assertTrue(delayedShareFetch.lock().tryLock()); + delayedShareFetch.lock().unlock(); } @Test @@ -343,7 +354,7 @@ public void testReplicaManagerFetchShouldHappenOnComplete() { sharePartitions.put(tp0, sp0); sharePartitions.put(tp1, sp1); - ShareFetchData shareFetchData = new ShareFetchData(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), + ShareFetch shareFetch = new ShareFetch(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), new CompletableFuture<>(), partitionMaxBytes, MAX_FETCH_RECORDS); when(sp0.canAcquireRecords()).thenReturn(true); @@ -352,7 +363,7 @@ public void testReplicaManagerFetchShouldHappenOnComplete() { ShareAcquiredRecords.fromAcquiredRecords(new ShareFetchResponseData.AcquiredRecords().setFirstOffset(0).setLastOffset(3).setDeliveryCount((short) 1))); doAnswer(invocation -> buildLogReadResult(Collections.singleton(tp0))).when(replicaManager).readFromLog(any(), any(), any(ReplicaQuota.class), anyBoolean()); DelayedShareFetch delayedShareFetch = spy(DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData) + .withShareFetchData(shareFetch) .withReplicaManager(replicaManager) .withSharePartitions(sharePartitions) .build()); @@ -365,8 +376,10 @@ public void testReplicaManagerFetchShouldHappenOnComplete() { Mockito.verify(sp0, times(1)).nextFetchOffset(); Mockito.verify(sp1, times(0)).nextFetchOffset(); assertTrue(delayedShareFetch.isCompleted()); - assertTrue(shareFetchData.future().isDone()); + assertTrue(shareFetch.isCompleted()); Mockito.verify(delayedShareFetch, times(1)).releasePartitionLocks(any()); + assertTrue(delayedShareFetch.lock().tryLock()); + delayedShareFetch.lock().unlock(); } @Test @@ -384,14 +397,14 @@ public void testToCompleteAnAlreadyCompletedFuture() { sharePartitions.put(tp0, sp0); CompletableFuture> future = new CompletableFuture<>(); - ShareFetchData shareFetchData = new ShareFetchData(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), + ShareFetch shareFetch = new ShareFetch(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), future, partitionMaxBytes, MAX_FETCH_RECORDS); when(sp0.maybeAcquireFetchLock()).thenReturn(true); when(sp0.canAcquireRecords()).thenReturn(false); DelayedShareFetch delayedShareFetch = spy(DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData) + .withShareFetchData(shareFetch) .withReplicaManager(replicaManager) .withSharePartitions(sharePartitions) .build()); @@ -402,7 +415,9 @@ public void testToCompleteAnAlreadyCompletedFuture() { assertTrue(delayedShareFetch.isCompleted()); // Verifying that the first forceComplete calls acquirablePartitions method in DelayedShareFetch. Mockito.verify(delayedShareFetch, times(1)).acquirablePartitions(); - assertEquals(0, shareFetchData.future().join().size()); + assertEquals(0, future.join().size()); + assertTrue(delayedShareFetch.lock().tryLock()); + delayedShareFetch.lock().unlock(); // Force completing the share fetch request for the second time should hit the future completion check and not // proceed ahead in the function. @@ -411,6 +426,8 @@ public void testToCompleteAnAlreadyCompletedFuture() { // Verifying that the second forceComplete does not call acquirablePartitions method in DelayedShareFetch. Mockito.verify(delayedShareFetch, times(1)).acquirablePartitions(); Mockito.verify(delayedShareFetch, times(0)).releasePartitionLocks(any()); + assertTrue(delayedShareFetch.lock().tryLock()); + delayedShareFetch.lock().unlock(); } @Test @@ -438,7 +455,7 @@ public void testForceCompleteTriggersDelayedActionsQueue() { sharePartitions1.put(tp1, sp1); sharePartitions1.put(tp2, sp2); - ShareFetchData shareFetchData1 = new ShareFetchData(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), + ShareFetch shareFetch1 = new ShareFetch(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), new CompletableFuture<>(), partitionMaxBytes1, MAX_FETCH_RECORDS); DelayedOperationPurgatory delayedShareFetchPurgatory = new DelayedOperationPurgatory<>( @@ -450,7 +467,7 @@ public void testForceCompleteTriggersDelayedActionsQueue() { partitionMaxBytes1.keySet().forEach(topicIdPartition -> delayedShareFetchWatchKeys.add(new DelayedShareFetchGroupKey(groupId, topicIdPartition.topicId(), topicIdPartition.partition()))); DelayedShareFetch delayedShareFetch1 = DelayedShareFetchTest.DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData1) + .withShareFetchData(shareFetch1) .withReplicaManager(replicaManager) .withSharePartitions(sharePartitions1) .build(); @@ -460,12 +477,14 @@ public void testForceCompleteTriggersDelayedActionsQueue() { delayedShareFetchPurgatory.tryCompleteElseWatch(delayedShareFetch1, delayedShareFetchWatchKeys); assertEquals(2, delayedShareFetchPurgatory.watched()); - assertFalse(shareFetchData1.future().isDone()); + assertFalse(shareFetch1.isCompleted()); + assertTrue(delayedShareFetch1.lock().tryLock()); + delayedShareFetch1.lock().unlock(); Map partitionMaxBytes2 = new HashMap<>(); partitionMaxBytes2.put(tp1, PARTITION_MAX_BYTES); partitionMaxBytes2.put(tp2, PARTITION_MAX_BYTES); - ShareFetchData shareFetchData2 = new ShareFetchData(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), + ShareFetch shareFetch2 = new ShareFetch(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), new CompletableFuture<>(), partitionMaxBytes2, MAX_FETCH_RECORDS); doAnswer(invocation -> buildLogReadResult(Collections.singleton(tp1))).when(replicaManager).readFromLog(any(), any(), any(ReplicaQuota.class), anyBoolean()); @@ -476,7 +495,7 @@ public void testForceCompleteTriggersDelayedActionsQueue() { sharePartitions2.put(tp2, sp2); DelayedShareFetch delayedShareFetch2 = spy(DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData2) + .withShareFetchData(shareFetch2) .withReplicaManager(replicaManager) .withSharePartitions(sharePartitions2) .build()); @@ -491,13 +510,15 @@ public void testForceCompleteTriggersDelayedActionsQueue() { // requests, it should add a "check and complete" action for request key tp1 on the purgatory. delayedShareFetch2.forceComplete(); assertTrue(delayedShareFetch2.isCompleted()); - assertTrue(shareFetchData2.future().isDone()); + assertTrue(shareFetch2.isCompleted()); Mockito.verify(replicaManager, times(1)).readFromLog( any(), any(), any(ReplicaQuota.class), anyBoolean()); assertFalse(delayedShareFetch1.isCompleted()); Mockito.verify(replicaManager, times(1)).addToActionQueue(any()); Mockito.verify(replicaManager, times(0)).tryCompleteActions(); Mockito.verify(delayedShareFetch2, times(1)).releasePartitionLocks(any()); + assertTrue(delayedShareFetch2.lock().tryLock()); + delayedShareFetch2.lock().unlock(); } @Test @@ -518,32 +539,32 @@ public void testCombineLogReadResponse() { sharePartitions.put(tp1, sp1); CompletableFuture> future = new CompletableFuture<>(); - ShareFetchData shareFetchData = new ShareFetchData( + ShareFetch shareFetch = new ShareFetch( new FetchParams(ApiKeys.SHARE_FETCH.latestVersion(), FetchRequest.ORDINARY_CONSUMER_ID, -1, MAX_WAIT_MS, 1, 1024 * 1024, FetchIsolation.HIGH_WATERMARK, Optional.empty()), groupId, Uuid.randomUuid().toString(), future, partitionMaxBytes, MAX_FETCH_RECORDS); DelayedShareFetch delayedShareFetch = DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData) + .withShareFetchData(shareFetch) .withReplicaManager(replicaManager) .withSharePartitions(sharePartitions) .build(); - Map topicPartitionData = new HashMap<>(); + LinkedHashMap topicPartitionData = new LinkedHashMap<>(); topicPartitionData.put(tp0, mock(FetchRequest.PartitionData.class)); topicPartitionData.put(tp1, mock(FetchRequest.PartitionData.class)); // Case 1 - logReadResponse contains tp0. - Map logReadResponse = Collections.singletonMap( - tp0, mock(LogReadResult.class)); + LinkedHashMap logReadResponse = new LinkedHashMap<>(); + logReadResponse.put(tp0, mock(LogReadResult.class)); doAnswer(invocation -> buildLogReadResult(Collections.singleton(tp1))).when(replicaManager).readFromLog(any(), any(), any(ReplicaQuota.class), anyBoolean()); - Map combinedLogReadResponse = delayedShareFetch.combineLogReadResponse(topicPartitionData, logReadResponse); + LinkedHashMap combinedLogReadResponse = delayedShareFetch.combineLogReadResponse(topicPartitionData, logReadResponse); assertEquals(topicPartitionData.keySet(), combinedLogReadResponse.keySet()); assertEquals(combinedLogReadResponse.get(tp0), logReadResponse.get(tp0)); // Case 2 - logReadResponse contains tp0 and tp1. - logReadResponse = new HashMap<>(); + logReadResponse = new LinkedHashMap<>(); logReadResponse.put(tp0, mock(LogReadResult.class)); logReadResponse.put(tp1, mock(LogReadResult.class)); combinedLogReadResponse = delayedShareFetch.combineLogReadResponse(topicPartitionData, logReadResponse); @@ -568,7 +589,7 @@ public void testExceptionInMinBytesCalculation() { LinkedHashMap sharePartitions = new LinkedHashMap<>(); sharePartitions.put(tp0, sp0); - ShareFetchData shareFetchData = new ShareFetchData( + ShareFetch shareFetch = new ShareFetch( new FetchParams(ApiKeys.SHARE_FETCH.latestVersion(), FetchRequest.ORDINARY_CONSUMER_ID, -1, MAX_WAIT_MS, 1, 1024 * 1024, FetchIsolation.HIGH_WATERMARK, Optional.empty()), groupId, Uuid.randomUuid().toString(), new CompletableFuture<>(), partitionMaxBytes, MAX_FETCH_RECORDS); @@ -583,20 +604,37 @@ public void testExceptionInMinBytesCalculation() { when(replicaManager.getPartitionOrException(tp0.topicPartition())).thenReturn(partition); when(partition.fetchOffsetSnapshot(any(), anyBoolean())).thenThrow(new RuntimeException("Exception thrown")); + SharePartitionManager sharePartitionManager = mock(SharePartitionManager.class); DelayedShareFetch delayedShareFetch = spy(DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData) + .withShareFetchData(shareFetch) .withSharePartitions(sharePartitions) .withReplicaManager(replicaManager) + .withSharePartitionManager(sharePartitionManager) .build()); + + // Try complete should return false as the share partition has errored out. + assertFalse(delayedShareFetch.tryComplete()); + // Fetch should remain pending and should be completed on request timeout. assertFalse(delayedShareFetch.isCompleted()); + // The request should be errored out as topic partition should get added as erroneous. + assertTrue(shareFetch.errorInAllPartitions()); - // Since minBytes calculation throws an exception and returns true, tryComplete should return true. - assertTrue(delayedShareFetch.tryComplete()); + Mockito.verify(sharePartitionManager, times(1)).handleFencedSharePartitionException(any(), any()); + Mockito.verify(replicaManager, times(1)).readFromLog( + any(), any(), any(ReplicaQuota.class), anyBoolean()); + Mockito.verify(delayedShareFetch, times(1)).releasePartitionLocks(any()); + + // Force complete the request as it's still pending. Return false from the share partition lock acquire. + when(sp0.maybeAcquireFetchLock()).thenReturn(false); + assertTrue(delayedShareFetch.forceComplete()); assertTrue(delayedShareFetch.isCompleted()); - Mockito.verify(replicaManager, times(2)).readFromLog( + + // Read from log and release partition locks should not be called as the request is errored out. + Mockito.verify(replicaManager, times(1)).readFromLog( any(), any(), any(ReplicaQuota.class), anyBoolean()); - // releasePartitionLocks will be called twice, once from tryComplete and then from onComplete. - Mockito.verify(delayedShareFetch, times(2)).releasePartitionLocks(any()); + Mockito.verify(delayedShareFetch, times(1)).releasePartitionLocks(any()); + assertTrue(delayedShareFetch.lock().tryLock()); + delayedShareFetch.lock().unlock(); } @Test @@ -615,11 +653,11 @@ public void testLocksReleasedForCompletedFetch() { doAnswer(invocation -> buildLogReadResult(Collections.singleton(tp0))).when(replicaManager).readFromLog(any(), any(), any(ReplicaQuota.class), anyBoolean()); mockTopicIdPartitionToReturnDataEqualToMinBytes(replicaManager, tp0, 1); - ShareFetchData shareFetchData = new ShareFetchData(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), + ShareFetch shareFetch = new ShareFetch(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), new CompletableFuture<>(), Map.of(tp0, PARTITION_MAX_BYTES), MAX_FETCH_RECORDS); DelayedShareFetch delayedShareFetch = DelayedShareFetchTest.DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData) + .withShareFetchData(shareFetch) .withSharePartitions(sharePartitions1) .withReplicaManager(replicaManager) .build(); @@ -629,6 +667,8 @@ public void testLocksReleasedForCompletedFetch() { assertFalse(spy.tryComplete()); Mockito.verify(sp0, times(1)).releaseFetchLock(); + assertTrue(delayedShareFetch.lock().tryLock()); + delayedShareFetch.lock().unlock(); } @Test @@ -643,16 +683,18 @@ public void testLocksReleasedAcquireException() { LinkedHashMap sharePartitions = new LinkedHashMap<>(); sharePartitions.put(tp0, sp0); - ShareFetchData shareFetchData = new ShareFetchData(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), + ShareFetch shareFetch = new ShareFetch(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), new CompletableFuture<>(), Map.of(tp0, PARTITION_MAX_BYTES), MAX_FETCH_RECORDS); DelayedShareFetch delayedShareFetch = DelayedShareFetchTest.DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData) + .withShareFetchData(shareFetch) .withSharePartitions(sharePartitions) .build(); assertFalse(delayedShareFetch.tryComplete()); Mockito.verify(sp0, times(1)).releaseFetchLock(); + assertTrue(delayedShareFetch.lock().tryLock()); + delayedShareFetch.lock().unlock(); } static void mockTopicIdPartitionToReturnDataEqualToMinBytes(ReplicaManager replicaManager, TopicIdPartition topicIdPartition, int minBytes) { @@ -675,13 +717,13 @@ private void mockTopicIdPartitionFetchBytes(ReplicaManager replicaManager, Topic } static class DelayedShareFetchBuilder { - ShareFetchData shareFetchData = mock(ShareFetchData.class); + ShareFetch shareFetch = mock(ShareFetch.class); private ReplicaManager replicaManager = mock(ReplicaManager.class); - private final SharePartitionManager sharePartitionManager = mock(SharePartitionManager.class); + private SharePartitionManager sharePartitionManager = mock(SharePartitionManager.class); private LinkedHashMap sharePartitions = mock(LinkedHashMap.class); - DelayedShareFetchBuilder withShareFetchData(ShareFetchData shareFetchData) { - this.shareFetchData = shareFetchData; + DelayedShareFetchBuilder withShareFetchData(ShareFetch shareFetch) { + this.shareFetch = shareFetch; return this; } @@ -690,6 +732,11 @@ DelayedShareFetchBuilder withReplicaManager(ReplicaManager replicaManager) { return this; } + DelayedShareFetchBuilder withSharePartitionManager(SharePartitionManager sharePartitionManager) { + this.sharePartitionManager = sharePartitionManager; + return this; + } + DelayedShareFetchBuilder withSharePartitions(LinkedHashMap sharePartitions) { this.sharePartitions = sharePartitions; return this; @@ -701,7 +748,7 @@ public static DelayedShareFetchBuilder builder() { public DelayedShareFetch build() { return new DelayedShareFetch( - shareFetchData, + shareFetch, replicaManager, sharePartitionManager, sharePartitions); diff --git a/core/src/test/java/kafka/server/share/ShareFetchUtilsTest.java b/core/src/test/java/kafka/server/share/ShareFetchUtilsTest.java index 6ff0f90bc49d..f8a53e9aa338 100644 --- a/core/src/test/java/kafka/server/share/ShareFetchUtilsTest.java +++ b/core/src/test/java/kafka/server/share/ShareFetchUtilsTest.java @@ -31,7 +31,7 @@ import org.apache.kafka.common.record.SimpleRecord; import org.apache.kafka.common.requests.FetchRequest; import org.apache.kafka.server.share.fetch.ShareAcquiredRecords; -import org.apache.kafka.server.share.fetch.ShareFetchData; +import org.apache.kafka.server.share.fetch.ShareFetch; import org.apache.kafka.server.storage.log.FetchIsolation; import org.apache.kafka.server.storage.log.FetchParams; import org.apache.kafka.server.storage.log.FetchPartitionData; @@ -101,7 +101,7 @@ public void testProcessFetchResponse() { sharePartitions.put(tp0, sp0); sharePartitions.put(tp1, sp1); - ShareFetchData shareFetchData = new ShareFetchData(FETCH_PARAMS, groupId, memberId, + ShareFetch shareFetch = new ShareFetch(FETCH_PARAMS, groupId, memberId, new CompletableFuture<>(), partitionMaxBytes, 100); MemoryRecords records = MemoryRecords.withRecords(Compression.NONE, @@ -124,7 +124,7 @@ public void testProcessFetchResponse() { records1, Optional.empty(), OptionalLong.empty(), Optional.empty(), OptionalInt.empty(), false)); Map resultData = - ShareFetchUtils.processFetchResponse(shareFetchData, responseData, sharePartitions, mock(ReplicaManager.class)); + ShareFetchUtils.processFetchResponse(shareFetch, responseData, sharePartitions, mock(ReplicaManager.class)); assertEquals(2, resultData.size()); assertTrue(resultData.containsKey(tp0)); @@ -167,7 +167,7 @@ public void testProcessFetchResponseWithEmptyRecords() { sharePartitions.put(tp0, sp0); sharePartitions.put(tp1, sp1); - ShareFetchData shareFetchData = new ShareFetchData(FETCH_PARAMS, groupId, memberId, + ShareFetch shareFetch = new ShareFetch(FETCH_PARAMS, groupId, memberId, new CompletableFuture<>(), partitionMaxBytes, 100); Map responseData = new HashMap<>(); @@ -178,7 +178,7 @@ public void testProcessFetchResponseWithEmptyRecords() { MemoryRecords.EMPTY, Optional.empty(), OptionalLong.empty(), Optional.empty(), OptionalInt.empty(), false)); Map resultData = - ShareFetchUtils.processFetchResponse(shareFetchData, responseData, sharePartitions, mock(ReplicaManager.class)); + ShareFetchUtils.processFetchResponse(shareFetch, responseData, sharePartitions, mock(ReplicaManager.class)); assertEquals(2, resultData.size()); assertTrue(resultData.containsKey(tp0)); @@ -209,7 +209,7 @@ public void testProcessFetchResponseWithLsoMovementForTopicPartition() { sharePartitions.put(tp0, sp0); sharePartitions.put(tp1, sp1); - ShareFetchData shareFetchData = new ShareFetchData(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), + ShareFetch shareFetch = new ShareFetch(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), new CompletableFuture<>(), partitionMaxBytes, 100); ReplicaManager replicaManager = mock(ReplicaManager.class); @@ -247,7 +247,7 @@ public void testProcessFetchResponseWithLsoMovementForTopicPartition() { records1, Optional.empty(), OptionalLong.empty(), Optional.empty(), OptionalInt.empty(), false)); Map resultData1 = - ShareFetchUtils.processFetchResponse(shareFetchData, responseData1, sharePartitions, replicaManager); + ShareFetchUtils.processFetchResponse(shareFetch, responseData1, sharePartitions, replicaManager); assertEquals(2, resultData1.size()); assertTrue(resultData1.containsKey(tp0)); @@ -276,7 +276,7 @@ public void testProcessFetchResponseWithLsoMovementForTopicPartition() { MemoryRecords.EMPTY, Optional.empty(), OptionalLong.empty(), Optional.empty(), OptionalInt.empty(), false)); Map resultData2 = - ShareFetchUtils.processFetchResponse(shareFetchData, responseData2, sharePartitions, replicaManager); + ShareFetchUtils.processFetchResponse(shareFetch, responseData2, sharePartitions, replicaManager); assertEquals(2, resultData2.size()); assertTrue(resultData2.containsKey(tp0)); @@ -303,7 +303,7 @@ public void testProcessFetchResponseWhenNoRecordsAreAcquired() { LinkedHashMap sharePartitions = new LinkedHashMap<>(); sharePartitions.put(tp0, sp0); - ShareFetchData shareFetchData = new ShareFetchData(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), + ShareFetch shareFetch = new ShareFetch(FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), new CompletableFuture<>(), partitionMaxBytes, 100); ReplicaManager replicaManager = mock(ReplicaManager.class); @@ -327,7 +327,7 @@ tp0, new FetchPartitionData(Errors.NONE, 0L, 0L, OptionalInt.empty(), false)); Map resultData = - ShareFetchUtils.processFetchResponse(shareFetchData, responseData, sharePartitions, replicaManager); + ShareFetchUtils.processFetchResponse(shareFetch, responseData, sharePartitions, replicaManager); assertEquals(1, resultData.size()); assertTrue(resultData.containsKey(tp0)); @@ -342,7 +342,7 @@ tp0, new FetchPartitionData(Errors.OFFSET_OUT_OF_RANGE, 0L, 0L, records, Optional.empty(), OptionalLong.empty(), Optional.empty(), OptionalInt.empty(), false)); - resultData = ShareFetchUtils.processFetchResponse(shareFetchData, responseData, sharePartitions, replicaManager); + resultData = ShareFetchUtils.processFetchResponse(shareFetch, responseData, sharePartitions, replicaManager); assertEquals(1, resultData.size()); assertTrue(resultData.containsKey(tp0)); @@ -376,7 +376,7 @@ public void testProcessFetchResponseWithMaxFetchRecords() { Uuid memberId = Uuid.randomUuid(); // Set max fetch records to 10 - ShareFetchData shareFetchData = new ShareFetchData( + ShareFetch shareFetch = new ShareFetch( new FetchParams(ApiKeys.SHARE_FETCH.latestVersion(), FetchRequest.ORDINARY_CONSUMER_ID, -1, 0, 1, 1024 * 1024, FetchIsolation.HIGH_WATERMARK, Optional.empty()), groupId, memberId.toString(), new CompletableFuture<>(), partitionMaxBytes, 10); @@ -413,7 +413,7 @@ public void testProcessFetchResponseWithMaxFetchRecords() { responseData1.put(tp1, fetchPartitionData2); Map resultData1 = - ShareFetchUtils.processFetchResponse(shareFetchData, responseData1, sharePartitions, replicaManager); + ShareFetchUtils.processFetchResponse(shareFetch, responseData1, sharePartitions, replicaManager); assertEquals(2, resultData1.size()); assertTrue(resultData1.containsKey(tp0)); diff --git a/core/src/test/java/kafka/server/share/SharePartitionManagerTest.java b/core/src/test/java/kafka/server/share/SharePartitionManagerTest.java index 46abf04b0a64..06bb123f5509 100644 --- a/core/src/test/java/kafka/server/share/SharePartitionManagerTest.java +++ b/core/src/test/java/kafka/server/share/SharePartitionManagerTest.java @@ -65,7 +65,7 @@ import org.apache.kafka.server.share.fetch.DelayedShareFetchGroupKey; import org.apache.kafka.server.share.fetch.DelayedShareFetchKey; import org.apache.kafka.server.share.fetch.ShareAcquiredRecords; -import org.apache.kafka.server.share.fetch.ShareFetchData; +import org.apache.kafka.server.share.fetch.ShareFetch; import org.apache.kafka.server.share.persister.NoOpShareStatePersister; import org.apache.kafka.server.share.persister.Persister; import org.apache.kafka.server.share.session.ShareSession; @@ -223,7 +223,7 @@ public void testNewContextReturnsFinalContextWithRequestData() { ShareRequestMetadata reqMetadata2 = new ShareRequestMetadata(memberId, ShareRequestMetadata.FINAL_EPOCH); - // shareFetchData is not empty, but the maxBytes of topic partition is 0, which means this is added only for acknowledgements. + // shareFetch is not empty, but the maxBytes of topic partition is 0, which means this is added only for acknowledgements. // New context should be created successfully Map reqData3 = Collections.singletonMap(new TopicIdPartition(tpId1, new TopicPartition("foo", 0)), new ShareFetchRequest.SharePartitionData(tpId1, 0)); @@ -257,7 +257,7 @@ public void testNewContextReturnsFinalContextError() { ShareRequestMetadata reqMetadata2 = new ShareRequestMetadata(memberId, ShareRequestMetadata.FINAL_EPOCH); - // shareFetchData is not empty and the maxBytes of topic partition is not 0, which means this is trying to fetch on a Final request. + // shareFetch is not empty and the maxBytes of topic partition is not 0, which means this is trying to fetch on a Final request. // New context should throw an error Map reqData3 = Collections.singletonMap(new TopicIdPartition(tpId1, new TopicPartition("foo", 0)), new ShareFetchRequest.SharePartitionData(tpId1, PARTITION_MAX_BYTES)); @@ -1665,7 +1665,7 @@ public void testAcknowledgeCompletesDelayedShareFetchRequest() { partitionCacheMap.put(new SharePartitionKey(groupId, tp1), sp1); partitionCacheMap.put(new SharePartitionKey(groupId, tp2), sp2); - ShareFetchData shareFetchData = new ShareFetchData( + ShareFetch shareFetch = new ShareFetch( FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), @@ -1700,7 +1700,7 @@ public void testAcknowledgeCompletesDelayedShareFetchRequest() { sharePartitions.put(tp2, sp2); DelayedShareFetch delayedShareFetch = DelayedShareFetchTest.DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData) + .withShareFetchData(shareFetch) .withReplicaManager(mockReplicaManager) .withSharePartitions(sharePartitions) .build(); @@ -1727,6 +1727,8 @@ public void testAcknowledgeCompletesDelayedShareFetchRequest() { Mockito.verify(sp1, times(1)).nextFetchOffset(); Mockito.verify(sp2, times(0)).nextFetchOffset(); + assertTrue(delayedShareFetch.lock().tryLock()); + delayedShareFetch.lock().unlock(); } @Test @@ -1765,7 +1767,7 @@ public void testAcknowledgeDoesNotCompleteDelayedShareFetchRequest() { partitionCacheMap.put(new SharePartitionKey(groupId, tp2), sp2); partitionCacheMap.put(new SharePartitionKey(groupId, tp3), sp3); - ShareFetchData shareFetchData = new ShareFetchData( + ShareFetch shareFetch = new ShareFetch( FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), @@ -1801,7 +1803,7 @@ public void testAcknowledgeDoesNotCompleteDelayedShareFetchRequest() { sharePartitions.put(tp3, sp3); DelayedShareFetch delayedShareFetch = DelayedShareFetchTest.DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData) + .withShareFetchData(shareFetch) .withReplicaManager(mockReplicaManager) .withSharePartitions(sharePartitions) .build(); @@ -1825,6 +1827,8 @@ public void testAcknowledgeDoesNotCompleteDelayedShareFetchRequest() { Mockito.verify(sp1, times(0)).nextFetchOffset(); Mockito.verify(sp2, times(0)).nextFetchOffset(); + assertTrue(delayedShareFetch.lock().tryLock()); + delayedShareFetch.lock().unlock(); } @Test @@ -1861,7 +1865,7 @@ public void testReleaseSessionCompletesDelayedShareFetchRequest() { partitionCacheMap.put(new SharePartitionKey(groupId, tp1), sp1); partitionCacheMap.put(new SharePartitionKey(groupId, tp2), sp2); - ShareFetchData shareFetchData = new ShareFetchData( + ShareFetch shareFetch = new ShareFetch( FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), @@ -1897,7 +1901,7 @@ public void testReleaseSessionCompletesDelayedShareFetchRequest() { sharePartitions.put(tp2, sp2); DelayedShareFetch delayedShareFetch = DelayedShareFetchTest.DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData) + .withShareFetchData(shareFetch) .withReplicaManager(mockReplicaManager) .withSharePartitions(sharePartitions) .build(); @@ -1923,6 +1927,8 @@ public void testReleaseSessionCompletesDelayedShareFetchRequest() { Mockito.verify(sp1, times(1)).nextFetchOffset(); Mockito.verify(sp2, times(0)).nextFetchOffset(); + assertTrue(delayedShareFetch.lock().tryLock()); + delayedShareFetch.lock().unlock(); } @Test @@ -1965,7 +1971,7 @@ public void testReleaseSessionDoesNotCompleteDelayedShareFetchRequest() { partitionCacheMap.put(new SharePartitionKey(groupId, tp2), sp2); partitionCacheMap.put(new SharePartitionKey(groupId, tp3), sp3); - ShareFetchData shareFetchData = new ShareFetchData( + ShareFetch shareFetch = new ShareFetch( FETCH_PARAMS, groupId, Uuid.randomUuid().toString(), @@ -2002,7 +2008,7 @@ public void testReleaseSessionDoesNotCompleteDelayedShareFetchRequest() { sharePartitions.put(tp3, sp3); DelayedShareFetch delayedShareFetch = DelayedShareFetchTest.DelayedShareFetchBuilder.builder() - .withShareFetchData(shareFetchData) + .withShareFetchData(shareFetch) .withReplicaManager(mockReplicaManager) .withSharePartitions(sharePartitions) .build(); @@ -2025,6 +2031,8 @@ public void testReleaseSessionDoesNotCompleteDelayedShareFetchRequest() { Mockito.verify(sp1, times(0)).nextFetchOffset(); Mockito.verify(sp2, times(0)).nextFetchOffset(); + assertTrue(delayedShareFetch.lock().tryLock()); + delayedShareFetch.lock().unlock(); } @Test @@ -2063,10 +2071,74 @@ public void testPendingInitializationShouldCompleteFetchRequest() throws Excepti // Verify that replica manager fetch is not called. Mockito.verify(mockReplicaManager, times(0)).readFromLog( any(), any(), any(ReplicaQuota.class), anyBoolean()); + assertFalse(pendingInitializationFuture.isDone()); // Complete the pending initialization future. pendingInitializationFuture.complete(null); } + @Test + public void testDelayedInitializationShouldCompleteFetchRequest() throws Exception { + String groupId = "grp"; + Uuid memberId = Uuid.randomUuid(); + Uuid fooId = Uuid.randomUuid(); + TopicIdPartition tp0 = new TopicIdPartition(fooId, new TopicPartition("foo", 0)); + Map partitionMaxBytes = Collections.singletonMap(tp0, PARTITION_MAX_BYTES); + + SharePartition sp0 = mock(SharePartition.class); + Map partitionCacheMap = new HashMap<>(); + partitionCacheMap.put(new SharePartitionKey(groupId, tp0), sp0); + + // Keep the 2 initialization futures pending and 1 completed with leader not available exception. + CompletableFuture pendingInitializationFuture1 = new CompletableFuture<>(); + CompletableFuture pendingInitializationFuture2 = new CompletableFuture<>(); + when(sp0.maybeInitialize()). + thenReturn(pendingInitializationFuture1) + .thenReturn(pendingInitializationFuture2) + .thenReturn(CompletableFuture.failedFuture(new LeaderNotAvailableException("Leader not available"))); + + DelayedOperationPurgatory shareFetchPurgatorySpy = spy(new DelayedOperationPurgatory<>( + "TestShareFetch", mockTimer, mockReplicaManager.localBrokerId(), + DELAYED_SHARE_FETCH_PURGATORY_PURGE_INTERVAL, true, true)); + mockReplicaManagerDelayedShareFetch(mockReplicaManager, shareFetchPurgatorySpy); + + SharePartitionManager sharePartitionManager = SharePartitionManagerBuilder.builder() + .withPartitionCacheMap(partitionCacheMap).withReplicaManager(mockReplicaManager).withTimer(mockTimer) + .build(); + + // Send 3 requests for share fetch for same share partition. + CompletableFuture> future1 = + sharePartitionManager.fetchMessages(groupId, memberId.toString(), FETCH_PARAMS, partitionMaxBytes); + + CompletableFuture> future2 = + sharePartitionManager.fetchMessages(groupId, memberId.toString(), FETCH_PARAMS, partitionMaxBytes); + + CompletableFuture> future3 = + sharePartitionManager.fetchMessages(groupId, memberId.toString(), FETCH_PARAMS, partitionMaxBytes); + + Mockito.verify(sp0, times(3)).maybeInitialize(); + Mockito.verify(mockReplicaManager, times(3)).addDelayedShareFetchRequest(any(), any()); + Mockito.verify(shareFetchPurgatorySpy, times(3)).tryCompleteElseWatch(any(), any()); + Mockito.verify(shareFetchPurgatorySpy, times(0)).checkAndComplete(any()); + + // All 3 requests should be pending. + assertFalse(future1.isDone()); + assertFalse(future2.isDone()); + assertFalse(future3.isDone()); + + // Complete one pending initialization future. + pendingInitializationFuture1.complete(null); + Mockito.verify(mockReplicaManager, times(1)).completeDelayedShareFetchRequest(any()); + Mockito.verify(shareFetchPurgatorySpy, times(1)).checkAndComplete(any()); + + pendingInitializationFuture2.complete(null); + Mockito.verify(mockReplicaManager, times(2)).completeDelayedShareFetchRequest(any()); + Mockito.verify(shareFetchPurgatorySpy, times(2)).checkAndComplete(any()); + + // Verify that replica manager fetch is not called. + Mockito.verify(mockReplicaManager, times(0)).readFromLog( + any(), any(), any(ReplicaQuota.class), anyBoolean()); + } + @Test public void testSharePartitionInitializationExceptions() throws Exception { String groupId = "grp"; @@ -2100,6 +2172,7 @@ public void testSharePartitionInitializationExceptions() throws Exception { // between SharePartitionManager and SharePartition to retry the request as SharePartition is not yet ready. assertFalse(future.isCompletedExceptionally()); assertTrue(future.join().isEmpty()); + Mockito.verify(sp0, times(0)).markFenced(); // Verify that the share partition is still in the cache on LeaderNotAvailableException. assertEquals(1, partitionCacheMap.size()); @@ -2111,6 +2184,7 @@ public void testSharePartitionInitializationExceptions() throws Exception { DELAYED_SHARE_FETCH_TIMEOUT_MS, () -> "Processing in delayed share fetch queue never ended."); validateShareFetchFutureException(future, tp0, Errors.UNKNOWN_SERVER_ERROR, "Illegal state"); + Mockito.verify(sp0, times(1)).markFenced(); assertTrue(partitionCacheMap.isEmpty()); // The last exception removes the share partition from the cache hence re-add the share partition to cache. @@ -2123,6 +2197,7 @@ public void testSharePartitionInitializationExceptions() throws Exception { DELAYED_SHARE_FETCH_TIMEOUT_MS, () -> "Processing in delayed share fetch queue never ended."); validateShareFetchFutureException(future, tp0, Errors.COORDINATOR_NOT_AVAILABLE, "Coordinator not available"); + Mockito.verify(sp0, times(2)).markFenced(); assertTrue(partitionCacheMap.isEmpty()); // The last exception removes the share partition from the cache hence re-add the share partition to cache. @@ -2135,6 +2210,7 @@ public void testSharePartitionInitializationExceptions() throws Exception { DELAYED_SHARE_FETCH_TIMEOUT_MS, () -> "Processing in delayed share fetch queue never ended."); validateShareFetchFutureException(future, tp0, Errors.INVALID_REQUEST, "Invalid request"); + Mockito.verify(sp0, times(3)).markFenced(); assertTrue(partitionCacheMap.isEmpty()); // The last exception removes the share partition from the cache hence re-add the share partition to cache. @@ -2147,6 +2223,7 @@ public void testSharePartitionInitializationExceptions() throws Exception { DELAYED_SHARE_FETCH_TIMEOUT_MS, () -> "Processing in delayed share fetch queue never ended."); validateShareFetchFutureException(future, tp0, Errors.FENCED_STATE_EPOCH, "Fenced state epoch"); + Mockito.verify(sp0, times(4)).markFenced(); assertTrue(partitionCacheMap.isEmpty()); // The last exception removes the share partition from the cache hence re-add the share partition to cache. @@ -2159,6 +2236,7 @@ public void testSharePartitionInitializationExceptions() throws Exception { DELAYED_SHARE_FETCH_TIMEOUT_MS, () -> "Processing in delayed share fetch queue never ended."); validateShareFetchFutureException(future, tp0, Errors.NOT_LEADER_OR_FOLLOWER, "Not leader or follower"); + Mockito.verify(sp0, times(5)).markFenced(); assertTrue(partitionCacheMap.isEmpty()); // The last exception removes the share partition from the cache hence re-add the share partition to cache. @@ -2171,6 +2249,7 @@ public void testSharePartitionInitializationExceptions() throws Exception { DELAYED_SHARE_FETCH_TIMEOUT_MS, () -> "Processing in delayed share fetch queue never ended."); validateShareFetchFutureException(future, tp0, Errors.UNKNOWN_SERVER_ERROR, "Runtime exception"); + Mockito.verify(sp0, times(6)).markFenced(); assertTrue(partitionCacheMap.isEmpty()); } @@ -2247,18 +2326,25 @@ public void testSharePartitionInitializationFailure() throws Exception { public void testSharePartitionPartialInitializationFailure() throws Exception { String groupId = "grp"; Uuid memberId1 = Uuid.randomUuid(); + // For tp0, share partition instantiation will fail. TopicIdPartition tp0 = new TopicIdPartition(Uuid.randomUuid(), new TopicPartition("foo", 0)); + // For tp1, share fetch should succeed. TopicIdPartition tp1 = new TopicIdPartition(memberId1, new TopicPartition("foo", 1)); - Map partitionMaxBytes = Map.of(tp0, PARTITION_MAX_BYTES, tp1, PARTITION_MAX_BYTES); - - // Mark partition1 as not the leader. - Partition partition1 = mock(Partition.class); - when(partition1.isLeader()).thenReturn(false); - + // For tp2, share partition initialization will fail. + TopicIdPartition tp2 = new TopicIdPartition(memberId1, new TopicPartition("foo", 2)); + Map partitionMaxBytes = Map.of( + tp0, PARTITION_MAX_BYTES, + tp1, PARTITION_MAX_BYTES, + tp2, PARTITION_MAX_BYTES); + + // Mark partition0 as not the leader. + Partition partition0 = mock(Partition.class); + when(partition0.isLeader()).thenReturn(false); ReplicaManager replicaManager = mock(ReplicaManager.class); when(replicaManager.getPartitionOrException(any())) - .thenReturn(partition1); + .thenReturn(partition0); + // Mock share partition for tp1, so it can succeed. SharePartition sp1 = mock(SharePartition.class); Map partitionCacheMap = new HashMap<>(); partitionCacheMap.put(new SharePartitionKey(groupId, tp1), sp1); @@ -2268,6 +2354,11 @@ public void testSharePartitionPartialInitializationFailure() throws Exception { when(sp1.maybeInitialize()).thenReturn(CompletableFuture.completedFuture(null)); when(sp1.acquire(anyString(), anyInt(), any())).thenReturn(new ShareAcquiredRecords(Collections.emptyList(), 0)); + // Fail initialization for tp2. + SharePartition sp2 = mock(SharePartition.class); + partitionCacheMap.put(new SharePartitionKey(groupId, tp2), sp2); + when(sp2.maybeInitialize()).thenReturn(CompletableFuture.failedFuture(new FencedStateEpochException("Fenced state epoch"))); + DelayedOperationPurgatory delayedShareFetchPurgatory = new DelayedOperationPurgatory<>( "TestShareFetch", mockTimer, replicaManager.localBrokerId(), DELAYED_SHARE_FETCH_PURGATORY_PURGE_INTERVAL, true, true); @@ -2289,11 +2380,16 @@ public void testSharePartitionPartialInitializationFailure() throws Exception { assertFalse(future.isCompletedExceptionally()); Map partitionDataMap = future.get(); - // For now only 1 successful partition is included, this will be fixed in subsequents PRs. - assertEquals(1, partitionDataMap.size()); + assertEquals(3, partitionDataMap.size()); + assertTrue(partitionDataMap.containsKey(tp0)); + assertEquals(Errors.NOT_LEADER_OR_FOLLOWER.code(), partitionDataMap.get(tp0).errorCode()); assertTrue(partitionDataMap.containsKey(tp1)); assertEquals(Errors.NONE.code(), partitionDataMap.get(tp1).errorCode()); + assertTrue(partitionDataMap.containsKey(tp2)); + assertEquals(Errors.FENCED_STATE_EPOCH.code(), partitionDataMap.get(tp2).errorCode()); + assertEquals("Fenced state epoch", partitionDataMap.get(tp2).errorMessage()); + Mockito.verify(replicaManager, times(0)).completeDelayedShareFetchRequest(any()); Mockito.verify(replicaManager, times(1)).readFromLog( any(), any(), any(ReplicaQuota.class), anyBoolean()); } diff --git a/core/src/test/scala/integration/kafka/api/GroupAuthorizerIntegrationTest.scala b/core/src/test/scala/integration/kafka/api/GroupAuthorizerIntegrationTest.scala index 46b34f4efdaf..e9a0644a26c6 100644 --- a/core/src/test/scala/integration/kafka/api/GroupAuthorizerIntegrationTest.scala +++ b/core/src/test/scala/integration/kafka/api/GroupAuthorizerIntegrationTest.scala @@ -33,6 +33,7 @@ import org.apache.kafka.metadata.authorizer.StandardAuthorizer import org.apache.kafka.security.authorizer.AclEntry.WILDCARD_HOST import org.apache.kafka.server.config.ServerConfigs import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.function.Executable import org.junit.jupiter.api.{BeforeEach, TestInfo} import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.MethodSource @@ -132,6 +133,84 @@ class GroupAuthorizerIntegrationTest extends BaseRequestTest { assertEquals(Set(topic), consumeException.unauthorizedTopics.asScala) } + @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumAndGroupProtocolNames) + @MethodSource(Array("getTestQuorumAndGroupProtocolParametersAll")) + def testConsumeUnsubscribeWithoutGroupPermission(quorum: String, groupProtocol: String): Unit = { + val topic = "topic" + + createTopic(topic, listenerName = interBrokerListenerName) + + // allow topic read/write permission to poll/send record + addAndVerifyAcls( + Set(createAcl(AclOperation.WRITE, AclPermissionType.ALLOW), createAcl(AclOperation.READ, AclPermissionType.ALLOW)), + new ResourcePattern(ResourceType.TOPIC, topic, PatternType.LITERAL) + ) + val producer = createProducer() + producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, "message".getBytes)).get() + producer.close() + + // allow group read permission to join group + val group = "group" + addAndVerifyAcls( + Set(createAcl(AclOperation.READ, AclPermissionType.ALLOW)), + new ResourcePattern(ResourceType.GROUP, group, PatternType.LITERAL) + ) + + val props = new Properties() + props.put(ConsumerConfig.GROUP_ID_CONFIG, group) + props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false") + val consumer = createConsumer(configOverrides = props) + consumer.subscribe(List(topic).asJava) + TestUtils.pollUntilAtLeastNumRecords(consumer, numRecords = 1) + + removeAndVerifyAcls( + Set(createAcl(AclOperation.READ, AclPermissionType.ALLOW)), + new ResourcePattern(ResourceType.GROUP, group, PatternType.LITERAL) + ) + + assertDoesNotThrow(new Executable { + override def execute(): Unit = consumer.unsubscribe() + }) + } + + @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumAndGroupProtocolNames) + @MethodSource(Array("getTestQuorumAndGroupProtocolParametersAll")) + def testConsumeCloseWithoutGroupPermission(quorum: String, groupProtocol: String): Unit = { + val topic = "topic" + createTopic(topic, listenerName = interBrokerListenerName) + + // allow topic read/write permission to poll/send record + addAndVerifyAcls( + Set(createAcl(AclOperation.WRITE, AclPermissionType.ALLOW), createAcl(AclOperation.READ, AclPermissionType.ALLOW)), + new ResourcePattern(ResourceType.TOPIC, topic, PatternType.LITERAL) + ) + val producer = createProducer() + producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, "message".getBytes)).get() + + // allow group read permission to join group + val group = "group" + addAndVerifyAcls( + Set(createAcl(AclOperation.READ, AclPermissionType.ALLOW)), + new ResourcePattern(ResourceType.GROUP, group, PatternType.LITERAL) + ) + + val props = new Properties() + props.put(ConsumerConfig.GROUP_ID_CONFIG, group) + props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false") + val consumer = createConsumer(configOverrides = props) + consumer.subscribe(List(topic).asJava) + TestUtils.pollUntilAtLeastNumRecords(consumer, numRecords = 1) + + removeAndVerifyAcls( + Set(createAcl(AclOperation.READ, AclPermissionType.ALLOW)), + new ResourcePattern(ResourceType.GROUP, group, PatternType.LITERAL) + ) + + assertDoesNotThrow(new Executable { + override def execute(): Unit = consumer.close() + }) + } + @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumAndGroupProtocolNames) @MethodSource(Array("getTestQuorumAndGroupProtocolParametersAll")) def testAuthorizedProduceAndConsume(quorum: String, groupProtocol: String): Unit = { diff --git a/core/src/test/scala/integration/kafka/api/PlaintextConsumerPollTest.scala b/core/src/test/scala/integration/kafka/api/PlaintextConsumerPollTest.scala index 9e6460df0502..9280b6af4751 100644 --- a/core/src/test/scala/integration/kafka/api/PlaintextConsumerPollTest.scala +++ b/core/src/test/scala/integration/kafka/api/PlaintextConsumerPollTest.scala @@ -16,6 +16,7 @@ import kafka.utils.{TestInfoUtils, TestUtils} import org.apache.kafka.clients.consumer._ import org.apache.kafka.common.{MetricName, TopicPartition} import org.apache.kafka.common.utils.Utils +import org.apache.kafka.coordinator.group.GroupCoordinatorConfig import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Timeout import org.junit.jupiter.params.ParameterizedTest @@ -23,6 +24,7 @@ import org.junit.jupiter.params.provider.MethodSource import java.time.Duration import java.util +import java.util.Properties import scala.collection.mutable import scala.jdk.CollectionConverters._ @@ -32,6 +34,12 @@ import scala.jdk.CollectionConverters._ @Timeout(600) class PlaintextConsumerPollTest extends AbstractConsumerTest { + override protected def brokerPropertyOverrides(properties: Properties): Unit = { + super.brokerPropertyOverrides(properties) + properties.setProperty(GroupCoordinatorConfig.CONSUMER_GROUP_HEARTBEAT_INTERVAL_MS_CONFIG, 500.toString) + properties.setProperty(GroupCoordinatorConfig.CONSUMER_GROUP_MIN_HEARTBEAT_INTERVAL_MS_CONFIG, 500.toString) + } + @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumAndGroupProtocolNames) @MethodSource(Array("getTestQuorumAndGroupProtocolParametersAll")) def testMaxPollRecords(quorum: String, groupProtocol: String): Unit = { diff --git a/core/src/test/scala/integration/kafka/api/SaslClientsWithInvalidCredentialsTest.scala b/core/src/test/scala/integration/kafka/api/SaslClientsWithInvalidCredentialsTest.scala index 1a4451c2b0a4..af3f030648fa 100644 --- a/core/src/test/scala/integration/kafka/api/SaslClientsWithInvalidCredentialsTest.scala +++ b/core/src/test/scala/integration/kafka/api/SaslClientsWithInvalidCredentialsTest.scala @@ -23,17 +23,18 @@ import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig} import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.{KafkaException, TopicPartition} import org.apache.kafka.common.errors.SaslAuthenticationException -import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo} +import org.junit.jupiter.api.{AfterEach, BeforeEach, TestInfo} import org.junit.jupiter.api.Assertions._ import kafka.utils.{TestInfoUtils, TestUtils} -import kafka.zk.ConfigEntityChangeNotificationZNode import org.apache.kafka.common.security.auth.SecurityProtocol import org.apache.kafka.coordinator.group.GroupCoordinatorConfig import org.apache.kafka.coordinator.transaction.TransactionLogConfig +import org.apache.kafka.metadata.storage.Formatter import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.{MethodSource, ValueSource} import scala.jdk.javaapi.OptionConverters +import scala.util.Using class SaslClientsWithInvalidCredentialsTest extends AbstractSaslTest { @@ -57,9 +58,11 @@ class SaslClientsWithInvalidCredentialsTest extends AbstractSaslTest { override def configureSecurityBeforeServersStart(testInfo: TestInfo): Unit = { super.configureSecurityBeforeServersStart(testInfo) - zkClient.makeSurePersistentPathExists(ConfigEntityChangeNotificationZNode.path) - // Create broker credentials before starting brokers - createScramCredentials(zkConnect, JaasTestUtils.KAFKA_SCRAM_ADMIN, JaasTestUtils.KAFKA_SCRAM_ADMIN_PASSWORD) + } + + override def addFormatterSettings(formatter: Formatter): Unit = { + formatter.setScramArguments( + List(s"SCRAM-SHA-256=[name=${JaasTestUtils.KAFKA_SCRAM_ADMIN},password=${JaasTestUtils.KAFKA_SCRAM_ADMIN_PASSWORD}]").asJava) } override def createPrivilegedAdminClient() = { @@ -72,7 +75,11 @@ class SaslClientsWithInvalidCredentialsTest extends AbstractSaslTest { startSasl(jaasSections(kafkaServerSaslMechanisms, Some(kafkaClientSaslMechanism), Both, JaasTestUtils.KAFKA_SERVER_CONTEXT_NAME)) super.setUp(testInfo) - createTopic(topic, numPartitions, brokerCount) + Using(createPrivilegedAdminClient()) { superuserAdminClient => + TestUtils.createTopicWithAdmin( + superuserAdminClient, topic, brokers, controllerServers, numPartitions + ) + } } @AfterEach @@ -81,7 +88,7 @@ class SaslClientsWithInvalidCredentialsTest extends AbstractSaslTest { closeSasl() } - @ParameterizedTest + @ParameterizedTest(name="{displayName}.quorum=kraft.isIdempotenceEnabled={0}") @ValueSource(booleans = Array(true, false)) def testProducerWithAuthenticationFailure(isIdempotenceEnabled: Boolean): Unit = { val prop = new Properties() @@ -101,8 +108,9 @@ class SaslClientsWithInvalidCredentialsTest extends AbstractSaslTest { verifyWithRetry(sendOneRecord(producer2)) } - @Test - def testTransactionalProducerWithAuthenticationFailure(): Unit = { + @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumAndGroupProtocolNames) + @MethodSource(Array("getTestQuorumAndGroupProtocolParametersClassicGroupProtocolOnly")) + def testTransactionalProducerWithAuthenticationFailure(quorum: String, groupProtocol: String): Unit = { val txProducer = createTransactionalProducer() verifyAuthenticationException(txProducer.initTransactions()) @@ -111,7 +119,7 @@ class SaslClientsWithInvalidCredentialsTest extends AbstractSaslTest { } @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumAndGroupProtocolNames) - @MethodSource(Array("getTestQuorumAndGroupProtocolParametersClassicGroupProtocolOnly_ZK_implicit")) + @MethodSource(Array("getTestQuorumAndGroupProtocolParametersClassicGroupProtocolOnly")) def testConsumerWithAuthenticationFailure(quorum: String, groupProtocol: String): Unit = { val consumer = createConsumer() consumer.subscribe(List(topic).asJava) @@ -119,7 +127,7 @@ class SaslClientsWithInvalidCredentialsTest extends AbstractSaslTest { } @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumAndGroupProtocolNames) - @MethodSource(Array("getTestQuorumAndGroupProtocolParametersClassicGroupProtocolOnly_ZK_implicit")) + @MethodSource(Array("getTestQuorumAndGroupProtocolParametersClassicGroupProtocolOnly")) def testManualAssignmentConsumerWithAuthenticationFailure(quorum: String, groupProtocol: String): Unit = { val consumer = createConsumer() consumer.assign(List(tp).asJava) @@ -127,7 +135,7 @@ class SaslClientsWithInvalidCredentialsTest extends AbstractSaslTest { } @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumAndGroupProtocolNames) - @MethodSource(Array("getTestQuorumAndGroupProtocolParametersClassicGroupProtocolOnly_ZK_implicit")) + @MethodSource(Array("getTestQuorumAndGroupProtocolParametersClassicGroupProtocolOnly")) def testManualAssignmentConsumerWithAutoCommitDisabledWithAuthenticationFailure(quorum: String, groupProtocol: String): Unit = { this.consumerConfig.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false.toString) val consumer = createConsumer() @@ -146,8 +154,9 @@ class SaslClientsWithInvalidCredentialsTest extends AbstractSaslTest { verifyWithRetry(assertEquals(1, consumer.poll(Duration.ofMillis(1000)).count)) } - @Test - def testKafkaAdminClientWithAuthenticationFailure(): Unit = { + @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumAndGroupProtocolNames) + @MethodSource(Array("getTestQuorumAndGroupProtocolParametersClassicGroupProtocolOnly")) + def testKafkaAdminClientWithAuthenticationFailure(quorum: String, groupProtocol: String): Unit = { val props = JaasTestUtils.adminClientSecurityConfigs(securityProtocol, OptionConverters.toJava(trustStoreFile), OptionConverters.toJava(clientSaslProperties)) props.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers()) val adminClient = Admin.create(props) diff --git a/core/src/test/scala/integration/kafka/server/GssapiAuthenticationTest.scala b/core/src/test/scala/integration/kafka/server/GssapiAuthenticationTest.scala index c19dffd6b865..125411168d46 100644 --- a/core/src/test/scala/integration/kafka/server/GssapiAuthenticationTest.scala +++ b/core/src/test/scala/integration/kafka/server/GssapiAuthenticationTest.scala @@ -38,9 +38,9 @@ import org.apache.kafka.common.security.kerberos.KerberosLogin import org.apache.kafka.common.utils.{LogContext, MockTime} import org.apache.kafka.network.SocketServerConfigs import org.junit.jupiter.api.Assertions._ -import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo} +import org.junit.jupiter.api.{AfterEach, BeforeEach, TestInfo} import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.MethodSource +import org.junit.jupiter.params.provider.{MethodSource, ValueSource} import scala.jdk.CollectionConverters._ @@ -69,7 +69,7 @@ class GssapiAuthenticationTest extends IntegrationTestHarness with SaslSetup { serverConfig.put(SocketServerConfigs.FAILED_AUTHENTICATION_DELAY_MS_CONFIG, failedAuthenticationDelayMs.toString) super.setUp(testInfo) serverAddr = new InetSocketAddress("localhost", - servers.head.boundPort(ListenerName.forSecurityProtocol(SecurityProtocol.SASL_PLAINTEXT))) + brokers.head.boundPort(ListenerName.forSecurityProtocol(SecurityProtocol.SASL_PLAINTEXT))) clientConfig.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, SecurityProtocol.SASL_PLAINTEXT.name) clientConfig.put(SaslConfigs.SASL_MECHANISM, kafkaClientSaslMechanism) @@ -77,7 +77,7 @@ class GssapiAuthenticationTest extends IntegrationTestHarness with SaslSetup { clientConfig.put(CommonClientConfigs.CONNECTIONS_MAX_IDLE_MS_CONFIG, "5000") // create the test topic with all the brokers as replicas - createTopic(topic, 2, brokerCount) + createTopic(topic, 2, brokerCount, listenerName = listenerName, adminClientConfig = adminClientConfig) } @AfterEach @@ -92,15 +92,16 @@ class GssapiAuthenticationTest extends IntegrationTestHarness with SaslSetup { * Tests that Kerberos replay error `Request is a replay (34)` is not handled as an authentication exception * since replay detection used to detect DoS attacks may occasionally reject valid concurrent requests. */ - @Test - def testRequestIsAReplay(): Unit = { + @ParameterizedTest + @ValueSource(strings = Array("kraft")) + def testRequestIsAReplay(quorum: String): Unit = { val successfulAuthsPerThread = 10 val futures = (0 until numThreads).map(_ => executor.submit(new Runnable { override def run(): Unit = verifyRetriableFailuresDuringAuthentication(successfulAuthsPerThread) })) futures.foreach(_.get(60, TimeUnit.SECONDS)) - assertEquals(0, TestUtils.totalMetricValue(servers.head, "failed-authentication-total")) - val successfulAuths = TestUtils.totalMetricValue(servers.head, "successful-authentication-total") + assertEquals(0, TestUtils.totalMetricValue(brokers.head, "failed-authentication-total")) + val successfulAuths = TestUtils.totalMetricValue(brokers.head, "successful-authentication-total") assertTrue(successfulAuths > successfulAuthsPerThread * numThreads, "Too few authentications: " + successfulAuths) } @@ -109,8 +110,9 @@ class GssapiAuthenticationTest extends IntegrationTestHarness with SaslSetup { * are able to connect after the second re-login. Verifies that logout is performed only once * since duplicate logouts without successful login results in NPE from Java 9 onwards. */ - @Test - def testLoginFailure(): Unit = { + @ParameterizedTest + @ValueSource(strings = Array("kraft")) + def testLoginFailure(quorum: String): Unit = { val selector = createSelectorWithRelogin() try { val login = TestableKerberosLogin.instance @@ -132,8 +134,9 @@ class GssapiAuthenticationTest extends IntegrationTestHarness with SaslSetup { * is performed when credentials are unavailable between logout and login, we handle it as a * transient error and not an authentication failure so that clients may retry. */ - @Test - def testReLogin(): Unit = { + @ParameterizedTest + @ValueSource(strings = Array("kraft")) + def testReLogin(quorum: String): Unit = { val selector = createSelectorWithRelogin() try { val login = TestableKerberosLogin.instance @@ -163,8 +166,9 @@ class GssapiAuthenticationTest extends IntegrationTestHarness with SaslSetup { * Tests that Kerberos error `Server not found in Kerberos database (7)` is handled * as a fatal authentication failure. */ - @Test - def testServerNotFoundInKerberosDatabase(): Unit = { + @ParameterizedTest + @ValueSource(strings = Array("kraft")) + def testServerNotFoundInKerberosDatabase(quorum: String): Unit = { val jaasConfig = clientConfig.getProperty(SaslConfigs.SASL_JAAS_CONFIG) val invalidServiceConfig = jaasConfig.replace("serviceName=\"kafka\"", "serviceName=\"invalid-service\"") clientConfig.put(SaslConfigs.SASL_JAAS_CONFIG, invalidServiceConfig) diff --git a/core/src/test/scala/integration/kafka/server/MultipleListenersWithSameSecurityProtocolBaseTest.scala b/core/src/test/scala/integration/kafka/server/MultipleListenersWithSameSecurityProtocolBaseTest.scala index 16d1454604ec..ec44af7b2364 100644 --- a/core/src/test/scala/integration/kafka/server/MultipleListenersWithSameSecurityProtocolBaseTest.scala +++ b/core/src/test/scala/integration/kafka/server/MultipleListenersWithSameSecurityProtocolBaseTest.scala @@ -25,13 +25,14 @@ import kafka.security.JaasTestUtils import kafka.security.JaasTestUtils.JaasSection import kafka.utils.{TestInfoUtils, TestUtils} import kafka.utils.Implicits._ +import org.apache.kafka.clients.admin.{Admin, AdminClientConfig, NewTopic} import org.apache.kafka.clients.consumer.Consumer import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.config.internals.BrokerSecurityConfigs import org.apache.kafka.common.config.{SaslConfigs, SslConfigs} import org.apache.kafka.common.internals.Topic import org.apache.kafka.common.network.{ConnectionMode, ListenerName} -import org.apache.kafka.server.config.{ReplicationConfigs, ZkConfigs} +import org.apache.kafka.server.config.{KRaftConfigs, ReplicationConfigs} import org.apache.kafka.coordinator.group.GroupCoordinatorConfig import org.apache.kafka.network.SocketServerConfigs import org.junit.jupiter.api.Assertions.assertEquals @@ -57,7 +58,8 @@ abstract class MultipleListenersWithSameSecurityProtocolBaseTest extends QuorumT import MultipleListenersWithSameSecurityProtocolBaseTest._ private val trustStoreFile = TestUtils.tempFile("truststore", ".jks") - private val servers = new ArrayBuffer[KafkaServer] + private val servers = new ArrayBuffer[KafkaBroker] + private var admin: Admin = null private val producers = mutable.Map[ClientMetadata, KafkaProducer[Array[Byte], Array[Byte]]]() private val consumers = mutable.Map[ClientMetadata, Consumer[Array[Byte], Array[Byte]]]() @@ -78,14 +80,15 @@ abstract class MultipleListenersWithSameSecurityProtocolBaseTest extends QuorumT (0 until numServers).foreach { brokerId => - val props = TestUtils.createBrokerConfig(brokerId, zkConnect, trustStoreFile = Some(trustStoreFile)) + val props = TestUtils.createBrokerConfig(brokerId, null, trustStoreFile = Some(trustStoreFile)) // Ensure that we can support multiple listeners per security protocol and multiple security protocols props.put(SocketServerConfigs.LISTENERS_CONFIG, s"$SecureInternal://localhost:0, $Internal://localhost:0, " + s"$SecureExternal://localhost:0, $External://localhost:0") + props.put(SocketServerConfigs.ADVERTISED_LISTENERS_CONFIG, props.get(SocketServerConfigs.LISTENERS_CONFIG)) props.put(SocketServerConfigs.LISTENER_SECURITY_PROTOCOL_MAP_CONFIG, s"$Internal:PLAINTEXT, $SecureInternal:SASL_SSL," + - s"$External:PLAINTEXT, $SecureExternal:SASL_SSL") + s"$External:PLAINTEXT, $SecureExternal:SASL_SSL, CONTROLLER:PLAINTEXT") + props.put(KRaftConfigs.CONTROLLER_LISTENER_NAMES_CONFIG, "CONTROLLER") props.put(ReplicationConfigs.INTER_BROKER_LISTENER_NAME_CONFIG, Internal) - props.put(ZkConfigs.ZK_ENABLE_SECURE_ACLS_CONFIG, "true") props.put(BrokerSecurityConfigs.SASL_MECHANISM_INTER_BROKER_PROTOCOL_CONFIG, kafkaClientSaslMechanism) props.put(s"${new ListenerName(SecureInternal).configPrefix}${BrokerSecurityConfigs.SASL_ENABLED_MECHANISMS_CONFIG}", kafkaServerSaslMechanisms(SecureInternal).mkString(",")) @@ -103,7 +106,7 @@ abstract class MultipleListenersWithSameSecurityProtocolBaseTest extends QuorumT } props.put(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, "invalid/file/path") - servers += TestUtils.createServer(KafkaConfig.fromProps(props)) + servers += createBroker(KafkaConfig.fromProps(props)) } servers.map(_.config).foreach { config => @@ -113,10 +116,20 @@ abstract class MultipleListenersWithSameSecurityProtocolBaseTest extends QuorumT s"Unexpected ${ReplicationConfigs.INTER_BROKER_LISTENER_NAME_CONFIG} for broker ${config.brokerId}") } - TestUtils.createTopic(zkClient, Topic.GROUP_METADATA_TOPIC_NAME, GroupCoordinatorConfig.OFFSETS_TOPIC_PARTITIONS_DEFAULT, - replicationFactor = 2, servers, servers.head.groupCoordinator.groupMetadataTopicConfigs) - - createScramCredentials(zkConnect, JaasTestUtils.KAFKA_SCRAM_USER, JaasTestUtils.KAFKA_SCRAM_PASSWORD) + val adminClientConfig = new java.util.HashMap[String, Object]() + adminClientConfig.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, + TestUtils.bootstrapServers(servers, new ListenerName(Internal))) + admin = Admin.create(adminClientConfig) + val newTopic = new NewTopic(Topic.GROUP_METADATA_TOPIC_NAME, + GroupCoordinatorConfig.OFFSETS_TOPIC_PARTITIONS_DEFAULT, 2.toShort) + val newTopicConfigs = new java.util.HashMap[String, String]() + servers.head.groupCoordinator.groupMetadataTopicConfigs.entrySet(). + forEach(e => newTopicConfigs.put(e.getKey.toString, e.getValue.toString)) + newTopic.configs(newTopicConfigs) + admin.createTopics(java.util.Arrays.asList(newTopic)).all().get(5, TimeUnit.MINUTES) + + createScramCredentials(admin, JaasTestUtils.KAFKA_SCRAM_USER, JaasTestUtils.KAFKA_SCRAM_PASSWORD) + TestUtils.ensureConsistentKRaftMetadata(servers, controllerServer) servers.head.config.listeners.foreach { endPoint => val listenerName = endPoint.listenerName @@ -130,7 +143,7 @@ abstract class MultipleListenersWithSameSecurityProtocolBaseTest extends QuorumT def addProducerConsumer(listenerName: ListenerName, mechanism: String, saslProps: Option[Properties]): Unit = { val topic = s"${listenerName.value}${producers.size}" - TestUtils.createTopic(zkClient, topic, 2, 2, servers) + admin.createTopics(java.util.Arrays.asList(new NewTopic(topic, 2, 2.toShort))).all().get(5, TimeUnit.MINUTES) val clientMetadata = ClientMetadata(listenerName, mechanism, topic) producers(clientMetadata) = TestUtils.createProducer(bootstrapServers, acks = -1, @@ -153,6 +166,8 @@ abstract class MultipleListenersWithSameSecurityProtocolBaseTest extends QuorumT @AfterEach override def tearDown(): Unit = { + Option(admin).foreach(_.close()) + admin = null producers.values.foreach(_.close()) consumers.values.foreach(_.close()) TestUtils.shutdownServers(servers) @@ -165,7 +180,7 @@ abstract class MultipleListenersWithSameSecurityProtocolBaseTest extends QuorumT * with acks=-1 to ensure that replication is also working. */ @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumAndGroupProtocolNames) - @MethodSource(Array("getTestQuorumAndGroupProtocolParametersClassicGroupProtocolOnly_ZK_implicit")) + @MethodSource(Array("getTestQuorumAndGroupProtocolParametersClassicGroupProtocolOnly")) def testProduceConsume(quorum: String, groupProtocol: String): Unit = { producers.foreach { case (clientMetadata, producer) => val producerRecords = (1 to 10).map(i => new ProducerRecord(clientMetadata.topic, s"key$i".getBytes, diff --git a/core/src/test/scala/unit/kafka/server/ProduceRequestTest.scala b/core/src/test/scala/unit/kafka/server/ProduceRequestTest.scala index cb781589d35c..4b9526744294 100644 --- a/core/src/test/scala/unit/kafka/server/ProduceRequestTest.scala +++ b/core/src/test/scala/unit/kafka/server/ProduceRequestTest.scala @@ -20,6 +20,7 @@ package kafka.server import java.nio.ByteBuffer import java.util.{Collections, Properties} import kafka.utils.TestUtils +import org.apache.kafka.clients.admin.{Admin, TopicDescription} import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.compress.Compression import org.apache.kafka.common.config.TopicConfig @@ -35,6 +36,7 @@ import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.{Arguments, MethodSource} import org.junit.jupiter.params.provider.ValueSource +import java.util.concurrent.TimeUnit import scala.annotation.nowarn import scala.jdk.CollectionConverters._ @@ -84,14 +86,41 @@ class ProduceRequestTest extends BaseRequestTest { new SimpleRecord(System.currentTimeMillis(), "key2".getBytes, "value2".getBytes)), 1) } - @ParameterizedTest + private def getPartitionToLeader( + admin: Admin, + topic: String + ): Map[Int, Int] = { + var topicDescription: TopicDescription = null + TestUtils.waitUntilTrue(() => { + val topicMap = admin. + describeTopics(java.util.Arrays.asList(topic)). + allTopicNames().get(10, TimeUnit.MINUTES) + topicDescription = topicMap.get(topic) + topicDescription != null + }, "Timed out waiting to describe topic " + topic) + topicDescription.partitions().asScala.map(p => { + p.partition() -> p.leader().id() + }).toMap + } + + @ParameterizedTest(name = "quorum=kraft") @MethodSource(Array("timestampConfigProvider")) def testProduceWithInvalidTimestamp(messageTimeStampConfig: String, recordTimestamp: Long): Unit = { val topic = "topic" val partition = 0 val topicConfig = new Properties topicConfig.setProperty(messageTimeStampConfig, "1000") - val partitionToLeader = TestUtils.createTopic(zkClient, topic, 1, 1, servers, topicConfig) + val admin = createAdminClient() + TestUtils.createTopicWithAdmin( + admin = admin, + topic = topic, + brokers = brokers, + controllers = controllerServers, + numPartitions = 1, + replicationFactor = 1, + topicConfig = topicConfig + ) + val partitionToLeader = getPartitionToLeader(admin, topic) val leader = partitionToLeader(partition) def createRecords(magicValue: Byte, timestamp: Long, codec: Compression): MemoryRecords = { @@ -138,7 +167,14 @@ class ProduceRequestTest extends BaseRequestTest { val partition = 0 // Create a single-partition topic and find a broker which is not the leader - val partitionToLeader = createTopic(topic) + val admin = createAdminClient() + TestUtils.createTopicWithAdmin( + admin = admin, + topic = topic, + brokers = brokers, + controllers = controllerServers + ) + val partitionToLeader = getPartitionToLeader(admin, topic) val leader = partitionToLeader(partition) val nonReplicaOpt = brokers.find(_.config.brokerId != leader) assertTrue(nonReplicaOpt.isDefined) diff --git a/core/src/test/scala/unit/kafka/server/ShareFetchAcknowledgeRequestTest.scala b/core/src/test/scala/unit/kafka/server/ShareFetchAcknowledgeRequestTest.scala index 8097021e4cb5..d2679cd62eae 100644 --- a/core/src/test/scala/unit/kafka/server/ShareFetchAcknowledgeRequestTest.scala +++ b/core/src/test/scala/unit/kafka/server/ShareFetchAcknowledgeRequestTest.scala @@ -16,6 +16,7 @@ */ package kafka.server +import kafka.utils.TestUtils import org.apache.kafka.common.test.api.{ClusterConfigProperty, ClusterInstance, ClusterTest, ClusterTestDefaults, ClusterTestExtensions, ClusterTests, Type} import org.apache.kafka.common.message.ShareFetchResponseData.AcquiredRecords import org.apache.kafka.common.message.{ShareAcknowledgeRequestData, ShareAcknowledgeResponseData, ShareFetchRequestData, ShareFetchResponseData} @@ -253,13 +254,26 @@ class ShareFetchAcknowledgeRequestTest(cluster: ClusterInstance) extends GroupCo val metadata = new ShareRequestMetadata(memberId, ShareRequestMetadata.nextEpoch(ShareRequestMetadata.INITIAL_EPOCH)) val acknowledgementsMap: Map[TopicIdPartition, util.List[ShareFetchRequestData.AcknowledgementBatch]] = Map.empty val shareFetchRequest = createShareFetchRequest(groupId, metadata, MAX_PARTITION_BYTES, send, Seq.empty, acknowledgementsMap) - val shareFetchResponse = connectAndReceive[ShareFetchResponse](shareFetchRequest) - val shareFetchResponseData = shareFetchResponse.data() - assertEquals(Errors.NONE.code, shareFetchResponseData.errorCode) - assertEquals(1, shareFetchResponseData.responses().size()) - assertEquals(topicId, shareFetchResponseData.responses().get(0).topicId()) - assertEquals(3, shareFetchResponseData.responses().get(0).partitions().size()) + // For the multi partition fetch request, the response may not be available in the first attempt + // as the share partitions might not be initialized yet. So, we retry until we get the response. + var responses = Seq[ShareFetchResponseData.PartitionData]() + TestUtils.waitUntilTrue(() => { + val shareFetchResponse = connectAndReceive[ShareFetchResponse](shareFetchRequest) + val shareFetchResponseData = shareFetchResponse.data() + assertEquals(Errors.NONE.code, shareFetchResponseData.errorCode) + assertEquals(1, shareFetchResponseData.responses().size()) + val partitionsCount = shareFetchResponseData.responses().get(0).partitions().size() + if (partitionsCount > 0) { + assertEquals(topicId, shareFetchResponseData.responses().get(0).topicId()) + shareFetchResponseData.responses().get(0).partitions().foreach(partitionData => { + if (!partitionData.acquiredRecords().isEmpty) { + responses = responses :+ partitionData + } + }) + } + responses.size == 3 + }, "Share fetch request failed", 5000) val expectedPartitionData1 = new ShareFetchResponseData.PartitionData() .setPartitionIndex(0) @@ -279,7 +293,7 @@ class ShareFetchAcknowledgeRequestTest(cluster: ClusterInstance) extends GroupCo .setAcknowledgeErrorCode(Errors.NONE.code()) .setAcquiredRecords(expectedAcquiredRecords(Collections.singletonList(0), Collections.singletonList(9), Collections.singletonList(1))) - shareFetchResponseData.responses().get(0).partitions().foreach(partitionData => { + responses.foreach(partitionData => { partitionData.partitionIndex() match { case 0 => compareFetchResponsePartitions(expectedPartitionData1, partitionData) case 1 => compareFetchResponsePartitions(expectedPartitionData2, partitionData) @@ -2230,13 +2244,26 @@ class ShareFetchAcknowledgeRequestTest(cluster: ClusterInstance) extends GroupCo var metadata = new ShareRequestMetadata(memberId, shareSessionEpoch) val acknowledgementsMap: Map[TopicIdPartition, util.List[ShareFetchRequestData.AcknowledgementBatch]] = Map.empty var shareFetchRequest = createShareFetchRequest(groupId, metadata, MAX_PARTITION_BYTES, send, Seq.empty, acknowledgementsMap) - var shareFetchResponse = connectAndReceive[ShareFetchResponse](shareFetchRequest) - var shareFetchResponseData = shareFetchResponse.data() - assertEquals(Errors.NONE.code, shareFetchResponseData.errorCode) - assertEquals(1, shareFetchResponseData.responses().size()) - assertEquals(topicId, shareFetchResponseData.responses().get(0).topicId()) - assertEquals(2, shareFetchResponseData.responses().get(0).partitions().size()) + // For the multi partition fetch request, the response may not be available in the first attempt + // as the share partitions might not be initialized yet. So, we retry until we get the response. + var responses = Seq[ShareFetchResponseData.PartitionData]() + TestUtils.waitUntilTrue(() => { + val shareFetchResponse = connectAndReceive[ShareFetchResponse](shareFetchRequest) + val shareFetchResponseData = shareFetchResponse.data() + assertEquals(Errors.NONE.code, shareFetchResponseData.errorCode) + assertEquals(1, shareFetchResponseData.responses().size()) + val partitionsCount = shareFetchResponseData.responses().get(0).partitions().size() + if (partitionsCount > 0) { + assertEquals(topicId, shareFetchResponseData.responses().get(0).topicId()) + shareFetchResponseData.responses().get(0).partitions().foreach(partitionData => { + if (!partitionData.acquiredRecords().isEmpty) { + responses = responses :+ partitionData + } + }) + } + responses.size == 2 + }, "Share fetch request failed", 5000) // Producing 10 more records to the topic partitions created above produceData(topicIdPartition1, 10) @@ -2247,9 +2274,9 @@ class ShareFetchAcknowledgeRequestTest(cluster: ClusterInstance) extends GroupCo metadata = new ShareRequestMetadata(memberId, shareSessionEpoch) val forget: Seq[TopicIdPartition] = Seq(topicIdPartition1) shareFetchRequest = createShareFetchRequest(groupId, metadata, MAX_PARTITION_BYTES, Seq.empty, forget, acknowledgementsMap) - shareFetchResponse = connectAndReceive[ShareFetchResponse](shareFetchRequest) + val shareFetchResponse = connectAndReceive[ShareFetchResponse](shareFetchRequest) - shareFetchResponseData = shareFetchResponse.data() + val shareFetchResponseData = shareFetchResponse.data() assertEquals(Errors.NONE.code, shareFetchResponseData.errorCode) assertEquals(1, shareFetchResponseData.responses().size()) assertEquals(topicId, shareFetchResponseData.responses().get(0).topicId()) @@ -2265,10 +2292,25 @@ class ShareFetchAcknowledgeRequestTest(cluster: ClusterInstance) extends GroupCo compareFetchResponsePartitions(expectedPartitionData, partitionData) } + // For initial fetch request, the response may not be available in the first attempt when the share + // partition is not initialized yet. Hence, wait for response from all partitions before proceeding. private def sendFirstShareFetchRequest(memberId: Uuid, groupId: String, topicIdPartitions: Seq[TopicIdPartition]): Unit = { - val metadata: ShareRequestMetadata = new ShareRequestMetadata(memberId, ShareRequestMetadata.INITIAL_EPOCH) - val shareFetchRequest = createShareFetchRequest(groupId, metadata, MAX_PARTITION_BYTES, topicIdPartitions, Seq.empty, Map.empty) - connectAndReceive[ShareFetchResponse](shareFetchRequest) + val partitions: util.Set[Integer] = new util.HashSet() + TestUtils.waitUntilTrue(() => { + val metadata = new ShareRequestMetadata(memberId, ShareRequestMetadata.INITIAL_EPOCH) + val shareFetchRequest = createShareFetchRequest(groupId, metadata, MAX_PARTITION_BYTES, topicIdPartitions, Seq.empty, Map.empty) + val shareFetchResponse = connectAndReceive[ShareFetchResponse](shareFetchRequest) + val shareFetchResponseData = shareFetchResponse.data() + + assertEquals(Errors.NONE.code, shareFetchResponseData.errorCode) + shareFetchResponseData.responses().foreach(response => { + if (!response.partitions().isEmpty) { + response.partitions().forEach(partitionData => partitions.add(partitionData.partitionIndex)) + } + }) + + partitions.size() == topicIdPartitions.size + }, "Share fetch request failed", 5000) } private def expectedAcquiredRecords(firstOffsets: util.List[Long], lastOffsets: util.List[Long], deliveryCounts: util.List[Int]): util.List[AcquiredRecords] = { diff --git a/gradle/dependencies.gradle b/gradle/dependencies.gradle index f02f2d5b5ea2..c4c386b004da 100644 --- a/gradle/dependencies.gradle +++ b/gradle/dependencies.gradle @@ -108,6 +108,7 @@ versions += [ kafka_36: "3.6.2", kafka_37: "3.7.1", kafka_38: "3.8.1", + kafka_39: "3.9.0", // When updating lz4 make sure the compression levels in org.apache.kafka.common.record.CompressionType are still valid lz4: "1.8.0", mavenArtifact: "3.9.6", @@ -208,6 +209,7 @@ libs += [ kafkaStreams_36: "org.apache.kafka:kafka-streams:$versions.kafka_36", kafkaStreams_37: "org.apache.kafka:kafka-streams:$versions.kafka_37", kafkaStreams_38: "org.apache.kafka:kafka-streams:$versions.kafka_38", + kafkaStreams_39: "org.apache.kafka:kafka-streams:$versions.kafka_39", lz4: "org.lz4:lz4-java:$versions.lz4", metrics: "com.yammer.metrics:metrics-core:$versions.metrics", dropwizardMetrics: "io.dropwizard.metrics:metrics-core:$versions.dropwizardMetrics", diff --git a/raft/src/main/java/org/apache/kafka/raft/KafkaNetworkChannel.java b/raft/src/main/java/org/apache/kafka/raft/KafkaNetworkChannel.java index 68224a8c2410..688a55abfd74 100644 --- a/raft/src/main/java/org/apache/kafka/raft/KafkaNetworkChannel.java +++ b/raft/src/main/java/org/apache/kafka/raft/KafkaNetworkChannel.java @@ -24,6 +24,7 @@ import org.apache.kafka.common.message.EndQuorumEpochRequestData; import org.apache.kafka.common.message.FetchRequestData; import org.apache.kafka.common.message.FetchSnapshotRequestData; +import org.apache.kafka.common.message.UpdateRaftVoterRequestData; import org.apache.kafka.common.message.VoteRequestData; import org.apache.kafka.common.network.ListenerName; import org.apache.kafka.common.protocol.ApiKeys; @@ -35,6 +36,7 @@ import org.apache.kafka.common.requests.EndQuorumEpochRequest; import org.apache.kafka.common.requests.FetchRequest; import org.apache.kafka.common.requests.FetchSnapshotRequest; +import org.apache.kafka.common.requests.UpdateRaftVoterRequest; import org.apache.kafka.common.requests.VoteRequest; import org.apache.kafka.common.utils.Time; import org.apache.kafka.server.util.InterBrokerSendThread; @@ -187,6 +189,8 @@ static AbstractRequest.Builder buildRequest(ApiMessag return new FetchRequest.SimpleBuilder((FetchRequestData) requestData); if (requestData instanceof FetchSnapshotRequestData) return new FetchSnapshotRequest.Builder((FetchSnapshotRequestData) requestData); + if (requestData instanceof UpdateRaftVoterRequestData) + return new UpdateRaftVoterRequest.Builder((UpdateRaftVoterRequestData) requestData); if (requestData instanceof ApiVersionsRequestData) return new ApiVersionsRequest.Builder((ApiVersionsRequestData) requestData, ApiKeys.API_VERSIONS.oldestVersion(), diff --git a/raft/src/test/java/org/apache/kafka/raft/KafkaNetworkChannelTest.java b/raft/src/test/java/org/apache/kafka/raft/KafkaNetworkChannelTest.java index 96a5df1845fc..af44137d061a 100644 --- a/raft/src/test/java/org/apache/kafka/raft/KafkaNetworkChannelTest.java +++ b/raft/src/test/java/org/apache/kafka/raft/KafkaNetworkChannelTest.java @@ -21,12 +21,14 @@ import org.apache.kafka.common.Node; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.Uuid; +import org.apache.kafka.common.feature.SupportedVersionRange; import org.apache.kafka.common.message.ApiVersionsResponseData; import org.apache.kafka.common.message.BeginQuorumEpochResponseData; import org.apache.kafka.common.message.EndQuorumEpochResponseData; import org.apache.kafka.common.message.FetchRequestData; import org.apache.kafka.common.message.FetchResponseData; import org.apache.kafka.common.message.FetchSnapshotResponseData; +import org.apache.kafka.common.message.UpdateRaftVoterResponseData; import org.apache.kafka.common.message.VoteResponseData; import org.apache.kafka.common.network.ListenerName; import org.apache.kafka.common.protocol.ApiKeys; @@ -42,6 +44,7 @@ import org.apache.kafka.common.requests.FetchRequest; import org.apache.kafka.common.requests.FetchResponse; import org.apache.kafka.common.requests.FetchSnapshotResponse; +import org.apache.kafka.common.requests.UpdateRaftVoterResponse; import org.apache.kafka.common.requests.VoteRequest; import org.apache.kafka.common.requests.VoteResponse; import org.apache.kafka.common.utils.MockTime; @@ -85,7 +88,8 @@ public void update(Time time, MockClient.MetadataUpdate update) { } ApiKeys.BEGIN_QUORUM_EPOCH, ApiKeys.END_QUORUM_EPOCH, ApiKeys.FETCH, - ApiKeys.FETCH_SNAPSHOT + ApiKeys.FETCH_SNAPSHOT, + ApiKeys.UPDATE_RAFT_VOTER ); private final int requestTimeoutMs = 30000; @@ -304,6 +308,15 @@ private ApiMessage buildTestRequest(ApiKeys key) { 10 ); + case UPDATE_RAFT_VOTER: + return RaftUtil.updateVoterRequest( + clusterId, + ReplicaKey.of(1, ReplicaKey.NO_DIRECTORY_ID), + 5, + new SupportedVersionRange((short) 1, (short) 1), + Endpoints.empty() + ); + default: throw new AssertionError("Unexpected api " + key); } @@ -337,6 +350,8 @@ private ApiMessage buildTestErrorResponse(ApiKeys key, Errors error) { return new FetchResponseData().setErrorCode(error.code()); case FETCH_SNAPSHOT: return new FetchSnapshotResponseData().setErrorCode(error.code()); + case UPDATE_RAFT_VOTER: + return new UpdateRaftVoterResponseData().setErrorCode(error.code()); default: throw new AssertionError("Unexpected api " + key); } @@ -354,6 +369,8 @@ private Errors extractError(ApiMessage response) { code = ((VoteResponseData) response).errorCode(); } else if (response instanceof FetchSnapshotResponseData) { code = ((FetchSnapshotResponseData) response).errorCode(); + } else if (response instanceof UpdateRaftVoterResponseData) { + code = ((UpdateRaftVoterResponseData) response).errorCode(); } else { throw new IllegalArgumentException("Unexpected type for responseData: " + response); } @@ -372,6 +389,8 @@ private AbstractResponse buildResponse(ApiMessage responseData) { return new FetchResponse((FetchResponseData) responseData); } else if (responseData instanceof FetchSnapshotResponseData) { return new FetchSnapshotResponse((FetchSnapshotResponseData) responseData); + } else if (responseData instanceof UpdateRaftVoterResponseData) { + return new UpdateRaftVoterResponse((UpdateRaftVoterResponseData) responseData); } else { throw new IllegalArgumentException("Unexpected type for responseData: " + responseData); } diff --git a/server-common/src/main/java/org/apache/kafka/server/purgatory/DelayedOperation.java b/server-common/src/main/java/org/apache/kafka/server/purgatory/DelayedOperation.java index 0ad638240c89..f3c818cb9c6c 100644 --- a/server-common/src/main/java/org/apache/kafka/server/purgatory/DelayedOperation.java +++ b/server-common/src/main/java/org/apache/kafka/server/purgatory/DelayedOperation.java @@ -42,8 +42,8 @@ public abstract class DelayedOperation extends TimerTask { private final AtomicBoolean completed = new AtomicBoolean(false); - // Visible for testing - final Lock lock; + + protected final Lock lock; public DelayedOperation(long delayMs, Optional lockOpt) { this(delayMs, lockOpt.orElse(new ReentrantLock())); diff --git a/settings.gradle b/settings.gradle index 22db09725420..9d08ac68ca26 100644 --- a/settings.gradle +++ b/settings.gradle @@ -112,6 +112,7 @@ include 'clients', 'streams:upgrade-system-tests-36', 'streams:upgrade-system-tests-37', 'streams:upgrade-system-tests-38', + 'streams:upgrade-system-tests-39', 'tools', 'tools:tools-api', 'transaction-coordinator', diff --git a/share/src/main/java/org/apache/kafka/server/share/fetch/ShareFetch.java b/share/src/main/java/org/apache/kafka/server/share/fetch/ShareFetch.java new file mode 100644 index 000000000000..7c60501ffd67 --- /dev/null +++ b/share/src/main/java/org/apache/kafka/server/share/fetch/ShareFetch.java @@ -0,0 +1,202 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.server.share.fetch; + +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.message.ShareFetchResponseData.PartitionData; +import org.apache.kafka.common.protocol.Errors; +import org.apache.kafka.server.storage.log.FetchParams; + +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; + +/** + * The ShareFetch class is used to store the fetch parameters for a share fetch request. + */ +public class ShareFetch { + + /** + * The future that will be completed when the fetch is done. + */ + private final CompletableFuture> future; + + /** + * The fetch parameters for the fetch request. + */ + private final FetchParams fetchParams; + /** + * The group id of the share group that is fetching the records. + */ + private final String groupId; + /** + * The member id of the share group that is fetching the records. + */ + private final String memberId; + /** + * The maximum number of bytes that can be fetched for each partition. + */ + private final Map partitionMaxBytes; + /** + * The maximum number of records that can be fetched for the request. + */ + private final int maxFetchRecords; + /** + * The partitions that had an error during the fetch. + */ + private Map erroneous; + + public ShareFetch( + FetchParams fetchParams, + String groupId, + String memberId, + CompletableFuture> future, + Map partitionMaxBytes, + int maxFetchRecords + ) { + this.fetchParams = fetchParams; + this.groupId = groupId; + this.memberId = memberId; + this.future = future; + this.partitionMaxBytes = partitionMaxBytes; + this.maxFetchRecords = maxFetchRecords; + } + + public String groupId() { + return groupId; + } + + public String memberId() { + return memberId; + } + + public Map partitionMaxBytes() { + return partitionMaxBytes; + } + + public FetchParams fetchParams() { + return fetchParams; + } + + public int maxFetchRecords() { + return maxFetchRecords; + } + + /** + * Add an erroneous partition to the share fetch request. If the erroneous map is null, it will + * be created. + *

+ * The method is synchronized to avoid concurrent modification of the erroneous map, as for + * some partitions the pending initialization can be on some threads and for other partitions + * share fetch request can be processed in purgatory. + * + * @param topicIdPartition The partition that had an error. + * @param throwable The error that occurred. + */ + public synchronized void addErroneous(TopicIdPartition topicIdPartition, Throwable throwable) { + if (erroneous == null) { + erroneous = new HashMap<>(); + } + erroneous.put(topicIdPartition, throwable); + } + + /** + * Check if the share fetch request is completed. + * @return true if the request is completed, false otherwise. + */ + public boolean isCompleted() { + return future.isDone(); + } + + /** + * Check if all the partitions in the request have errored. + * @return true if all the partitions in the request have errored, false otherwise. + */ + public synchronized boolean errorInAllPartitions() { + return erroneous != null && erroneous.size() == partitionMaxBytes().size(); + } + + /** + * May be complete the share fetch request with the given partition data. If the request is already completed, + * this method does nothing. If there are any erroneous partitions, they will be added to the response. + * + * @param partitionData The partition data to complete the fetch with. + */ + public void maybeComplete(Map partitionData) { + if (isCompleted()) { + return; + } + + Map response = new HashMap<>(partitionData); + // Add any erroneous partitions to the response. + addErroneousToResponse(response); + future.complete(response); + } + + /** + * Maybe complete the share fetch request with the given exception for the topicIdPartitions. + * If the request is already completed, this method does nothing. If there are any erroneous partitions, + * they will be added to the response. + * + * @param topicIdPartitions The topic id partitions which errored out. + * @param throwable The exception to complete the fetch with. + */ + public void maybeCompleteWithException(Collection topicIdPartitions, Throwable throwable) { + if (isCompleted()) { + return; + } + Map response = topicIdPartitions.stream().collect( + Collectors.toMap(tp -> tp, tp -> new PartitionData() + .setErrorCode(Errors.forException(throwable).code()) + .setErrorMessage(throwable.getMessage()))); + // Add any erroneous partitions to the response. + addErroneousToResponse(response); + future.complete(response); + } + + /** + * Filter out the erroneous partitions from the given set of topicIdPartitions. The order of + * partitions is important hence the method expects an ordered set as input and returns the ordered + * set as well. + * + * @param topicIdPartitions The topic id partitions to filter. + * @return The topic id partitions without the erroneous partitions. + */ + public synchronized Set filterErroneousTopicPartitions(Set topicIdPartitions) { + if (erroneous != null) { + Set retain = new LinkedHashSet<>(topicIdPartitions); + retain.removeAll(erroneous.keySet()); + return retain; + } + return topicIdPartitions; + } + + private synchronized void addErroneousToResponse(Map response) { + if (erroneous != null) { + erroneous.forEach((topicIdPartition, throwable) -> { + response.put(topicIdPartition, new PartitionData() + .setErrorCode(Errors.forException(throwable).code()) + .setErrorMessage(throwable.getMessage())); + }); + } + } +} diff --git a/share/src/main/java/org/apache/kafka/server/share/fetch/ShareFetchData.java b/share/src/main/java/org/apache/kafka/server/share/fetch/ShareFetchData.java deleted file mode 100644 index c32b32800177..000000000000 --- a/share/src/main/java/org/apache/kafka/server/share/fetch/ShareFetchData.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kafka.server.share.fetch; - -import org.apache.kafka.common.TopicIdPartition; -import org.apache.kafka.common.message.ShareFetchResponseData.PartitionData; -import org.apache.kafka.server.storage.log.FetchParams; - -import java.util.Map; -import java.util.concurrent.CompletableFuture; - -/** - * The ShareFetchData class is used to store the fetch parameters for a share fetch request. - */ -public class ShareFetchData { - - private final FetchParams fetchParams; - private final String groupId; - private final String memberId; - private final CompletableFuture> future; - private final Map partitionMaxBytes; - private final int maxFetchRecords; - - public ShareFetchData( - FetchParams fetchParams, - String groupId, - String memberId, - CompletableFuture> future, - Map partitionMaxBytes, - int maxFetchRecords - ) { - this.fetchParams = fetchParams; - this.groupId = groupId; - this.memberId = memberId; - this.future = future; - this.partitionMaxBytes = partitionMaxBytes; - this.maxFetchRecords = maxFetchRecords; - } - - public String groupId() { - return groupId; - } - - public String memberId() { - return memberId; - } - - public CompletableFuture> future() { - return future; - } - - public Map partitionMaxBytes() { - return partitionMaxBytes; - } - - public FetchParams fetchParams() { - return fetchParams; - } - - public int maxFetchRecords() { - return maxFetchRecords; - } -} diff --git a/share/src/test/java/org/apache/kafka/server/share/fetch/ShareFetchTest.java b/share/src/test/java/org/apache/kafka/server/share/fetch/ShareFetchTest.java new file mode 100644 index 000000000000..bf5de1ae0de4 --- /dev/null +++ b/share/src/test/java/org/apache/kafka/server/share/fetch/ShareFetchTest.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.server.share.fetch; + +import org.apache.kafka.common.TopicIdPartition; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.Uuid; +import org.apache.kafka.server.storage.log.FetchParams; + +import org.junit.jupiter.api.Test; + +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; + +public class ShareFetchTest { + + private static final String GROUP_ID = "groupId"; + private static final String MEMBER_ID = "memberId"; + + @Test + public void testErrorInAllPartitions() { + TopicIdPartition topicIdPartition = new TopicIdPartition(Uuid.randomUuid(), new TopicPartition("foo", 0)); + ShareFetch shareFetch = new ShareFetch(mock(FetchParams.class), GROUP_ID, MEMBER_ID, new CompletableFuture<>(), + Map.of(topicIdPartition, 10), 100); + assertFalse(shareFetch.errorInAllPartitions()); + + shareFetch.addErroneous(topicIdPartition, new RuntimeException()); + assertTrue(shareFetch.errorInAllPartitions()); + } + + @Test + public void testErrorInAllPartitionsWithMultipleTopicIdPartitions() { + TopicIdPartition topicIdPartition0 = new TopicIdPartition(Uuid.randomUuid(), new TopicPartition("foo", 0)); + TopicIdPartition topicIdPartition1 = new TopicIdPartition(Uuid.randomUuid(), new TopicPartition("foo", 1)); + ShareFetch shareFetch = new ShareFetch(mock(FetchParams.class), GROUP_ID, MEMBER_ID, new CompletableFuture<>(), + Map.of(topicIdPartition0, 10, topicIdPartition1, 10), 100); + assertFalse(shareFetch.errorInAllPartitions()); + + shareFetch.addErroneous(topicIdPartition0, new RuntimeException()); + assertFalse(shareFetch.errorInAllPartitions()); + + shareFetch.addErroneous(topicIdPartition1, new RuntimeException()); + assertTrue(shareFetch.errorInAllPartitions()); + } + + @Test + public void testFilterErroneousTopicPartitions() { + TopicIdPartition topicIdPartition0 = new TopicIdPartition(Uuid.randomUuid(), new TopicPartition("foo", 0)); + TopicIdPartition topicIdPartition1 = new TopicIdPartition(Uuid.randomUuid(), new TopicPartition("foo", 1)); + ShareFetch shareFetch = new ShareFetch(mock(FetchParams.class), GROUP_ID, MEMBER_ID, new CompletableFuture<>(), + Map.of(topicIdPartition0, 10, topicIdPartition1, 10), 100); + Set result = shareFetch.filterErroneousTopicPartitions(Set.of(topicIdPartition0, topicIdPartition1)); + // No erroneous partitions, hence all partitions should be returned. + assertEquals(2, result.size()); + assertTrue(result.contains(topicIdPartition0)); + assertTrue(result.contains(topicIdPartition1)); + + // Add an erroneous partition and verify that it is filtered out. + shareFetch.addErroneous(topicIdPartition0, new RuntimeException()); + result = shareFetch.filterErroneousTopicPartitions(Set.of(topicIdPartition0, topicIdPartition1)); + assertEquals(1, result.size()); + assertTrue(result.contains(topicIdPartition1)); + + // Add another erroneous partition and verify that it is filtered out. + shareFetch.addErroneous(topicIdPartition1, new RuntimeException()); + result = shareFetch.filterErroneousTopicPartitions(Set.of(topicIdPartition0, topicIdPartition1)); + assertTrue(result.isEmpty()); + } + +} diff --git a/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java b/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java index 29b92408ef50..5b0b68f7854b 100644 --- a/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java +++ b/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java @@ -423,6 +423,12 @@ public class StreamsConfig extends AbstractConfig { @SuppressWarnings("WeakerAccess") public static final String UPGRADE_FROM_38 = UpgradeFromValues.UPGRADE_FROM_38.toString(); + /** + * Config value for parameter {@link #UPGRADE_FROM_CONFIG "upgrade.from"} for upgrading an application from version {@code 3.9.x}. + */ + @SuppressWarnings("WeakerAccess") + public static final String UPGRADE_FROM_39 = UpgradeFromValues.UPGRADE_FROM_39.toString(); + /** * Config value for parameter {@link #PROCESSING_GUARANTEE_CONFIG "processing.guarantee"} for at-least-once processing guarantees. @@ -789,7 +795,7 @@ public class StreamsConfig extends AbstractConfig { UPGRADE_FROM_28 + "\", \"" + UPGRADE_FROM_30 + "\", \"" + UPGRADE_FROM_31 + "\", \"" + UPGRADE_FROM_32 + "\", \"" + UPGRADE_FROM_33 + "\", \"" + UPGRADE_FROM_34 + "\", \"" + UPGRADE_FROM_35 + "\", \"" + UPGRADE_FROM_36 + "\", \"" + UPGRADE_FROM_37 + "\", \"" + - UPGRADE_FROM_38 + "(for upgrading from the corresponding old version)."; + UPGRADE_FROM_38 + "\", \"" + UPGRADE_FROM_39 + "\", \"" + "(for upgrading from the corresponding old version)."; /** {@code topology.optimization} */ public static final String TOPOLOGY_OPTIMIZATION_CONFIG = "topology.optimization"; diff --git a/streams/src/main/java/org/apache/kafka/streams/internals/UpgradeFromValues.java b/streams/src/main/java/org/apache/kafka/streams/internals/UpgradeFromValues.java index 66e079eecacb..617726cdf646 100644 --- a/streams/src/main/java/org/apache/kafka/streams/internals/UpgradeFromValues.java +++ b/streams/src/main/java/org/apache/kafka/streams/internals/UpgradeFromValues.java @@ -40,7 +40,8 @@ public enum UpgradeFromValues { UPGRADE_FROM_35("3.5"), UPGRADE_FROM_36("3.6"), UPGRADE_FROM_37("3.7"), - UPGRADE_FROM_38("3.8"); + UPGRADE_FROM_38("3.8"), + UPGRADE_FROM_39("3.9"); private final String value; diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/GlobalStateManagerImpl.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/GlobalStateManagerImpl.java index ad53634386ee..96d9074eff31 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/GlobalStateManagerImpl.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/GlobalStateManagerImpl.java @@ -319,7 +319,10 @@ private void reprocessState(final List topicPartitions, record.headers())); restoreCount++; } - } catch (final RuntimeException deserializationException) { + } catch (final Exception deserializationException) { + // while Java distinguishes checked vs unchecked exceptions, other languages + // like Scala or Kotlin do not, and thus we need to catch `Exception` + // (instead of `RuntimeException`) to work well with those languages handleDeserializationFailure( deserializationExceptionHandler, globalProcessorContext, diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorNode.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorNode.java index 9e2a79f0f259..2bb58eb6b82b 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorNode.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorNode.java @@ -203,7 +203,10 @@ public void process(final Record record) { } catch (final FailedProcessingException | TaskCorruptedException | TaskMigratedException e) { // Rethrow exceptions that should not be handled here throw e; - } catch (final RuntimeException processingException) { + } catch (final Exception processingException) { + // while Java distinguishes checked vs unchecked exceptions, other languages + // like Scala or Kotlin do not, and thus we need to catch `Exception` + // (instead of `RuntimeException`) to work well with those languages final ErrorHandlerContext errorHandlerContext = new DefaultErrorHandlerContext( null, // only required to pass for DeserializationExceptionHandler internalProcessorContext.topic(), @@ -220,7 +223,10 @@ public void process(final Record record) { processingExceptionHandler.handle(errorHandlerContext, record, processingException), "Invalid ProductionExceptionHandler response." ); - } catch (final RuntimeException fatalUserException) { + } catch (final Exception fatalUserException) { + // while Java distinguishes checked vs unchecked exceptions, other languages + // like Scala or Kotlin do not, and thus we need to catch `Exception` + // (instead of `RuntimeException`) to work well with those languages log.error( "Processing error callback failed after processing error for record: {}", errorHandlerContext, diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordCollectorImpl.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordCollectorImpl.java index 8dedeb7dad41..81db581d04f7 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordCollectorImpl.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordCollectorImpl.java @@ -210,7 +210,10 @@ public void send(final String topic, key, keySerializer, exception); - } catch (final RuntimeException serializationException) { + } catch (final Exception serializationException) { + // while Java distinguishes checked vs unchecked exceptions, other languages + // like Scala or Kotlin do not, and thus we need to catch `Exception` + // (instead of `RuntimeException`) to work well with those languages handleException( ProductionExceptionHandler.SerializationExceptionOrigin.KEY, topic, @@ -221,7 +224,8 @@ public void send(final String topic, timestamp, processorNodeId, context, - serializationException); + serializationException + ); return; } @@ -234,7 +238,10 @@ public void send(final String topic, value, valueSerializer, exception); - } catch (final RuntimeException serializationException) { + } catch (final Exception serializationException) { + // while Java distinguishes checked vs unchecked exceptions, other languages + // like Scala or Kotlin do not, and thus we need to catch `Exception` + // (instead of `RuntimeException`) to work well with those languages handleException( ProductionExceptionHandler.SerializationExceptionOrigin.VALUE, topic, @@ -312,7 +319,7 @@ private void handleException(final ProductionExceptionHandler.Serializati final Long timestamp, final String processorNodeId, final InternalProcessorContext context, - final RuntimeException serializationException) { + final Exception serializationException) { log.debug(String.format("Error serializing record for topic %s", topic), serializationException); final ProducerRecord record = new ProducerRecord<>(topic, partition, timestamp, key, value, headers); @@ -328,7 +335,10 @@ private void handleException(final ProductionExceptionHandler.Serializati ), "Invalid ProductionExceptionHandler response." ); - } catch (final RuntimeException fatalUserException) { + } catch (final Exception fatalUserException) { + // while Java distinguishes checked vs unchecked exceptions, other languages + // like Scala or Kotlin do not, and thus we need to catch `Exception` + // (instead of `RuntimeException`) to work well with those languages log.error( String.format( "Production error callback failed after serialization error for record %s: %s", @@ -442,7 +452,10 @@ private void recordSendError(final String topic, ), "Invalid ProductionExceptionHandler response." ); - } catch (final RuntimeException fatalUserException) { + } catch (final Exception fatalUserException) { + // while Java distinguishes checked vs unchecked exceptions, other languages + // like Scala or Kotlin do not, and thus we need to catch `Exception` + // (instead of `RuntimeException`) to work well with those languages log.error( "Production error callback failed after production error for record {}", serializedRecord, diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordDeserializer.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordDeserializer.java index 5ddafe654e98..6f9fe989552f 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordDeserializer.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordDeserializer.java @@ -70,7 +70,10 @@ ConsumerRecord deserialize(final ProcessorContext processo rawRecord.headers(), rawRecord.leaderEpoch() ); - } catch (final RuntimeException deserializationException) { + } catch (final Exception deserializationException) { + // while Java distinguishes checked vs unchecked exceptions, other languages + // like Scala or Kotlin do not, and thus we need to catch `Exception` + // (instead of `RuntimeException`) to work well with those languages handleDeserializationFailure(deserializationExceptionHandler, processorContext, deserializationException, rawRecord, log, droppedRecordsSensor, sourceNode().name()); return null; // 'handleDeserializationFailure' would either throw or swallow -- if we swallow we need to skip the record by returning 'null' } @@ -78,7 +81,7 @@ ConsumerRecord deserialize(final ProcessorContext processo public static void handleDeserializationFailure(final DeserializationExceptionHandler deserializationExceptionHandler, final ProcessorContext processorContext, - final RuntimeException deserializationException, + final Exception deserializationException, final ConsumerRecord rawRecord, final Logger log, final Sensor droppedRecordsSensor, @@ -100,7 +103,10 @@ public static void handleDeserializationFailure(final DeserializationExceptionHa deserializationExceptionHandler.handle(errorHandlerContext, rawRecord, deserializationException), "Invalid DeserializationExceptionHandler response." ); - } catch (final RuntimeException fatalUserException) { + } catch (final Exception fatalUserException) { + // while Java distinguishes checked vs unchecked exceptions, other languages + // like Scala or Kotlin do not, and thus we need to catch `Exception` + // (instead of `RuntimeException`) to work well with those languages log.error( "Deserialization error callback failed after deserialization error for record {}", rawRecord, diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java index 92772c686af8..1fdd298088bb 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java @@ -52,8 +52,6 @@ import org.apache.kafka.streams.state.internals.ThreadCache; import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringWriter; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -883,18 +881,6 @@ public void recordProcessTimeRatioAndBufferSize(final long allTaskProcessMs, fin processTimeMs = 0L; } - private String getStacktraceString(final Throwable e) { - String stacktrace = null; - try (final StringWriter stringWriter = new StringWriter(); - final PrintWriter printWriter = new PrintWriter(stringWriter)) { - e.printStackTrace(printWriter); - stacktrace = stringWriter.toString(); - } catch (final IOException ioe) { - log.error("Encountered error extracting stacktrace from this exception", ioe); - } - return stacktrace; - } - /** * @throws IllegalStateException if the current node is not null * @throws TaskMigratedException if the task producer got fenced (EOS only) @@ -937,7 +923,10 @@ record = null; throw createStreamsException(node.name(), e.getCause()); } catch (final TaskCorruptedException | TaskMigratedException e) { throw e; - } catch (final RuntimeException processingException) { + } catch (final Exception processingException) { + // while Java distinguishes checked vs unchecked exceptions, other languages + // like Scala or Kotlin do not, and thus we need to catch `Exception` + // (instead of `RuntimeException`) to work well with those languages final ErrorHandlerContext errorHandlerContext = new DefaultErrorHandlerContext( null, recordContext.topic(), @@ -955,7 +944,10 @@ record = null; processingExceptionHandler.handle(errorHandlerContext, null, processingException), "Invalid ProcessingExceptionHandler response." ); - } catch (final RuntimeException fatalUserException) { + } catch (final Exception fatalUserException) { + // while Java distinguishes checked vs unchecked exceptions, other languages + // like Scala or Kotlin do not, and thus we need to catch `Exception` + // (instead of `RuntimeException`) to work well with those languages log.error( "Processing error callback failed after processing error for record: {}", errorHandlerContext, diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/AssignorConfiguration.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/AssignorConfiguration.java index ad6aca2bac8f..bc2324044da0 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/AssignorConfiguration.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/AssignorConfiguration.java @@ -130,6 +130,7 @@ public RebalanceProtocol rebalanceProtocol() { case UPGRADE_FROM_36: case UPGRADE_FROM_37: case UPGRADE_FROM_38: + case UPGRADE_FROM_39: // we need to add new version when new "upgrade.from" values become available // This config is for explicitly sending FK response to a requested partition @@ -192,6 +193,7 @@ public int configuredMetadataVersion(final int priorVersion) { case UPGRADE_FROM_36: case UPGRADE_FROM_37: case UPGRADE_FROM_38: + case UPGRADE_FROM_39: // we need to add new version when new "upgrade.from" values become available // This config is for explicitly sending FK response to a requested partition diff --git a/streams/upgrade-system-tests-39/src/test/java/org/apache/kafka/streams/tests/SmokeTestClient.java b/streams/upgrade-system-tests-39/src/test/java/org/apache/kafka/streams/tests/SmokeTestClient.java new file mode 100644 index 000000000000..dc0ad4d5601c --- /dev/null +++ b/streams/upgrade-system-tests-39/src/test/java/org/apache/kafka/streams/tests/SmokeTestClient.java @@ -0,0 +1,299 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.tests; + +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.common.utils.Bytes; +import org.apache.kafka.common.utils.KafkaThread; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.StreamsBuilder; +import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.Topology; +import org.apache.kafka.streams.errors.StreamsUncaughtExceptionHandler; +import org.apache.kafka.streams.kstream.Consumed; +import org.apache.kafka.streams.kstream.Grouped; +import org.apache.kafka.streams.kstream.KGroupedStream; +import org.apache.kafka.streams.kstream.KStream; +import org.apache.kafka.streams.kstream.KTable; +import org.apache.kafka.streams.kstream.Materialized; +import org.apache.kafka.streams.kstream.Produced; +import org.apache.kafka.streams.kstream.Suppressed.BufferConfig; +import org.apache.kafka.streams.kstream.TimeWindows; +import org.apache.kafka.streams.kstream.Windowed; +import org.apache.kafka.streams.state.Stores; +import org.apache.kafka.streams.state.WindowStore; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.time.Duration; +import java.time.Instant; +import java.util.Properties; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import static org.apache.kafka.streams.kstream.Suppressed.untilWindowCloses; + +public class SmokeTestClient extends SmokeTestUtil { + + private final String name; + + private KafkaStreams streams; + private boolean uncaughtException = false; + private boolean started; + private volatile boolean closed; + + private static void addShutdownHook(final String name, final Runnable runnable) { + if (name != null) { + Runtime.getRuntime().addShutdownHook(KafkaThread.nonDaemon(name, runnable)); + } else { + Runtime.getRuntime().addShutdownHook(new Thread(runnable)); + } + } + + private static File tempDirectory() { + final String prefix = "kafka-"; + final File file; + try { + file = Files.createTempDirectory(prefix).toFile(); + } catch (final IOException ex) { + throw new RuntimeException("Failed to create a temp dir", ex); + } + file.deleteOnExit(); + + addShutdownHook("delete-temp-file-shutdown-hook", () -> { + try { + Utils.delete(file); + } catch (final IOException e) { + System.out.println("Error deleting " + file.getAbsolutePath()); + e.printStackTrace(System.out); + } + }); + + return file; + } + + public SmokeTestClient(final String name) { + this.name = name; + } + + public boolean started() { + return started; + } + + public boolean closed() { + return closed; + } + + public void start(final Properties streamsProperties) { + final Topology build = getTopology(); + streams = new KafkaStreams(build, getStreamsConfig(streamsProperties)); + + final CountDownLatch countDownLatch = new CountDownLatch(1); + streams.setStateListener((newState, oldState) -> { + System.out.printf("%s %s: %s -> %s%n", name, Instant.now(), oldState, newState); + if (oldState == KafkaStreams.State.REBALANCING && newState == KafkaStreams.State.RUNNING) { + started = true; + countDownLatch.countDown(); + } + + if (newState == KafkaStreams.State.NOT_RUNNING) { + closed = true; + } + }); + + streams.setUncaughtExceptionHandler(e -> { + System.out.println(name + ": SMOKE-TEST-CLIENT-EXCEPTION"); + System.out.println(name + ": FATAL: An unexpected exception is encountered: " + e); + e.printStackTrace(System.out); + uncaughtException = true; + return StreamsUncaughtExceptionHandler.StreamThreadExceptionResponse.SHUTDOWN_CLIENT; + }); + + addShutdownHook("streams-shutdown-hook", this::close); + + streams.start(); + try { + if (!countDownLatch.await(1, TimeUnit.MINUTES)) { + System.out.println(name + ": SMOKE-TEST-CLIENT-EXCEPTION: Didn't start in one minute"); + } + } catch (final InterruptedException e) { + System.out.println(name + ": SMOKE-TEST-CLIENT-EXCEPTION: " + e); + e.printStackTrace(System.out); + } + System.out.println(name + ": SMOKE-TEST-CLIENT-STARTED"); + System.out.println(name + " started at " + Instant.now()); + } + + public void closeAsync() { + streams.close(Duration.ZERO); + } + + public void close() { + final boolean closed = streams.close(Duration.ofMinutes(1)); + + if (closed && !uncaughtException) { + System.out.println(name + ": SMOKE-TEST-CLIENT-CLOSED"); + } else if (closed) { + System.out.println(name + ": SMOKE-TEST-CLIENT-EXCEPTION"); + } else { + System.out.println(name + ": SMOKE-TEST-CLIENT-EXCEPTION: Didn't close"); + } + } + + private Properties getStreamsConfig(final Properties props) { + final Properties fullProps = new Properties(props); + fullProps.put(StreamsConfig.APPLICATION_ID_CONFIG, "SmokeTest"); + fullProps.put(StreamsConfig.CLIENT_ID_CONFIG, "SmokeTest-" + name); + fullProps.put(StreamsConfig.STATE_DIR_CONFIG, tempDirectory().getAbsolutePath()); + fullProps.putAll(props); + return fullProps; + } + + public Topology getTopology() { + final StreamsBuilder builder = new StreamsBuilder(); + final Consumed stringIntConsumed = Consumed.with(stringSerde, intSerde); + final KStream source = builder.stream("data", stringIntConsumed); + source.filterNot((k, v) -> k.equals("flush")) + .to("echo", Produced.with(stringSerde, intSerde)); + final KStream data = source.filter((key, value) -> value == null || value != END); + data.process(SmokeTestUtil.printProcessorSupplier("data", name)); + + // min + final KGroupedStream groupedData = data.groupByKey(Grouped.with(stringSerde, intSerde)); + + final KTable, Integer> minAggregation = groupedData + .windowedBy(TimeWindows.ofSizeAndGrace(Duration.ofDays(1), Duration.ofMinutes(1))) + .aggregate( + () -> Integer.MAX_VALUE, + (aggKey, value, aggregate) -> (value < aggregate) ? value : aggregate, + Materialized + .>as("uwin-min") + .withValueSerde(intSerde) + .withRetention(Duration.ofHours(25)) + ); + + streamify(minAggregation, "min-raw"); + + streamify(minAggregation.suppress(untilWindowCloses(BufferConfig.unbounded())), "min-suppressed"); + + minAggregation + .toStream(new Unwindow<>()) + .filterNot((k, v) -> k.equals("flush")) + .to("min", Produced.with(stringSerde, intSerde)); + + final KTable, Integer> smallWindowSum = groupedData + .windowedBy(TimeWindows.ofSizeAndGrace(Duration.ofSeconds(2), Duration.ofSeconds(30)).advanceBy(Duration.ofSeconds(1))) + .reduce(Integer::sum); + + streamify(smallWindowSum, "sws-raw"); + streamify(smallWindowSum.suppress(untilWindowCloses(BufferConfig.unbounded())), "sws-suppressed"); + + final KTable minTable = builder.table( + "min", + Consumed.with(stringSerde, intSerde), + Materialized.as("minStoreName")); + + minTable.toStream().process(SmokeTestUtil.printProcessorSupplier("min", name)); + + // max + groupedData + .windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2))) + .aggregate( + () -> Integer.MIN_VALUE, + (aggKey, value, aggregate) -> (value > aggregate) ? value : aggregate, + Materialized.>as("uwin-max").withValueSerde(intSerde)) + .toStream(new Unwindow<>()) + .filterNot((k, v) -> k.equals("flush")) + .to("max", Produced.with(stringSerde, intSerde)); + + final KTable maxTable = builder.table( + "max", + Consumed.with(stringSerde, intSerde), + Materialized.as("maxStoreName")); + maxTable.toStream().process(SmokeTestUtil.printProcessorSupplier("max", name)); + + // sum + groupedData + .windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2))) + .aggregate( + () -> 0L, + (aggKey, value, aggregate) -> (long) value + aggregate, + Materialized.>as("win-sum").withValueSerde(longSerde)) + .toStream(new Unwindow<>()) + .filterNot((k, v) -> k.equals("flush")) + .to("sum", Produced.with(stringSerde, longSerde)); + + final Consumed stringLongConsumed = Consumed.with(stringSerde, longSerde); + final KTable sumTable = builder.table("sum", stringLongConsumed); + sumTable.toStream().process(SmokeTestUtil.printProcessorSupplier("sum", name)); + + // cnt + groupedData + .windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2))) + .count(Materialized.as("uwin-cnt")) + .toStream(new Unwindow<>()) + .filterNot((k, v) -> k.equals("flush")) + .to("cnt", Produced.with(stringSerde, longSerde)); + + final KTable cntTable = builder.table( + "cnt", + Consumed.with(stringSerde, longSerde), + Materialized.as("cntStoreName")); + cntTable.toStream().process(SmokeTestUtil.printProcessorSupplier("cnt", name)); + + // dif + maxTable + .join( + minTable, + (value1, value2) -> value1 - value2) + .toStream() + .filterNot((k, v) -> k.equals("flush")) + .to("dif", Produced.with(stringSerde, intSerde)); + + // avg + sumTable + .join( + cntTable, + (value1, value2) -> (double) value1 / (double) value2) + .toStream() + .filterNot((k, v) -> k.equals("flush")) + .to("avg", Produced.with(stringSerde, doubleSerde)); + + // test repartition + final Agg agg = new Agg(); + cntTable.groupBy(agg.selector(), Grouped.with(stringSerde, longSerde)) + .aggregate(agg.init(), agg.adder(), agg.remover(), + Materialized.as(Stores.inMemoryKeyValueStore("cntByCnt")) + .withKeySerde(Serdes.String()) + .withValueSerde(Serdes.Long())) + .toStream() + .to("tagg", Produced.with(stringSerde, longSerde)); + + return builder.build(); + } + + private static void streamify(final KTable, Integer> windowedTable, final String topic) { + windowedTable + .toStream() + .filterNot((k, v) -> k.key().equals("flush")) + .map((key, value) -> new KeyValue<>(key.toString(), value)) + .to(topic, Produced.with(stringSerde, intSerde)); + } +} diff --git a/streams/upgrade-system-tests-39/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java b/streams/upgrade-system-tests-39/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java new file mode 100644 index 000000000000..8ab48f7cf5f6 --- /dev/null +++ b/streams/upgrade-system-tests-39/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java @@ -0,0 +1,670 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.tests; + +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.PartitionInfo; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.errors.TimeoutException; +import org.apache.kafka.common.serialization.ByteArraySerializer; +import org.apache.kafka.common.serialization.Deserializer; +import org.apache.kafka.common.serialization.Serde; +import org.apache.kafka.common.serialization.StringDeserializer; +import org.apache.kafka.common.utils.Exit; +import org.apache.kafka.common.utils.Utils; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static java.util.Collections.emptyMap; +import static org.apache.kafka.common.utils.Utils.mkEntry; + +public class SmokeTestDriver extends SmokeTestUtil { + private static final String[] NUMERIC_VALUE_TOPICS = { + "data", + "echo", + "max", + "min", "min-suppressed", "min-raw", + "dif", + "sum", + "sws-raw", "sws-suppressed", + "cnt", + "avg", + "tagg" + }; + private static final String[] STRING_VALUE_TOPICS = { + "fk" + }; + + private static final String[] TOPICS = new String[NUMERIC_VALUE_TOPICS.length + STRING_VALUE_TOPICS.length]; + static { + System.arraycopy(NUMERIC_VALUE_TOPICS, 0, TOPICS, 0, NUMERIC_VALUE_TOPICS.length); + System.arraycopy(STRING_VALUE_TOPICS, 0, TOPICS, NUMERIC_VALUE_TOPICS.length, STRING_VALUE_TOPICS.length); + } + + private static final int MAX_RECORD_EMPTY_RETRIES = 30; + + private static class ValueList { + public final String key; + private final int[] values; + private int index; + + ValueList(final int min, final int max) { + key = min + "-" + max; + + values = new int[max - min + 1]; + for (int i = 0; i < values.length; i++) { + values[i] = min + i; + } + // We want to randomize the order of data to test not completely predictable processing order + // However, values are also use as a timestamp of the record. (TODO: separate data and timestamp) + // We keep some correlation of time and order. Thus, the shuffling is done with a sliding window + shuffle(values, 10); + + index = 0; + } + + int next() { + return (index < values.length) ? values[index++] : -1; + } + } + + public static String[] topics() { + return Arrays.copyOf(TOPICS, TOPICS.length); + } + + static void generatePerpetually(final String kafka, + final int numKeys, + final int maxRecordsPerKey) { + final Properties producerProps = generatorProperties(kafka); + + int numRecordsProduced = 0; + + final ValueList[] data = new ValueList[numKeys]; + for (int i = 0; i < numKeys; i++) { + data[i] = new ValueList(i, i + maxRecordsPerKey - 1); + } + + final Random rand = new Random(); + + try (final KafkaProducer producer = new KafkaProducer<>(producerProps)) { + while (true) { + final int index = rand.nextInt(numKeys); + final String key = data[index].key; + final int value = data[index].next(); + + final ProducerRecord record = + new ProducerRecord<>( + "data", + stringSerde.serializer().serialize("", key), + intSerde.serializer().serialize("", value) + ); + producer.send(record); + + final ProducerRecord fkRecord = + new ProducerRecord<>( + "fk", + intSerde.serializer().serialize("", value), + stringSerde.serializer().serialize("", key) + ); + producer.send(fkRecord); + + numRecordsProduced++; + if (numRecordsProduced % 100 == 0) { + System.out.println(Instant.now() + " " + numRecordsProduced + " records produced"); + } + Utils.sleep(2); + } + } + } + + public static Map> generate(final String kafka, + final int numKeys, + final int maxRecordsPerKey, + final Duration timeToSpend) { + final Properties producerProps = generatorProperties(kafka); + + int numRecordsProduced = 0; + + final Map> allData = new HashMap<>(); + final ValueList[] data = new ValueList[numKeys]; + for (int i = 0; i < numKeys; i++) { + data[i] = new ValueList(i, i + maxRecordsPerKey - 1); + allData.put(data[i].key, new HashSet<>()); + } + final Random rand = new Random(); + + int remaining = data.length; + + final long recordPauseTime = timeToSpend.toMillis() / numKeys / maxRecordsPerKey; + + final List> dataNeedRetry = new ArrayList<>(); + final List> fkNeedRetry = new ArrayList<>(); + + try (final KafkaProducer producer = new KafkaProducer<>(producerProps)) { + while (remaining > 0) { + final int index = rand.nextInt(remaining); + final String key = data[index].key; + final int value = data[index].next(); + + if (value < 0) { + remaining--; + data[index] = data[remaining]; + } else { + final ProducerRecord record = + new ProducerRecord<>( + "data", + stringSerde.serializer().serialize("", key), + intSerde.serializer().serialize("", value) + ); + + producer.send(record, new TestCallback(record, dataNeedRetry)); + + final ProducerRecord fkRecord = + new ProducerRecord<>( + "fk", + intSerde.serializer().serialize("", value), + stringSerde.serializer().serialize("", key) + ); + + producer.send(fkRecord, new TestCallback(fkRecord, fkNeedRetry)); + + numRecordsProduced++; + allData.get(key).add(value); + if (numRecordsProduced % 100 == 0) { + System.out.println(Instant.now() + " " + numRecordsProduced + " records produced"); + } + Utils.sleep(Math.max(recordPauseTime, 2)); + } + } + producer.flush(); + + retry(producer, dataNeedRetry, stringSerde); + retry(producer, fkNeedRetry, intSerde); + + flush(producer, + "data", + stringSerde.serializer().serialize("", "flush"), + intSerde.serializer().serialize("", 0) + ); + flush(producer, + "fk", + intSerde.serializer().serialize("", 0), + stringSerde.serializer().serialize("", "flush") + ); + } + return Collections.unmodifiableMap(allData); + } + + private static void retry(final KafkaProducer producer, + List> needRetry, + final Serde keySerde) { + int remainingRetries = 5; + while (!needRetry.isEmpty()) { + final List> needRetry2 = new ArrayList<>(); + for (final ProducerRecord record : needRetry) { + System.out.println( + "retry producing " + keySerde.deserializer().deserialize("", record.key())); + producer.send(record, new TestCallback(record, needRetry2)); + } + producer.flush(); + needRetry = needRetry2; + if (--remainingRetries == 0 && !needRetry.isEmpty()) { + System.err.println("Failed to produce all records after multiple retries"); + Exit.exit(1); + } + } + } + + private static void flush(final KafkaProducer producer, + final String topic, + final byte[] keyBytes, + final byte[] valBytes) { + // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out + // all suppressed records. + final List partitions = producer.partitionsFor(topic); + for (final PartitionInfo partition : partitions) { + producer.send(new ProducerRecord<>( + partition.topic(), + partition.partition(), + System.currentTimeMillis() + Duration.ofDays(2).toMillis(), + keyBytes, + valBytes + )); + } + } + + private static Properties generatorProperties(final String kafka) { + final Properties producerProps = new Properties(); + producerProps.put(ProducerConfig.CLIENT_ID_CONFIG, "SmokeTest"); + producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, kafka); + producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class); + producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class); + producerProps.put(ProducerConfig.ACKS_CONFIG, "all"); + return producerProps; + } + + private static class TestCallback implements Callback { + private final ProducerRecord originalRecord; + private final List> needRetry; + + TestCallback(final ProducerRecord originalRecord, + final List> needRetry) { + this.originalRecord = originalRecord; + this.needRetry = needRetry; + } + + @Override + public void onCompletion(final RecordMetadata metadata, final Exception exception) { + if (exception != null) { + if (exception instanceof TimeoutException) { + needRetry.add(originalRecord); + } else { + exception.printStackTrace(); + Exit.exit(1); + } + } + } + } + + private static void shuffle(final int[] data, @SuppressWarnings("SameParameterValue") final int windowSize) { + final Random rand = new Random(); + for (int i = 0; i < data.length; i++) { + // we shuffle data within windowSize + final int j = rand.nextInt(Math.min(data.length - i, windowSize)) + i; + + // swap + final int tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + } + + public static class NumberDeserializer implements Deserializer { + @Override + public Number deserialize(final String topic, final byte[] data) { + final Number value; + switch (topic) { + case "data": + case "echo": + case "min": + case "min-raw": + case "min-suppressed": + case "sws-raw": + case "sws-suppressed": + case "max": + case "dif": + value = intSerde.deserializer().deserialize(topic, data); + break; + case "sum": + case "cnt": + case "tagg": + value = longSerde.deserializer().deserialize(topic, data); + break; + case "avg": + value = doubleSerde.deserializer().deserialize(topic, data); + break; + default: + throw new RuntimeException("unknown topic: " + topic); + } + return value; + } + } + + public static VerificationResult verify(final String kafka, + final Map> inputs, + final int maxRecordsPerKey) { + final Properties props = new Properties(); + props.put(ConsumerConfig.CLIENT_ID_CONFIG, "verifier"); + props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, kafka); + props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); + props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, NumberDeserializer.class); + props.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed"); + + final KafkaConsumer consumer = new KafkaConsumer<>(props); + final List partitions = getAllPartitions(consumer, NUMERIC_VALUE_TOPICS); + consumer.assign(partitions); + consumer.seekToBeginning(partitions); + + final int recordsGenerated = inputs.size() * maxRecordsPerKey; + int recordsProcessed = 0; + final Map processed = + Stream.of(NUMERIC_VALUE_TOPICS) + .collect(Collectors.toMap(t -> t, t -> new AtomicInteger(0))); + + final Map>>> events = new HashMap<>(); + + VerificationResult verificationResult = new VerificationResult(false, "no results yet"); + int retry = 0; + final long start = System.currentTimeMillis(); + while (System.currentTimeMillis() - start < TimeUnit.MINUTES.toMillis(6)) { + final ConsumerRecords records = consumer.poll(Duration.ofSeconds(5)); + if (records.isEmpty() && recordsProcessed >= recordsGenerated) { + verificationResult = verifyAll(inputs, events, false); + if (verificationResult.passed()) { + break; + } else if (retry++ > MAX_RECORD_EMPTY_RETRIES) { + System.out.println(Instant.now() + " Didn't get any more results, verification hasn't passed, and out of retries."); + break; + } else { + System.out.println(Instant.now() + " Didn't get any more results, but verification hasn't passed (yet). Retrying..." + retry); + } + } else { + System.out.println(Instant.now() + " Get some more results from " + records.partitions() + ", resetting retry."); + + retry = 0; + for (final ConsumerRecord record : records) { + final String key = record.key(); + + final String topic = record.topic(); + processed.get(topic).incrementAndGet(); + + if (topic.equals("echo")) { + recordsProcessed++; + if (recordsProcessed % 100 == 0) { + System.out.println("Echo records processed = " + recordsProcessed); + } + } + + events.computeIfAbsent(topic, t -> new HashMap<>()) + .computeIfAbsent(key, k -> new LinkedList<>()) + .add(record); + } + + System.out.println(processed); + } + } + consumer.close(); + final long finished = System.currentTimeMillis() - start; + System.out.println("Verification time=" + finished); + System.out.println("-------------------"); + System.out.println("Result Verification"); + System.out.println("-------------------"); + System.out.println("recordGenerated=" + recordsGenerated); + System.out.println("recordProcessed=" + recordsProcessed); + + if (recordsProcessed > recordsGenerated) { + System.out.println("PROCESSED-MORE-THAN-GENERATED"); + } else if (recordsProcessed < recordsGenerated) { + System.out.println("PROCESSED-LESS-THAN-GENERATED"); + } + + boolean success; + + final Map> received = + events.get("echo") + .entrySet() + .stream() + .map(entry -> mkEntry( + entry.getKey(), + entry.getValue().stream().map(ConsumerRecord::value).collect(Collectors.toSet())) + ) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + success = inputs.equals(received); + + if (success) { + System.out.println("ALL-RECORDS-DELIVERED"); + } else { + int missedCount = 0; + for (final Map.Entry> entry : inputs.entrySet()) { + missedCount += received.get(entry.getKey()).size(); + } + System.out.println("missedRecords=" + missedCount); + } + + // give it one more try if it's not already passing. + if (!verificationResult.passed()) { + verificationResult = verifyAll(inputs, events, true); + } + success &= verificationResult.passed(); + + System.out.println(verificationResult.result()); + + System.out.println(success ? "SUCCESS" : "FAILURE"); + return verificationResult; + } + + public static class VerificationResult { + private final boolean passed; + private final String result; + + VerificationResult(final boolean passed, final String result) { + this.passed = passed; + this.result = result; + } + + public boolean passed() { + return passed; + } + + public String result() { + return result; + } + } + + private static VerificationResult verifyAll(final Map> inputs, + final Map>>> events, + final boolean printResults) { + final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + boolean pass; + try (final PrintStream resultStream = new PrintStream(byteArrayOutputStream)) { + pass = verifyTAgg(resultStream, inputs, events.get("tagg"), printResults); + pass &= verifySuppressed(resultStream, "min-suppressed", events, printResults); + pass &= verify(resultStream, "min-suppressed", inputs, events, windowedKey -> { + final String unwindowedKey = windowedKey.substring(1, windowedKey.length() - 1).replaceAll("@.*", ""); + return getMin(unwindowedKey); + }, printResults); + pass &= verifySuppressed(resultStream, "sws-suppressed", events, printResults); + pass &= verify(resultStream, "min", inputs, events, SmokeTestDriver::getMin, printResults); + pass &= verify(resultStream, "max", inputs, events, SmokeTestDriver::getMax, printResults); + pass &= verify(resultStream, "dif", inputs, events, key -> getMax(key).intValue() - getMin(key).intValue(), printResults); + pass &= verify(resultStream, "sum", inputs, events, SmokeTestDriver::getSum, printResults); + pass &= verify(resultStream, "cnt", inputs, events, key1 -> getMax(key1).intValue() - getMin(key1).intValue() + 1L, printResults); + pass &= verify(resultStream, "avg", inputs, events, SmokeTestDriver::getAvg, printResults); + } + return new VerificationResult(pass, new String(byteArrayOutputStream.toByteArray(), StandardCharsets.UTF_8)); + } + + private static boolean verify(final PrintStream resultStream, + final String topic, + final Map> inputData, + final Map>>> events, + final Function keyToExpectation, + final boolean printResults) { + final Map>> observedInputEvents = events.get("data"); + final Map>> outputEvents = events.getOrDefault(topic, emptyMap()); + if (outputEvents.isEmpty()) { + resultStream.println(topic + " is empty"); + return false; + } else { + resultStream.printf("verifying %s with %d keys%n", topic, outputEvents.size()); + + if (outputEvents.size() != inputData.size()) { + resultStream.printf("fail: resultCount=%d expectedCount=%s%n\tresult=%s%n\texpected=%s%n", + outputEvents.size(), inputData.size(), outputEvents.keySet(), inputData.keySet()); + return false; + } + for (final Map.Entry>> entry : outputEvents.entrySet()) { + final String key = entry.getKey(); + final Number expected = keyToExpectation.apply(key); + final Number actual = entry.getValue().getLast().value(); + if (!expected.equals(actual)) { + resultStream.printf("%s fail: key=%s actual=%s expected=%s%n", topic, key, actual, expected); + + if (printResults) { + resultStream.printf("\t inputEvents=%n%s%n\t" + + "echoEvents=%n%s%n\tmaxEvents=%n%s%n\tminEvents=%n%s%n\tdifEvents=%n%s%n\tcntEvents=%n%s%n\ttaggEvents=%n%s%n", + indent("\t\t", observedInputEvents.get(key)), + indent("\t\t", events.getOrDefault("echo", emptyMap()).getOrDefault(key, new LinkedList<>())), + indent("\t\t", events.getOrDefault("max", emptyMap()).getOrDefault(key, new LinkedList<>())), + indent("\t\t", events.getOrDefault("min", emptyMap()).getOrDefault(key, new LinkedList<>())), + indent("\t\t", events.getOrDefault("dif", emptyMap()).getOrDefault(key, new LinkedList<>())), + indent("\t\t", events.getOrDefault("cnt", emptyMap()).getOrDefault(key, new LinkedList<>())), + indent("\t\t", events.getOrDefault("tagg", emptyMap()).getOrDefault(key, new LinkedList<>()))); + + if (!Set.of("echo", "max", "min", "dif", "cnt", "tagg").contains(topic)) + resultStream.printf("%sEvents=%n%s%n", topic, indent("\t\t", entry.getValue())); + } + + return false; + } + } + return true; + } + } + + + private static boolean verifySuppressed(final PrintStream resultStream, + @SuppressWarnings("SameParameterValue") final String topic, + final Map>>> events, + final boolean printResults) { + resultStream.println("verifying suppressed " + topic); + final Map>> topicEvents = events.getOrDefault(topic, emptyMap()); + for (final Map.Entry>> entry : topicEvents.entrySet()) { + if (entry.getValue().size() != 1) { + final String unsuppressedTopic = topic.replace("-suppressed", "-raw"); + final String key = entry.getKey(); + final String unwindowedKey = key.substring(1, key.length() - 1).replaceAll("@.*", ""); + resultStream.printf("fail: key=%s%n\tnon-unique result:%n%s%n", + key, + indent("\t\t", entry.getValue())); + + if (printResults) + resultStream.printf("\tresultEvents:%n%s%n\tinputEvents:%n%s%n", + indent("\t\t", events.get(unsuppressedTopic).get(key)), + indent("\t\t", events.get("data").get(unwindowedKey))); + + return false; + } + } + return true; + } + + private static String indent(@SuppressWarnings("SameParameterValue") final String prefix, + final Iterable> list) { + final StringBuilder stringBuilder = new StringBuilder(); + for (final ConsumerRecord record : list) { + stringBuilder.append(prefix).append(record).append('\n'); + } + return stringBuilder.toString(); + } + + private static Long getSum(final String key) { + final int min = getMin(key).intValue(); + final int max = getMax(key).intValue(); + return ((long) min + max) * (max - min + 1L) / 2L; + } + + private static Double getAvg(final String key) { + final int min = getMin(key).intValue(); + final int max = getMax(key).intValue(); + return ((long) min + max) / 2.0; + } + + + private static boolean verifyTAgg(final PrintStream resultStream, + final Map> allData, + final Map>> taggEvents, + final boolean printResults) { + if (taggEvents == null) { + resultStream.println("tagg is missing"); + return false; + } else if (taggEvents.isEmpty()) { + resultStream.println("tagg is empty"); + return false; + } else { + resultStream.println("verifying tagg"); + + // generate expected answer + final Map expected = new HashMap<>(); + for (final String key : allData.keySet()) { + final int min = getMin(key).intValue(); + final int max = getMax(key).intValue(); + final String cnt = Long.toString(max - min + 1L); + + expected.put(cnt, expected.getOrDefault(cnt, 0L) + 1); + } + + // check the result + for (final Map.Entry>> entry : taggEvents.entrySet()) { + final String key = entry.getKey(); + Long expectedCount = expected.remove(key); + if (expectedCount == null) { + expectedCount = 0L; + } + + if (entry.getValue().getLast().value().longValue() != expectedCount) { + resultStream.println("fail: key=" + key + " tagg=" + entry.getValue() + " expected=" + expectedCount); + + if (printResults) + resultStream.println("\t taggEvents: " + entry.getValue()); + return false; + } + } + + } + return true; + } + + private static Number getMin(final String key) { + return Integer.parseInt(key.split("-")[0]); + } + + private static Number getMax(final String key) { + return Integer.parseInt(key.split("-")[1]); + } + + private static List getAllPartitions(final KafkaConsumer consumer, final String... topics) { + final List partitions = new ArrayList<>(); + + for (final String topic : topics) { + for (final PartitionInfo info : consumer.partitionsFor(topic)) { + partitions.add(new TopicPartition(info.topic(), info.partition())); + } + } + return partitions; + } + +} diff --git a/streams/upgrade-system-tests-39/src/test/java/org/apache/kafka/streams/tests/SmokeTestUtil.java b/streams/upgrade-system-tests-39/src/test/java/org/apache/kafka/streams/tests/SmokeTestUtil.java new file mode 100644 index 000000000000..e4a711313aaf --- /dev/null +++ b/streams/upgrade-system-tests-39/src/test/java/org/apache/kafka/streams/tests/SmokeTestUtil.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.tests; + +import org.apache.kafka.common.serialization.Serde; +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.kstream.Aggregator; +import org.apache.kafka.streams.kstream.Initializer; +import org.apache.kafka.streams.kstream.KeyValueMapper; +import org.apache.kafka.streams.kstream.Windowed; +import org.apache.kafka.streams.processor.api.ContextualProcessor; +import org.apache.kafka.streams.processor.api.ProcessorContext; +import org.apache.kafka.streams.processor.api.ProcessorSupplier; +import org.apache.kafka.streams.processor.api.Record; + +import java.time.Instant; + +public class SmokeTestUtil { + + static final int END = Integer.MAX_VALUE; + + static ProcessorSupplier printProcessorSupplier(final String topic) { + return printProcessorSupplier(topic, ""); + } + + static ProcessorSupplier printProcessorSupplier(final String topic, final String name) { + return () -> new ContextualProcessor() { + private int numRecordsProcessed = 0; + private long smallestOffset = Long.MAX_VALUE; + private long largestOffset = Long.MIN_VALUE; + + @Override + public void init(final ProcessorContext context) { + super.init(context); + System.out.println("[3.9] initializing processor: topic=" + topic + " taskId=" + context.taskId()); + System.out.flush(); + numRecordsProcessed = 0; + smallestOffset = Long.MAX_VALUE; + largestOffset = Long.MIN_VALUE; + } + + @Override + public void process(final Record record) { + numRecordsProcessed++; + if (numRecordsProcessed % 100 == 0) { + System.out.printf("%s: %s%n", name, Instant.now()); + System.out.println("processed " + numRecordsProcessed + " records from topic=" + topic); + } + + context().recordMetadata().ifPresent(recordMetadata -> { + if (smallestOffset > recordMetadata.offset()) { + smallestOffset = recordMetadata.offset(); + } + if (largestOffset < recordMetadata.offset()) { + largestOffset = recordMetadata.offset(); + } + }); + } + + @Override + public void close() { + System.out.printf("Close processor for task %s%n", context().taskId()); + System.out.println("processed " + numRecordsProcessed + " records"); + final long processed; + if (largestOffset >= smallestOffset) { + processed = 1L + largestOffset - smallestOffset; + } else { + processed = 0L; + } + System.out.println("offset " + smallestOffset + " to " + largestOffset + " -> processed " + processed); + System.out.flush(); + } + }; + } + + public static final class Unwindow implements KeyValueMapper, V, K> { + @Override + public K apply(final Windowed winKey, final V value) { + return winKey.key(); + } + } + + public static class Agg { + + KeyValueMapper> selector() { + return (key, value) -> new KeyValue<>(value == null ? null : Long.toString(value), 1L); + } + + public Initializer init() { + return () -> 0L; + } + + Aggregator adder() { + return (aggKey, value, aggregate) -> aggregate + value; + } + + Aggregator remover() { + return (aggKey, value, aggregate) -> aggregate - value; + } + } + + public static Serde stringSerde = Serdes.String(); + + public static Serde intSerde = Serdes.Integer(); + + static Serde longSerde = Serdes.Long(); + + static Serde doubleSerde = Serdes.Double(); + + public static void sleep(final long duration) { + try { + Thread.sleep(duration); + } catch (final Exception ignore) { } + } + +} diff --git a/streams/upgrade-system-tests-39/src/test/java/org/apache/kafka/streams/tests/StreamsSmokeTest.java b/streams/upgrade-system-tests-39/src/test/java/org/apache/kafka/streams/tests/StreamsSmokeTest.java new file mode 100644 index 000000000000..5803b2fbd021 --- /dev/null +++ b/streams/upgrade-system-tests-39/src/test/java/org/apache/kafka/streams/tests/StreamsSmokeTest.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.tests; + +import org.apache.kafka.common.utils.Exit; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.streams.StreamsConfig; + +import java.io.IOException; +import java.time.Duration; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.UUID; + +import static org.apache.kafka.streams.tests.SmokeTestDriver.generate; +import static org.apache.kafka.streams.tests.SmokeTestDriver.generatePerpetually; + +public class StreamsSmokeTest { + + /** + * args ::= kafka propFileName command disableAutoTerminate + * command := "run" | "process" + * + * @param args + */ + public static void main(final String[] args) throws IOException { + if (args.length < 2) { + System.err.println("StreamsSmokeTest are expecting two parameters: propFile, command; but only see " + args.length + " parameter"); + Exit.exit(1); + } + + final String propFileName = args[0]; + final String command = args[1]; + final boolean disableAutoTerminate = args.length > 2; + + final Properties streamsProperties = Utils.loadProps(propFileName); + final String kafka = streamsProperties.getProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG); + final String processingGuarantee = streamsProperties.getProperty(StreamsConfig.PROCESSING_GUARANTEE_CONFIG); + + if (kafka == null) { + System.err.println("No bootstrap kafka servers specified in " + StreamsConfig.BOOTSTRAP_SERVERS_CONFIG); + Exit.exit(1); + } + + if ("process".equals(command)) { + if (!StreamsConfig.AT_LEAST_ONCE.equals(processingGuarantee) && + !StreamsConfig.EXACTLY_ONCE_V2.equals(processingGuarantee)) { + + System.err.println("processingGuarantee must be either " + StreamsConfig.AT_LEAST_ONCE + " or " + + StreamsConfig.EXACTLY_ONCE_V2); + + Exit.exit(1); + } + } + + System.out.println("StreamsTest instance started (StreamsSmokeTest)"); + System.out.println("command=" + command); + System.out.println("props=" + streamsProperties); + System.out.println("disableAutoTerminate=" + disableAutoTerminate); + + switch (command) { + case "run": + // this starts the driver (data generation and result verification) + final int numKeys = 10; + final int maxRecordsPerKey = 500; + if (disableAutoTerminate) { + generatePerpetually(kafka, numKeys, maxRecordsPerKey); + } else { + // slow down data production to span 30 seconds so that system tests have time to + // do their bounces, etc. + final Map> allData = + generate(kafka, numKeys, maxRecordsPerKey, Duration.ofSeconds(30)); + SmokeTestDriver.verify(kafka, allData, maxRecordsPerKey); + } + break; + case "process": + // this starts the stream processing app + new SmokeTestClient(UUID.randomUUID().toString()).start(streamsProperties); + break; + default: + System.out.println("unknown command: " + command); + } + } + +} diff --git a/streams/upgrade-system-tests-39/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java b/streams/upgrade-system-tests-39/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java new file mode 100644 index 000000000000..462f8358774f --- /dev/null +++ b/streams/upgrade-system-tests-39/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.tests; + +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.StreamsBuilder; +import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.kstream.Consumed; +import org.apache.kafka.streams.kstream.KStream; +import org.apache.kafka.streams.kstream.KTable; +import org.apache.kafka.streams.kstream.Produced; +import org.apache.kafka.streams.processor.api.ContextualProcessor; +import org.apache.kafka.streams.processor.api.ProcessorContext; +import org.apache.kafka.streams.processor.api.ProcessorSupplier; +import org.apache.kafka.streams.processor.api.Record; + +import java.util.Properties; + +import static org.apache.kafka.streams.tests.SmokeTestUtil.intSerde; +import static org.apache.kafka.streams.tests.SmokeTestUtil.stringSerde; + + +public class StreamsUpgradeTest { + + @SuppressWarnings("unchecked") + public static void main(final String[] args) throws Exception { + if (args.length < 1) { + System.err.println("StreamsUpgradeTest requires one argument (properties-file) but provided none"); + } + final String propFileName = args[0]; + + final Properties streamsProperties = Utils.loadProps(propFileName); + + System.out.println("StreamsTest instance started (StreamsUpgradeTest v3.7)"); + System.out.println("props=" + streamsProperties); + + final StreamsBuilder builder = new StreamsBuilder(); + final KTable dataTable = builder.table( + "data", Consumed.with(stringSerde, intSerde)); + final KStream dataStream = dataTable.toStream(); + dataStream.process(printProcessorSupplier("data")); + dataStream.to("echo"); + + final boolean runFkJoin = Boolean.parseBoolean(streamsProperties.getProperty( + "test.run_fk_join", + "false")); + if (runFkJoin) { + try { + final KTable fkTable = builder.table( + "fk", Consumed.with(intSerde, stringSerde)); + buildFKTable(dataStream, fkTable); + } catch (final Exception e) { + System.err.println("Caught " + e.getMessage()); + } + } + + final Properties config = new Properties(); + config.setProperty( + StreamsConfig.APPLICATION_ID_CONFIG, + "StreamsUpgradeTest"); + config.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000); + config.putAll(streamsProperties); + + final KafkaStreams streams = new KafkaStreams(builder.build(), config); + streams.start(); + + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + streams.close(); + System.out.println("UPGRADE-TEST-CLIENT-CLOSED"); + System.out.flush(); + })); + } + + private static void buildFKTable(final KStream primaryTable, + final KTable otherTable) { + final KStream kStream = primaryTable.toTable() + .join(otherTable, v -> v, (k0, v0) -> v0) + .toStream(); + kStream.process(printProcessorSupplier("fk")); + kStream.to("fk-result", Produced.with(stringSerde, stringSerde)); + } + + private static ProcessorSupplier printProcessorSupplier(final String topic) { + return () -> new ContextualProcessor() { + private int numRecordsProcessed = 0; + + @Override + public void init(final ProcessorContext context) { + System.out.println("[3.9] initializing processor: topic=" + topic + "taskId=" + context.taskId()); + numRecordsProcessed = 0; + } + + @Override + public void process(final Record record) { + numRecordsProcessed++; + if (numRecordsProcessed % 100 == 0) { + System.out.println("processed " + numRecordsProcessed + " records from topic=" + topic); + } + } + + @Override + public void close() {} + }; + } +} diff --git a/test-common/test-common-api/src/main/java/org/apache/kafka/common/test/api/README.md b/test-common/test-common-api/src/main/java/org/apache/kafka/common/test/api/README.md index 0821d3f1f4b2..f2d5b40e9fd8 100644 --- a/test-common/test-common-api/src/main/java/org/apache/kafka/common/test/api/README.md +++ b/test-common/test-common-api/src/main/java/org/apache/kafka/common/test/api/README.md @@ -14,9 +14,12 @@ This annotation has fields for a set of cluster types and number of brokers, as Arbitrary server properties can also be provided in the annotation: ```java -@ClusterTest(types = {Type.KRAFT}, securityProtocol = "PLAINTEXT", properties = { - @ClusterProperty(key = "inter.broker.protocol.version", value = "2.7-IV2"), - @ClusterProperty(key = "socket.send.buffer.bytes", value = "10240"), +@ClusterTest( + types = {Type.KRAFT}, + brokerSecurityProtocol = SecurityProtocol.PLAINTEXT, + properties = { + @ClusterProperty(key = "inter.broker.protocol.version", value = "2.7-IV2"), + @ClusterProperty(key = "socket.send.buffer.bytes", value = "10240"), }) void testSomething() { ... } ``` @@ -25,8 +28,8 @@ Multiple `@ClusterTest` annotations can be given to generate more than one test ```scala @ClusterTests(Array( - @ClusterTest(securityProtocol = "PLAINTEXT"), - @ClusterTest(securityProtocol = "SASL_PLAINTEXT") + @ClusterTest(brokerSecurityProtocol = SecurityProtocol.PLAINTEXT), + @ClusterTest(securityProtocol = SecurityProtocol.SASL_PLAINTEXT) )) def testSomething(): Unit = { ... } ``` @@ -45,18 +48,18 @@ produce any number of test configurations using a fluent builder style API. import java.util.Arrays; @ClusterTemplate("generateConfigs") -void testSomething() { ...} +void testSomething() { ... } static List generateConfigs() { ClusterConfig config1 = ClusterConfig.defaultClusterBuilder() .name("Generated Test 1") .serverProperties(props1) - .ibp("2.7-IV1") + .setMetadataVersion(MetadataVersion.IBP_2_7_IV1) .build(); ClusterConfig config2 = ClusterConfig.defaultClusterBuilder() .name("Generated Test 2") .serverProperties(props2) - .ibp("2.7-IV2") + .setMetadataVersion(MetadataVersion.IBP_2_7_IV2) .build(); ClusterConfig config3 = ClusterConfig.defaultClusterBuilder() .name("Generated Test 3") diff --git a/tests/docker/Dockerfile b/tests/docker/Dockerfile index e9ecf99e1a69..d3aae062926a 100644 --- a/tests/docker/Dockerfile +++ b/tests/docker/Dockerfile @@ -65,7 +65,8 @@ LABEL ducker.creator=$ducker_creator RUN apt update && apt install -y sudo git netcat iptables rsync unzip wget curl jq coreutils openssh-server net-tools vim python3-pip python3-dev libffi-dev libssl-dev cmake pkg-config libfuse-dev iperf traceroute iproute2 iputils-ping && apt-get -y clean RUN python3 -m pip install -U pip==21.1.1; # NOTE: ducktape 0.12.0 supports py 3.9, 3.10, 3.11 and 3.12 -RUN pip3 install --upgrade cffi virtualenv pyasn1 boto3 pycrypto pywinrm ipaddress enum34 debugpy psutil && pip3 install --upgrade "ducktape==0.12.0" +COPY requirements.txt requirements.txt +RUN pip3 install --upgrade -r requirements.txt COPY --from=build-native-image /build/kafka-binary/ /opt/kafka-binary/ # Set up ssh @@ -94,6 +95,7 @@ RUN mkdir -p "/opt/kafka-3.5.2" && chmod a+rw /opt/kafka-3.5.2 && curl -s "$KAFK RUN mkdir -p "/opt/kafka-3.6.2" && chmod a+rw /opt/kafka-3.6.2 && curl -s "$KAFKA_MIRROR/kafka_2.12-3.6.2.tgz" | tar xz --strip-components=1 -C "/opt/kafka-3.6.2" RUN mkdir -p "/opt/kafka-3.7.1" && chmod a+rw /opt/kafka-3.7.1 && curl -s "$KAFKA_MIRROR/kafka_2.12-3.7.1.tgz" | tar xz --strip-components=1 -C "/opt/kafka-3.7.1" RUN mkdir -p "/opt/kafka-3.8.1" && chmod a+rw /opt/kafka-3.8.1 && curl -s "$KAFKA_MIRROR/kafka_2.12-3.8.1.tgz" | tar xz --strip-components=1 -C "/opt/kafka-3.8.1" +RUN mkdir -p "/opt/kafka-3.9.0" && chmod a+rw /opt/kafka-3.9.0 && curl -s "$KAFKA_MIRROR/kafka_2.12-3.9.0.tgz" | tar xz --strip-components=1 -C "/opt/kafka-3.9.0" # Streams test dependencies @@ -114,6 +116,7 @@ RUN curl -s "$KAFKA_MIRROR/kafka-streams-3.5.2-test.jar" -o /opt/kafka-3.5.2/lib RUN curl -s "$KAFKA_MIRROR/kafka-streams-3.6.2-test.jar" -o /opt/kafka-3.6.2/libs/kafka-streams-3.6.2-test.jar RUN curl -s "$KAFKA_MIRROR/kafka-streams-3.7.1-test.jar" -o /opt/kafka-3.7.1/libs/kafka-streams-3.7.1-test.jar RUN curl -s "$KAFKA_MIRROR/kafka-streams-3.8.1-test.jar" -o /opt/kafka-3.8.1/libs/kafka-streams-3.8.1-test.jar +RUN curl -s "$KAFKA_MIRROR/kafka-streams-3.9.0-test.jar" -o /opt/kafka-3.9.0/libs/kafka-streams-3.9.0-test.jar # To ensure the Kafka cluster starts successfully under JDK 17, we need to update the Zookeeper # client from version 3.4.x to 3.5.7 in Kafka versions 2.1.1, 2.2.2, and 2.3.1, as the older Zookeeper diff --git a/tests/docker/requirements.txt b/tests/docker/requirements.txt new file mode 100644 index 000000000000..91eaae441b46 --- /dev/null +++ b/tests/docker/requirements.txt @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cffi +virtualenv +pyasn1 +boto3 +pycrypto +pywinrm +ipaddress +debugpy +psutil +ducktape==0.12.0 \ No newline at end of file diff --git a/tests/kafkatest/services/console_consumer.py b/tests/kafkatest/services/console_consumer.py index cc8a0d319970..3e65efc3483a 100644 --- a/tests/kafkatest/services/console_consumer.py +++ b/tests/kafkatest/services/console_consumer.py @@ -59,7 +59,7 @@ class ConsoleConsumer(KafkaPathResolverMixin, JmxMixin, BackgroundThreadService) "collect_default": False} } - def __init__(self, context, num_nodes, kafka, topic, group_id="test-consumer-group", new_consumer=True, + def __init__(self, context, num_nodes, kafka, topic, group_id="test-consumer-group", message_validator=None, from_beginning=True, consumer_timeout_ms=None, version=DEV_BRANCH, client_id="console-consumer", print_key=False, jmx_object_names=None, jmx_attributes=None, enable_systest_events=False, stop_timeout_sec=35, print_timestamp=False, print_partition=False, @@ -72,7 +72,6 @@ def __init__(self, context, num_nodes, kafka, topic, group_id="test-consumer-gro num_nodes: number of nodes to use (this should be 1) kafka: kafka service topic: consume from this topic - new_consumer: use new Kafka consumer if True message_validator: function which returns message or None from_beginning: consume from beginning if True, else from the end consumer_timeout_ms: corresponds to consumer.timeout.ms. consumer process ends if time between @@ -96,7 +95,6 @@ def __init__(self, context, num_nodes, kafka, topic, group_id="test-consumer-gro root=ConsoleConsumer.PERSISTENT_ROOT) BackgroundThreadService.__init__(self, context, num_nodes) self.kafka = kafka - self.new_consumer = new_consumer self.group_id = group_id self.args = { 'topic': topic, @@ -145,8 +143,6 @@ def start_cmd(self, node): """Return the start command appropriate for the given node.""" args = self.args.copy() args['broker_list'] = self.kafka.bootstrap_servers(self.security_config.security_protocol) - if not self.new_consumer: - args['zk_connect'] = self.kafka.zk_connect_setting() args['stdout'] = ConsoleConsumer.STDOUT_CAPTURE args['stderr'] = ConsoleConsumer.STDERR_CAPTURE args['log_dir'] = ConsoleConsumer.LOG_DIR diff --git a/tests/kafkatest/services/kafka/kafka.py b/tests/kafkatest/services/kafka/kafka.py index 1d9444da204c..07f5d44ef2ce 100644 --- a/tests/kafkatest/services/kafka/kafka.py +++ b/tests/kafkatest/services/kafka/kafka.py @@ -881,11 +881,11 @@ def start_node(self, node, timeout_sec=60, **kwargs): kafka_storage_script = self.path.script("kafka-storage.sh", node) cmd = "%s format --ignore-formatted --config %s --cluster-id %s" % (kafka_storage_script, KafkaService.CONFIG_FILE, config_property.CLUSTER_ID) if self.dynamicRaftQuorum: - cmd += " --feature kraft.version=1" if self.node_quorum_info.has_controller_role: if self.standalone_controller_bootstrapped: cmd += " --no-initial-controllers" else: + cmd += " --feature kraft.version=1" cmd += " --standalone" self.standalone_controller_bootstrapped = True self.logger.info("Running log directory format command...\n%s" % cmd) diff --git a/tests/kafkatest/services/zookeeper.py b/tests/kafkatest/services/zookeeper.py index 49eaf741bb31..3215ce783233 100644 --- a/tests/kafkatest/services/zookeeper.py +++ b/tests/kafkatest/services/zookeeper.py @@ -23,7 +23,7 @@ from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin from kafkatest.services.security.security_config import SecurityConfig -from kafkatest.version import LATEST_3_8 +from kafkatest.version import LATEST_3_9 class ZookeeperService(KafkaPathResolverMixin, Service): @@ -43,9 +43,9 @@ class ZookeeperService(KafkaPathResolverMixin, Service): "collect_default": True} } - # After 4.0, zookeeper service is removed from source code. Using LATEST_3_8 for compatibility test cases. + # After 4.0, zookeeper service is removed from source code. Using LATEST_3_9 for compatibility test cases. def __init__(self, context, num_nodes, zk_sasl = False, zk_client_port = True, zk_client_secure_port = False, - zk_tls_encrypt_only = False, version=LATEST_3_8): + zk_tls_encrypt_only = False, version=LATEST_3_9): """ :type context """ @@ -187,7 +187,7 @@ def query(self, path, chroot=None): chroot_path = ('' if chroot is None else chroot) + path - kafka_run_class = self.path.script("kafka-run-class.sh", LATEST_3_8) + kafka_run_class = self.path.script("kafka-run-class.sh", LATEST_3_9) cmd = "%s %s -server %s %s get %s" % \ (kafka_run_class, self.java_cli_class_name(), self.connect_setting(force_tls=self.zk_client_secure_port), self.zkTlsConfigFileOption(True), @@ -212,7 +212,7 @@ def get_children(self, path, chroot=None): chroot_path = ('' if chroot is None else chroot) + path - kafka_run_class = self.path.script("kafka-run-class.sh", LATEST_3_8) + kafka_run_class = self.path.script("kafka-run-class.sh", LATEST_3_9) cmd = "%s %s -server %s %s ls %s" % \ (kafka_run_class, self.java_cli_class_name(), self.connect_setting(force_tls=self.zk_client_secure_port), self.zkTlsConfigFileOption(True), @@ -240,7 +240,7 @@ def delete(self, path, recursive, chroot=None): chroot_path = ('' if chroot is None else chroot) + path - kafka_run_class = self.path.script("kafka-run-class.sh", LATEST_3_8) + kafka_run_class = self.path.script("kafka-run-class.sh", LATEST_3_9) if recursive: op = "deleteall" else: @@ -262,7 +262,7 @@ def create(self, path, chroot=None, value=""): chroot_path = ('' if chroot is None else chroot) + path - kafka_run_class = self.path.script("kafka-run-class.sh", LATEST_3_8) + kafka_run_class = self.path.script("kafka-run-class.sh", LATEST_3_9) cmd = "%s %s -server %s %s create %s '%s'" % \ (kafka_run_class, self.java_cli_class_name(), self.connect_setting(force_tls=self.zk_client_secure_port), self.zkTlsConfigFileOption(True), @@ -276,7 +276,7 @@ def describeUsers(self): Describe the default user using the ConfigCommand CLI """ - kafka_run_class = self.path.script("kafka-run-class.sh", LATEST_3_8) + kafka_run_class = self.path.script("kafka-run-class.sh", LATEST_3_9) cmd = "%s kafka.admin.ConfigCommand --zookeeper %s %s --describe --entity-type users --entity-default" % \ (kafka_run_class, self.connect_setting(force_tls=self.zk_client_secure_port), self.zkTlsConfigFileOption()) diff --git a/tests/kafkatest/tests/client/client_compatibility_features_test.py b/tests/kafkatest/tests/client/client_compatibility_features_test.py index d0bcd80a7911..1e0c37825d12 100644 --- a/tests/kafkatest/tests/client/client_compatibility_features_test.py +++ b/tests/kafkatest/tests/client/client_compatibility_features_test.py @@ -28,7 +28,7 @@ from ducktape.tests.test import Test from kafkatest.version import DEV_BRANCH, \ LATEST_2_1, LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, LATEST_2_6, LATEST_2_7, LATEST_2_8, \ - LATEST_3_0, LATEST_3_1, LATEST_3_2, LATEST_3_3, LATEST_3_4, LATEST_3_5, LATEST_3_6, LATEST_3_7, LATEST_3_8, KafkaVersion + LATEST_3_0, LATEST_3_1, LATEST_3_2, LATEST_3_3, LATEST_3_4, LATEST_3_5, LATEST_3_6, LATEST_3_7, LATEST_3_8, LATEST_3_9, KafkaVersion def get_broker_features(broker_version): features = {} @@ -124,6 +124,7 @@ def invoke_compatibility_program(self, features): @parametrize(broker_version=str(LATEST_3_6)) @parametrize(broker_version=str(LATEST_3_7)) @parametrize(broker_version=str(LATEST_3_8)) + @parametrize(broker_version=str(LATEST_3_9)) def run_compatibility_test(self, broker_version, metadata_quorum=quorum.zk): if self.zk: self.zk.start() diff --git a/tests/kafkatest/tests/client/client_compatibility_produce_consume_test.py b/tests/kafkatest/tests/client/client_compatibility_produce_consume_test.py index 74bd5563200b..1209deed64f7 100644 --- a/tests/kafkatest/tests/client/client_compatibility_produce_consume_test.py +++ b/tests/kafkatest/tests/client/client_compatibility_produce_consume_test.py @@ -25,7 +25,7 @@ from kafkatest.utils import is_int_with_prefix from kafkatest.version import DEV_BRANCH, \ LATEST_2_1, LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, LATEST_2_6, LATEST_2_7, LATEST_2_8, \ - LATEST_3_0, LATEST_3_1, LATEST_3_2, LATEST_3_3, LATEST_3_4, LATEST_3_5, LATEST_3_6, LATEST_3_7, LATEST_3_8, KafkaVersion + LATEST_3_0, LATEST_3_1, LATEST_3_2, LATEST_3_3, LATEST_3_4, LATEST_3_5, LATEST_3_6, LATEST_3_7, LATEST_3_8, LATEST_3_9, KafkaVersion class ClientCompatibilityProduceConsumeTest(ProduceConsumeValidateTest): """ @@ -75,6 +75,7 @@ def min_cluster_size(self): @parametrize(broker_version=str(LATEST_3_6)) @parametrize(broker_version=str(LATEST_3_7)) @parametrize(broker_version=str(LATEST_3_8)) + @parametrize(broker_version=str(LATEST_3_9)) def test_produce_consume(self, broker_version, metadata_quorum=quorum.zk): print("running producer_consumer_compat with broker_version = %s" % broker_version, flush=True) self.kafka.set_version(KafkaVersion(broker_version)) diff --git a/tests/kafkatest/tests/client/consumer_protocol_migration_test.py b/tests/kafkatest/tests/client/consumer_protocol_migration_test.py index 41b21ee26558..07f501fe0c69 100644 --- a/tests/kafkatest/tests/client/consumer_protocol_migration_test.py +++ b/tests/kafkatest/tests/client/consumer_protocol_migration_test.py @@ -20,7 +20,7 @@ from kafkatest.tests.verifiable_consumer_test import VerifiableConsumerTest from kafkatest.services.kafka import TopicPartition, quorum, consumer_group from kafkatest.version import LATEST_2_1, LATEST_2_3, LATEST_2_4, LATEST_2_5, \ - LATEST_3_2, LATEST_3_4, LATEST_3_5, LATEST_3_6, LATEST_3_7, LATEST_3_8, DEV_BRANCH, KafkaVersion + LATEST_3_2, LATEST_3_4, LATEST_3_5, LATEST_3_6, LATEST_3_7, LATEST_3_8, LATEST_3_9, DEV_BRANCH, KafkaVersion class ConsumerProtocolMigrationTest(VerifiableConsumerTest): """ @@ -42,7 +42,7 @@ class ConsumerProtocolMigrationTest(VerifiableConsumerTest): COOPERATIVE_STICKEY = "org.apache.kafka.clients.consumer.CooperativeStickyAssignor" all_consumer_versions = [LATEST_2_1, LATEST_2_3, LATEST_2_4, LATEST_2_5, \ - LATEST_3_2, LATEST_3_4, LATEST_3_5, LATEST_3_6, LATEST_3_7, LATEST_3_8, DEV_BRANCH] + LATEST_3_2, LATEST_3_4, LATEST_3_5, LATEST_3_6, LATEST_3_7, LATEST_3_8, LATEST_3_9, DEV_BRANCH] consumer_versions_supporting_range_assignnor = [str(v) for v in all_consumer_versions] consumer_versions_supporting_static_membership = [str(v) for v in all_consumer_versions if v >= LATEST_2_3] consumer_versions_supporting_cooperative_sticky_assignor = [str(v) for v in all_consumer_versions if v >= LATEST_2_4] diff --git a/tests/kafkatest/tests/core/compatibility_test_new_broker_test.py b/tests/kafkatest/tests/core/compatibility_test_new_broker_test.py index c7f600a0f3e6..ec4a878f0325 100644 --- a/tests/kafkatest/tests/core/compatibility_test_new_broker_test.py +++ b/tests/kafkatest/tests/core/compatibility_test_new_broker_test.py @@ -23,7 +23,7 @@ from kafkatest.utils import is_int from kafkatest.version import LATEST_2_1, LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, LATEST_2_6, \ LATEST_2_7, LATEST_2_8, LATEST_3_0, LATEST_3_1, LATEST_3_2, LATEST_3_3, LATEST_3_4, LATEST_3_5, LATEST_3_6, \ - LATEST_3_7, LATEST_3_8, DEV_BRANCH, KafkaVersion + LATEST_3_7, LATEST_3_8, LATEST_3_9, DEV_BRANCH, KafkaVersion # Compatibility tests for moving to a new broker (e.g., 0.10.x) and using a mix of old and new clients (e.g., 0.9.x) class ClientCompatibilityTestNewBroker(ProduceConsumeValidateTest): @@ -64,10 +64,9 @@ def setUp(self): @matrix(producer_version=[str(LATEST_3_6)], consumer_version=[str(LATEST_3_6)], compression_types=[["none"]], timestamp_type=[str("CreateTime")], metadata_quorum=quorum.all_non_upgrade) @matrix(producer_version=[str(LATEST_3_7)], consumer_version=[str(LATEST_3_7)], compression_types=[["none"]], timestamp_type=[str("CreateTime")], metadata_quorum=quorum.all_non_upgrade) @matrix(producer_version=[str(LATEST_3_8)], consumer_version=[str(LATEST_3_8)], compression_types=[["none"]], timestamp_type=[str("CreateTime")], metadata_quorum=quorum.all_non_upgrade) + @matrix(producer_version=[str(LATEST_3_9)], consumer_version=[str(LATEST_3_9)], compression_types=[["none"]], timestamp_type=[str("CreateTime")], metadata_quorum=quorum.all_non_upgrade) @matrix(producer_version=[str(LATEST_2_1)], consumer_version=[str(LATEST_2_1)], compression_types=[["zstd"]], timestamp_type=[str("CreateTime")], metadata_quorum=quorum.all_non_upgrade) - def test_compatibility(self, producer_version, consumer_version, compression_types, new_consumer=True, timestamp_type=None, metadata_quorum=quorum.zk): - if not new_consumer and metadata_quorum != quorum.zk: - raise Exception("ZooKeeper-based consumers are not supported when using a KRaft metadata quorum") + def test_compatibility(self, producer_version, consumer_version, compression_types, timestamp_type=None, metadata_quorum=quorum.zk): self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, version=DEV_BRANCH, topics={self.topic: { "partitions": 3, "replication-factor": 3, @@ -85,7 +84,7 @@ def test_compatibility(self, producer_version, consumer_version, compression_typ version=KafkaVersion(producer_version)) self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, - self.topic, consumer_timeout_ms=30000, new_consumer=new_consumer, + self.topic, consumer_timeout_ms=30000, message_validator=is_int, version=KafkaVersion(consumer_version)) self.run_produce_consume_validate(lambda: wait_until( diff --git a/tests/kafkatest/tests/core/downgrade_test.py b/tests/kafkatest/tests/core/downgrade_test.py index e35f6f751542..44134b05a8f0 100644 --- a/tests/kafkatest/tests/core/downgrade_test.py +++ b/tests/kafkatest/tests/core/downgrade_test.py @@ -21,7 +21,7 @@ from kafkatest.tests.end_to_end import EndToEndTest from kafkatest.version import LATEST_2_4, LATEST_2_5, \ LATEST_2_6, LATEST_2_7, LATEST_2_8, LATEST_3_0, LATEST_3_1, LATEST_3_2, LATEST_3_3, LATEST_3_4, LATEST_3_5, \ - LATEST_3_6, LATEST_3_7, LATEST_3_8, DEV_BRANCH, KafkaVersion + LATEST_3_6, LATEST_3_7, LATEST_3_8, LATEST_3_9, DEV_BRANCH, KafkaVersion class TestDowngrade(EndToEndTest): PARTITIONS = 3 @@ -81,8 +81,12 @@ def wait_until_rejoin(self): timeout_sec=60, backoff_sec=1, err_msg="Replicas did not rejoin the ISR in a reasonable amount of time") @cluster(num_nodes=7) + @parametrize(version=str(LATEST_3_9), compression_types=["snappy"]) + @parametrize(version=str(LATEST_3_9), compression_types=["zstd"], security_protocol="SASL_SSL") + @matrix(version=[str(LATEST_3_9)], compression_types=[["none"]], static_membership=[False, True]) @parametrize(version=str(LATEST_3_8), compression_types=["snappy"]) @parametrize(version=str(LATEST_3_8), compression_types=["zstd"], security_protocol="SASL_SSL") + @matrix(version=[str(LATEST_3_8)], compression_types=[["none"]], static_membership=[False, True]) @parametrize(version=str(LATEST_3_7), compression_types=["snappy"]) @parametrize(version=str(LATEST_3_7), compression_types=["zstd"], security_protocol="SASL_SSL") @matrix(version=[str(LATEST_3_7)], compression_types=[["none"]], static_membership=[False, True]) diff --git a/tests/kafkatest/tests/core/kraft_upgrade_test.py b/tests/kafkatest/tests/core/kraft_upgrade_test.py index 6975069b73e5..dc0dc261d1c6 100644 --- a/tests/kafkatest/tests/core/kraft_upgrade_test.py +++ b/tests/kafkatest/tests/core/kraft_upgrade_test.py @@ -23,7 +23,7 @@ from kafkatest.tests.produce_consume_validate import ProduceConsumeValidateTest from kafkatest.utils import is_int from kafkatest.version import LATEST_3_1, LATEST_3_2, LATEST_3_3, LATEST_3_4, LATEST_3_5, \ - LATEST_3_6, LATEST_3_7, LATEST_3_8, DEV_BRANCH, KafkaVersion, LATEST_STABLE_METADATA_VERSION + LATEST_3_6, LATEST_3_7, LATEST_3_8, LATEST_3_9, DEV_BRANCH, KafkaVersion, LATEST_STABLE_METADATA_VERSION # # Test upgrading between different KRaft versions. @@ -100,7 +100,7 @@ def run_upgrade(self, from_kafka_version): compression_types=["none"], version=KafkaVersion(from_kafka_version)) self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, - self.topic, new_consumer=True, consumer_timeout_ms=30000, + self.topic, consumer_timeout_ms=30000, message_validator=is_int, version=KafkaVersion(from_kafka_version)) self.run_produce_consume_validate(core_test_action=lambda: self.perform_version_change(from_kafka_version)) cluster_id = self.kafka.cluster_id() @@ -109,13 +109,13 @@ def run_upgrade(self, from_kafka_version): assert self.kafka.check_protocol_errors(self) @cluster(num_nodes=5) - @matrix(from_kafka_version=[str(LATEST_3_1), str(LATEST_3_2), str(LATEST_3_3), str(LATEST_3_4), str(LATEST_3_5), str(LATEST_3_6), str(LATEST_3_7), str(LATEST_3_8), str(DEV_BRANCH)], + @matrix(from_kafka_version=[str(LATEST_3_1), str(LATEST_3_2), str(LATEST_3_3), str(LATEST_3_4), str(LATEST_3_5), str(LATEST_3_6), str(LATEST_3_7), str(LATEST_3_8), str(LATEST_3_9), str(DEV_BRANCH)], metadata_quorum=[combined_kraft]) def test_combined_mode_upgrade(self, from_kafka_version, metadata_quorum, use_new_coordinator=False): self.run_upgrade(from_kafka_version) @cluster(num_nodes=8) - @matrix(from_kafka_version=[str(LATEST_3_1), str(LATEST_3_2), str(LATEST_3_3), str(LATEST_3_4), str(LATEST_3_5), str(LATEST_3_6), str(LATEST_3_7), str(LATEST_3_8), str(DEV_BRANCH)], + @matrix(from_kafka_version=[str(LATEST_3_1), str(LATEST_3_2), str(LATEST_3_3), str(LATEST_3_4), str(LATEST_3_5), str(LATEST_3_6), str(LATEST_3_7), str(LATEST_3_8), str(LATEST_3_9), str(DEV_BRANCH)], metadata_quorum=[isolated_kraft]) def test_isolated_mode_upgrade(self, from_kafka_version, metadata_quorum, use_new_coordinator=False): self.run_upgrade(from_kafka_version) diff --git a/tests/kafkatest/tests/core/quorum_reconfiguration_test.py b/tests/kafkatest/tests/core/quorum_reconfiguration_test.py index 80cf535021c5..10aa1d6a792f 100644 --- a/tests/kafkatest/tests/core/quorum_reconfiguration_test.py +++ b/tests/kafkatest/tests/core/quorum_reconfiguration_test.py @@ -120,7 +120,7 @@ def test_combined_mode_reconfig(self, metadata_quorum): message_validator=is_int, compression_types=["none"], version=DEV_BRANCH, offline_nodes=[inactive_controller]) self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, - self.topic, new_consumer=True, consumer_timeout_ms=30000, + self.topic, consumer_timeout_ms=30000, message_validator=is_int, version=DEV_BRANCH) # Perform reconfigurations self.run_produce_consume_validate( @@ -161,7 +161,7 @@ def test_isolated_mode_reconfig(self, metadata_quorum): message_validator=is_int, compression_types=["none"], version=DEV_BRANCH) self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, - self.topic, new_consumer=True, consumer_timeout_ms=30000, + self.topic, consumer_timeout_ms=30000, message_validator=is_int, version=DEV_BRANCH) # Perform reconfigurations self.run_produce_consume_validate( diff --git a/tests/kafkatest/tests/core/transactions_mixed_versions_test.py b/tests/kafkatest/tests/core/transactions_mixed_versions_test.py index 803cd82a9d8d..3347e114a770 100644 --- a/tests/kafkatest/tests/core/transactions_mixed_versions_test.py +++ b/tests/kafkatest/tests/core/transactions_mixed_versions_test.py @@ -21,7 +21,7 @@ from kafkatest.utils import is_int from kafkatest.utils.transactions_utils import create_and_start_copiers from kafkatest.version import LATEST_3_1, LATEST_3_2, LATEST_3_3, LATEST_3_4, LATEST_3_5, \ - LATEST_3_6, LATEST_3_7, LATEST_3_8, DEV_BRANCH, KafkaVersion, LATEST_STABLE_METADATA_VERSION + LATEST_3_6, LATEST_3_7, LATEST_3_8, LATEST_3_9, DEV_BRANCH, KafkaVersion, LATEST_STABLE_METADATA_VERSION from ducktape.tests.test import Test from ducktape.mark import matrix @@ -178,7 +178,7 @@ def setup_topics(self): @cluster(num_nodes=8) @matrix( - old_kafka_version=[str(LATEST_3_8), str(LATEST_3_7), str(LATEST_3_6), str(LATEST_3_5), str(LATEST_3_4), str(LATEST_3_3), str(LATEST_3_2), str(LATEST_3_1)], + old_kafka_version=[str(LATEST_3_9), str(LATEST_3_8), str(LATEST_3_7), str(LATEST_3_6), str(LATEST_3_5), str(LATEST_3_4), str(LATEST_3_3), str(LATEST_3_2), str(LATEST_3_1)], metadata_quorum=[isolated_kraft], use_new_coordinator=[False], group_protocol=[None] diff --git a/tests/kafkatest/tests/core/transactions_upgrade_test.py b/tests/kafkatest/tests/core/transactions_upgrade_test.py index 33355fef64fb..5c72a6fa806c 100644 --- a/tests/kafkatest/tests/core/transactions_upgrade_test.py +++ b/tests/kafkatest/tests/core/transactions_upgrade_test.py @@ -21,7 +21,7 @@ from kafkatest.utils import is_int from kafkatest.utils.transactions_utils import create_and_start_copiers from kafkatest.version import LATEST_3_1, LATEST_3_2, LATEST_3_3, LATEST_3_4, LATEST_3_5, \ - LATEST_3_6, LATEST_3_7, LATEST_3_8, DEV_BRANCH, KafkaVersion, LATEST_STABLE_METADATA_VERSION + LATEST_3_6, LATEST_3_7, LATEST_3_8, LATEST_3_9, DEV_BRANCH, KafkaVersion, LATEST_STABLE_METADATA_VERSION from ducktape.tests.test import Test from ducktape.mark import matrix @@ -199,7 +199,7 @@ def setup_topics(self): @cluster(num_nodes=10) @matrix( - from_kafka_version=[str(LATEST_3_8), str(LATEST_3_7), str(LATEST_3_6), str(LATEST_3_5), str(LATEST_3_4), str(LATEST_3_3), str(LATEST_3_2), str(LATEST_3_1)], + from_kafka_version=[str(LATEST_3_9), str(LATEST_3_8), str(LATEST_3_7), str(LATEST_3_6), str(LATEST_3_5), str(LATEST_3_4), str(LATEST_3_3), str(LATEST_3_2), str(LATEST_3_1)], metadata_quorum=[isolated_kraft], use_new_coordinator=[False], group_protocol=[None] diff --git a/tests/kafkatest/tests/streams/streams_application_upgrade_test.py b/tests/kafkatest/tests/streams/streams_application_upgrade_test.py index 0ad84edd5098..70d30b35012b 100644 --- a/tests/kafkatest/tests/streams/streams_application_upgrade_test.py +++ b/tests/kafkatest/tests/streams/streams_application_upgrade_test.py @@ -21,14 +21,14 @@ from kafkatest.services.kafka import KafkaService, quorum from kafkatest.services.streams import StreamsSmokeTestDriverService, StreamsSmokeTestJobRunnerService from kafkatest.version import LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, LATEST_2_6, LATEST_2_7, LATEST_2_8, \ - LATEST_3_0, LATEST_3_1, LATEST_3_2, LATEST_3_3, LATEST_3_4, LATEST_3_5, LATEST_3_6, LATEST_3_7, LATEST_3_8, DEV_VERSION, KafkaVersion + LATEST_3_0, LATEST_3_1, LATEST_3_2, LATEST_3_3, LATEST_3_4, LATEST_3_5, LATEST_3_6, LATEST_3_7, LATEST_3_8, LATEST_3_9, DEV_VERSION, KafkaVersion smoke_test_versions = [str(LATEST_2_2), str(LATEST_2_3), str(LATEST_2_4), str(LATEST_2_5), str(LATEST_2_6), str(LATEST_2_7), str(LATEST_2_8), str(LATEST_3_0), str(LATEST_3_1), str(LATEST_3_2), str(LATEST_3_3), str(LATEST_3_4), str(LATEST_3_5), str(LATEST_3_6), str(LATEST_3_7), - str(LATEST_3_8)] + str(LATEST_3_8), str(LATEST_3_9)] class StreamsUpgradeTest(Test): """ diff --git a/tests/kafkatest/tests/streams/streams_broker_compatibility_test.py b/tests/kafkatest/tests/streams/streams_broker_compatibility_test.py index 953ce2263d44..050c8148061c 100644 --- a/tests/kafkatest/tests/streams/streams_broker_compatibility_test.py +++ b/tests/kafkatest/tests/streams/streams_broker_compatibility_test.py @@ -21,7 +21,7 @@ from kafkatest.services.kafka import KafkaService, quorum from kafkatest.services.streams import StreamsBrokerCompatibilityService from kafkatest.services.verifiable_consumer import VerifiableConsumer -from kafkatest.version import LATEST_3_0, LATEST_3_1, LATEST_3_2, LATEST_3_3, LATEST_3_4, LATEST_3_5, LATEST_3_6, LATEST_3_7, LATEST_3_8, KafkaVersion +from kafkatest.version import LATEST_3_0, LATEST_3_1, LATEST_3_2, LATEST_3_3, LATEST_3_4, LATEST_3_5, LATEST_3_6, LATEST_3_7, LATEST_3_8, LATEST_3_9, KafkaVersion class StreamsBrokerCompatibility(Test): @@ -56,7 +56,7 @@ def __init__(self, test_context): @cluster(num_nodes=4) @matrix(broker_version=[str(LATEST_3_0),str(LATEST_3_1),str(LATEST_3_2),str(LATEST_3_3), str(LATEST_3_4),str(LATEST_3_5),str(LATEST_3_6),str(LATEST_3_7), - str(LATEST_3_8)], + str(LATEST_3_8),str(LATEST_3_9)], metadata_quorum=[quorum.combined_kraft] ) def test_compatible_brokers_eos_disabled(self, broker_version, metadata_quorum): @@ -78,7 +78,7 @@ def test_compatible_brokers_eos_disabled(self, broker_version, metadata_quorum): @cluster(num_nodes=4) @matrix(broker_version=[str(LATEST_3_0),str(LATEST_3_1),str(LATEST_3_2),str(LATEST_3_3), str(LATEST_3_4),str(LATEST_3_5),str(LATEST_3_6),str(LATEST_3_7), - str(LATEST_3_8)], + str(LATEST_3_8),str(LATEST_3_9)], metadata_quorum=[quorum.combined_kraft]) def test_compatible_brokers_eos_v2_enabled(self, broker_version, metadata_quorum): self.kafka.set_version(KafkaVersion(broker_version)) diff --git a/tests/kafkatest/tests/streams/streams_upgrade_test.py b/tests/kafkatest/tests/streams/streams_upgrade_test.py index 759e4156920c..fe6ad8ae16c6 100644 --- a/tests/kafkatest/tests/streams/streams_upgrade_test.py +++ b/tests/kafkatest/tests/streams/streams_upgrade_test.py @@ -23,14 +23,14 @@ StreamsUpgradeTestJobRunnerService from kafkatest.tests.streams.utils import extract_generation_from_logs, extract_generation_id from kafkatest.version import LATEST_2_1, LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, LATEST_2_6, LATEST_2_7, LATEST_2_8, \ - LATEST_3_0, LATEST_3_1, LATEST_3_2, LATEST_3_3, LATEST_3_4, LATEST_3_5, LATEST_3_6, LATEST_3_7, LATEST_3_8, DEV_BRANCH, DEV_VERSION, \ - KafkaVersion + LATEST_3_0, LATEST_3_1, LATEST_3_2, LATEST_3_3, LATEST_3_4, LATEST_3_5, LATEST_3_6, LATEST_3_7, LATEST_3_8, LATEST_3_9, \ + DEV_BRANCH, DEV_VERSION, KafkaVersion # broker 0.10.0 is not compatible with newer Kafka Streams versions # broker 0.10.1 and 0.10.2 do not support headers, as required by suppress() (since v2.2.1) broker_upgrade_versions = [str(LATEST_2_8), str(LATEST_3_0), str(LATEST_3_1), str(LATEST_3_2), str(LATEST_3_3), str(LATEST_3_4), str(LATEST_3_5), str(LATEST_3_6), - str(LATEST_3_7), str(LATEST_3_8), str(DEV_BRANCH)] + str(LATEST_3_7), str(LATEST_3_8), str(LATEST_3_9), str(DEV_BRANCH)] metadata_2_versions = [str(LATEST_2_4), str(LATEST_2_5), str(LATEST_2_6), str(LATEST_2_7), str(LATEST_2_8), str(LATEST_3_0), str(LATEST_3_1), str(LATEST_3_2), str(LATEST_3_3)] @@ -38,7 +38,7 @@ # -> https://issues.apache.org/jira/browse/KAFKA-14646 # thus, we cannot test two bounce rolling upgrade because we know it's broken # instead we add version 2.4...3.3 to the `metadata_2_versions` upgrade list -fk_join_versions = [str(LATEST_3_4), str(LATEST_3_5), str(LATEST_3_6), str(LATEST_3_7), str(LATEST_3_8)] +fk_join_versions = [str(LATEST_3_4), str(LATEST_3_5), str(LATEST_3_6), str(LATEST_3_7), str(LATEST_3_8), str(LATEST_3_9)] """ diff --git a/tests/kafkatest/version.py b/tests/kafkatest/version.py index 00bcf536f84c..7920e373db3e 100644 --- a/tests/kafkatest/version.py +++ b/tests/kafkatest/version.py @@ -209,6 +209,10 @@ def get_version(node=None): V_3_8_1 = KafkaVersion("3.8.1") LATEST_3_8 = V_3_8_1 +# 3.9.x version +V_3_9_0 = KafkaVersion("3.9.0") +LATEST_3_9 = V_3_9_0 + # 4.0.x version V_4_0_0 = KafkaVersion("4.0.0") LATEST_4_0 = V_4_0_0 diff --git a/vagrant/base.sh b/vagrant/base.sh index d57a2d223a4d..7290ae497671 100755 --- a/vagrant/base.sh +++ b/vagrant/base.sh @@ -148,6 +148,8 @@ get_kafka 3.7.1 2.12 chmod a+rw /opt/kafka-3.7.1 get_kafka 3.8.1 2.12 chmod a+rw /opt/kafka-3.8.1 +get_kafka 3.9.0 2.12 +chmod a+rw /opt/kafka-3.9.0 # For EC2 nodes, we want to use /mnt, which should have the local disk. On local # VMs, we can just create it if it doesn't exist and use it like we'd use