From 4c3accb86357963f24a8de0360c46ab71316d2e0 Mon Sep 17 00:00:00 2001 From: Marcos Schroh <2828842+marcosschroh@users.noreply.github.com> Date: Thu, 14 Dec 2023 08:56:38 +0100 Subject: [PATCH] feat: getmany added to Stream. Closes #128 (#147) --- docs/stream.md | 12 ++++++ .../recommended_worker_app/streams.py | 2 +- kstreams/streams.py | 43 +++++++++++++++++++ kstreams/test_utils/test_clients.py | 16 +++++++ tests/test_client.py | 26 +++++++++++ tests/test_stream_engine.py | 28 ++++++++++++ 6 files changed, 126 insertions(+), 1 deletion(-) diff --git a/docs/stream.md b/docs/stream.md index b11f3eb..6302bb0 100644 --- a/docs/stream.md +++ b/docs/stream.md @@ -9,6 +9,8 @@ Consuming can be done using `kstreams.Stream`. You only need to decorate a `coro show_root_heading: true docstring_section_style: table show_signature_annotations: false + members: + - ## Dependency Injection and typing @@ -256,6 +258,16 @@ async with stream as stream_flow: # Use the context manager If for some reason you interrupt the "async for in" in the async generator, the Stream will stopped consuming events meaning that the lag will increase. +## Get many + +::: kstreams.streams.Stream.getmany + options: + docstring_section_style: table + show_signature_annotations: false + +!!! warning + This approach does not works with `Dependency Injection`. + ## Rebalance Listener For some cases you will need a `RebalanceListener` so when partitions are `assigned` or `revoked` to the stream different accions can be performed. diff --git a/examples/recommended-worker-app/recommended_worker_app/streams.py b/examples/recommended-worker-app/recommended_worker_app/streams.py index 164da8d..53b7f97 100644 --- a/examples/recommended-worker-app/recommended_worker_app/streams.py +++ b/examples/recommended-worker-app/recommended_worker_app/streams.py @@ -1,4 +1,4 @@ -from kstreams import stream, ConsumerRecord +from kstreams import ConsumerRecord, stream @stream("local--hello-world", group_id="example-group") diff --git a/kstreams/streams.py b/kstreams/streams.py index 7e18513..fe765b4 100644 --- a/kstreams/streams.py +++ b/kstreams/streams.py @@ -160,6 +160,49 @@ async def getone(self) -> ConsumerRecord: return consumer_record + async def getmany( + self, + partitions: Optional[List[TopicPartition]] = None, + timeout_ms: int = 0, + max_records: Optional[int] = None, + ) -> Dict[TopicPartition, List[ConsumerRecord]]: + """ + Get a batch of events from the assigned TopicPartition. + + Prefetched events are returned in batches by topic-partition. + If messages is not available in the prefetched buffer this method waits + `timeout_ms` milliseconds. + + Attributes: + partitions List[TopicPartition] | None: The partitions that need + fetching message. If no one partition specified then all + subscribed partitions will be used + timeout_ms int | None: milliseconds spent waiting if + data is not available in the buffer. If 0, returns immediately + with any records that are available currently in the buffer, + else returns empty. Must not be negative. + max_records int | None: The amount of records to fetch. + if `timeout_ms` was defined and reached and the fetched records + has not reach `max_records` then returns immediately + with any records that are available currently in the buffer + + Returns: + Topic to list of records + + !!! Example + ```python + @stream_engine.stream(topic, ...) + async def stream(stream: Stream): + while True: + data = await stream.getmany(max_records=5) + print(data) + ``` + """ + partitions = partitions or [] + return await self.consumer.getmany( # type: ignore + *partitions, timeout_ms=timeout_ms, max_records=max_records + ) + async def start(self) -> Optional[AsyncGenerator]: if self.running: return None diff --git a/kstreams/test_utils/test_clients.py b/kstreams/test_utils/test_clients.py index 88962ef..d989cfa 100644 --- a/kstreams/test_utils/test_clients.py +++ b/kstreams/test_utils/test_clients.py @@ -190,6 +190,22 @@ async def getone( return None + async def getmany( + self, + *partitions: List[TopicPartition], + timeout_ms: int = 0, + max_records: int = 1, + ) -> Dict[TopicPartition, List[ConsumerRecord]]: + """ + Basic getmany implementation. + `partitions` and `timeout_ms` could be added to the logic + but it seems unnecessary for now; if end users request them we + can add it + """ + return { + self._assignment[0]: [await self.getone() for _ in range(0, max_records)] + } + def seek(self, *, partition: TopicPartition, offset: int) -> None: # This method intends to have the same signature as aiokafka but with kwargs # rather than positional arguments diff --git a/tests/test_client.py b/tests/test_client.py index 1e657f9..47f7e51 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -93,6 +93,32 @@ async def consume(stream): save_to_db.assert_called_once_with(event) +@pytest.mark.asyncio +async def test_stream_consume_many(stream_engine: StreamEngine): + event = b'{"message": "Hello world!"}' + max_records = 2 + save_to_db = Mock() + + @stream_engine.stream(topic) + async def stream(stream: Stream): + while True: + data = await stream.getmany(max_records=max_records) + save_to_db( + [ + cr.value + for consumer_records_list in data.values() + for cr in consumer_records_list + ] + ) + + client = TestStreamClient(stream_engine) + async with client: + await client.send(topic, value=event, key="1") + await client.send(topic, value=event, key="1") + + save_to_db.assert_called_once_with([event for _ in range(0, max_records)]) + + @pytest.mark.asyncio async def test_stream_consume_events_as_generator(stream_engine: StreamEngine): topic = "local--hello-kpn" diff --git a/tests/test_stream_engine.py b/tests/test_stream_engine.py index 2a8477e..51f9177 100644 --- a/tests/test_stream_engine.py +++ b/tests/test_stream_engine.py @@ -345,6 +345,34 @@ async def getone(_): assert not stream.running +@pytest.mark.asyncio +async def test_stream_getmany( + stream_engine: StreamEngine, consumer_record_factory: Callable[..., ConsumerRecord] +): + topic_partition_crs = { + TopicPartition(topic="local--hello-kpn", partition=0): [ + consumer_record_factory(offset=1), + consumer_record_factory(offset=2), + consumer_record_factory(offset=3), + ] + } + + save_to_db = mock.Mock() + + @stream_engine.stream("local--hello-kpn") + async def stream(stream: Stream): + data = await stream.getmany(max_records=3) + save_to_db(data) + + async def getmany(*args, **kwargs): + return topic_partition_crs + + with mock.patch.multiple(Consumer, start=mock.DEFAULT, getmany=getmany): + await stream_engine.start_streams() + await asyncio.sleep(0.1) + save_to_db.assert_called_once_with(topic_partition_crs) + + @pytest.mark.asyncio async def test_stream_decorator(stream_engine: StreamEngine): topic = "local--hello-kpn"