-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Add User Defined Source support (#114)
Signed-off-by: Sidhant Kohli <[email protected]>
- Loading branch information
Showing
43 changed files
with
2,000 additions
and
426 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#################################################################################################### | ||
# builder: install needed dependencies | ||
#################################################################################################### | ||
|
||
FROM python:3.10-slim-bullseye AS builder | ||
|
||
ENV PYTHONFAULTHANDLER=1 \ | ||
PYTHONUNBUFFERED=1 \ | ||
PYTHONHASHSEED=random \ | ||
PIP_NO_CACHE_DIR=on \ | ||
PIP_DISABLE_PIP_VERSION_CHECK=on \ | ||
PIP_DEFAULT_TIMEOUT=100 \ | ||
POETRY_VERSION=1.2.2 \ | ||
POETRY_HOME="/opt/poetry" \ | ||
POETRY_VIRTUALENVS_IN_PROJECT=true \ | ||
POETRY_NO_INTERACTION=1 \ | ||
PYSETUP_PATH="/opt/pysetup" \ | ||
VENV_PATH="/opt/pysetup/.venv" | ||
|
||
ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH" | ||
|
||
RUN apt-get update \ | ||
&& apt-get install --no-install-recommends -y \ | ||
curl \ | ||
wget \ | ||
# deps for building python deps | ||
build-essential \ | ||
&& apt-get install -y git \ | ||
&& apt-get clean && rm -rf /var/lib/apt/lists/* \ | ||
\ | ||
# install dumb-init | ||
&& wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \ | ||
&& chmod +x /dumb-init \ | ||
&& curl -sSL https://install.python-poetry.org | python3 - | ||
|
||
#################################################################################################### | ||
# udf: used for running the udf vertices | ||
#################################################################################################### | ||
FROM builder AS udf | ||
|
||
WORKDIR $PYSETUP_PATH | ||
COPY pyproject.toml ./ | ||
RUN poetry install --no-cache --no-root && \ | ||
rm -rf ~/.cache/pypoetry/ | ||
|
||
ADD . /app | ||
WORKDIR /app | ||
|
||
RUN chmod +x entry.sh | ||
|
||
ENTRYPOINT ["/dumb-init", "--"] | ||
CMD ["/app/entry.sh"] | ||
|
||
EXPOSE 5000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
.PHONY: image | ||
image: | ||
docker build -t "quay.io/numaio/numaflow-python/simple-source:v0.5.3" . | ||
# Github CI runner uses platform linux/amd64. If your local environment don't, the image built by command above might not work | ||
# under the CI E2E test environment. | ||
# To build an image that supports multiple platforms(linux/amd64,linux/arm64) and push to quay.io, use the following command | ||
# docker buildx build -t "quay.io/numaio/numaflow-python/simple-source:v0.5.3" --platform linux/amd64,linux/arm64 . --push | ||
# If command failed, refer to https://billglover.me/notes/build-multi-arch-docker-images/ to fix |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Example Python User Defined Source | ||
A simple example of a user-defined source. The source maintains an array of messages and implements the Read, | ||
Ack, and Pending methods. | ||
The Read(x) method returns the next x number of messages in the array. | ||
The Ack() method acknowledges the last batch of messages returned by Read(). | ||
The Pending() method returns 0 to indicate that the simple source always has 0 pending messages. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/sh | ||
set -eux | ||
|
||
python example.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
from collections.abc import Iterable | ||
from datetime import datetime | ||
|
||
from pynumaflow.sourcer import ( | ||
ReadRequest, | ||
Message, | ||
Sourcer, | ||
AckRequest, | ||
PendingResponse, | ||
Offset, | ||
) | ||
|
||
|
||
class SimpleSource: | ||
""" | ||
SimpleSource is a class for User Defined Source implementation. | ||
""" | ||
|
||
def __init__(self): | ||
""" | ||
to_ack_set: Set to maintain a track of the offsets yet to be acknowledged | ||
read_idx : the offset idx till where the messages have been read | ||
""" | ||
self.to_ack_set = set() | ||
self.read_idx = 0 | ||
|
||
def read_handler(self, datum: ReadRequest) -> Iterable[Message]: | ||
""" | ||
read_handler is used to read the data from the source and send the data forward | ||
for each read request we process num_records and increment the read_idx to indicate that | ||
the message has been read and the same is added to the ack set | ||
""" | ||
if self.to_ack_set: | ||
return | ||
|
||
for x in range(datum.num_records): | ||
yield Message( | ||
payload=str(self.read_idx).encode(), | ||
offset=Offset(offset=str(self.read_idx).encode(), partition_id="0"), | ||
event_time=datetime.now(), | ||
) | ||
self.to_ack_set.add(str(self.read_idx)) | ||
self.read_idx += 1 | ||
|
||
def ack_handler(self, ack_request: AckRequest): | ||
""" | ||
The ack handler is used acknowledge the offsets that have been read, and remove them | ||
from the to_ack_set | ||
""" | ||
for offset in ack_request.offset: | ||
self.to_ack_set.remove(str(offset.offset, "utf-8")) | ||
|
||
def pending_handler(self) -> PendingResponse: | ||
""" | ||
The simple source always returns zero to indicate there is no pending record. | ||
""" | ||
return PendingResponse(count=0) | ||
|
||
|
||
if __name__ == "__main__": | ||
ud_source = SimpleSource() | ||
grpc_server = Sourcer( | ||
read_handler=ud_source.read_handler, | ||
ack_handler=ud_source.ack_handler, | ||
pending_handler=ud_source.pending_handler, | ||
) | ||
grpc_server.start() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
apiVersion: numaflow.numaproj.io/v1alpha1 | ||
kind: Pipeline | ||
metadata: | ||
name: simple-source | ||
spec: | ||
vertices: | ||
- name: in | ||
source: | ||
udsource: | ||
container: | ||
# A simple user-defined source for e2e testing | ||
image: quay.io/numaio/numaflow-python/simple-source:v0.5.3 | ||
imagePullPolicy: Always | ||
limits: | ||
readBatchSize: 2 | ||
- name: out | ||
sink: | ||
log: {} | ||
edges: | ||
- from: in | ||
to: out |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
[tool.poetry] | ||
name = "simple-source" | ||
version = "0.2.4" | ||
description = "" | ||
authors = ["Numaflow developers"] | ||
|
||
[tool.poetry.dependencies] | ||
python = "~3.10" | ||
pynumaflow = "~0.5.3" | ||
|
||
|
||
[tool.poetry.dev-dependencies] | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0"] | ||
build-backend = "poetry.core.masonry.api" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.