-
Notifications
You must be signed in to change notification settings - Fork 0
/
docker-compose.yaml
106 lines (100 loc) · 3.22 KB
/
docker-compose.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
version: '3'
services:
zookeeper:
image: confluentinc/cp-zookeeper:7.6.0
container_name: zookeeper
environment:
ZOOKEEPER_CLIENT_PORT: 2181
ZOOKEEPER_TICK_TIME: 2000
kafka:
image: confluentinc/cp-enterprise-kafka:7.6.0
container_name: kafka
ports:
- 9092:9092
depends_on:
- zookeeper
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT, PLAINTEXT_INTERNAL:PLAINTEXT
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9092, PLAINTEXT_INTERNAL://kafka:29092
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
restart: always
debezium:
image: debezium/connect:2.5
environment:
BOOTSTRAP_SERVERS: kafka:29092
GROUP_ID: 1
CONFIG_STORAGE_TOPIC: connect_configs
STATUS_STORAGE_TOPIC: connect_statuses
OFFSET_STORAGE_TOPIC: connect_offsets
KEY_CONVERTER: org.apache.kafka.connect.json.JsonConverter
VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter
# CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081
# CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081
# ENABLE_DEBEZIUM_SCRIPTING: 'true'
depends_on:
- kafka
ports:
- 8083:8083
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8083/"]
interval: 2s
timeout: 1s
retries: 5
start_period: 4s
debezium-connector:
build:
context: ./src/debezium_connector
dockerfile: Dockerfile
env_file: .env
depends_on:
debezium:
condition: service_healthy
environment:
DEBEZIUM_SERVER: "http://debezium:8083/connectors/"
KAFKA_SERVER: "kafka:29092"
TOPIC_NAME: "rds_first_topic"
# comma-separated list of tables to capture data from
TABLE_LIST: "test_table"
jupyter:
build:
context: ./src/jupyter_experiment
dockerfile: Dockerfile
container_name: experiment_notebook
ports:
- 8888:8888
volumes:
- ./notebooks:/app/notebooks
- ./nba_dataset:/app/notebooks/data/nba_dataset
environment:
KAFKA_BOOTSTRAP_SERVERS: "kafka:29092"
INPUT_TOPIC: "rds_first_topic"
OUTPUT_TOPIC: "output_topic_sink"
depends_on:
- kafka
# # command: bash -c "pip install pyspark==3.5.1 confluent-kafka==2.3.0 && start-notebook.sh --NotebookApp.token='' --NotebookApp.password=''"
# pyspark_consumer:
# build:
# context: ./src/pyspark_consumer
# dockerfile: Dockerfile
# container_name: pyspark_consumer
# depends_on:
# - kafka
# environment:
# KAFKA_BOOTSTRAP_SERVERS: "kafka:29092"
# INPUT_TOPIC: "rds_first_topic"
# OUTPUT_TOPIC: "output_topic_sink"
# schema-registry:
# image: confluentinc/cp-schema-registry:7.6.0
# environment:
# - SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
# - SCHEMA_REGISTRY_HOST_NAME=schema-registry
# - SCHEMA_REGISTRY_LISTENERS=http://schema-registry:8081,http://localhost:8081
# ports:
# - 8081:8081
# depends_on:
# - zookeeper
# - kafka