Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement all infra services #27

Merged
merged 1 commit into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
config/hive/plugin/*.jar
43 changes: 43 additions & 0 deletions config/hive/hive-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<?xml version="1.0"?>
<configuration>
<!-- Metastore Configuration -->
<property>
<name>hive.metastore.uris</name>
<value>thrift://hive-metastore:9083</value>
<description>Thrift URI for the remote metastore. Use hostname or IP.</description>
</property>

<!-- S3/MinIO Configuration -->
<property>
<name>fs.s3a.access.key</name>
<value>minio</value>
<description>S3/MinIO access key</description>
</property>
<property>
<name>fs.s3a.secret.key</name>
<value>password</value>
<description>S3/MinIO secret key</description>
</property>
<property>
<name>fs.s3a.endpoint</name>
<value>http://minio:9000</value>
<description>MinIO S3 endpoint</description>
</property>
<property>
<name>fs.s3a.path.style.access</name>
<value>true</value>
<description>Enable path-style access (needed for MinIO)</description>
</property>

<!-- Execution Settings -->
<property>
<name>hive.exec.dynamic.partition</name>
<value>true</value>
<description>Enable dynamic partitioning</description>
</property>
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>nonstrict</value>
<description>Allow nonstrict dynamic partitioning</description>
</property>
</configuration>
8 changes: 8 additions & 0 deletions config/trino/catalog/hive.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
connector.name=hive
hive.metastore.uri=thrift://hive-metastore:9083
fs.native-s3.enabled=true
s3.endpoint=http://minio:9000
s3.region=us-east-1
s3.path-style-access=true
s3.aws-access-key=minio
s3.aws-secret-key=password
231 changes: 193 additions & 38 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,66 +1,221 @@
version: "3.8"

services:
kafka:
postgres:
image: postgres:17
ports:
- '5432:5432'
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: postgres

kafka-1:
image: apache/kafka:latest
container_name: kafka-1
restart: unless-stopped
ports:
- "9092:9092"
volumes:
- kafka_data:/var/lib/kafka/data
- 19092:19092
environment:
# Shared setup
KAFKA_NODE_ID: 1
KAFKA_PROCESS_ROLES: broker,controller
KAFKA_LISTENERS: PLAINTEXT://:9092,PLAINTEXT_HOST://:19092,CONTROLLER://:9093
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka-1:9093,2@kafka-2:9093
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
# Broker setup
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092,PLAINTEXT_HOST://localhost:19092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
healthcheck:
test:
[
'CMD',
'/opt/kafka/bin/kafka-topics.sh',
'--bootstrap-server',
'localhost:9092',
'--list',
]
interval: 10s
timeout: 5s
retries: 5

grafana:
image: grafana/grafana:latest
kafka-2:
image: apache/kafka:latest
container_name: kafka-2
restart: unless-stopped
ports:
- "3000:3000"
- 29092:29092
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
- GF_USERS_ALLOW_SIGN_UP=false
# Shared setup
KAFKA_NODE_ID: 2
KAFKA_PROCESS_ROLES: broker,controller
KAFKA_LISTENERS: PLAINTEXT://:9092,PLAINTEXT_HOST://:29092,CONTROLLER://:9093
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka-1:9093,2@kafka-2:9093
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
# Broker setup
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
healthcheck:
test:
[
'CMD',
'/opt/kafka/bin/kafka-topics.sh',
'--bootstrap-server',
'localhost:9092',
'--list',
]
interval: 10s
timeout: 5s
retries: 5

init-topics:
image: apache/kafka:latest
depends_on:
- kafka
kafka-1:
condition: service_healthy
kafka-2:
condition: service_healthy
environment:
BOOTSTRAP_SERVERS: kafka-1:9092
entrypoint: >
bash -c '
echo "Waiting for Kafka to be ready...";
for i in {1..30}; do
/opt/kafka/bin/kafka-topics.sh --bootstrap-server kafka-1:9092 --list && break || sleep 2;
done;
echo "Creating topics...";
/opt/kafka/bin/kafka-topics.sh --create --if-not-exists --bootstrap-server kafka-1:9092 --topic position_topic --partitions 1 --replication-factor 1 &&
/opt/kafka/bin/kafka-topics.sh --create --if-not-exists --bootstrap-server kafka-1:9092 --topic time_registration_topic --partitions 1 --replication-factor 1 &&
/opt/kafka/bin/kafka-topics.sh --create --if-not-exists --bootstrap-server kafka-1:9092 --topic report_topic --partitions 1 --replication-factor 1 &&
/opt/kafka/bin/kafka-topics.sh --create --if-not-exists --bootstrap-server kafka-1:9092 --topic entity_topic --partitions 1 --replication-factor 1;
echo "Kafka initialization complete.";'

schema-registry:
image: confluentinc/cp-schema-registry:${CONFLUENT_VERSION:-latest}
restart: unless-stopped
ports:
- 8085:8085
depends_on:
kafka-1:
condition: service_healthy
kafka-2:
condition: service_healthy
environment:
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'PLAINTEXT://kafka-1:9092'
SCHEMA_REGISTRY_HOST_NAME: 'schema-registry'
SCHEMA_REGISTRY_LISTENERS: 'http://0.0.0.0:8085'
SCHEMA_REGISTRY_LOG4J_ROOT_LOGLEVEL: 'INFO'

akhq:
image: tchiotludo/akhq
volumes:
- /opt/tibco/akd/repo/1.2/lib/tibftl-kafka-avro-1.2.0-thin.jar:/app/tibftl-kafka-avro-1.2.0-thin.jar
- /opt/tibco/akd/repo/1.2/lib/deps:/app/deps
restart: unless-stopped
depends_on:
kafka-1:
condition: service_healthy
kafka-2:
condition: service_healthy
environment:
AKHQ_CONFIGURATION: |
akhq:
connections:
docker-kafka-server:
properties:
bootstrap.servers: "kafka-1:9092"
schema-registry:
url: "http://schema-registry:8085"
CLASSPATH: '/app/tibftl-kafka-avro-1.2.0-thin.jar:/app/deps/*'
ports:
- 8080:8080
links:
- kafka-1
- kafka-2
- schema-registry

minio:
image: "minio/minio:latest"
image: 'minio/minio:latest'
ports:
- "9000:9000"
- "9001:9001"
- '9000:9000'
- '9001:9001'
environment:
- MINIO_ROOT_USER=minio
- MINIO_ROOT_PASSWORD=password
- MINIO_REGION_NAME=us-east-1
- MINIO_REGION=us-east-1
volumes:
- "minio:/data/minio"
command: 'minio server /data/minio --console-address ":9001"'

command: 'minio server /data --console-address ":9001"'
healthcheck:
test:
- CMD
- curl
- "-f"
- "http://localhost:9000/minio/health/live"
- '-f'
- 'http://localhost:9000/minio/health/live'
retries: 3
timeout: 5s

init-minio:
image: minio/mc:latest
depends_on:
- kafka
minio:
condition: service_healthy
entrypoint: >
/bin/sh -c "
until (mc alias set myminio http://minio:9000 minio password); do echo 'Waiting for MinIO...'; sleep 3; done;
mc mb myminio/kafkamion;
mc anonymous set public myminio/kafkamion;
echo 'Bucket created and policie set!';
"

download-hive-plugin:
image: alpine:latest
container_name: download-hive-plugin
command: >
/bin/sh -c "
wget -O /jars/aws-java-sdk-bundle-1.11.1026.jar https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.11.1026/aws-java-sdk-bundle-1.11.1026.jar &&
wget -O /jars/hadoop-aws-3.3.6.jar https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.6/hadoop-aws-3.3.6.jar"
volumes:
- ./config/hive/plugin:/jars

hive-metastore:
image: apache/hive:4.0.1
container_name: hive-metastore
environment:
SERVICE_NAME: metastore
depends_on:
download-hive-plugin:
condition: service_completed_successfully
ports:
- '9083:9083'
volumes:
- ./config/hive/plugin/aws-java-sdk-bundle-1.11.1026.jar:/opt/hive/lib/aws-java-sdk-bundle-1.11.1026.jar
- ./config/hive/plugin/hadoop-aws-3.3.6.jar:/opt/hive/lib/hadoop-aws-3.3.6.jar
- ./config/hive/hive-site.xml:/opt/hive/conf/hive-site.xml

trino:
image: trinodb/trino:latest
ports:
- "8080:8080"
environment:
- TRINO_ENVIRONMENT=production
- TRINO_JVM_MAX_HEAP_SIZE=4GB
- TRINO_JVM_OFF_HEAP_SIZE=4GB
- TRINO_QUERY_MAX_MEMORY=1GB
- TRINO_QUERY_MAX_MEMORY_PER_NODE=1GB
- TRINO_QUERY_MAX_TOTAL_MEMORY=2GB
- TRINO_DISCOVERY_URI=http://trino:8080
container_name: trino
restart: unless-stopped
depends_on:
- kafka
- hive-metastore
ports:
- 8081:8080
volumes:
- trino_data:/var/lib/trino/data
- ./config/trino/catalog:/etc/trino/catalog

volumes:
kafka_volume:
minio:
kafka_data:
trino_data:
grafana:
image: grafana/grafana:latest
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
- GF_USERS_ALLOW_SIGN_UP=false
depends_on:
- kafka-1
2 changes: 1 addition & 1 deletion producer/target/.rustc_info.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"rustc_fingerprint":9706494220011346817,"outputs":{"4614504638168534921":{"success":true,"status":"","code":0,"stdout":"rustc 1.83.0 (90b35a623 2024-11-26)\nbinary: rustc\ncommit-hash: 90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf\ncommit-date: 2024-11-26\nhost: x86_64-unknown-linux-gnu\nrelease: 1.83.0\nLLVM version: 19.1.1\n","stderr":""},"14371922958718593042":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n/home/baptiste/.rustup/toolchains/stable-x86_64-unknown-linux-gnu\noff\npacked\nunpacked\n___\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"gnu\"\ntarget_family=\"unix\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_os=\"linux\"\ntarget_pointer_width=\"64\"\ntarget_vendor=\"unknown\"\nunix\n","stderr":""},"15729799797837862367":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n/home/baptiste/.rustup/toolchains/stable-x86_64-unknown-linux-gnu\noff\npacked\nunpacked\n___\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"gnu\"\ntarget_family=\"unix\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_os=\"linux\"\ntarget_pointer_width=\"64\"\ntarget_vendor=\"unknown\"\nunix\n","stderr":""}},"successes":{}}
{"rustc_fingerprint":9706494220011346817,"outputs":{"4614504638168534921":{"success":true,"status":"","code":0,"stdout":"rustc 1.83.0 (90b35a623 2024-11-26)\nbinary: rustc\ncommit-hash: 90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf\ncommit-date: 2024-11-26\nhost: x86_64-unknown-linux-gnu\nrelease: 1.83.0\nLLVM version: 19.1.1\n","stderr":""},"15729799797837862367":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n/home/baptiste/.rustup/toolchains/stable-x86_64-unknown-linux-gnu\noff\npacked\nunpacked\n___\ndebug_assertions\nfmt_debug=\"full\"\noverflow_checks\npanic=\"unwind\"\nproc_macro\nrelocation_model=\"pic\"\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"gnu\"\ntarget_family=\"unix\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_has_atomic\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_has_atomic_equal_alignment=\"16\"\ntarget_has_atomic_equal_alignment=\"32\"\ntarget_has_atomic_equal_alignment=\"64\"\ntarget_has_atomic_equal_alignment=\"8\"\ntarget_has_atomic_equal_alignment=\"ptr\"\ntarget_has_atomic_load_store\ntarget_has_atomic_load_store=\"16\"\ntarget_has_atomic_load_store=\"32\"\ntarget_has_atomic_load_store=\"64\"\ntarget_has_atomic_load_store=\"8\"\ntarget_has_atomic_load_store=\"ptr\"\ntarget_os=\"linux\"\ntarget_pointer_width=\"64\"\ntarget_thread_local\ntarget_vendor=\"unknown\"\nub_checks\nunix\n","stderr":""},"14371922958718593042":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n/home/baptiste/.rustup/toolchains/stable-x86_64-unknown-linux-gnu\noff\npacked\nunpacked\n___\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"gnu\"\ntarget_family=\"unix\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_os=\"linux\"\ntarget_pointer_width=\"64\"\ntarget_vendor=\"unknown\"\nunix\n","stderr":""}},"successes":{}}