Mirror of https://github.com/roostorg/osprey
github.com/roostorg/osprey
1# Volumes for druid purposes only
2volumes:
3 metadata_data: {}
4 middle_var: {}
5 historical_var: {}
6 broker_var: {}
7 coordinator_var: {}
8 router_var: {}
9 druid_shared: {}
10 minio_data: {}
11
12services:
13 kafka:
14 image: confluentinc/cp-kafka:7.4.0
15 hostname: kafka
16 container_name: kafka
17 ports:
18 - "9092:9092"
19 environment:
20 KAFKA_NODE_ID: 1
21 KAFKA_PROCESS_ROLES: "broker,controller"
22 KAFKA_CONTROLLER_QUORUM_VOTERS: "1@kafka:29093"
23 KAFKA_CONTROLLER_LISTENER_NAMES: "CONTROLLER"
24 KAFKA_INTER_BROKER_LISTENER_NAME: "INTERNAL"
25 KAFKA_LISTENERS: "INTERNAL://kafka:29092,EXTERNAL://0.0.0.0:9092,CONTROLLER://kafka:29093"
26 KAFKA_ADVERTISED_LISTENERS: "INTERNAL://kafka:29092,EXTERNAL://localhost:9092"
27 KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: "INTERNAL:PLAINTEXT,EXTERNAL:PLAINTEXT,CONTROLLER:PLAINTEXT"
28 KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
29 KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
30 KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
31 KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
32 CLUSTER_ID: "P45WxmmWSe2CrdGoeJMcKg"
33 healthcheck:
34 test:
35 [
36 "CMD",
37 "bash",
38 "-c",
39 "kafka-topics --bootstrap-server kafka:29092 --list",
40 ]
41 interval: 10s
42 timeout: 5s
43 retries: 5
44
45 minio:
46 image: minio/minio:latest
47 container_name: minio
48 hostname: minio
49 ports:
50 - "9000:9000" # minio API
51 - "9001:9001" # minio Console
52 environment:
53 MINIO_ROOT_USER: minioadmin
54 MINIO_ROOT_PASSWORD: minioadmin123
55 volumes:
56 - minio_data:/data
57 command: server --console-address ":9001" /data
58 healthcheck:
59 test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
60 interval: 10s
61 timeout: 5s
62 retries: 3
63
64 minio-bucket-init:
65 image: minio/mc:latest
66 depends_on:
67 minio:
68 condition: service_healthy
69 entrypoint: ["/bin/sh", "/init-minio-bucket.sh"]
70 volumes:
71 - ./init-minio-bucket.sh:/init-minio-bucket.sh
72 restart: "no"
73
74 kafka-topic-creator:
75 image: confluentinc/cp-kafka:7.4.0
76 depends_on:
77 kafka:
78 condition: service_healthy
79 command: >
80 bash -c "
81 kafka-topics --bootstrap-server kafka:29092 --create --if-not-exists --topic osprey.actions_input --partitions 3 --replication-factor 1 &&
82 kafka-topics --bootstrap-server kafka:29092 --create --if-not-exists --topic osprey.execution_results --partitions 3 --replication-factor 1 &&
83 kafka-topics --bootstrap-server kafka:29092 --list
84 "
85
86 osprey-worker:
87 container_name: osprey-worker
88 hostname: osprey-worker
89 build:
90 context: .
91 dockerfile: osprey_worker/Dockerfile
92 depends_on:
93 kafka:
94 condition: service_healthy
95 kafka-topic-creator:
96 condition: service_completed_successfully
97 bigtable:
98 condition: service_healthy
99 bigtable-initializer:
100 condition: service_completed_successfully
101 minio:
102 condition: service_healthy
103 minio-bucket-init:
104 condition: service_completed_successfully
105 ports:
106 - "5001:5000"
107 command: ["osprey-worker"]
108 environment:
109 - PYTHONPATH=/osprey
110 - PORT=5000
111 - POSTGRES_HOSTS={"osprey_db":"postgresql://osprey:FoolishPassword@postgres:5432/osprey"}
112 - OSPREY_INPUT_STREAM_SOURCE=kafka
113 - OSPREY_STDOUT_OUTPUT_SINK=True
114 - OSPREY_KAFKA_BOOTSTRAP_SERVERS=["kafka:29092"]
115 - OSPREY_KAFKA_INPUT_STREAM_TOPIC=osprey.actions_input
116 # Client ID will default to the machine hostname if it isn't defined
117 - OSPREY_KAFKA_INPUT_STREAM_CLIENT_ID=localhost
118 - OSPREY_KAFKA_OUTPUT_SINK=True
119 - OSPREY_KAFKA_OUTPUT_TOPIC=osprey.execution_results
120 - OSPREY_KAFKA_OUTPUT_CLIENT_ID=localhost
121 - DD_TRACE_ENABLED=False
122 - DD_DOGSTATSD_DISABLE=True
123 - OSPREY_RULES_SINK_NUM_WORKERS=1
124 - BIGTABLE_EMULATOR_HOST=bigtable:8361
125 - OSPREY_EXECUTION_RESULT_STORAGE_BACKEND=minio
126 - OSPREY_MINIO_ENDPOINT=minio:9000
127 - OSPREY_MINIO_ACCESS_KEY=minioadmin
128 - OSPREY_MINIO_SECRET_KEY=minioadmin123
129 - OSPREY_MINIO_SECURE=false
130 - OSPREY_MINIO_EXECUTION_RESULTS_BUCKET=execution-output
131 - SNOWFLAKE_API_ENDPOINT=http://snowflake-id-worker:8088
132 - OSPREY_RULES_PATH=./example_rules
133 volumes:
134 - ./osprey_worker:/osprey/osprey_worker
135 - ./osprey_rpc:/osprey/osprey_rpc
136 - ./example_rules:/osprey/example_rules
137 - ./entrypoint.sh:/osprey/entrypoint.sh
138
139 osprey-ui-api:
140 container_name: osprey-ui-api
141 build:
142 context: .
143 dockerfile: osprey_worker/Dockerfile
144 depends_on:
145 - osprey-worker
146 - druid-broker
147 - postgres
148 - snowflake-id-worker
149 - bigtable
150 - bigtable-initializer
151 ports:
152 - "5004:5004"
153 command: ["osprey-ui-api"]
154 environment:
155 - PYTHONPATH=/osprey
156 - PORT=5004
157 - DEBUG=true
158 - FLASK_DEBUG=1
159 - FLASK_ENV=development
160 - DRUID_URL=http://druid-broker:8082
161 - POSTGRES_HOSTS={"osprey_db":"postgresql://osprey:FoolishPassword@postgres:5432/osprey"}
162 - DD_TRACE_ENABLED=False
163 - DD_DOGSTATSD_DISABLE=True
164 - OSPREY_RULES_PATH=/osprey/example_rules
165 - OSPREY_DISABLE_VALIDATION_EXPORTER=true
166 - BIGTABLE_EMULATOR_HOST=bigtable:8361
167 - SNOWFLAKE_API_ENDPOINT=http://snowflake-id-worker:8088
168 - SNOWFLAKE_EPOCH=1420070400000
169 volumes:
170 - ./osprey_worker:/osprey/osprey_worker
171 - ./osprey_rpc:/osprey/osprey_rpc
172 - ./example_rules:/osprey/example_rules
173
174 osprey-ui:
175 container_name: osprey-ui
176 hostname: osprey-ui
177 build:
178 context: .
179 dockerfile: osprey_ui/Dockerfile
180 depends_on:
181 - osprey-ui-api
182 ports:
183 - "5002:5002"
184 environment:
185 - NODE_ENV=development
186 - REACT_APP_API_BASE_URL=http://localhost:5004
187 volumes:
188 - ./osprey_ui:/app
189 - /app/node_modules
190
191 snowflake-id-worker:
192 hostname: snowflake-id-worker
193 container_name: snowflake-id-worker
194 image: ghcr.io/ayubun/snowflake-id-worker:0
195 ports:
196 - "8088:8088"
197 environment:
198 - WORKER_ID=0
199 - DATA_CENTER_ID=0
200 - EPOCH=1420070400000
201 - PORT=8088
202 restart: unless-stopped
203
204 bigtable:
205 hostname: bigtable
206 container_name: bigtable
207 image: gcr.io/google.com/cloudsdktool/cloud-sdk:latest
208 ports:
209 - "8361:8361"
210 command: >
211 bash -c "
212 gcloud beta emulators bigtable start --host-port=0.0.0.0:8361 --project=osprey-dev
213 "
214 healthcheck:
215 test: ["CMD", "bash", "-c", "pgrep -f cbtemulator > /dev/null || exit 1"]
216 interval: 10s
217 timeout: 5s
218 retries: 5
219 restart: unless-stopped
220
221 bigtable-initializer:
222 container_name: bigtable-initializer
223 image: gcr.io/google.com/cloudsdktool/cloud-sdk:latest
224 depends_on:
225 bigtable:
226 condition: service_healthy
227 volumes:
228 - ./init-bigtable.sh:/init-bigtable.sh
229 command: ["/bin/bash", "/init-bigtable.sh"]
230
231 # Optional test data generator - run with:
232 # docker compose --profile test_data up kafka-test-data-producer -d
233 kafka-test-data-producer:
234 image: confluentinc/cp-kafka:7.4.0
235 hostname: kafka-test-data-producer
236 container_name: kafka-test-data-producer
237 depends_on:
238 kafka:
239 condition: service_healthy
240 kafka-topic-creator:
241 condition: service_completed_successfully
242 profiles:
243 - test_data
244 - test-data
245 environment:
246 KAFKA_TOPIC: "osprey.actions_input"
247 KAFKA_BROKER: "kafka:29092"
248 volumes:
249 - ./example_data:/osprey/example_data
250 entrypoint:
251 - /bin/bash
252 command: ["/osprey/example_data/generate_test_data.sh"]
253
254 postgres:
255 hostname: postgres
256 container_name: postgres
257 image: postgres:18
258 ports:
259 - "5432:5432"
260 volumes:
261 - metadata_data:/var/lib/postgresql
262 environment:
263 - POSTGRES_PASSWORD=FoolishPassword
264 - POSTGRES_USER=osprey
265 - POSTGRES_DB=osprey
266 healthcheck:
267 test: pg_isready -U $$POSTGRES_USER -d $$POSTGRES_DB
268 start_period: 30s
269 interval: 10s
270 timeout: 10s
271 retries: 5
272
273 # DRUID, HERE BE DRAGONS
274 # Need 3.5 or later for container nodes
275 druid-zookeeper:
276 hostname: druid-zookeeper
277 container_name: druid-zookeeper
278 image: zookeeper:3.5.10
279 ports:
280 - "2181:2181"
281 environment:
282 - ZOO_MY_ID=1
283
284 druid-coordinator:
285 image: apache/druid:34.0.0
286 hostname: druid-coordinator
287 container_name: druid-coordinator
288 volumes:
289 - druid_shared:/opt/shared
290 - coordinator_var:/opt/druid/var
291 depends_on:
292 - druid-zookeeper
293 - postgres
294 ports:
295 - "8081:8081"
296 command:
297 - coordinator
298 env_file:
299 - druid/environment
300
301 druid-broker:
302 image: apache/druid:34.0.0
303 container_name: druid-broker
304 hostname: druid-broker
305 volumes:
306 - broker_var:/opt/druid/var
307 depends_on:
308 - druid-zookeeper
309 - postgres
310 - druid-coordinator
311 ports:
312 - "8082:8082"
313 command:
314 - broker
315 env_file:
316 - druid/environment
317
318 druid-historical:
319 image: apache/druid:34.0.0
320 container_name: druid-historical
321 hostname: druid-historical
322 volumes:
323 - druid_shared:/opt/shared
324 - historical_var:/opt/druid/var
325 depends_on:
326 - druid-zookeeper
327 - postgres
328 - druid-coordinator
329 ports:
330 - "8083:8083"
331 command:
332 - historical
333 env_file:
334 - druid/environment
335
336 druid-middlemanager:
337 image: apache/druid:34.0.0
338 container_name: druid-middlemanager
339 hostname: druid-middlemanager
340 volumes:
341 - druid_shared:/opt/shared
342 - middle_var:/opt/druid/var
343 depends_on:
344 - druid-zookeeper
345 - postgres
346 - druid-coordinator
347 ports:
348 - "8091:8091"
349 - "8100-8105:8100-8105"
350 command:
351 - middleManager
352 env_file:
353 - druid/environment
354
355 druid-router:
356 image: apache/druid:34.0.0
357 container_name: druid-router
358 hostname: druid-router
359 volumes:
360 - router_var:/opt/druid/var
361 depends_on:
362 - druid-zookeeper
363 - postgres
364 - druid-coordinator
365 ports:
366 - "8888:8888"
367 command:
368 - router
369 env_file:
370 - druid/environment
371
372 druid-spec-submitter:
373 image: curlimages/curl:latest
374 depends_on:
375 - druid-coordinator
376 volumes:
377 - ./druid/specs:/specs
378 command: ["/bin/sh", "/specs/submit-specs.sh"]
379 restart: "no"