···9797 docker compose up -d
9898 ```
9999100100+ or using the wrapper script
101101+102102+ ```bash
103103+ ./start.sh
104104+ ```
105105+106106+ this starts the osprey-worker on its own along with all its required dependencies.
107107+108108+ alternatively, you can start Osprey with `osprey-coordinator`, refer to the [Coordinator README](./example_docker_compose/run_osprey_with_coordinator/README.md) for more information
109109+1001106. (Optional) **Port Forward the UI/UI API:**
101111102112 If you are running the docker compose on a headless machine, you will need to port forward the UI and UI API.
···11+#!/bin/bash
22+33+# continuously generate and send test actions to the osprey coordinator via gRPC
44+# this mimics the Kafka test data generator but sends directly to the coordinator
55+66+set -e
77+88+COORDINATOR_HOST="${COORDINATOR_HOST:-localhost:19951}"
99+1010+# Check if grpcurl is installed
1111+if ! command -v grpcurl &> /dev/null; then
1212+ echo "Error: grpcurl is not installed."
1313+ echo "Install it with: brew install grpcurl (macOS) or go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest"
1414+ exit 1
1515+fi
1616+1717+# Check if jq is installed
1818+if ! command -v jq &> /dev/null; then
1919+ echo "Error: jq is not installed."
2020+ echo "Install it with: brew install jq (macOS)"
2121+ exit 1
2222+fi
2323+2424+# Initialize action_id counter
2525+action_id=1
2626+2727+# Words to randomly generate post content
2828+words=(hello the quick brown fox jumps over lazy dog and cat runs fast)
2929+3030+# Get script directory
3131+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
3232+3333+# Function to generate random user ID
3434+generate_random_user_id() {
3535+ echo "user_$(shuf -i 100-9999 -n 1)"
3636+}
3737+3838+# Function to generate current timestamp in RFC3339 format
3939+generate_timestamp() {
4040+ date -u +"%Y-%m-%dT%H:%M:%S.000000000Z"
4141+}
4242+4343+# Function to generate random post text
4444+generate_random_text() {
4545+ echo "${words[RANDOM % ${#words[@]}]} ${words[RANDOM % ${#words[@]}]} ${words[RANDOM % ${#words[@]}]} ${words[RANDOM % ${#words[@]}]} ${words[RANDOM % ${#words[@]}]}."
4646+}
4747+4848+# Function to generate action data from template
4949+generate_action() {
5050+ local text=$(generate_random_text)
5151+ local timestamp=$(generate_timestamp)
5252+ local user_id=$(generate_random_user_id)
5353+ local ip_address="192.168.1.$(shuf -i 1-254 -n 1)"
5454+5555+ local sed_commands=()
5656+ sed_commands+=("s/\$text/$text/g")
5757+ sed_commands+=("s/\$timestamp/$timestamp/g")
5858+ sed_commands+=("s/\$user_id/$user_id/g")
5959+ sed_commands+=("s/\$ip_address/$ip_address/g")
6060+ sed_commands+=("s/\$action_id/$action_id/g")
6161+6262+ # Apply all sed commands to template.json
6363+ local cmd="sed"
6464+ for sed_cmd in "${sed_commands[@]}"; do
6565+ cmd="$cmd -e '$sed_cmd'"
6666+ done
6767+ eval "$cmd" "$SCRIPT_DIR/template.json"
6868+}
6969+7070+# Function to send a single action
7171+send_action() {
7272+ local kafka_format_json=$(generate_action)
7373+7474+ # Extract the data object from the Kafka format and convert to coordinator format
7575+ local action_data=$(echo "$kafka_format_json" | jq -c '.data')
7676+ local timestamp=$(echo "$kafka_format_json" | jq -r '.send_time')
7777+ local action_name=$(echo "$action_data" | jq -r '.action_name')
7878+ local data_payload=$(echo "$action_data" | jq -c '.data')
7979+8080+ echo "[$action_id] Sending action - Name: $action_name, Timestamp: $timestamp"
8181+8282+ # Build gRPC request format
8383+ jq -n \
8484+ --arg action_id "$action_id" \
8585+ --arg action_name "$action_name" \
8686+ --argjson data_payload "$data_payload" \
8787+ --arg timestamp "$timestamp" \
8888+ '{
8989+ action_id: ($action_id | tonumber),
9090+ action_name: $action_name,
9191+ action_data_json: ($data_payload | tostring),
9292+ timestamp: $timestamp
9393+ }' | grpcurl -plaintext -d @ "$COORDINATOR_HOST" \
9494+ osprey.rpc.osprey_coordinator.sync_action.v1.OspreyCoordinatorSyncActionService/ProcessAction
9595+9696+ # Increment action_id
9797+ ((action_id++))
9898+}
9999+100100+# Function to handle cleanup on script termination
101101+cleanup() {
102102+ echo
103103+ echo "Stopping data generation..."
104104+ exit 0
105105+}
106106+107107+# Set up signal handlers for graceful shutdown
108108+trap cleanup SIGINT SIGTERM
109109+110110+# Main execution
111111+echo "Generating actions every second to Osprey Coordinator at $COORDINATOR_HOST"
112112+echo "Press Ctrl+C to stop..."
113113+echo
114114+115115+# Infinite loop to generate and send actions
116116+while true; do
117117+ send_action
118118+ sleep 1
119119+done
···11+While Osprey worker can stand on its own by directly ingesting data from Kafka, Osprey Coordinator provides an alternative that provides additional features such as load balancing and synchronous actions.
22+33+## Quick Start
44+55+The easiest way to run Osprey with the Coordinator is using the helper script from the repository root:
66+77+```bash
88+# Start with coordinator
99+./start.sh --with-coordinator
1010+1111+# Start in detached mode
1212+./start.sh --with-coordinator up -d
1313+1414+# Start with test data producer
1515+./start.sh --with-coordinator --profile coordinator_test_data up
1616+```
1717+1818+Or manually using docker compose override files:
1919+2020+```bash
2121+# From the repository root
2222+docker compose -f docker-compose.yaml -f example_docker_compose/run_osprey_with_coordinator/docker-compose.coordinator.yaml up
2323+```
2424+2525+## Overview
2626+2727+The **Osprey Coordinator** is a Rust-based service that acts as a central hub for distributing actions to Osprey Workers for rule evaluation. It provides two primary modes for receiving actions:
2828+2929+1. **Bidirectional gRPC Streaming** - Workers connect to the coordinator via persistent bidirectional streams
3030+2. **Synchronous gRPC API** - External services send actions directly for immediate processing
3131+3232+The coordinator can consume actions from Kafka, Pubsub and/or receives them via gRPC, manages action distribution across connected workers, handles acknowledgments, and ensures reliable action processing.
3333+3434+## Architecture
3535+3636+### Components
3737+3838+```
3939+┌───────────────────────────────┐
4040+│ Kafka Topics and/or Pubsub │
4141+│ (actions_input) │
4242+└──────────┬────────────────────┘
4343+ │
4444+ ▼
4545+┌─────────────────────────────────────────────┐
4646+│ Osprey Coordinator (Rust) │
4747+│ ┌─────────────────────────────────────┐ │
4848+│ │ Priority Queue │ │
4949+│ │ - Sync Actions (high priority) │ │
5050+│ │ - Async Actions (lower priority) │ │
5151+│ └─────────────────────────────────────┘ │
5252+│ │
5353+│ gRPC Services: │
5454+│ - Bidirectional Stream (port 19950) │
5555+│ - Sync Action API (port 19951) │
5656+└──────────────┬──────────────────────────────┘
5757+ │
5858+ ▼
5959+ ┌──────────────────────┐
6060+ │ Osprey Workers │
6161+ │ (Python) │
6262+ │ - Process rules │
6363+ │ - Send verdicts │
6464+ └──────────────────────┘
6565+```
6666+6767+## Configuration
6868+6969+The coordinator is configured via the `docker-compose.coordinator.yaml` override file in this directory (`example_docker_compose/run_osprey_with_coordinator/`). This file adds the coordinator service and modifies the worker configuration to connect to it.
7070+7171+### Environment Variables
7272+7373+Configure the coordinator via environment variables in `docker-compose.yaml`:
7474+7575+| Variable | Default | Description |
7676+|----------|---------|-------------|
7777+| `OSPREY_COORDINATOR_BIDI_STREAM_PORT` | `19950` | Port for bidirectional streaming |
7878+| `OSPREY_COORDINATOR_SYNC_ACTION_PORT` | `19951` | Port for synchronous action API |
7979+| `SNOWFLAKE_API_ENDPOINT` | `http://snowflake-id-worker:8088` | Snowflake ID service endpoint |
8080+| `ETCD_PEERS` | `http://etcd:2379` | etcd connection string |
8181+| `OSPREY_KAFKA_BOOTSTRAP_SERVERS` | `kafka:29092` | Kafka broker addresses |
8282+| `OSPREY_KAFKA_INPUT_STREAM_TOPIC` | `osprey.actions_input` | Kafka topic to consume |
8383+| `OSPREY_KAFKA_GROUP_ID` | `osprey_coordinator_group` | Kafka consumer group ID |
8484+| `OSPREY_COORDINATOR_CONSUMER_TYPE` | `kafka` | Consumer type: `kafka` or `pubsub` |
8585+| `MAX_TIME_TO_SEND_TO_ASYNC_QUEUE_MS` | `500` | Max time to wait before queuing async actions |
8686+| `MAX_ACKING_RECEIVER_WAIT_TIME_MS` | `60000` | Max time to wait for worker ack/nack |
8787+8888+### Example Configuration
8989+9090+To customize the coordinator, edit `example_docker_compose/run_osprey_with_coordinator/docker-compose.coordinator.yaml`:
9191+9292+```yaml
9393+services:
9494+ osprey-coordinator:
9595+ environment:
9696+ - RUST_LOG=info
9797+ - ETCD_PEERS=http://etcd:2379
9898+ - SNOWFLAKE_API_ENDPOINT=http://snowflake-id-worker:8088
9999+ - OSPREY_COORDINATOR_CONSUMER_TYPE=kafka # or 'pubsub'
100100+ - OSPREY_COORDINATOR_BIDI_STREAM_PORT=19950
101101+ - OSPREY_COORDINATOR_SYNC_ACTION_PORT=19951
102102+```
103103+104104+## Using the Coordinator
105105+106106+**Worker Configuration:**
107107+108108+### Worker Connection
109109+110110+When using `docker-compose.coordinator.yaml`, workers are automatically configured to connect to the coordinator. The override file sets:
111111+112112+```yaml
113113+osprey-worker:
114114+ environment:
115115+ - OSPREY_INPUT_STREAM_SOURCE=osprey_coordinator
116116+ - OSPREY_COORDINATOR_SERVICE_NAME=osprey_coordinator
117117+```
118118+119119+**How It Works:**
120120+121121+1. Worker connects to coordinator on port 19950
122122+2. Worker sends initial connection request with client ID
123123+3. Coordinator sends actions to worker via the bidirectional stream
124124+4. Worker processes actions through rules
125125+5. Worker sends ack/nack with optional verdicts back to coordinator
126126+6. Connection automatically reconnects every 60-120 seconds for load balancing
127127+128128+129129+### Direct Action Submission (Sync API)
130130+131131+External services can submit actions directly to the coordinator for synchronous processing.
132132+133133+**Using grpcurl:**
134134+135135+```bash
136136+# Send a single action for immediate processing
137137+grpcurl -plaintext \
138138+ -d '{
139139+ "action_id": 12345,
140140+ "action_name": "user_login",
141141+ "action_data_json": "{\"user_id\":\"user_123\",\"ip_address\":\"192.168.1.1\"}",
142142+ "timestamp": "2024-11-25T10:30:00.000000000Z"
143143+ }' \
144144+ localhost:19951 \
145145+ osprey.rpc.osprey_coordinator.sync_action.v1.OspreyCoordinatorSyncActionService/ProcessAction
146146+```
147147+148148+or Use the test data producer:
149149+150150+```bash
151151+./start.sh --with-coordinator --profile coordinator_test_data up
152152+```
153153+154154+### Kafka/Pubsub Integration
155155+The coordinator can automatically consume from either Kafka or PubSub (but not both simultaneously). Set `OSPREY_COORDINATOR_CONSUMER_TYPE` to choose:
156156+157157+- `kafka` (default) - Consume from Kafka
158158+- `pubsub` - Consume from Google Cloud PubSub
159159+160160+Configure the appropriate environment variables for your chosen consumer in `example_docker_compose/run_osprey_with_coordinator/docker-compose.coordinator.yaml`:
161161+162162+```yaml
163163+ osprey-coordinator:
164164+ environment:
165165+ # Consumer selection (kafka or pubsub)
166166+ - OSPREY_COORDINATOR_CONSUMER_TYPE=kafka
167167+ # Kafka configuration (when using kafka)
168168+ - OSPREY_KAFKA_BOOTSTRAP_SERVERS=kafka:29092
169169+ - OSPREY_KAFKA_INPUT_STREAM_TOPIC=osprey.actions_input
170170+ - OSPREY_KAFKA_GROUP_ID=osprey_coordinator_group
171171+172172+ # Pubsub
173173+ - OSPREY_COORDINATOR_SERVICE_ACCOUNT
174174+ - PUBSUB_SUBSCRIPTION_PROJECT_ID
175175+ - PUBSUB_SUBSCRIPTION_ID
176176+ - PUBSUB_ENCRYPTION_KEY_URI
177177+ # Optionally
178178+ - PUBSUB_MAX_MESSAGES = 5000 # default
179179+ - PUBSUB_MAX_PROCESSING_MESSAGES = 5000 # default
180180+181181+ # shared by both Kafka and Pubsub, optional
182182+ - MAX_TIME_TO_SEND_TO_ASYNC_QUEUE_MS = 500 # default
183183+ - MAX_ACKING_RECEIVER_WAIT_TIME_MS = 6000 # default
184184+```
185185+186186+187187+**Sending Actions via Kafka:**
188188+189189+Use the test data producer:
190190+191191+```bash
192192+# Start the Kafka test data producer
193193+./start.sh --with-coordinator --profile test_data up kafka-test-data-producer -d
194194+```
195195+
···11+pub mod kafka;
22+pub mod message_consumer;
33+pub mod message_decoder;
44+pub mod pubsub;
55+66+pub use kafka::start_kafka_consumer;
77+pub use pubsub::start_pubsub_subscriber;