···30303131### Audio output
3232- [x] Built-in SDL audio
3333-- [x] AirPlay (RAOP) — stream to Apple TV, HomePod, Airport Express, shairport-sync
3333+- [x] AirPlay (RAOP) — single or multi-room fan-out to Apple TV, HomePod, Airport Express, shairport-sync
3434- [x] Snapcast (FIFO/pipe) — synchronised multi-room via snapserver
3535- [x] Squeezelite (Slim Protocol + HTTP broadcast) — synchronised multi-room
3636- [x] Chromecast
···228228rockboxd | ffplay -f s16le -ar 44100 -ac 2 -
229229```
230230231231-### AirPlay (RAOP)
231231+### AirPlay (RAOP) — single or multi-room
232232+233233+Single receiver:
232234233235```toml
234236music_dir = "/path/to/Music"
···237239airplay_port = 5000 # optional, default 5000
238240```
239241242242+Multi-room (fan-out to N receivers simultaneously):
243243+244244+```toml
245245+music_dir = "/path/to/Music"
246246+audio_output = "airplay"
247247+248248+[[airplay_receivers]]
249249+host = "192.168.1.50" # living room
250250+port = 5000 # optional, default 5000
251251+252252+[[airplay_receivers]]
253253+host = "192.168.1.51" # bedroom
254254+# port defaults to 5000
255255+```
256256+240257Streams ALAC-encoded audio over RTP to any RAOP-compatible receiver — Apple
241258TV, HomePod, Airport Express, or
242242-[shairport-sync](https://github.com/mikebrady/shairport-sync).
259259+[shairport-sync](https://github.com/mikebrady/shairport-sync). All receivers
260260+share the same `initial_rtptime`, so RTP-level playback synchronisation is
261261+within one frame (~8 ms) across the LAN.
243262244263### Squeezelite (Slim Protocol — multi-room)
245264
+319-212
crates/airplay/README.md
···11# rockbox-airplay — AirPlay PCM Sink
2233This document traces every hop an audio frame takes from the Rockbox C firmware
44-through the `rockbox-airplay` Rust crate to an AirPlay (RAOP) receiver.
44+through the `rockbox-airplay` Rust crate to one or more AirPlay (RAOP)
55+receivers.
5667---
78···18199. [RTP audio stream (`rtp.rs`)](#rtp-audio-stream-rtprs)
192010. [RTCP synchronisation](#rtcp-synchronisation)
202111. [NTP timing responder](#ntp-timing-responder)
2121-12. [Track transitions](#track-transitions)
2222-13. [Configuration](#configuration)
2323-14. [AirPlay 2 probe](#airplay-2-probe)
2424-15. [Gotchas and known limits](#gotchas-and-known-limits)
2222+12. [Multi-room fan-out](#multi-room-fan-out)
2323+13. [Track transitions](#track-transitions)
2424+14. [Configuration](#configuration)
2525+15. [AirPlay 2 probe](#airplay-2-probe)
2626+16. [Gotchas and known limits](#gotchas-and-known-limits)
25272628---
2729···29313032The AirPlay sink lets Rockbox stream audio to any RAOP-compatible receiver —
3133Apple TV, HomePod, Airport Express, or third-party software such as
3232-[shairport-sync](https://github.com/mikebrady/shairport-sync). It implements
3333-**AirPlay 1 (RAOP)** entirely in pure Rust with no external C libraries.
3434+[shairport-sync](https://github.com/mikebrady/shairport-sync). Multiple
3535+receivers can be configured simultaneously for multi-room playback.
3636+3737+The implementation is **AirPlay 1 (RAOP)** in pure Rust with no external C
3838+libraries. AirPlay 2 pairing (HAP SRP6a + x25519 ECDH) is attempted as a
3939+non-fatal probe before falling through to the AirPlay 1 path.
34403541The protocol stack looks like:
36423743```
3838-RTSP/TCP ── session negotiation (ANNOUNCE, SETUP, RECORD, TEARDOWN)
3939-RTP/UDP ── ALAC-encoded audio frames
4040-RTCP/UDP ── synchronisation (NTP send-report) every ~350 ms
4141-UDP ── NTP timing response service
4444+RTSP/TCP ── session negotiation per receiver (ANNOUNCE, SETUP, RECORD, TEARDOWN)
4545+RTP/UDP ── ALAC-encoded audio frames (same frame broadcast to all receivers)
4646+RTCP/UDP ── synchronisation (NTP send-report) every ~350 ms per receiver
4747+UDP ── shared NTP timing response service (one port, all receivers)
4248```
43494450---
···4652## Layer map
47534854```
4949-┌────────────────────────────────────────────────────────┐
5050-│ Rockbox C firmware (pcm.c, audio thread) │
5151-│ pcm_play_data() → sink.ops.play() │
5252-│ pcm_play_dma_complete_callback() per chunk │
5353-└───────────────────┬────────────────────────────────────┘
5555+┌─────────────────────────────────────────────────────────────┐
5656+│ Rockbox C firmware (pcm.c, audio thread) │
5757+│ pcm_play_data() → sink.ops.play() │
5858+│ pcm_play_dma_complete_callback() per chunk │
5959+└───────────────────┬─────────────────────────────────────────┘
5460 │ raw S16LE stereo PCM chunks
5555-┌───────────────────▼────────────────────────────────────┐
5656-│ firmware/target/hosted/pcm-airplay.c │
5757-│ sink_dma_start() → pcm_airplay_connect() │
5858-│ airplay_thread() → pcm_airplay_write() │
5959-│ sink_dma_stop() → pcm_airplay_stop() │
6060-└───────────────────┬────────────────────────────────────┘
6161+┌───────────────────▼─────────────────────────────────────────┐
6262+│ firmware/target/hosted/pcm-airplay.c │
6363+│ sink_dma_start() → pcm_airplay_connect() │
6464+│ airplay_thread() → pcm_airplay_write() │
6565+│ sink_dma_stop() → pcm_airplay_stop() │
6666+└───────────────────┬─────────────────────────────────────────┘
6167 │ extern "C" FFI
6262-┌───────────────────▼────────────────────────────────────┐
6363-│ crates/airplay/src/lib.rs │
6464-│ AirPlaySession { sender, rtsp, buf, first_frame } │
6565-│ pcm_airplay_connect() — RTSP handshake │
6666-│ pcm_airplay_write() — ALAC frame dispatch │
6767-│ pcm_airplay_stop() — TEARDOWN + session clear │
6868-└───────┬───────────────────────┬────────────────────────┘
6969- │ RTSP/TCP │ ALAC frames
7070-┌───────▼────────────┐ ┌───────▼──────────────────────┐
7171-│ rtsp.rs │ │ alac.rs │
7272-│ RtspClient │ │ encode_frame() │
7373-│ ANNOUNCE / SETUP │ │ BitWriter │
7474-│ RECORD / TEARDOWN │ │ 352 S16LE → 1411-byte frame │
7575-└────────────────────┘ └───────┬──────────────────────┘
7676- │ encoded frames
7777- ┌───────▼──────────────────────┐
7878- │ rtp.rs │
7979- │ RtpSender │
8080- │ send_audio() — RTP/UDP │
8181- │ send_sync() — RTCP │
8282- │ timing_responder() — NTP │
8383- └──────────────────────────────┘
8484- │ UDP packets
8585- ┌───────▼──────────────────────┐
8686- │ AirPlay receiver │
8787- │ (Apple TV, shairport-sync…) │
8888- └──────────────────────────────┘
6868+┌───────────────────▼─────────────────────────────────────────┐
6969+│ crates/airplay/src/lib.rs │
7070+│ AirPlaySession { │
7171+│ receivers: Vec<ReceiverHandle>, │
7272+│ rtsp_clients: Vec<RtspClient>, │
7373+│ timing: TimingSocket, ← shared, one port │
7474+│ pacing: PacingClock, ← shared clock │
7575+│ buf, first_frame, │
7676+│ } │
7777+│ pcm_airplay_connect() — handshake per receiver │
7878+│ pcm_airplay_write() — encode once, fan out │
7979+│ pcm_airplay_stop() — TEARDOWN all + session clear │
8080+└───┬───────────────────────┬─────────────────────────────────┘
8181+ │ RTSP/TCP (per rx) │ ALAC frames
8282+┌───▼────────────┐ ┌───────▼─────────────────────────────────┐
8383+│ rtsp.rs │ │ alac.rs │
8484+│ RtspClient │ │ encode_frame() — called once/frame │
8585+│ ANNOUNCE │ │ BitWriter │
8686+│ SETUP │ │ 352 S16LE → 1411-byte verbatim frame │
8787+│ RECORD │ └───────┬─────────────────────────────────┘
8888+│ SET_PARAMETER │ │ encoded frame (shared reference)
8989+│ TEARDOWN │ ┌───────▼─────────────────────────────────┐
9090+└────────────────┘ │ rtp.rs │
9191+ │ ReceiverHandle (per receiver) │
9292+ │ send_audio_packet() — RTP/UDP │
9393+ │ send_sync() — RTCP │
9494+ │ TimingSocket (shared, one port) │
9595+ │ timing_responder() — NTP thread │
9696+ │ PacingClock (shared) │
9797+ │ pace() — one sleep for all rooms │
9898+ └───────┬─────────────────────────────────┘
9999+ │ UDP packets (fan-out)
100100+ ┌─────────────┼─────────────┐
101101+ ┌──────▼──────┐ ┌────▼──────┐ ┌───▼──────┐
102102+ │ Receiver 1 │ │ Receiver 2│ │ … │
103103+ └─────────────┘ └───────────┘ └──────────┘
89104```
9010591106---
···99114|-------------------|---------------------------------------------------------------------|
100115| `init` | `pthread_mutex_init` (recursive) |
101116| `postinit` | no-op |
102102-| `set_freq` | records `current_sample_rate` from `hw_freq_sampr[freq]` |
117117+| `set_freq` | no-op (sample rate is fixed at 44100 Hz) |
103118| `lock` / `unlock` | `pthread_mutex_lock/unlock` |
104104-| `play` | `sink_dma_start` — connects, spawns `airplay_thread` |
119119+| `play` | `sink_dma_start` — connects all receivers, spawns `airplay_thread` |
105120| `stop` | `sink_dma_stop` — signals thread, joins, calls `pcm_airplay_stop()` |
106121107122`airplay_pcm_sink` is registered at index `PCM_SINK_AIRPLAY = 2` in the
···124139 5. pcm_play_dma_status_callback(STARTED) ← tells audio engine chunk consumed
125140```
126141127127-Unlike the FIFO sink, there is **no explicit real-time pacing** in C. Pacing is
128128-handled inside `rtp.rs` — the RTP sender sleeps to maintain the correct
129129-wall-clock transmission rate based on the RTP timestamp increment.
142142+Real-time pacing is handled inside `PacingClock` in `rtp.rs` — the shared
143143+clock sleeps once per frame after fanning out to all receivers.
130144131145---
132146133147## FFI boundary
134148135135-`crates/airplay/src/lib.rs` exports three `#[no_mangle] extern "C"` functions:
149149+`crates/airplay/src/lib.rs` exports these `#[no_mangle] extern "C"` functions:
136150137137-| C symbol | Rust function | Purpose |
138138-|------------------------|------------------------|--------------------------------------|
139139-| `pcm_airplay_set_host` | `pcm_airplay_set_host` | Store `HOST` + `PORT` atomics/mutex |
140140-| `pcm_airplay_connect` | `pcm_airplay_connect` | Open RTSP + RTP session (idempotent) |
141141-| `pcm_airplay_write` | `pcm_airplay_write` | Buffer PCM, encode ALAC, send RTP |
142142-| `pcm_airplay_stop` | `pcm_airplay_stop` | Send TEARDOWN, clear session |
151151+| C symbol | Purpose |
152152+|-----------------------------|----------------------------------------------------------|
153153+| `pcm_airplay_set_host` | Set a single receiver (clears any previous list) |
154154+| `pcm_airplay_add_receiver` | Append one receiver to the multi-room list |
155155+| `pcm_airplay_clear_receivers` | Clear the receiver list before re-configuring |
156156+| `pcm_airplay_connect` | Open RTSP + RTP sessions for all configured receivers |
157157+| `pcm_airplay_write` | Buffer PCM, encode ALAC once, fan out to every receiver |
158158+| `pcm_airplay_stop` | Send TEARDOWN to all, clear session |
159159+| `pcm_airplay_close` | Same as stop (called on sink switch) |
143160144144-`HOST` is a `Mutex<Option<String>>` and `PORT` is an `AtomicU16` (default
145145-5000). `SESSION` is a `Mutex<Option<AirPlaySession>>` — the session is
146146-created once and reused across `write` calls for the lifetime of a track.
161161+`SESSION` is a `Mutex<Option<AirPlaySession>>`. `CONFIG` is a
162162+`Mutex<AirPlayConfig>` holding `receivers: Vec<(String, u16)>`.
147163148164### Force-link shim
149165···155171use rockbox_airplay::_link_airplay as _;
156172```
157173158158-where `_link_airplay` is a public no-op function in `lib.rs`. This is enough
159159-to pull the entire crate into the link graph.
174174+where `_link_airplay` is a public no-op function in `lib.rs`.
160175161176---
162177···168183```
169184if SESSION is already Some → return OK immediately (idempotent)
170185171171-1. Probe AirPlay 2 (non-fatal — logs and falls through on failure)
172172-2. RtpSender::bind(host, ports) ← binds three UDP sockets
173173-3. RtspClient::new(host, port) ← opens TCP connection to receiver
174174-4. rtsp.announce(sdp) ← sends SDP describing the ALAC stream
175175-5. rtsp.setup(transport) ← negotiates UDP port numbers
176176-6. rtsp.record() ← starts the session
177177-7. sender.send_initial_sync() ← sends first RTCP sync packet
178178-8. SESSION = Some(AirPlaySession { sender, rtsp, buf: [], first_frame: true })
186186+1. Read receiver list from CONFIG
187187+2. TimingSocket::bind() ← one shared NTP timing port + responder thread
188188+3. Choose shared initial_rtptime ← same value for ALL receivers (sync anchor)
189189+4. For each configured receiver:
190190+ a. connect_one(host, port, initial_rtptime, timing_port)
191191+ ├── Probe AirPlay 2 (non-fatal)
192192+ ├── ReceiverHandle::bind() ← audio_sock + ctrl_sock
193193+ ├── RtspClient::connect() ← TCP to receiver
194194+ ├── rtsp.announce(sdp) ← SDP with ALAC params
195195+ ├── rtsp.setup(ctrl, timing) ← get server UDP ports
196196+ ├── rx.connect(audio, ctrl) ← connect audio_sock
197197+ ├── rtsp.record(seq=0, ts) ← start stream
198198+ └── rtsp.set_parameter_volume(0.0)
199199+ b. On failure: log warning, continue (partial success OK)
200200+5. Abort only if ZERO receivers connected
201201+6. session.send_initial_sync() ← RTCP sync to all receivers
202202+7. SESSION = Some(AirPlaySession { receivers, rtsp_clients, timing, pacing, … })
179203```
180204181181-`pcm_airplay_write(data, len)` appends the incoming PCM bytes to `buf`, then
182182-drains complete 352-sample (1408-byte) frames in a loop:
205205+`pcm_airplay_write(data, len)` accumulates PCM in `buf`, then for each
206206+complete 352-sample frame:
183207184208```rust
185185-while buf.len() >= FRAME_SIZE:
186186- frame_pcm = buf.drain(..FRAME_SIZE)
187187- alac_frame = alac::encode_frame(&frame_pcm)
188188- sender.send_audio(&alac_frame, first_frame)
189189- first_frame = false
209209+alac = encode_frame(&frame_bytes) // encode ONCE
210210+211211+for rx in &mut receivers:
212212+ rx.send_audio_packet(&alac, rtptime, …) // send to EACH receiver
213213+214214+pacing.advance() // increment rtptime + frames_sent
215215+if frames_sent % 44 == 0:
216216+ for rx: rx.send_sync(current_ts, next_ts, false)
217217+218218+pacing.pace() // sleep ONCE for all rooms
190219```
191220192192-`pcm_airplay_stop()` sends RTSP TEARDOWN and sets `SESSION = None`.
221221+`pcm_airplay_stop()` sends RTSP TEARDOWN to every receiver, then sets
222222+`SESSION = None`.
193223194224---
195225196226## RTSP handshake (`rtsp.rs`)
197227198198-`RtspClient` speaks synchronous RTSP over a single TCP connection. The full
199199-exchange for one session is:
228228+`RtspClient` speaks synchronous RTSP over a single TCP connection **per
229229+receiver**. The TCP connection is kept alive in `AirPlaySession.rtsp_clients`
230230+for the duration of the track — dropping it would cause the receiver to detect
231231+EOF and tear down its audio socket.
200232201233### 1. ANNOUNCE
202234···211243m=audio 0 RTP/AVP 96
212244a=rtpmap:96 AppleLossless
213245a=fmtp:96 352 0 16 40 10 14 2 255 0 0 44100
246246+a=min-latency:3528
214247```
215248216216-The `fmtp` parameters encode:
217217-`<frames_per_packet> <version> <bit_depth> <rice_history_mult>
249249+The `fmtp` parameters: `<frames/pkt> <version> <bit_depth> <rice_history_mult>
218250<rice_initial_history> <rice_limit> <channels> <max_run> <max_frame_bytes>
219219-<avg_bit_rate> <sample_rate>`
251251+<avg_bit_rate> <sample_rate>`.
252252+253253+No `a=rsaaeskey` line — encryption is disabled. The receiver sets
254254+`stream.encrypted = 0` and passes frames straight to the ALAC decoder.
220255221256### 2. SETUP
222257223223-Sends a `Transport` header requesting UDP:
258258+Requests UDP transport, advertising our local ctrl and timing ports:
224259225260```
226226-Transport: RTP/AVP/UDP;unicast;interleaved=0-1;
227227- client_port=<audio_port>-<ctrl_port>
261261+Transport: RTP/AVP/UDP;unicast;interleaved=0-1;mode=record;
262262+ control_port=<local_ctrl>;timing_port=<shared_timing>
228263```
229264230230-`interleaved=0-1` is required by many receivers even though the transport is
231231-UDP (not RTSP interleaved). The response carries the server's UDP port pair,
232232-extracted by `parse_port()`.
265265+All receivers are advertised the **same** `timing_port` (the shared
266266+`TimingSocket`). The response carries the server's audio, ctrl, and timing
267267+ports, extracted by `parse_port()`.
233268234269### 3. RECORD
235270236236-Starts the stream. Sends `RTP-Info` with sequence number and RTP timestamp.
271271+Starts the stream. Sends `RTP-Info` with sequence number 0 and the shared
272272+`initial_rtptime`.
237273238274### 4. SET_PARAMETER (volume)
239275240240-Sets playback volume. Sent as a float string in a `text/parameters` body:
241241-`volume: -20.0` (range −144 to 0; 0 is full volume).
276276+Sets playback volume to maximum (0.0 in RAOP's −144…0 range).
242277243278### 5. TEARDOWN
244279245245-Gracefully terminates the session. Called from `pcm_airplay_stop()`.
280280+Gracefully terminates the session. Called per-receiver from
281281+`pcm_airplay_stop()`.
246282247283---
248284249285## ALAC encoding (`alac.rs`)
250286251251-`encode_frame(samples: &[i16])` encodes exactly **352 stereo S16LE samples**
287287+`encode_frame(pcm: &[u8])` encodes exactly **352 stereo S16LE samples**
252288(1408 bytes of PCM) into an ALAC verbatim ("uncompressed escape") frame.
253289254290### Frame format
255291256256-The Hammerton ALAC decoder expects this exact bit layout:
292292+The Hammerton ALAC decoder (used by shairport-sync) expects this exact bit
293293+layout — note there is **no** 4-bit element-instance tag after the channel
294294+field:
257295258296```
259259-Bits Width Field
260260-0–2 3 channels − 1 (= 1 for stereo)
261261-3–6 4 discarded (0)
262262-7–18 12 discarded (0)
263263-19 1 hassize = 0
264264-20–23 4 uncompressed_bytes = 0
265265-24 1 isNotCompressed = 1 ← verbatim frame flag
266266-25+ 32 each sample as big-endian signed 16-bit, left then right
297297+Bits Width Field
298298+0–2 3 channels − 1 (= 1 for stereo)
299299+3–6 4 output_waiting — read and discarded
300300+7–18 12 unknown — read and discarded
301301+19 1 hassize = 0
302302+20–21 2 uncompressed_bytes = 0
303303+22 1 isNotCompressed = 1 ← verbatim frame flag
304304+23+ 32 each sample as big-endian signed 16-bit, left then right
305305+ (352 × L + R pairs = 22,528 bits)
267306```
268307269269-Output size = 4 bytes header + 352 × 2 channels × 2 bytes/sample
270270- = **1412 bytes** (rounded up to byte boundary).
308308+Total: 23 header bits + 352 × 32 sample bits = 11,287 bits → **1411 bytes**
309309+(padded to byte boundary with no END tag).
271310272311### BitWriter
273312274274-`BitWriter` accumulates bits MSB-first into a `Vec<u8>`:
313313+`BitWriter` accumulates bits MSB-first into a `[u8; 1411]` buffer:
275314276315```rust
277277-fn write(&mut self, value: u64, nbits: u32)
316316+fn write(&mut self, value: u32, nbits: usize)
278317fn align(&mut self) // zero-pad to next byte boundary
279318```
280319281281-The encoder calls `write` for the 25-bit header fields and then for each
282282-sample (16 bits per channel, interleaved L/R), then `align()` to flush the
283283-final byte.
284284-285320---
286321287322## RTP audio stream (`rtp.rs`)
288323289289-`RtpSender` opens **three UDP sockets** at construction time:
324324+Three types in `rtp.rs` handle the per-receiver and shared concerns:
325325+326326+### `ReceiverHandle` — per receiver
290327291291-| Socket | Direction | Purpose |
292292-|---------------|-------------------------|---------------------|
293293-| `audio_sock` | → receiver audio port | RTP audio frames |
294294-| `ctrl_sock` | ↔ receiver control port | RTCP sync packets |
295295-| `timing_sock` | ↔ receiver timing port | NTP timing exchange |
328328+Owns the two UDP sockets for one AirPlay endpoint:
296329297297-### `send_audio(frame, marker)`
330330+| Socket | Direction | Purpose |
331331+|--------------|-------------------------|---------------------|
332332+| `audio_sock` | → receiver audio port | RTP audio frames |
333333+| `ctrl_sock` | ↔ receiver control port | RTCP sync packets |
298334299299-Builds a 12-byte RTP header:
335335+Also holds `ssrc` (random per receiver) and `seqnum` (wrapping u16).
336336+337337+`send_audio_packet(alac_frame, rtptime, frame_index, first)` builds and sends
338338+one 12-byte RTP packet:
300339301340```
302341 0 1 2 3
303342 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
304343├─┤─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
305305-│V=2│P│X│ CC │M│ PT=96 │ Sequence Number │
306306-├───────────────────────────────┼─────────────────────────────┤
307307-│ Timestamp (RTP clock units) │
308308-├─────────────────────────────────────────────────────────────┤
309309-│ SSRC │
310310-└─────────────────────────────────────────────────────────────┘
344344+│V=2│P│X│ CC │M│ PT=96 │ Sequence Number │
345345+├───────────────────────────────┼───────────────────────────────┤
346346+│ Timestamp (shared rtptime — same for all receivers) │
347347+├────────────────────────────────────────────────────────────────┤
348348+│ SSRC (per-receiver random u32) │
349349+└────────────────────────────────────────────────────────────────┘
311350```
312351313313-- `M` (marker) = 1 on the first frame of a session, 0 thereafter.
314314-- Timestamp increments by **352** per frame (one ALAC frame = 352 samples).
315315-- SSRC is a random 32-bit value chosen at sender creation.
352352+- `M` (marker) = 1 on the first frame of a session only.
353353+- Timestamp is **shared** across all receivers — all rooms decode the same
354354+ logical frame position.
355355+356356+### `TimingSocket` — shared
357357+358358+One UDP socket bound to a random port. All receivers are told this single
359359+port in SETUP. `timing_responder` (a background thread) answers any PT=0xD2
360360+timing request from any source with a PT=0xD3 response containing the current
361361+NTP time.
362362+363363+### `PacingClock` — shared
364364+365365+Tracks `stream_start` (an `Instant`), `frames_sent`, and the current `rtptime`.
366366+After all receivers have been sent a frame, `pace()` sleeps until the frame's
367367+wall-clock deadline:
368368+369369+```rust
370370+let expected = stream_start + frames_sent × FRAME_DURATION_US;
371371+if expected > Instant::now() { thread::sleep(expected - now); }
372372+```
316373317317-**Real-time pacing**: `send_audio` tracks the expected transmission instant
318318-using `Instant` and `frame_count × Duration_per_frame` and calls
319319-`thread::sleep` when the sender is running ahead.
374374+`FRAME_DURATION_US = 352 × 1_000_000 / 44100 ≈ 7982 µs`.
320375321376---
322377323378## RTCP synchronisation
324379325325-`send_sync(first)` sends a 20-byte RTCP NTP Send Report to the control socket
326326-every **44 frames** (~350 ms at 44100 Hz):
380380+`ReceiverHandle::send_sync(current_ts, next_ts, first)` sends a 20-byte RTCP
381381+packet on the ctrl socket every **44 frames** (~350 ms at 44100 Hz):
327382328383```
329329-Byte Field
330330-0 V=2, P=0, RC=0
331331-1 PT=200 (SR) or 0xD4 (first sync)
332332-2–3 length = 4 (words after fixed header)
333333-4–7 SSRC
334334-8–11 NTP timestamp seconds (since 1900-01-01)
335335-12–15 NTP timestamp fraction (2^32 units)
336336-16–19 RTP timestamp (matching the next audio frame's timestamp)
384384+Byte Field
385385+0 0x80 (normal) or 0x90 (first sync, extension bit set)
386386+1 0xD4 (PT=212, Apple proprietary sync)
387387+2–3 0x0007 (length field)
388388+4–7 current RTP timestamp (frame just sent)
389389+8–11 NTP seconds (since 1900-01-01, = UNIX_time + 0x83AA7E80)
390390+12–15 NTP fraction (2^32 units per second)
391391+16–19 next RTP timestamp (next frame to be sent)
337392```
338393339339-`NTP_EPOCH_DELTA = 0x83AA_7E80` converts UNIX time (seconds since 1970) to NTP
340340-time (seconds since 1900).
341341-342342-The first sync packet (`first=true`) uses PT=`0xD4` (not standard SR) — some
343343-receivers require this to accept the initial synchronisation.
394394+`current_ts` and `next_ts` are derived from the shared `PacingClock.rtptime`,
395395+so all receivers receive consistent timestamps.
344396345397---
346398347399## NTP timing responder
348400349349-A background thread (`timing_responder`) listens on `timing_sock` and answers
350350-NTP timing requests from the receiver:
401401+A single background thread (`timing_responder`) listens on the shared
402402+`TimingSocket` and answers NTP timing requests from **all** receivers:
351403352404```
353353-Request PT = 0xD2 (timing request)
405405+Request PT = 0xD2 (timing request, from any receiver)
354406Response PT = 0xD3 (timing response)
355407356356-Response body (32 bytes):
357357- [0–3] SSRC
358358- [4–7] 0 (reference seconds)
359359- [8–11] 0 (reference fraction)
360360- [12–15] received seconds (echoed from request)
361361- [16–19] received fraction (echoed from request)
362362- [20–23] send seconds (current NTP time)
363363- [24–27] send fraction (current NTP time)
408408+Response layout (32 bytes):
409409+ [0] 0x80
410410+ [1] 0xD3
411411+ [2–3] sequence number (copied from request)
412412+ [4–7] padding (zero)
413413+ [8–15] reference NTP (zero)
414414+ [16–23] originate NTP (copied from request bytes [16–23])
415415+ [24–31] receive/transmit NTP (current system time)
364416```
365417366366-Many receivers stall playback if timing responses stop arriving. The thread
367367-runs for the entire duration of the session.
418418+Using one socket for all receivers works because the responder uses
419419+`send_to(src)` to reply to the exact source address of each request.
420420+421421+---
422422+423423+## Multi-room fan-out
424424+425425+The complete per-frame processing path in `AirPlaySession::send_frame()`:
426426+427427+```
428428+1. encode_frame(&pcm) → alac: [u8; 1411] (once, ~5 µs)
429429+2. for rx in receivers:
430430+ rx.send_audio_packet(&alac, …) → UDP send (per receiver, ~1 µs each)
431431+3. pacing.advance() → increment rtptime, frames_sent
432432+4. if frames_sent % 44 == 0:
433433+ for rx in receivers:
434434+ rx.send_sync(…) → RTCP UDP send (per receiver)
435435+5. pacing.pace() → thread::sleep (once, ~7982 µs avg)
436436+```
437437+438438+With N receivers, steps 2 and 4 take O(N) sequential UDP sends (~1–2 µs each).
439439+Even with 10 receivers the added latency (~20 µs) is negligible compared to
440440+the 7982 µs frame budget.
441441+442442+### Sync accuracy
443443+444444+All receivers share the same `initial_rtptime` and receive each frame within
445445+the same loop iteration (a few microseconds apart). Their playout timestamps
446446+are identical. Actual synchronisation accuracy is bounded by:
447447+- Receiver buffer depth (typically 1–3 s for shairport-sync)
448448+- NTP timing exchange accuracy (usually < 5 ms on LAN)
449449+450450+This gives **AirPlay 1-level sync** — adequate for multi-room on a LAN.
451451+Sample-accurate sync across rooms requires AirPlay 2's clock-anchoring, which
452452+is a different protocol.
453453+454454+### Partial failure
455455+456456+If one receiver fails to connect during `pcm_airplay_connect()`, the error is
457457+logged at `warn` level and the session continues with the remaining receivers.
458458+The session is only aborted when **zero** receivers connect successfully.
368459369460---
370461···372463373464When Rockbox moves to the next track:
374465375375-1. `sink_dma_stop()` is called → `pcm_airplay_stop()` → RTSP TEARDOWN →
376376- `SESSION = None`.
377377-2. `sink_dma_start()` is called for the new track → `pcm_airplay_connect()` →
378378- new RTSP session with fresh RTP sequence/timestamp counters.
466466+1. `sink_dma_stop()` → `pcm_airplay_stop()` → RTSP TEARDOWN on every receiver
467467+ → `SESSION = None`.
468468+2. `sink_dma_start()` → `pcm_airplay_connect()` → new RTSP sessions with
469469+ fresh RTP sequence/timestamp counters and a new random `initial_rtptime`.
379470380471There is a brief gap (TEARDOWN round-trip + new ANNOUNCE/SETUP/RECORD) between
381381-tracks. This is inherent to RAOP and is typically inaudible (<100 ms).
472472+tracks, inherent to RAOP and typically inaudible (< 100 ms).
382473383474---
384475385476## Configuration
386477387387-In `~/.config/rockbox.org/settings.toml`:
478478+### Single receiver (backward-compatible)
388479389480```toml
390481audio_output = "airplay"
391391-airplay_host = "192.168.1.x" # IP of the AirPlay receiver
482482+airplay_host = "192.168.1.50" # IP of the AirPlay receiver
392483airplay_port = 5000 # optional, default 5000
393484```
394485395395-`crates/settings/src/lib.rs:load_settings()` reads these values and calls:
486486+### Multi-room
396487397397-```rust
398398-pcm::airplay_set_host(&host, port);
399399-pcm::switch_sink(PCM_SINK_AIRPLAY);
488488+```toml
489489+audio_output = "airplay"
490490+491491+[[airplay_receivers]]
492492+host = "192.168.1.50"
493493+port = 5000 # optional, default 5000
494494+495495+[[airplay_receivers]]
496496+host = "192.168.1.51"
497497+498498+[[airplay_receivers]]
499499+host = "192.168.1.52"
500500+port = 5001
400501```
401502402402-`airplay_set_host` stores the host in `HOST: Mutex<Option<String>>` and the
403403-port in `PORT: AtomicU16`. These are read by `pcm_airplay_connect()` at the
404404-start of each track.
503503+`airplay_receivers` takes precedence over `airplay_host`/`airplay_port` when
504504+both are present. `crates/settings/src/lib.rs` calls
505505+`pcm_airplay_clear_receivers()` then `pcm_airplay_add_receiver()` for each
506506+entry.
507507+508508+### Runtime control
509509+510510+The Rust FFI also exposes:
511511+512512+```rust
513513+pcm::airplay_set_host("192.168.1.50", 5000); // replace list with one receiver
514514+pcm::airplay_add_receiver("192.168.1.51", 5000); // append to list
515515+pcm::airplay_clear_receivers(); // clear before re-configuring
516516+```
405517406518---
407519408520## AirPlay 2 probe
409521410410-`pcm_airplay_connect()` first attempts an AirPlay 2 handshake (PTP-based). If
411411-it fails (connection refused, or the receiver does not support AirPlay 2) the
412412-error is logged at `tracing::debug!` level and the function falls through to the
413413-AirPlay 1 / RAOP path. This makes the probe transparent to the user.
522522+`connect_one()` first attempts an AirPlay 2 handshake (HAP-based). If it
523523+fails the error is logged at `tracing::debug!` and the function falls through
524524+to the AirPlay 1 / RAOP path. This makes the probe transparent to the user.
414525415415-The AirPlay 2 path uses the cryptographic dependencies declared in
416416-`Cargo.toml`:
526526+The AirPlay 2 path uses:
417527418528```toml
419419-x25519-dalek # key exchange
420420-ed25519-dalek # signature
421421-chacha20poly1305 # AEAD encryption
529529+x25519-dalek # ephemeral key exchange (PAIR-VERIFY)
530530+ed25519-dalek # long-term identity signature
531531+chacha20poly1305 # AEAD encryption of the identity payload
422532sha2, hkdf, hmac # key derivation
423423-num-bigint # SRP big-integer arithmetic
533533+num-bigint # SRP 3072-bit big-integer arithmetic (PAIR-SETUP)
424534```
425535426536None of these are needed for the AirPlay 1 code path.
···429539430540## Gotchas and known limits
431541432432-### 1. Only one simultaneous receiver
433433-434434-The `SESSION` mutex holds a single `AirPlaySession`. Sending to multiple
435435-AirPlay devices simultaneously is not supported. For multi-room output use
436436-the Squeezelite sink with multiple clients, or run multiple rockboxd instances.
437437-438438-### 2. Receiver must be on the local network
542542+### 1. Receiver must be reachable via UDP
439543440440-RAOP uses UDP with no NAT traversal. The receiver must be directly reachable
441441-at the configured IP. Multicast discovery (mDNS/Bonjour) is not implemented —
442442-you must supply the IP manually.
544544+RAOP uses UDP with no NAT traversal. Every configured receiver must be
545545+directly reachable at its IP from the machine running rockboxd. Multicast
546546+discovery (mDNS/Bonjour) is not implemented — supply the IP manually.
443547444444-### 3. `interleaved=0-1` in Transport header
548548+### 2. `interleaved=0-1` in Transport header
445549446550Even though the transport is plain UDP, most receivers require the
447447-`interleaved=0-1` parameter in the SETUP `Transport` header. Omitting it causes
448448-the receiver to ignore the `RECORD` command silently.
551551+`interleaved=0-1` parameter in the SETUP `Transport` header. Omitting it
552552+causes the receiver to silently ignore the `RECORD` command.
449553450450-### 4. Verbatim ALAC only (no compression)
554554+### 3. Verbatim ALAC only (no compression)
451555452556`alac.rs` only implements the verbatim escape frame (`isNotCompressed=1`).
453557Bitrate is fixed at `sample_rate × 4 bytes/s = 176,400 bytes/s` at 44.1 kHz.
454454-This is fine for LAN streaming but wasteful compared to the compressed ALAC
455455-path.
558558+Fine for LAN streaming but higher than compressed ALAC.
559559+560560+### 4. Fixed 44100 Hz sample rate
456561457457-### 5. Fixed 44100 Hz sample rate
562562+The SDP and ALAC frame size constants are hard-coded for 44100 Hz. Playback
563563+of 48 kHz or 96 kHz tracks is not tested.
564564+565565+### 5. Multi-room sync is LAN-quality, not sample-accurate
458566459459-The RTSP SDP and ALAC frame size constants are hard-coded for 44100 Hz.
460460-Playback of 48 kHz or 96 kHz tracks is not tested and may produce incorrect
461461-pitch or receiver errors.
567567+See [Sync accuracy](#sync-accuracy). AirPlay 2-level clock anchoring is not
568568+implemented.
462569463570### 6. Logging uses `tracing`, never `println!`
464571465465-All diagnostic output is routed through the `tracing` crate. To see the full
466466-AirPlay negotiation:
572572+All diagnostic output is routed through the `tracing` crate:
467573468574```sh
469469-RUST_LOG=rockbox_airplay=debug rockboxd
575575+RUST_LOG=rockbox_airplay=debug rockboxd # full protocol trace
576576+RUST_LOG=info rockboxd # lifecycle events only
470577```
471578472472-Never add `println!` or `eprintln!` — those bypass the log filter and pollute
473473-stdout, breaking FIFO/pipe mode.
579579+Never add `println!` or `eprintln!` — those bypass the log filter and can
580580+corrupt the stdout PCM stream in FIFO mode.
+191-84
crates/airplay/src/lib.rs
···77#[doc(hidden)]
88pub fn _link_airplay() {}
991010-use alac::{encode_frame, PCM_BYTES_PER_FRAME};
1111-use rtp::RtpSender;
1010+use alac::{encode_frame, FRAME_SAMPLES, PCM_BYTES_PER_FRAME};
1111+use rtp::{PacingClock, ReceiverHandle, TimingSocket};
1212use rtsp::RtspClient;
13131414use std::ffi::CStr;
1515use std::os::raw::{c_char, c_int, c_ushort};
1616use std::sync::Mutex;
17171818+// ---------------------------------------------------------------------------
1919+// Global state
2020+// ---------------------------------------------------------------------------
2121+1822static SESSION: Mutex<Option<AirPlaySession>> = Mutex::new(None);
19232024struct AirPlaySession {
2121- sender: RtpSender,
2222- rtsp: RtspClient,
2525+ receivers: Vec<ReceiverHandle>,
2626+ timing: TimingSocket,
2727+ rtsp_clients: Vec<RtspClient>,
2328 buf: Vec<u8>,
2429 first_frame: bool,
3030+ pacing: PacingClock,
2531}
26323333+impl AirPlaySession {
3434+ /// Encode one ALAC frame and fan it out to every connected receiver.
3535+ fn send_frame(&mut self, frame_bytes: &[u8; PCM_BYTES_PER_FRAME], first: bool) {
3636+ let alac = encode_frame(frame_bytes);
3737+ let rtptime = self.pacing.rtptime;
3838+ let frame_index = self.pacing.frames_sent;
3939+4040+ for rx in &mut self.receivers {
4141+ rx.send_audio_packet(&alac, rtptime, frame_index, first);
4242+ }
4343+4444+ self.pacing.advance();
4545+4646+ // RTCP NTP sync every ~44 frames (~0.35 s)
4747+ if self.pacing.frames_sent % 44 == 0 {
4848+ let current_ts = self.pacing.rtptime.wrapping_sub(FRAME_SAMPLES as u32);
4949+ let next_ts = self.pacing.rtptime;
5050+ for rx in &self.receivers {
5151+ rx.send_sync(current_ts, next_ts, false);
5252+ }
5353+ }
5454+5555+ // Pace once for all receivers
5656+ self.pacing.pace();
5757+ }
5858+5959+ fn send_initial_sync(&self) {
6060+ let ts = self.pacing.initial_rtptime;
6161+ for rx in &self.receivers {
6262+ rx.send_sync(ts, ts, true);
6363+ }
6464+ tracing::debug!(
6565+ "sent initial sync ts={} to {} receiver(s)",
6666+ ts,
6767+ self.receivers.len()
6868+ );
6969+ }
7070+}
7171+7272+// ---------------------------------------------------------------------------
7373+// Config
7474+// ---------------------------------------------------------------------------
7575+2776static CONFIG: Mutex<AirPlayConfig> = Mutex::new(AirPlayConfig {
2828- host: None,
2929- port: 5000,
7777+ receivers: Vec::new(),
3078});
31793280struct AirPlayConfig {
3333- host: Option<String>,
3434- port: u16,
8181+ receivers: Vec<(String, u16)>,
3582}
36833737-// Safety: the raw pointer in host is only touched inside the mutex
8484+// Safety: Vec<(String, u16)> is Send
3885unsafe impl Send for AirPlayConfig {}
39868787+// ---------------------------------------------------------------------------
8888+// FFI — configuration
8989+// ---------------------------------------------------------------------------
9090+9191+/// Set a single AirPlay receiver, replacing any previously configured list.
9292+/// Kept for backward compatibility with existing C callers and settings.
4093#[no_mangle]
4194pub extern "C" fn pcm_airplay_set_host(host: *const c_char, port: c_ushort) {
4295 if host.is_null() {
···4699 .to_string_lossy()
47100 .into_owned();
48101 let mut cfg = CONFIG.lock().unwrap();
4949- cfg.host = Some(s);
5050- cfg.port = port;
102102+ cfg.receivers.clear();
103103+ cfg.receivers.push((s, port));
104104+}
105105+106106+/// Append one receiver to the multi-room list.
107107+#[no_mangle]
108108+pub extern "C" fn pcm_airplay_add_receiver(host: *const c_char, port: c_ushort) {
109109+ if host.is_null() {
110110+ return;
111111+ }
112112+ let s = unsafe { CStr::from_ptr(host) }
113113+ .to_string_lossy()
114114+ .into_owned();
115115+ let mut cfg = CONFIG.lock().unwrap();
116116+ cfg.receivers.push((s, port));
117117+}
118118+119119+/// Clear the receiver list (call before re-configuring).
120120+#[no_mangle]
121121+pub extern "C" fn pcm_airplay_clear_receivers() {
122122+ CONFIG.lock().unwrap().receivers.clear();
51123}
52124125125+// ---------------------------------------------------------------------------
126126+// FFI — session lifecycle
127127+// ---------------------------------------------------------------------------
128128+53129#[no_mangle]
54130pub extern "C" fn pcm_airplay_connect() -> c_int {
5555- // Already connected — don't redo the RTSP handshake for every DMA chunk.
56131 if SESSION.lock().unwrap().is_some() {
5757- return 0;
132132+ return 0; // idempotent
58133 }
591346060- let cfg = CONFIG.lock().unwrap();
6161- let host = match cfg.host.clone() {
6262- Some(h) => h,
6363- None => {
6464- tracing::error!("pcm_airplay_connect: no host configured");
135135+ let targets = {
136136+ let cfg = CONFIG.lock().unwrap();
137137+ if cfg.receivers.is_empty() {
138138+ tracing::error!("pcm_airplay_connect: no receivers configured");
65139 return -1;
66140 }
141141+ cfg.receivers.clone()
67142 };
6868- let port = cfg.port;
6969- drop(cfg);
701437171- let local_ip = local_ip_for(&host).unwrap_or_else(|| "127.0.0.1".to_string());
7272- tracing::info!("connecting to {}:{} (local_ip={})", host, port, local_ip);
7373-7474- // Attempt AirPlay 2 pairing (PAIR-VERIFY / PAIR-SETUP).
7575- // Failure here is non-fatal — many AirPlay 1 receivers don't have the endpoint.
7676- match airplay2::connect(&host, port, None) {
7777- Ok(()) => tracing::info!("AirPlay 2 handshake complete"),
7878- Err(e) => tracing::debug!("AirPlay 2 handshake skipped ({}), using AirPlay 1", e),
7979- }
8080-8181- let session_token: u64 = rand::random();
8282- let ssrc: u32 = rand::random();
144144+ // All receivers share the same initial_rtptime for RTP-level synchronisation.
83145 let initial_rtptime: u32 = rand::random();
841468585- // Bind all UDP sockets first so we know the local ports before SETUP.
8686- let mut sender = match RtpSender::bind(ssrc, initial_rtptime) {
8787- Ok(s) => s,
147147+ // Bind the shared timing socket first; every receiver advertises the same port.
148148+ let timing = match TimingSocket::bind() {
149149+ Ok(t) => t,
88150 Err(e) => {
8989- tracing::error!("bind failed: {}", e);
151151+ tracing::error!("timing socket bind failed: {}", e);
90152 return -1;
91153 }
92154 };
9393- let local_ctrl_port = sender.local_ctrl_port;
9494- let local_timing_port = sender.local_timing_port;
155155+ let local_timing_port = timing.local_port;
156156+157157+ let mut receivers: Vec<ReceiverHandle> = Vec::new();
158158+ let mut rtsp_clients: Vec<RtspClient> = Vec::new();
159159+ let mut connected = 0usize;
951609696- let mut rtsp = match RtspClient::connect(&host, port, session_token) {
9797- Ok(c) => c,
9898- Err(e) => {
9999- tracing::error!("RTSP TCP connect failed: {}", e);
100100- return -1;
161161+ for (host, port) in &targets {
162162+ match connect_one(host, *port, initial_rtptime, local_timing_port) {
163163+ Ok((rx, rtsp)) => {
164164+ tracing::info!("connected to {}:{}", host, port);
165165+ receivers.push(rx);
166166+ rtsp_clients.push(rtsp);
167167+ connected += 1;
168168+ }
169169+ Err(e) => tracing::warn!("failed to connect to {}:{}: {}", host, port, e),
101170 }
102102- };
171171+ }
103172104104- if let Err(e) = rtsp.announce(&local_ip, &host) {
105105- tracing::error!("ANNOUNCE failed: {}", e);
173173+ if connected == 0 {
174174+ tracing::error!("could not connect to any AirPlay receiver");
106175 return -1;
107176 }
108177109109- let (server_audio, server_ctrl, _server_timing) =
110110- match rtsp.setup(local_ctrl_port, local_timing_port) {
111111- Ok(ports) => ports,
112112- Err(e) => {
113113- tracing::error!("SETUP failed: {}", e);
114114- return -1;
115115- }
116116- };
178178+ let pacing = PacingClock::new(initial_rtptime);
179179+ let session = AirPlaySession {
180180+ receivers,
181181+ timing,
182182+ rtsp_clients,
183183+ buf: Vec::with_capacity(PCM_BYTES_PER_FRAME * 4),
184184+ first_frame: true,
185185+ pacing,
186186+ };
187187+188188+ session.send_initial_sync();
189189+ tracing::info!(
190190+ "session established: {}/{} receiver(s) connected",
191191+ connected,
192192+ targets.len()
193193+ );
194194+195195+ *SESSION.lock().unwrap() = Some(session);
196196+ 0
197197+}
117198118118- if let Err(e) = sender.connect_server(&host, server_audio, server_ctrl) {
119119- tracing::error!("connect_server failed: {}", e);
120120- return -1;
199199+/// Connect to a single AirPlay receiver. Returns `(ReceiverHandle, RtspClient)` on success.
200200+fn connect_one(
201201+ host: &str,
202202+ port: u16,
203203+ initial_rtptime: u32,
204204+ local_timing_port: u16,
205205+) -> std::io::Result<(ReceiverHandle, RtspClient)> {
206206+ let local_ip = local_ip_for(host).unwrap_or_else(|| "127.0.0.1".to_string());
207207+208208+ // Attempt AirPlay 2 pairing (non-fatal fallback to AirPlay 1).
209209+ match airplay2::connect(host, port, None) {
210210+ Ok(()) => tracing::info!("AirPlay 2 handshake complete for {}:{}", host, port),
211211+ Err(e) => tracing::debug!(
212212+ "AirPlay 2 skipped for {}:{} ({}), using AirPlay 1",
213213+ host,
214214+ port,
215215+ e
216216+ ),
121217 }
122218123123- if let Err(e) = rtsp.record(0, initial_rtptime) {
124124- tracing::error!("RECORD failed: {}", e);
125125- return -1;
126126- }
219219+ let session_token: u64 = rand::random();
220220+221221+ let mut rx = ReceiverHandle::bind()?;
222222+ let local_ctrl_port = rx.local_ctrl_port;
223223+224224+ let mut rtsp = RtspClient::connect(host, port, session_token)?;
225225+ rtsp.announce(&local_ip, host)?;
226226+227227+ let (server_audio, server_ctrl, _server_timing) =
228228+ rtsp.setup(local_ctrl_port, local_timing_port)?;
229229+230230+ rx.connect(host, server_audio, server_ctrl)?;
231231+ rtsp.record(0, initial_rtptime)?;
127232128233 // Set volume to maximum; RAOP range: -144.0 (mute) to 0.0 (full).
129234 if let Err(e) = rtsp.set_parameter_volume(0.0) {
130130- tracing::warn!("SET_PARAMETER volume failed (non-fatal): {}", e);
235235+ tracing::warn!(
236236+ "SET_PARAMETER volume failed for {}:{} (non-fatal): {}",
237237+ host,
238238+ port,
239239+ e
240240+ );
131241 }
132242133133- sender.send_initial_sync();
134134- tracing::info!(
135135- "session established — sending audio to {}:{}",
136136- host,
137137- server_audio
138138- );
139139-140140- let mut guard = SESSION.lock().unwrap();
141141- *guard = Some(AirPlaySession {
142142- sender,
143143- rtsp,
144144- buf: Vec::with_capacity(PCM_BYTES_PER_FRAME * 4),
145145- first_frame: true,
146146- });
147147-148148- 0
243243+ Ok((rx, rtsp))
149244}
150245151151-/// Write raw S16LE stereo PCM. Buffers into 352-sample frames, encodes ALAC, sends RTP.
246246+/// Write raw S16LE stereo PCM. Buffers into 352-sample frames, encodes ALAC,
247247+/// fans out to every connected receiver, then paces once.
152248#[no_mangle]
153249pub extern "C" fn pcm_airplay_write(data: *const u8, len: usize) -> c_int {
154250 if data.is_null() || len == 0 {
···166262 };
167263168264 if session.first_frame {
169169- tracing::debug!("first write: {} bytes", len);
265265+ tracing::debug!(
266266+ "first write: {} bytes, {} receiver(s)",
267267+ len,
268268+ session.receivers.len()
269269+ );
170270 }
171271172272 session.buf.extend_from_slice(input);
···176276 session.buf[..PCM_BYTES_PER_FRAME].try_into().unwrap();
177277 session.buf.drain(..PCM_BYTES_PER_FRAME);
178278179179- let alac = encode_frame(&frame_bytes);
180279 let first = session.first_frame;
181280 session.first_frame = false;
182182- session.sender.send_audio(&alac, first);
281281+ session.send_frame(&frame_bytes, first);
183282 }
184283185284 0
···189288pub extern "C" fn pcm_airplay_stop() {
190289 let mut guard = SESSION.lock().unwrap();
191290 if let Some(ref mut session) = *guard {
192192- let _ = session.rtsp.teardown();
291291+ for rtsp in &mut session.rtsp_clients {
292292+ let _ = rtsp.teardown();
293293+ }
193294 }
194295 *guard = None;
195296}
···198299pub extern "C" fn pcm_airplay_close() {
199300 let mut guard = SESSION.lock().unwrap();
200301 if let Some(ref mut session) = *guard {
201201- let _ = session.rtsp.teardown();
302302+ for rtsp in &mut session.rtsp_clients {
303303+ let _ = rtsp.teardown();
304304+ }
202305 }
203306 *guard = None;
204307}
308308+309309+// ---------------------------------------------------------------------------
310310+// Helpers
311311+// ---------------------------------------------------------------------------
205312206313fn local_ip_for(remote: &str) -> Option<String> {
207314 use std::net::UdpSocket;
+84-108
crates/airplay/src/rtp.rs
···1111const NTP_EPOCH_DELTA: u32 = 0x83AA_7E80;
12121313// Duration of one ALAC frame at 44100 Hz
1414-const FRAME_DURATION_US: u64 = FRAME_SAMPLES as u64 * 1_000_000 / 44100; // ~7982 µs
1414+pub const FRAME_DURATION_US: u64 = FRAME_SAMPLES as u64 * 1_000_000 / 44100; // ~7982 µs
15151616-pub struct RtpSender {
1616+/// Per-receiver UDP state. One of these per AirPlay endpoint.
1717+pub struct ReceiverHandle {
1718 audio_sock: UdpSocket,
1819 ctrl_sock: UdpSocket,
1920 server_ctrl_addr: std::net::SocketAddr,
2020- ssrc: u32,
2121- seqnum: u16,
2222- rtptime: u32,
2323- initial_rtptime: u32,
2424- frames_sent: u64,
2521 pub local_ctrl_port: u16,
2626- pub local_timing_port: u16,
2727- stream_start: Option<Instant>,
2828- // kept alive so the OS port stays open; responder thread holds the other Arc
2929- _timing_sock: Arc<UdpSocket>,
2222+ pub ssrc: u32,
2323+ pub seqnum: u16,
3024}
31253232-impl RtpSender {
3333- /// Bind all local UDP sockets. `connect_server()` must be called after SETUP
3434- /// once the server's ports are known.
3535- pub fn bind(ssrc: u32, initial_rtptime: u32) -> std::io::Result<Self> {
2626+impl ReceiverHandle {
2727+ /// Bind local audio and ctrl sockets. Call `connect()` after SETUP.
2828+ pub fn bind() -> std::io::Result<Self> {
3629 let audio_sock = UdpSocket::bind("0.0.0.0:0")?;
3730 let ctrl_sock = UdpSocket::bind("0.0.0.0:0")?;
3831 let local_ctrl_port = ctrl_sock.local_addr()?.port();
3939- let timing_sock = Arc::new(UdpSocket::bind("0.0.0.0:0")?);
4040- let local_timing_port = timing_sock.local_addr()?.port();
4141-4242- // Respond to NTP timing requests from the receiver so it can synchronise
4343- // and actually start playing. Without this, the timing port gets ICMP
4444- // unreachable replies and many receivers stall indefinitely.
4545- let timing_thread = Arc::clone(&timing_sock);
4646- thread::spawn(move || timing_responder(timing_thread));
4747-3232+ let ssrc: u32 = rand::random();
4833 let server_ctrl_addr = "0.0.0.0:0".parse().unwrap();
4949- tracing::debug!(
5050- "local ctrl_port={} timing_port={}",
5151- local_ctrl_port,
5252- local_timing_port
5353- );
5454-5534 Ok(Self {
5635 audio_sock,
5736 ctrl_sock,
5837 server_ctrl_addr,
3838+ local_ctrl_port,
5939 ssrc,
6040 seqnum: 0,
6161- rtptime: initial_rtptime,
6262- initial_rtptime,
6363- frames_sent: 0,
6464- local_ctrl_port,
6565- local_timing_port,
6666- stream_start: None,
6767- _timing_sock: timing_sock,
6841 })
6942 }
70437144 /// Connect the audio socket to the server's RTP port and record the ctrl addr.
7272- pub fn connect_server(
7373- &mut self,
7474- host: &str,
7575- audio_port: u16,
7676- ctrl_port: u16,
7777- ) -> std::io::Result<()> {
4545+ pub fn connect(&mut self, host: &str, audio_port: u16, ctrl_port: u16) -> std::io::Result<()> {
7846 tracing::debug!("connecting audio → {}:{}", host, audio_port);
7947 self.audio_sock
8048 .connect(format!("{}:{}", host, audio_port))?;
···8452 Ok(())
8553 }
86548787- pub fn send_audio(&mut self, alac_frame: &[u8; ALAC_FRAME_BYTES], first: bool) {
8888- let start = *self.stream_start.get_or_insert_with(Instant::now);
8989-5555+ /// Build and send one RTP audio packet. Increments seqnum.
5656+ pub fn send_audio_packet(
5757+ &mut self,
5858+ alac_frame: &[u8; ALAC_FRAME_BYTES],
5959+ rtptime: u32,
6060+ frame_index: u64,
6161+ first: bool,
6262+ ) {
9063 let mut pkt = [0u8; RTP_PACKET_BYTES];
9164 pkt[0] = 0x80;
9265 pkt[1] = if first { 0x60 | 0x80 } else { 0x60 }; // M=1 on first, PT=96
9366 pkt[2] = (self.seqnum >> 8) as u8;
9467 pkt[3] = self.seqnum as u8;
9595- pkt[4] = (self.rtptime >> 24) as u8;
9696- pkt[5] = (self.rtptime >> 16) as u8;
9797- pkt[6] = (self.rtptime >> 8) as u8;
9898- pkt[7] = self.rtptime as u8;
6868+ pkt[4] = (rtptime >> 24) as u8;
6969+ pkt[5] = (rtptime >> 16) as u8;
7070+ pkt[6] = (rtptime >> 8) as u8;
7171+ pkt[7] = rtptime as u8;
9972 pkt[8] = (self.ssrc >> 24) as u8;
10073 pkt[9] = (self.ssrc >> 16) as u8;
10174 pkt[10] = (self.ssrc >> 8) as u8;
···1047710578 match self.audio_sock.send(&pkt) {
10679 Ok(_) => {
107107- if self.frames_sent < 5 {
8080+ if frame_index < 5 {
10881 tracing::debug!(
10982 "sent frame {} ts={} seq={} first={}",
110110- self.frames_sent,
111111- self.rtptime,
8383+ frame_index,
8484+ rtptime,
11285 self.seqnum,
11386 first
11487 );
11588 }
11689 }
117117- Err(e) => tracing::warn!("send error on frame {}: {}", self.frames_sent, e),
9090+ Err(e) => tracing::warn!("send error on frame {}: {}", frame_index, e),
11891 }
119119-12092 self.seqnum = self.seqnum.wrapping_add(1);
121121- self.rtptime = self.rtptime.wrapping_add(FRAME_SAMPLES as u32);
122122- self.frames_sent += 1;
123123-124124- // RTCP NTP sync every ~44 frames (~0.35 s)
125125- if self.frames_sent % 44 == 0 {
126126- self.send_sync(false);
127127- }
128128-129129- // Real-time pacing — sleep until the frame's playout deadline.
130130- let expected = start + Duration::from_micros(self.frames_sent * FRAME_DURATION_US);
131131- let now = Instant::now();
132132- if expected > now {
133133- std::thread::sleep(expected - now);
134134- }
13593 }
13694137137- fn send_sync(&self, first: bool) {
9595+ /// Send an RTCP NTP sync packet on the ctrl socket.
9696+ pub fn send_sync(&self, current_ts: u32, next_ts: u32, first: bool) {
13897 let now = SystemTime::now()
13998 .duration_since(UNIX_EPOCH)
14099 .unwrap_or_default();
141100 let ntp_sec = now.as_secs() as u32 + NTP_EPOCH_DELTA;
142101 let ntp_frac = ((now.subsec_nanos() as u64 * (1u64 << 32)) / 1_000_000_000) as u32;
143143-144144- // "current" timestamp = frame we just sent (rtptime was already incremented)
145145- let current_ts = self.rtptime.wrapping_sub(FRAME_SAMPLES as u32);
146146- // "next" timestamp = self.rtptime (next frame to be sent)
147147- let next_ts = self.rtptime;
148102149103 let mut pkt = [0u8; 20];
150104 pkt[0] = if first { 0x90 } else { 0x80 };
···170124171125 let _ = self.ctrl_sock.send_to(&pkt, self.server_ctrl_addr);
172126 }
127127+}
173128174174- pub fn send_initial_sync(&self) {
175175- // At startup no frames have been sent yet; use initial_rtptime for both
176176- // "current" and "next" so we don't send a backwards-wrapped timestamp.
177177- let now = SystemTime::now()
178178- .duration_since(UNIX_EPOCH)
179179- .unwrap_or_default();
180180- let ntp_sec = now.as_secs() as u32 + NTP_EPOCH_DELTA;
181181- let ntp_frac = ((now.subsec_nanos() as u64 * (1u64 << 32)) / 1_000_000_000) as u32;
182182- let ts = self.initial_rtptime;
129129+/// Shared NTP timing socket. One instance serves all receivers — the responder
130130+/// doesn't care which receiver the request came from, it just replies in place.
131131+pub struct TimingSocket {
132132+ pub local_port: u16,
133133+ // kept alive so the OS port stays open; responder thread holds the other Arc
134134+ _sock: Arc<UdpSocket>,
135135+}
183136184184- let mut pkt = [0u8; 20];
185185- pkt[0] = 0x90; // first sync: extension bit set
186186- pkt[1] = 0xd4;
187187- pkt[2] = 0x00;
188188- pkt[3] = 0x07;
189189- pkt[4] = (ts >> 24) as u8;
190190- pkt[5] = (ts >> 16) as u8;
191191- pkt[6] = (ts >> 8) as u8;
192192- pkt[7] = ts as u8;
193193- pkt[8] = (ntp_sec >> 24) as u8;
194194- pkt[9] = (ntp_sec >> 16) as u8;
195195- pkt[10] = (ntp_sec >> 8) as u8;
196196- pkt[11] = ntp_sec as u8;
197197- pkt[12] = (ntp_frac >> 24) as u8;
198198- pkt[13] = (ntp_frac >> 16) as u8;
199199- pkt[14] = (ntp_frac >> 8) as u8;
200200- pkt[15] = ntp_frac as u8;
201201- pkt[16] = (ts >> 24) as u8;
202202- pkt[17] = (ts >> 16) as u8;
203203- pkt[18] = (ts >> 8) as u8;
204204- pkt[19] = ts as u8;
137137+impl TimingSocket {
138138+ pub fn bind() -> std::io::Result<Self> {
139139+ let sock = Arc::new(UdpSocket::bind("0.0.0.0:0")?);
140140+ let local_port = sock.local_addr()?.port();
141141+ let thread_sock = Arc::clone(&sock);
142142+ thread::spawn(move || timing_responder(thread_sock));
143143+ tracing::debug!("timing responder bound on port {}", local_port);
144144+ Ok(Self {
145145+ local_port,
146146+ _sock: sock,
147147+ })
148148+ }
149149+}
205150206206- let _ = self.ctrl_sock.send_to(&pkt, self.server_ctrl_addr);
207207- tracing::debug!("sent initial sync ts={}", ts);
151151+/// Pacing state shared across all receivers.
152152+pub struct PacingClock {
153153+ pub stream_start: Option<Instant>,
154154+ pub frames_sent: u64,
155155+ pub rtptime: u32,
156156+ pub initial_rtptime: u32,
157157+}
158158+159159+impl PacingClock {
160160+ pub fn new(initial_rtptime: u32) -> Self {
161161+ Self {
162162+ stream_start: None,
163163+ frames_sent: 0,
164164+ rtptime: initial_rtptime,
165165+ initial_rtptime,
166166+ }
208167 }
209168210210- pub fn reset_clock(&mut self) {
169169+ /// Advance after sending one frame to all receivers.
170170+ pub fn advance(&mut self) {
171171+ self.rtptime = self.rtptime.wrapping_add(FRAME_SAMPLES as u32);
172172+ self.frames_sent += 1;
173173+ }
174174+175175+ /// Sleep until the current frame's real-time deadline.
176176+ pub fn pace(&mut self) {
177177+ let start = *self.stream_start.get_or_insert_with(Instant::now);
178178+ let expected = start + Duration::from_micros(self.frames_sent * FRAME_DURATION_US);
179179+ let now = Instant::now();
180180+ if expected > now {
181181+ std::thread::sleep(expected - now);
182182+ }
183183+ }
184184+185185+ pub fn reset(&mut self) {
211186 self.stream_start = None;
212187 self.frames_sent = 0;
188188+ self.rtptime = self.initial_rtptime;
213189 }
214190}
215191