Sync reading position from Moon Reader app to Bookhive atproto records
atproto bookhive ereader moonreader
3
fork

Configure Feed

Select the types of activity you want to include in your feed.

Harden before going public

Five fixes from the pre-publish review:

1. PROPFIND XML: escape `<D:displayname>` values (bookhive filenames and
local file names). Unescaped `&`/`<` would break the response XML for
any book with those chars in its title, and authenticated clients
could poison directory listings by PUT-ing crafted filenames.
2. Passthrough path check: replace str.startswith with is_relative_to so
a sibling directory sharing a prefix with $PASSTHROUGH_ROOT can't be
reached via `../sibling`.
3. Cover-download SSRF guard: require https + reject localhost and
RFC1918 / loopback / link-local IPs before fetching a URL returned
by the bookhive catalog.
4. Dockerfile: run as UID 10001 (non-root).
5. docker-compose: bind port to 127.0.0.1 by default, with a comment
explaining when to switch to 0.0.0.0.

New tests/test_security_guards.py covers all three code-level fixes.
Full suite: 68 passed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

+146 -6
+7
Dockerfile
··· 8 8 COPY src ./src 9 9 RUN uv sync --no-dev 10 10 11 + # Run as a fixed non-root UID. If the host's ./data volume is owned by a 12 + # different UID, override at runtime with `user: "<uid>:<gid>"` in compose. 13 + RUN groupadd --gid 10001 spacebee \ 14 + && useradd --uid 10001 --gid 10001 --no-create-home spacebee \ 15 + && chown -R spacebee:spacebee /app 16 + USER spacebee 17 + 11 18 ENV PATH="/app/.venv/bin:$PATH" \ 12 19 PASSTHROUGH_ROOT=/data/passthrough \ 13 20 LOG_LEVEL=INFO
+5 -1
docker-compose.yml
··· 6 6 container_name: spacebee 7 7 restart: unless-stopped 8 8 ports: 9 - - "8080:8080" 9 + # Bind to loopback only — spacebee speaks plaintext Basic auth over 10 + # WebDAV; put an HTTPS reverse proxy in front. If you're running the 11 + # proxy on another host, change this to "8080:8080" and make sure 12 + # the proxy is the only thing reachable from outside. 13 + - "127.0.0.1:8080:8080" 10 14 volumes: 11 15 - ./data:/data 12 16 environment:
+5 -2
src/spacebee/adapters/webdav/moonreader.py
··· 12 12 from datetime import UTC, datetime 13 13 from email.utils import format_datetime 14 14 from urllib.parse import quote 15 + from xml.sax.saxutils import escape as xml_escape 15 16 16 17 from spacebee.atproto import bookhive 17 18 from spacebee.atproto.client import ATProtoClient ··· 68 69 resourcetype = "<D:collection/>" if is_collection else "" 69 70 content_type = "" if is_collection else "<D:getcontenttype>text/plain</D:getcontenttype>" 70 71 length = "" if is_collection else f"<D:getcontentlength>{content_length}</D:getcontentlength>" 71 - etag_xml = f"<D:getetag>{etag}</D:getetag>" if etag else "" 72 + # etag is our own sha1 hex output (always safe), but defensively escape 73 + # in case the helper contract changes. 74 + etag_xml = f"<D:getetag>{xml_escape(etag)}</D:getetag>" if etag else "" 72 75 dn = display_name or href.rsplit("/", 1)[-1] or href 73 76 return ( 74 77 "<D:response>" ··· 76 79 "<D:propstat>" 77 80 "<D:prop>" 78 81 f"<D:resourcetype>{resourcetype}</D:resourcetype>" 79 - f"<D:displayname>{dn}</D:displayname>" 82 + f"<D:displayname>{xml_escape(dn)}</D:displayname>" 80 83 f"<D:getlastmodified>{last_modified_http}</D:getlastmodified>" 81 84 f"{length}" 82 85 f"{content_type}"
+5 -3
src/spacebee/adapters/webdav/passthrough.py
··· 13 13 from email.utils import format_datetime 14 14 from pathlib import Path 15 15 from urllib.parse import quote 16 + from xml.sax.saxutils import escape as xml_escape 16 17 17 18 log = logging.getLogger(__name__) 18 19 ··· 32 33 """Resolve a WebDAV path to a local Path, refusing escapes.""" 33 34 rel = path.lstrip("/") 34 35 candidate = (self._root / rel).resolve() 35 - # Directory traversal guard 36 - if not str(candidate).startswith(str(self._root)): 36 + # Use is_relative_to (not str.startswith) so sibling paths that share 37 + # a prefix with _root can't sneak through: /data/pass vs /data/password. 38 + if candidate != self._root and not candidate.is_relative_to(self._root): 37 39 raise PermissionError(f"path escapes passthrough root: {path!r}") 38 40 return candidate 39 41 ··· 120 122 "<D:propstat>" 121 123 "<D:prop>" 122 124 f"<D:resourcetype>{resourcetype}</D:resourcetype>" 123 - f"<D:displayname>{local.name or 'root'}</D:displayname>" 125 + f"<D:displayname>{xml_escape(local.name or 'root')}</D:displayname>" 124 126 f"<D:getlastmodified>{lastmod}</D:getlastmodified>" 125 127 f"{size_xml}{ct_xml}" 126 128 "</D:prop>"
+29
src/spacebee/atproto/bookhive.py
··· 12 12 13 13 from __future__ import annotations 14 14 15 + import ipaddress 15 16 import logging 16 17 import re 17 18 import time 18 19 from dataclasses import dataclass 19 20 from datetime import UTC, datetime 21 + from urllib.parse import urlparse 20 22 21 23 import httpx 22 24 ··· 289 291 ) 290 292 291 293 294 + def _is_safe_cover_url(url: str) -> bool: 295 + """Reject cover URLs that could be used for SSRF. 296 + 297 + The URL is supplied by bookhive.buzz's catalog search, which is trusted 298 + but not under our control. Limit blast radius by requiring https + a 299 + public-looking host. Doesn't guard against DNS-rebinding, but blocks 300 + the obvious `http://169.254.169.254/...` / `http://localhost/...` cases. 301 + """ 302 + try: 303 + parsed = urlparse(url) 304 + except ValueError: 305 + return False 306 + if parsed.scheme != "https" or not parsed.hostname: 307 + return False 308 + host = parsed.hostname.lower() 309 + if host in ("localhost", "localhost.localdomain") or host.endswith(".localhost"): 310 + return False 311 + try: 312 + ip = ipaddress.ip_address(host) 313 + except ValueError: 314 + return True # hostname, not a bare IP — accept 315 + return ip.is_global 316 + 317 + 292 318 async def upload_cover(client: ATProtoClient, cover_url: str) -> dict | None: 293 319 """Download a cover image URL and upload as a PDS blob. Returns blob ref.""" 320 + if not _is_safe_cover_url(cover_url): 321 + log.warning("Refusing to fetch cover from unsafe URL: %s", cover_url) 322 + return None 294 323 try: 295 324 img = await client.http.get(cover_url) 296 325 img.raise_for_status()
+95
tests/test_security_guards.py
··· 1 + """Guard-rail unit tests added during the pre-publish security review. 2 + 3 + Each test pins down one specific hardening from the review: 4 + 1. PROPFIND displayname escapes XML-special chars 5 + 2. Passthrough refuses sibling-prefix directory escapes 6 + 3. upload_cover rejects non-https / private-IP / localhost URLs 7 + """ 8 + 9 + from __future__ import annotations 10 + 11 + import pytest 12 + 13 + from spacebee.adapters.webdav import moonreader, passthrough 14 + from spacebee.atproto.bookhive import _is_safe_cover_url 15 + 16 + # 1. XML escaping in PROPFIND response ------------------------------------- 17 + 18 + def test_propfind_displayname_escapes_special_chars(): 19 + xml = moonreader._response_xml( 20 + href="/Books/.Moon+/Cache/Rosencrantz & Guildenstern.epub.po", 21 + is_collection=False, 22 + last_modified_http="Thu, 01 Jan 1970 00:00:00 GMT", 23 + content_length=0, 24 + etag='"abc"', 25 + display_name="Rosencrantz & Guildenstern <script>.epub.po", 26 + ) 27 + assert "&amp;" in xml 28 + assert "&lt;script&gt;" in xml 29 + # The raw '<' must not appear as the start of a tag we didn't write. 30 + assert "<script>" not in xml 31 + 32 + 33 + def test_passthrough_entry_xml_escapes_local_name(tmp_path): 34 + sneaky = tmp_path / "R&D <plan>.txt" 35 + sneaky.write_text("hello") 36 + xml = passthrough._entry_xml(sneaky, "/R&D <plan>.txt", tmp_path) 37 + assert "&amp;" in xml 38 + assert "&lt;plan&gt;" in xml 39 + assert "<plan>" not in xml 40 + 41 + 42 + # 2. Path-traversal guard -------------------------------------------------- 43 + 44 + def test_passthrough_refuses_parent_escape(tmp_path): 45 + root = tmp_path / "pass" 46 + root.mkdir() 47 + p = passthrough.Passthrough(str(root)) 48 + with pytest.raises(PermissionError): 49 + p._local("/../etc/passwd") 50 + 51 + 52 + def test_passthrough_refuses_sibling_prefix_escape(tmp_path): 53 + # Regression for the str.startswith antipattern — a sibling like 54 + # `/tmp/.../pass_evil` must not pass when root is `/tmp/.../pass`. 55 + root = tmp_path / "pass" 56 + root.mkdir() 57 + (tmp_path / "passerby").mkdir() 58 + p = passthrough.Passthrough(str(root)) 59 + with pytest.raises(PermissionError): 60 + p._local("/../passerby/secret") 61 + 62 + 63 + # 3. Cover-URL SSRF guard -------------------------------------------------- 64 + 65 + @pytest.mark.parametrize( 66 + "url", 67 + [ 68 + "https://covers.bookhive.buzz/img/abc.jpg", 69 + "https://images.example.com/covers/12345", 70 + ], 71 + ) 72 + def test_safe_cover_urls_accepted(url): 73 + assert _is_safe_cover_url(url) 74 + 75 + 76 + @pytest.mark.parametrize( 77 + "url", 78 + [ 79 + "http://covers.bookhive.buzz/img/abc.jpg", # plaintext 80 + "https://localhost/foo", # localhost hostname 81 + "https://localhost.localdomain/x", # localhost alias 82 + "https://foo.localhost/x", # .localhost suffix 83 + "https://127.0.0.1/x", # loopback IPv4 84 + "https://[::1]/x", # loopback IPv6 85 + "https://169.254.169.254/latest/meta-data/", # AWS IMDS 86 + "https://10.0.0.5/x", # RFC1918 87 + "https://192.168.1.1/x", # RFC1918 88 + "file:///etc/passwd", # not http(s) 89 + "gopher://example.com/", # not http(s) 90 + "", # empty 91 + "not a url at all", # malformed 92 + ], 93 + ) 94 + def test_unsafe_cover_urls_rejected(url): 95 + assert not _is_safe_cover_url(url)