my prefect server setup prefect-metrics.waow.tech
python orchestration
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix: use wrangler for CF Pages deploy instead of raw API

The site has Pages Functions (functions/ dir) that wrangler compiles into
a _worker.bundle. The raw Direct Upload API doesn't bundle functions,
so deploying without wrangler causes 500 errors on the live site.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+37 -85
+37 -85
flows/atlas.py
··· 2 2 Rebuild the atlas (2D semantic map) and deploy to Cloudflare Pages. 3 3 4 4 Clones leaflet-search, runs the build-atlas script (UMAP + HDBSCAN), 5 - then deploys the site to Cloudflare Pages via the Direct Upload API. 5 + then deploys the site to Cloudflare Pages via wrangler. 6 6 7 7 Requires: 8 8 - Secret block "tpuf-token" (turbopuffer API key) 9 9 - Secret block "cloudflare-api-token" (Pages edit permission) 10 10 """ 11 11 12 - import hashlib 13 - import json 14 12 import os 15 13 import subprocess 16 14 import tempfile 17 15 from pathlib import Path 18 16 19 - import httpx 20 17 from prefect import flow, get_run_logger, task 21 18 from prefect.blocks.system import Secret 22 19 23 20 REPO_URL = "https://github.com/zzstoatzz/leaflet-search.git" 24 - CF_API = "https://api.cloudflare.com/client/v4" 25 - CF_ACCOUNT_ID = "8feb33b5fb57ce2bc093bc6f4141f40a" 26 21 CF_PROJECT = "leaflet-search" 27 22 28 23 ··· 63 58 64 59 @task 65 60 def deploy_to_pages(site_dir: Path, api_token: str) -> str: 66 - """Deploy site/ to Cloudflare Pages via Direct Upload API.""" 67 - logger = get_run_logger() 68 - headers = {"Authorization": f"Bearer {api_token}"} 61 + """Deploy site/ to Cloudflare Pages via wrangler. 69 62 70 - # build manifest: path -> truncated SHA-256 71 - manifest: dict[str, str] = {} 72 - content_by_hash: dict[str, bytes] = {} 63 + Uses wrangler because the site has Pages Functions (functions/ dir) 64 + that must be compiled into a _worker.bundle. The raw Direct Upload API 65 + doesn't handle function bundling, and deploying without it causes 500s. 66 + """ 67 + logger = get_run_logger() 68 + env = {**os.environ, "CLOUDFLARE_API_TOKEN": api_token} 73 69 74 - for path in sorted(site_dir.rglob("*")): 75 - if path.is_dir() or path.name.startswith("."): 76 - continue 77 - content = path.read_bytes() 78 - h = hashlib.sha256(content).hexdigest()[:32] 79 - rel = "/" + str(path.relative_to(site_dir)) 80 - manifest[rel] = h 81 - content_by_hash[h] = content 82 - 83 - logger.info(f"deploying {len(manifest)} files") 84 - 85 - # create deployment — CF expects multipart/form-data (-F fields in curl) 86 - # httpx: files={(name, (None, value))} sends multipart form fields 87 - resp = httpx.post( 88 - f"{CF_API}/accounts/{CF_ACCOUNT_ID}/pages/projects/{CF_PROJECT}/deployments", 89 - headers=headers, 90 - files={ 91 - "manifest": (None, json.dumps(manifest), "application/json"), 92 - "branch": (None, "main"), 93 - }, 94 - timeout=60, 70 + # install node + wrangler if not already available 71 + subprocess.run( 72 + ["bash", "-c", 73 + "command -v npx >/dev/null 2>&1 || " 74 + "(apt-get update -qq && apt-get install -y -qq nodejs npm >/dev/null 2>&1)"], 75 + env=env, capture_output=True, timeout=120, 95 76 ) 96 - if not resp.is_success: 97 - logger.error(f"create deployment failed ({resp.status_code}): {resp.text[:500]}") 98 - resp.raise_for_status() 99 - body = resp.json() 100 - deployment = body["result"] 101 - logger.info(f"deployment {deployment['id']} created, keys: {list(deployment.keys())}") 102 - jwt = deployment.get("jwt") or body.get("jwt") 103 - if not jwt: 104 - logger.error(f"no jwt in response: {json.dumps(body, indent=2)[:1000]}") 105 - raise RuntimeError("no jwt in deployment response") 106 - 107 - # check which files need uploading 108 - jwt_headers = {"Authorization": f"Bearer {jwt}"} 109 - resp = httpx.post( 110 - f"{CF_API}/pages/assets/check-missing", 111 - headers=jwt_headers, 112 - json={"hashes": list(content_by_hash.keys())}, 113 - timeout=30, 77 + subprocess.run( 78 + ["npm", "install", "--global", "wrangler"], 79 + env=env, capture_output=True, text=True, timeout=120, 114 80 ) 115 - if not resp.is_success: 116 - logger.error(f"check-missing failed ({resp.status_code}): {resp.text[:500]}") 117 - resp.raise_for_status() 118 - missing = set(resp.json()) 119 - logger.info(f"{len(missing)} files to upload ({len(manifest) - len(missing)} cached)") 120 81 121 - # upload missing files 122 - if missing: 123 - batch: list[tuple[str, tuple[str, bytes, str]]] = [] 124 - for h in missing: 125 - batch.append((h, ("blob", content_by_hash[h], "application/octet-stream"))) 126 - if len(batch) >= 50: 127 - r = httpx.post( 128 - f"{CF_API}/pages/assets/upload", 129 - headers=jwt_headers, 130 - files=batch, 131 - timeout=120, 132 - ) 133 - if not r.is_success: 134 - logger.error(f"upload failed ({r.status_code}): {r.text[:500]}") 135 - r.raise_for_status() 136 - batch = [] 137 - if batch: 138 - r = httpx.post( 139 - f"{CF_API}/pages/assets/upload", 140 - headers=jwt_headers, 141 - files=batch, 142 - timeout=120, 143 - ) 144 - if not r.is_success: 145 - logger.error(f"upload failed ({r.status_code}): {r.text[:500]}") 146 - r.raise_for_status() 82 + result = subprocess.run( 83 + ["wrangler", "pages", "deploy", ".", 84 + f"--project-name={CF_PROJECT}", "--branch=main", "--commit-dirty=true"], 85 + cwd=str(site_dir), 86 + env=env, 87 + capture_output=True, 88 + text=True, 89 + timeout=180, 90 + ) 91 + for line in result.stdout.strip().splitlines(): 92 + logger.info(line) 93 + if result.returncode != 0: 94 + logger.error(result.stderr) 95 + raise RuntimeError(f"wrangler deploy failed:\n{result.stderr}") 147 96 148 - url = deployment.get("url", "") 149 - logger.info(f"deployed: {url}") 150 - return url 97 + # extract deployment URL from wrangler output 98 + for line in reversed(result.stdout.strip().splitlines()): 99 + if "https://" in line: 100 + url = line.split("https://", 1)[1].split()[0] 101 + return f"https://{url}" 102 + return "" 151 103 152 104 153 105 @flow(name="rebuild-atlas", log_prints=True)