Code and data for arewedecentralizedyet.online and related projects
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add a --now argument that enables us to pretend we're running in the past

+37 -9
+37 -9
data-processing/fedi-nodeinfo/parse-nodeinfo.py
··· 1 1 #!/usr/bin/env python3 2 + import argparse 2 3 import sys 3 4 import os 4 5 import json ··· 210 211 211 212 return hostname, software_name, software_version, users_total, active_month, protocols, protocols_str 212 213 214 + def _parse_now(value: Optional[str]) -> "datetime.datetime": 215 + import datetime 216 + 217 + if value is None: 218 + return datetime.datetime.now(datetime.timezone.utc) 219 + iso_value = value.strip() 220 + if iso_value.endswith("Z"): 221 + iso_value = iso_value[:-1] + "+00:00" 222 + try: 223 + parsed = datetime.datetime.fromisoformat(iso_value) 224 + except ValueError as exc: 225 + raise argparse.ArgumentTypeError(f"Invalid --now value: {value}") from exc 226 + if parsed.tzinfo is None: 227 + return parsed.replace(tzinfo=datetime.timezone.utc) 228 + return parsed.astimezone(datetime.timezone.utc) 229 + 230 + 213 231 def main() -> None: 214 232 import datetime 215 233 216 - if len(sys.argv) < 3 or len(sys.argv) > 4: 217 - print(f"Usage: {sys.argv[0]} nodeinfo_dir output.csv [max_age_days=30]", file=sys.stderr) 218 - sys.exit(1) 234 + parser = argparse.ArgumentParser( 235 + description="Parse nodeinfo JSON into a CSV snapshot.", 236 + ) 237 + parser.add_argument("nodeinfo_dir") 238 + parser.add_argument("output_csv") 239 + parser.add_argument("max_age_days", nargs="?", type=int, default=30) 240 + parser.add_argument( 241 + "--now", 242 + type=_parse_now, 243 + help="ISO-8601 datetime to use as the current time (UTC if naive).", 244 + ) 245 + args = parser.parse_args() 219 246 220 - nodeinfo_dir = sys.argv[1] 221 - output_csv = sys.argv[2] 222 - max_age_days = int(sys.argv[3]) if len(sys.argv) == 4 else 30 247 + nodeinfo_dir = args.nodeinfo_dir 248 + output_csv = args.output_csv 249 + max_age_days = args.max_age_days 223 250 224 251 cutoff = datetime.timedelta(days=max_age_days) 225 - # Make 'now' timezone-aware (UTC) 226 - now = datetime.datetime.now(datetime.timezone.utc) 252 + now = args.now or datetime.datetime.now(datetime.timezone.utc) 227 253 228 254 hostname_dirs = [ 229 255 os.path.join(nodeinfo_dir, d) ··· 256 282 if not candidates: 257 283 continue 258 284 259 - candidates_recent = [c for c in candidates if now - c[0] <= cutoff] 285 + candidates_recent = [ 286 + c for c in candidates if c[0] <= now and now - c[0] <= cutoff 287 + ] 260 288 if not candidates_recent: 261 289 continue 262 290