···11+# SPDX-License-Identifier: AGPL-3.0-only
22+# Copyright (c) 2026 sol pbc
33+44+"""Tests for dirty_days() utility."""
55+66+import time
77+88+import think.utils
99+from think.utils import dirty_days
1010+1111+1212+def test_dirty_days_fixture(monkeypatch):
1313+ """20250101 has stream.updated but no daily.updated — should be dirty."""
1414+ monkeypatch.setenv("JOURNAL_PATH", "tests/fixtures/journal")
1515+ monkeypatch.setattr(think.utils, "_journal_path_cache", None)
1616+ days = dirty_days()
1717+ assert "20250101" in days
1818+1919+2020+def test_dirty_days_exclude(monkeypatch):
2121+ """Excluded days should not appear in results."""
2222+ monkeypatch.setenv("JOURNAL_PATH", "tests/fixtures/journal")
2323+ monkeypatch.setattr(think.utils, "_journal_path_cache", None)
2424+ days = dirty_days(exclude={"20250101"})
2525+ assert "20250101" not in days
2626+2727+2828+def test_dirty_days_clean(tmp_path, monkeypatch):
2929+ """Day with daily.updated newer than stream.updated is not dirty."""
3030+ monkeypatch.setenv("JOURNAL_PATH", str(tmp_path))
3131+ day_dir = tmp_path / "20260101" / "health"
3232+ day_dir.mkdir(parents=True)
3333+ (day_dir / "stream.updated").touch()
3434+ time.sleep(0.05)
3535+ (day_dir / "daily.updated").touch()
3636+ assert dirty_days() == []
3737+3838+3939+def test_dirty_days_no_stream(tmp_path, monkeypatch):
4040+ """Day without stream.updated is not dirty (no stream data)."""
4141+ monkeypatch.setenv("JOURNAL_PATH", str(tmp_path))
4242+ (tmp_path / "20260101").mkdir()
4343+ assert dirty_days() == []
+12
think/dream.py
···14411441 if args.activity and not args.facet:
14421442 parser.error("--activity requires --facet")
1443144314441444+ # Auto-enable refresh for dirty days (full daily runs only)
14451445+ if not args.refresh and not args.segment and not args.segments:
14461446+ health_dir = day_dir / "health"
14471447+ stream_marker = health_dir / "stream.updated"
14481448+ daily_marker = health_dir / "daily.updated"
14491449+ if stream_marker.is_file() and (
14501450+ not daily_marker.is_file()
14511451+ or stream_marker.stat().st_mtime > daily_marker.stat().st_mtime
14521452+ ):
14531453+ args.refresh = True
14541454+ logging.info("Day %s has pending stream data, enabling refresh", day)
14551455+14441456 if args.activity and not args.day:
14451457 parser.error("--activity requires --day")
14461458
+35
think/utils.py
···212212 return days
213213214214215215+def dirty_days(exclude: set[str] | None = None) -> list[str]:
216216+ """Return journal days with pending stream data not yet processed daily.
217217+218218+ A day is "dirty" when it has a ``health/stream.updated`` marker that is
219219+ newer than its ``health/daily.updated`` marker (or daily.updated is missing).
220220+ Days without ``stream.updated`` are skipped entirely.
221221+222222+ Parameters
223223+ ----------
224224+ exclude : set of str, optional
225225+ Day strings (YYYYMMDD) to skip.
226226+227227+ Returns
228228+ -------
229229+ list of str
230230+ Sorted list of dirty day strings.
231231+ """
232232+ days = day_dirs()
233233+ dirty: list[str] = []
234234+ for name, path in days.items():
235235+ if exclude and name in exclude:
236236+ continue
237237+ stream = os.path.join(path, "health", "stream.updated")
238238+ if not os.path.isfile(stream):
239239+ continue
240240+ daily = os.path.join(path, "health", "daily.updated")
241241+ if not os.path.isfile(daily):
242242+ dirty.append(name)
243243+ continue
244244+ if os.path.getmtime(stream) > os.path.getmtime(daily):
245245+ dirty.append(name)
246246+ dirty.sort()
247247+ return dirty
248248+249249+215250def segment_path(day: str, segment: str, stream: str) -> Path:
216251 """Return absolute path for a segment directory within a stream.
217252