"""Integration tests for scripts/wiki-hygiene.py. Uses the tmp_wiki fixture so tests never touch the real wiki. """ from __future__ import annotations from datetime import date, timedelta from pathlib import Path from typing import Any import pytest from conftest import make_conversation, make_page, make_staging_page # --------------------------------------------------------------------------- # Backfill last_verified # --------------------------------------------------------------------------- class TestBackfill: def test_sets_last_verified_from_last_compiled( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: path = make_page(tmp_wiki, "patterns/foo.md", last_compiled="2026-01-15") # Strip last_verified from the fixture-built file text = path.read_text() text = text.replace("last_verified: 2026-04-01\n", "") path.write_text(text) changes = wiki_hygiene.backfill_last_verified() assert len(changes) == 1 assert changes[0][1] == "last_compiled" reparsed = wiki_hygiene.parse_page(path) assert reparsed.frontmatter["last_verified"] == "2026-01-15" def test_skips_pages_already_verified( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: make_page(tmp_wiki, "patterns/done.md", last_verified="2026-04-01") changes = wiki_hygiene.backfill_last_verified() assert changes == [] def test_dry_run_does_not_write( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: path = make_page(tmp_wiki, "patterns/foo.md", last_compiled="2026-01-15") text = path.read_text().replace("last_verified: 2026-04-01\n", "") path.write_text(text) changes = wiki_hygiene.backfill_last_verified(dry_run=True) assert len(changes) == 1 reparsed = wiki_hygiene.parse_page(path) assert "last_verified" not in reparsed.frontmatter # --------------------------------------------------------------------------- # Confidence decay math # --------------------------------------------------------------------------- class TestConfidenceDecay: def test_recent_page_unchanged(self, wiki_hygiene: Any) -> None: recent = wiki_hygiene.today() - timedelta(days=30) assert wiki_hygiene.expected_confidence("high", recent, False) == "high" def test_six_months_decays_high_to_medium(self, wiki_hygiene: Any) -> None: old = wiki_hygiene.today() - timedelta(days=200) assert wiki_hygiene.expected_confidence("high", old, False) == "medium" def test_nine_months_decays_medium_to_low(self, wiki_hygiene: Any) -> None: old = wiki_hygiene.today() - timedelta(days=280) assert wiki_hygiene.expected_confidence("medium", old, False) == "low" def test_twelve_months_decays_to_stale(self, wiki_hygiene: Any) -> None: old = wiki_hygiene.today() - timedelta(days=400) assert wiki_hygiene.expected_confidence("high", old, False) == "stale" def test_superseded_is_always_stale(self, wiki_hygiene: Any) -> None: recent = wiki_hygiene.today() - timedelta(days=1) assert wiki_hygiene.expected_confidence("high", recent, True) == "stale" def test_none_date_leaves_confidence_alone(self, wiki_hygiene: Any) -> None: assert wiki_hygiene.expected_confidence("medium", None, False) == "medium" def test_bump_confidence_ladder(self, wiki_hygiene: Any) -> None: assert wiki_hygiene.bump_confidence("stale") == "low" assert wiki_hygiene.bump_confidence("low") == "medium" assert wiki_hygiene.bump_confidence("medium") == "high" assert wiki_hygiene.bump_confidence("high") == "high" # --------------------------------------------------------------------------- # Frontmatter repair # --------------------------------------------------------------------------- class TestFrontmatterRepair: def test_adds_missing_confidence( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: path = tmp_wiki / "patterns" / "no-conf.md" path.write_text( "---\ntitle: No Confidence\ntype: pattern\n" "last_compiled: 2026-04-01\nlast_verified: 2026-04-01\n---\n" "# Body\n\nSubstantive content here for testing purposes.\n" ) changes = wiki_hygiene.repair_frontmatter() assert any("confidence" in fields for _, fields in changes) reparsed = wiki_hygiene.parse_page(path) assert reparsed.frontmatter["confidence"] == "medium" def test_fixes_invalid_confidence( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: path = make_page(tmp_wiki, "patterns/bad-conf.md", confidence="wat") changes = wiki_hygiene.repair_frontmatter() assert any(p == path for p, _ in changes) reparsed = wiki_hygiene.parse_page(path) assert reparsed.frontmatter["confidence"] == "medium" def test_leaves_valid_pages_alone( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: make_page(tmp_wiki, "patterns/good.md") changes = wiki_hygiene.repair_frontmatter() assert changes == [] # --------------------------------------------------------------------------- # Archive and restore round-trip # --------------------------------------------------------------------------- class TestArchiveRestore: def test_archive_moves_file_and_updates_frontmatter( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: path = make_page(tmp_wiki, "patterns/doomed.md") page = wiki_hygiene.parse_page(path) wiki_hygiene.archive_page(page, "test archive") assert not path.exists() archived = tmp_wiki / "archive" / "patterns" / "doomed.md" assert archived.exists() reparsed = wiki_hygiene.parse_page(archived) assert reparsed.frontmatter["archived_reason"] == "test archive" assert reparsed.frontmatter["original_path"] == "patterns/doomed.md" assert reparsed.frontmatter["confidence"] == "stale" def test_restore_reverses_archive( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: original = make_page(tmp_wiki, "patterns/zombie.md") page = wiki_hygiene.parse_page(original) wiki_hygiene.archive_page(page, "test") archived = tmp_wiki / "archive" / "patterns" / "zombie.md" archived_page = wiki_hygiene.parse_page(archived) wiki_hygiene.restore_page(archived_page) assert original.exists() assert not archived.exists() reparsed = wiki_hygiene.parse_page(original) assert reparsed.frontmatter["confidence"] == "medium" assert "archived_date" not in reparsed.frontmatter assert "archived_reason" not in reparsed.frontmatter assert "original_path" not in reparsed.frontmatter def test_archive_rejects_non_live_pages( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: # Page outside the live content dirs — should refuse to archive weird = tmp_wiki / "raw" / "weird.md" weird.parent.mkdir(parents=True, exist_ok=True) weird.write_text("---\ntitle: Weird\n---\nBody\n") page = wiki_hygiene.parse_page(weird) result = wiki_hygiene.archive_page(page, "test") assert result is None def test_archive_dry_run_does_not_move( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: path = make_page(tmp_wiki, "patterns/safe.md") page = wiki_hygiene.parse_page(path) wiki_hygiene.archive_page(page, "test", dry_run=True) assert path.exists() assert not (tmp_wiki / "archive" / "patterns" / "safe.md").exists() # --------------------------------------------------------------------------- # Orphan detection # --------------------------------------------------------------------------- class TestOrphanDetection: def test_finds_orphan_page(self, wiki_hygiene: Any, tmp_wiki: Path) -> None: make_page(tmp_wiki, "patterns/lonely.md") orphans = wiki_hygiene.find_orphan_pages() assert len(orphans) == 1 assert orphans[0].path.stem == "lonely" def test_page_referenced_in_index_is_not_orphan( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: make_page(tmp_wiki, "patterns/linked.md") idx = tmp_wiki / "index.md" idx.write_text(idx.read_text() + "- [Linked](patterns/linked.md) — desc\n") orphans = wiki_hygiene.find_orphan_pages() assert not any(p.path.stem == "linked" for p in orphans) def test_page_referenced_in_related_is_not_orphan( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: make_page(tmp_wiki, "patterns/referenced.md") make_page( tmp_wiki, "patterns/referencer.md", related=["patterns/referenced.md"], ) orphans = wiki_hygiene.find_orphan_pages() stems = {p.path.stem for p in orphans} assert "referenced" not in stems def test_fix_orphan_adds_to_index( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: path = make_page(tmp_wiki, "patterns/orphan.md", title="Orphan Test") page = wiki_hygiene.parse_page(path) wiki_hygiene.fix_orphan_page(page) idx_text = (tmp_wiki / "index.md").read_text() assert "patterns/orphan.md" in idx_text # --------------------------------------------------------------------------- # Broken cross-references # --------------------------------------------------------------------------- class TestBrokenCrossRefs: def test_detects_broken_link(self, wiki_hygiene: Any, tmp_wiki: Path) -> None: make_page( tmp_wiki, "patterns/source.md", body="See [nonexistent](patterns/does-not-exist.md) for details.\n", ) broken = wiki_hygiene.find_broken_cross_refs() assert len(broken) == 1 target, bad, suggested = broken[0] assert bad == "patterns/does-not-exist.md" def test_fuzzy_match_finds_near_miss( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: make_page(tmp_wiki, "patterns/health-endpoint.md") make_page( tmp_wiki, "patterns/source.md", body="See [H](patterns/health-endpoints.md) — typo.\n", ) broken = wiki_hygiene.find_broken_cross_refs() assert len(broken) >= 1 _, bad, suggested = broken[0] assert suggested == "patterns/health-endpoint.md" def test_fix_broken_xref(self, wiki_hygiene: Any, tmp_wiki: Path) -> None: make_page(tmp_wiki, "patterns/health-endpoint.md") src = make_page( tmp_wiki, "patterns/source.md", body="See [H](patterns/health-endpoints.md).\n", ) broken = wiki_hygiene.find_broken_cross_refs() for target, bad, suggested in broken: wiki_hygiene.fix_broken_cross_ref(target, bad, suggested) text = src.read_text() assert "patterns/health-endpoints.md" not in text assert "patterns/health-endpoint.md" in text def test_archived_link_triggers_restore( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: # Page in archive, referenced by a live page make_page( tmp_wiki, "archive/patterns/ghost.md", confidence="stale", extra_fm={ "archived_date": "2026-01-01", "archived_reason": "test", "original_path": "patterns/ghost.md", }, ) make_page( tmp_wiki, "patterns/caller.md", body="See [ghost](patterns/ghost.md).\n", ) broken = wiki_hygiene.find_broken_cross_refs() assert len(broken) >= 1 for target, bad, suggested in broken: if suggested and suggested.startswith("__RESTORE__"): wiki_hygiene.fix_broken_cross_ref(target, bad, suggested) # After restore, ghost should be live again assert (tmp_wiki / "patterns" / "ghost.md").exists() # --------------------------------------------------------------------------- # Index drift # --------------------------------------------------------------------------- class TestIndexDrift: def test_finds_page_missing_from_index( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: make_page(tmp_wiki, "patterns/missing.md") missing, stale = wiki_hygiene.find_index_drift() assert "patterns/missing.md" in missing assert stale == [] def test_finds_stale_index_entry( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: idx = tmp_wiki / "index.md" idx.write_text( idx.read_text() + "- [Ghost](patterns/ghost.md) — page that no longer exists\n" ) missing, stale = wiki_hygiene.find_index_drift() assert "patterns/ghost.md" in stale def test_fix_adds_missing_and_removes_stale( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: make_page(tmp_wiki, "patterns/new.md") idx = tmp_wiki / "index.md" idx.write_text( idx.read_text() + "- [Gone](patterns/gone.md) — deleted page\n" ) missing, stale = wiki_hygiene.find_index_drift() wiki_hygiene.fix_index_drift(missing, stale) idx_text = idx.read_text() assert "patterns/new.md" in idx_text assert "patterns/gone.md" not in idx_text # --------------------------------------------------------------------------- # Empty stubs # --------------------------------------------------------------------------- class TestEmptyStubs: def test_flags_small_body(self, wiki_hygiene: Any, tmp_wiki: Path) -> None: make_page(tmp_wiki, "patterns/stub.md", body="# Stub\n\nShort.\n") stubs = wiki_hygiene.find_empty_stubs() assert len(stubs) == 1 assert stubs[0].path.stem == "stub" def test_ignores_substantive_pages( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: body = "# Full\n\n" + ("This is substantive content. " * 20) + "\n" make_page(tmp_wiki, "patterns/full.md", body=body) stubs = wiki_hygiene.find_empty_stubs() assert stubs == [] # --------------------------------------------------------------------------- # Conversation refresh signals # --------------------------------------------------------------------------- class TestConversationRefreshSignals: def test_picks_up_related_link( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: make_page(tmp_wiki, "patterns/hot.md", last_verified="2026-01-01") make_conversation( tmp_wiki, "test", "2026-04-11-abc.md", date="2026-04-11", related=["patterns/hot.md"], ) refs = wiki_hygiene.scan_conversation_references() assert "patterns/hot.md" in refs assert refs["patterns/hot.md"] == date(2026, 4, 11) def test_apply_refresh_updates_last_verified( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: path = make_page(tmp_wiki, "patterns/hot.md", last_verified="2026-01-01") make_conversation( tmp_wiki, "test", "2026-04-11-abc.md", date="2026-04-11", related=["patterns/hot.md"], ) refs = wiki_hygiene.scan_conversation_references() changes = wiki_hygiene.apply_refresh_signals(refs) assert len(changes) == 1 reparsed = wiki_hygiene.parse_page(path) assert reparsed.frontmatter["last_verified"] == "2026-04-11" def test_bumps_low_confidence_to_medium( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: path = make_page( tmp_wiki, "patterns/reviving.md", confidence="low", last_verified="2026-01-01", ) make_conversation( tmp_wiki, "test", "2026-04-11-ref.md", date="2026-04-11", related=["patterns/reviving.md"], ) refs = wiki_hygiene.scan_conversation_references() wiki_hygiene.apply_refresh_signals(refs) reparsed = wiki_hygiene.parse_page(path) assert reparsed.frontmatter["confidence"] == "medium" # --------------------------------------------------------------------------- # Auto-restore # --------------------------------------------------------------------------- class TestAutoRestore: def test_restores_page_referenced_in_conversation( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: # Archive a page path = make_page(tmp_wiki, "patterns/returning.md") page = wiki_hygiene.parse_page(path) wiki_hygiene.archive_page(page, "aging out") assert (tmp_wiki / "archive" / "patterns" / "returning.md").exists() # Reference it in a conversation make_conversation( tmp_wiki, "test", "2026-04-12-ref.md", related=["patterns/returning.md"], ) # Auto-restore restored = wiki_hygiene.auto_restore_archived() assert len(restored) == 1 assert (tmp_wiki / "patterns" / "returning.md").exists() assert not (tmp_wiki / "archive" / "patterns" / "returning.md").exists() # --------------------------------------------------------------------------- # Staging / archive index sync # --------------------------------------------------------------------------- class TestIndexSync: def test_staging_sync_regenerates_index( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: make_staging_page(tmp_wiki, "patterns/pending.md") changed = wiki_hygiene.sync_staging_index() assert changed is True text = (tmp_wiki / "staging" / "index.md").read_text() assert "pending.md" in text def test_staging_sync_idempotent( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: make_staging_page(tmp_wiki, "patterns/pending.md") wiki_hygiene.sync_staging_index() changed_second = wiki_hygiene.sync_staging_index() assert changed_second is False def test_archive_sync_regenerates_index( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: make_page( tmp_wiki, "archive/patterns/old.md", confidence="stale", extra_fm={ "archived_date": "2026-01-01", "archived_reason": "test", "original_path": "patterns/old.md", }, ) changed = wiki_hygiene.sync_archive_index() assert changed is True text = (tmp_wiki / "archive" / "index.md").read_text() assert "old" in text.lower() # --------------------------------------------------------------------------- # State drift detection # --------------------------------------------------------------------------- class TestStateDrift: def test_detects_missing_raw_file( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: import json state = { "harvested_urls": { "https://example.com": { "raw_file": "raw/harvested/missing.md", "wiki_pages": [], } } } (tmp_wiki / ".harvest-state.json").write_text(json.dumps(state)) issues = wiki_hygiene.find_state_drift() assert any("missing.md" in i for i in issues) def test_empty_state_has_no_drift( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: # Fixture already creates an empty .harvest-state.json issues = wiki_hygiene.find_state_drift() assert issues == [] # --------------------------------------------------------------------------- # Hygiene state file # --------------------------------------------------------------------------- class TestHygieneState: def test_load_returns_defaults_when_missing( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: state = wiki_hygiene.load_hygiene_state() assert state["last_quick_run"] is None assert state["pages_checked"] == {} def test_save_and_reload( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: state = wiki_hygiene.load_hygiene_state() state["last_quick_run"] = "2026-04-12T00:00:00Z" wiki_hygiene.save_hygiene_state(state) reloaded = wiki_hygiene.load_hygiene_state() assert reloaded["last_quick_run"] == "2026-04-12T00:00:00Z" def test_mark_page_checked_stores_hash( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: path = make_page(tmp_wiki, "patterns/tracked.md") page = wiki_hygiene.parse_page(path) state = wiki_hygiene.load_hygiene_state() wiki_hygiene.mark_page_checked(state, page, "quick") entry = state["pages_checked"]["patterns/tracked.md"] assert entry["content_hash"].startswith("sha256:") assert "last_checked_quick" in entry def test_page_changed_since_detects_body_change( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: path = make_page(tmp_wiki, "patterns/mutable.md", body="# One\n\nOne body.\n") page = wiki_hygiene.parse_page(path) state = wiki_hygiene.load_hygiene_state() wiki_hygiene.mark_page_checked(state, page, "quick") assert not wiki_hygiene.page_changed_since(state, page, "quick") # Mutate the body path.write_text(path.read_text().replace("One body", "Two body")) new_page = wiki_hygiene.parse_page(path) assert wiki_hygiene.page_changed_since(state, new_page, "quick") # --------------------------------------------------------------------------- # Full quick-hygiene run end-to-end (dry-run, idempotent) # --------------------------------------------------------------------------- class TestRunQuickHygiene: def test_empty_wiki_produces_empty_report( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: report = wiki_hygiene.run_quick_hygiene(dry_run=True) assert report.backfilled == [] assert report.archived == [] def test_real_run_is_idempotent( self, wiki_hygiene: Any, tmp_wiki: Path ) -> None: make_page(tmp_wiki, "patterns/one.md") make_page(tmp_wiki, "patterns/two.md") report1 = wiki_hygiene.run_quick_hygiene() # Second run should have 0 work report2 = wiki_hygiene.run_quick_hygiene() assert report2.backfilled == [] assert report2.decayed == [] assert report2.archived == [] assert report2.frontmatter_fixes == []