Initial commit — memex
A compounding LLM-maintained knowledge wiki. Synthesis of Andrej Karpathy's persistent-wiki gist and milla-jovovich's mempalace, with an automation layer on top for conversation mining, URL harvesting, human-in-the-loop staging, staleness decay, and hygiene. Includes: - 11 pipeline scripts (extract, summarize, index, harvest, stage, hygiene, maintain, sync, + shared library) - Full docs: README, SETUP, ARCHITECTURE, DESIGN-RATIONALE, CUSTOMIZE - Example CLAUDE.md files (wiki schema + global instructions) tuned for the three-collection qmd setup - 171-test pytest suite (cross-platform, runs in ~1.3s) - MIT licensed
This commit is contained in:
616
tests/test_wiki_hygiene.py
Normal file
616
tests/test_wiki_hygiene.py
Normal file
@@ -0,0 +1,616 @@
|
||||
"""Integration tests for scripts/wiki-hygiene.py.
|
||||
|
||||
Uses the tmp_wiki fixture so tests never touch the real wiki.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from conftest import make_conversation, make_page, make_staging_page
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Backfill last_verified
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestBackfill:
|
||||
def test_sets_last_verified_from_last_compiled(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
path = make_page(tmp_wiki, "patterns/foo.md", last_compiled="2026-01-15")
|
||||
# Strip last_verified from the fixture-built file
|
||||
text = path.read_text()
|
||||
text = text.replace("last_verified: 2026-04-01\n", "")
|
||||
path.write_text(text)
|
||||
|
||||
changes = wiki_hygiene.backfill_last_verified()
|
||||
assert len(changes) == 1
|
||||
assert changes[0][1] == "last_compiled"
|
||||
|
||||
reparsed = wiki_hygiene.parse_page(path)
|
||||
assert reparsed.frontmatter["last_verified"] == "2026-01-15"
|
||||
|
||||
def test_skips_pages_already_verified(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
make_page(tmp_wiki, "patterns/done.md", last_verified="2026-04-01")
|
||||
changes = wiki_hygiene.backfill_last_verified()
|
||||
assert changes == []
|
||||
|
||||
def test_dry_run_does_not_write(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
path = make_page(tmp_wiki, "patterns/foo.md", last_compiled="2026-01-15")
|
||||
text = path.read_text().replace("last_verified: 2026-04-01\n", "")
|
||||
path.write_text(text)
|
||||
|
||||
changes = wiki_hygiene.backfill_last_verified(dry_run=True)
|
||||
assert len(changes) == 1
|
||||
|
||||
reparsed = wiki_hygiene.parse_page(path)
|
||||
assert "last_verified" not in reparsed.frontmatter
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Confidence decay math
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestConfidenceDecay:
|
||||
def test_recent_page_unchanged(self, wiki_hygiene: Any) -> None:
|
||||
recent = wiki_hygiene.today() - timedelta(days=30)
|
||||
assert wiki_hygiene.expected_confidence("high", recent, False) == "high"
|
||||
|
||||
def test_six_months_decays_high_to_medium(self, wiki_hygiene: Any) -> None:
|
||||
old = wiki_hygiene.today() - timedelta(days=200)
|
||||
assert wiki_hygiene.expected_confidence("high", old, False) == "medium"
|
||||
|
||||
def test_nine_months_decays_medium_to_low(self, wiki_hygiene: Any) -> None:
|
||||
old = wiki_hygiene.today() - timedelta(days=280)
|
||||
assert wiki_hygiene.expected_confidence("medium", old, False) == "low"
|
||||
|
||||
def test_twelve_months_decays_to_stale(self, wiki_hygiene: Any) -> None:
|
||||
old = wiki_hygiene.today() - timedelta(days=400)
|
||||
assert wiki_hygiene.expected_confidence("high", old, False) == "stale"
|
||||
|
||||
def test_superseded_is_always_stale(self, wiki_hygiene: Any) -> None:
|
||||
recent = wiki_hygiene.today() - timedelta(days=1)
|
||||
assert wiki_hygiene.expected_confidence("high", recent, True) == "stale"
|
||||
|
||||
def test_none_date_leaves_confidence_alone(self, wiki_hygiene: Any) -> None:
|
||||
assert wiki_hygiene.expected_confidence("medium", None, False) == "medium"
|
||||
|
||||
def test_bump_confidence_ladder(self, wiki_hygiene: Any) -> None:
|
||||
assert wiki_hygiene.bump_confidence("stale") == "low"
|
||||
assert wiki_hygiene.bump_confidence("low") == "medium"
|
||||
assert wiki_hygiene.bump_confidence("medium") == "high"
|
||||
assert wiki_hygiene.bump_confidence("high") == "high"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Frontmatter repair
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFrontmatterRepair:
|
||||
def test_adds_missing_confidence(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
path = tmp_wiki / "patterns" / "no-conf.md"
|
||||
path.write_text(
|
||||
"---\ntitle: No Confidence\ntype: pattern\n"
|
||||
"last_compiled: 2026-04-01\nlast_verified: 2026-04-01\n---\n"
|
||||
"# Body\n\nSubstantive content here for testing purposes.\n"
|
||||
)
|
||||
changes = wiki_hygiene.repair_frontmatter()
|
||||
assert any("confidence" in fields for _, fields in changes)
|
||||
|
||||
reparsed = wiki_hygiene.parse_page(path)
|
||||
assert reparsed.frontmatter["confidence"] == "medium"
|
||||
|
||||
def test_fixes_invalid_confidence(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
path = make_page(tmp_wiki, "patterns/bad-conf.md", confidence="wat")
|
||||
changes = wiki_hygiene.repair_frontmatter()
|
||||
assert any(p == path for p, _ in changes)
|
||||
|
||||
reparsed = wiki_hygiene.parse_page(path)
|
||||
assert reparsed.frontmatter["confidence"] == "medium"
|
||||
|
||||
def test_leaves_valid_pages_alone(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
make_page(tmp_wiki, "patterns/good.md")
|
||||
changes = wiki_hygiene.repair_frontmatter()
|
||||
assert changes == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Archive and restore round-trip
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestArchiveRestore:
|
||||
def test_archive_moves_file_and_updates_frontmatter(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
path = make_page(tmp_wiki, "patterns/doomed.md")
|
||||
page = wiki_hygiene.parse_page(path)
|
||||
|
||||
wiki_hygiene.archive_page(page, "test archive")
|
||||
|
||||
assert not path.exists()
|
||||
archived = tmp_wiki / "archive" / "patterns" / "doomed.md"
|
||||
assert archived.exists()
|
||||
|
||||
reparsed = wiki_hygiene.parse_page(archived)
|
||||
assert reparsed.frontmatter["archived_reason"] == "test archive"
|
||||
assert reparsed.frontmatter["original_path"] == "patterns/doomed.md"
|
||||
assert reparsed.frontmatter["confidence"] == "stale"
|
||||
|
||||
def test_restore_reverses_archive(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
original = make_page(tmp_wiki, "patterns/zombie.md")
|
||||
page = wiki_hygiene.parse_page(original)
|
||||
wiki_hygiene.archive_page(page, "test")
|
||||
|
||||
archived = tmp_wiki / "archive" / "patterns" / "zombie.md"
|
||||
archived_page = wiki_hygiene.parse_page(archived)
|
||||
wiki_hygiene.restore_page(archived_page)
|
||||
|
||||
assert original.exists()
|
||||
assert not archived.exists()
|
||||
|
||||
reparsed = wiki_hygiene.parse_page(original)
|
||||
assert reparsed.frontmatter["confidence"] == "medium"
|
||||
assert "archived_date" not in reparsed.frontmatter
|
||||
assert "archived_reason" not in reparsed.frontmatter
|
||||
assert "original_path" not in reparsed.frontmatter
|
||||
|
||||
def test_archive_rejects_non_live_pages(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
# Page outside the live content dirs — should refuse to archive
|
||||
weird = tmp_wiki / "raw" / "weird.md"
|
||||
weird.parent.mkdir(parents=True, exist_ok=True)
|
||||
weird.write_text("---\ntitle: Weird\n---\nBody\n")
|
||||
page = wiki_hygiene.parse_page(weird)
|
||||
result = wiki_hygiene.archive_page(page, "test")
|
||||
assert result is None
|
||||
|
||||
def test_archive_dry_run_does_not_move(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
path = make_page(tmp_wiki, "patterns/safe.md")
|
||||
page = wiki_hygiene.parse_page(path)
|
||||
wiki_hygiene.archive_page(page, "test", dry_run=True)
|
||||
assert path.exists()
|
||||
assert not (tmp_wiki / "archive" / "patterns" / "safe.md").exists()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Orphan detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestOrphanDetection:
|
||||
def test_finds_orphan_page(self, wiki_hygiene: Any, tmp_wiki: Path) -> None:
|
||||
make_page(tmp_wiki, "patterns/lonely.md")
|
||||
orphans = wiki_hygiene.find_orphan_pages()
|
||||
assert len(orphans) == 1
|
||||
assert orphans[0].path.stem == "lonely"
|
||||
|
||||
def test_page_referenced_in_index_is_not_orphan(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
make_page(tmp_wiki, "patterns/linked.md")
|
||||
idx = tmp_wiki / "index.md"
|
||||
idx.write_text(idx.read_text() + "- [Linked](patterns/linked.md) — desc\n")
|
||||
orphans = wiki_hygiene.find_orphan_pages()
|
||||
assert not any(p.path.stem == "linked" for p in orphans)
|
||||
|
||||
def test_page_referenced_in_related_is_not_orphan(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
make_page(tmp_wiki, "patterns/referenced.md")
|
||||
make_page(
|
||||
tmp_wiki,
|
||||
"patterns/referencer.md",
|
||||
related=["patterns/referenced.md"],
|
||||
)
|
||||
orphans = wiki_hygiene.find_orphan_pages()
|
||||
stems = {p.path.stem for p in orphans}
|
||||
assert "referenced" not in stems
|
||||
|
||||
def test_fix_orphan_adds_to_index(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
path = make_page(tmp_wiki, "patterns/orphan.md", title="Orphan Test")
|
||||
page = wiki_hygiene.parse_page(path)
|
||||
wiki_hygiene.fix_orphan_page(page)
|
||||
idx_text = (tmp_wiki / "index.md").read_text()
|
||||
assert "patterns/orphan.md" in idx_text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Broken cross-references
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestBrokenCrossRefs:
|
||||
def test_detects_broken_link(self, wiki_hygiene: Any, tmp_wiki: Path) -> None:
|
||||
make_page(
|
||||
tmp_wiki,
|
||||
"patterns/source.md",
|
||||
body="See [nonexistent](patterns/does-not-exist.md) for details.\n",
|
||||
)
|
||||
broken = wiki_hygiene.find_broken_cross_refs()
|
||||
assert len(broken) == 1
|
||||
target, bad, suggested = broken[0]
|
||||
assert bad == "patterns/does-not-exist.md"
|
||||
|
||||
def test_fuzzy_match_finds_near_miss(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
make_page(tmp_wiki, "patterns/health-endpoint.md")
|
||||
make_page(
|
||||
tmp_wiki,
|
||||
"patterns/source.md",
|
||||
body="See [H](patterns/health-endpoints.md) — typo.\n",
|
||||
)
|
||||
broken = wiki_hygiene.find_broken_cross_refs()
|
||||
assert len(broken) >= 1
|
||||
_, bad, suggested = broken[0]
|
||||
assert suggested == "patterns/health-endpoint.md"
|
||||
|
||||
def test_fix_broken_xref(self, wiki_hygiene: Any, tmp_wiki: Path) -> None:
|
||||
make_page(tmp_wiki, "patterns/health-endpoint.md")
|
||||
src = make_page(
|
||||
tmp_wiki,
|
||||
"patterns/source.md",
|
||||
body="See [H](patterns/health-endpoints.md).\n",
|
||||
)
|
||||
broken = wiki_hygiene.find_broken_cross_refs()
|
||||
for target, bad, suggested in broken:
|
||||
wiki_hygiene.fix_broken_cross_ref(target, bad, suggested)
|
||||
text = src.read_text()
|
||||
assert "patterns/health-endpoints.md" not in text
|
||||
assert "patterns/health-endpoint.md" in text
|
||||
|
||||
def test_archived_link_triggers_restore(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
# Page in archive, referenced by a live page
|
||||
make_page(
|
||||
tmp_wiki,
|
||||
"archive/patterns/ghost.md",
|
||||
confidence="stale",
|
||||
extra_fm={
|
||||
"archived_date": "2026-01-01",
|
||||
"archived_reason": "test",
|
||||
"original_path": "patterns/ghost.md",
|
||||
},
|
||||
)
|
||||
make_page(
|
||||
tmp_wiki,
|
||||
"patterns/caller.md",
|
||||
body="See [ghost](patterns/ghost.md).\n",
|
||||
)
|
||||
broken = wiki_hygiene.find_broken_cross_refs()
|
||||
assert len(broken) >= 1
|
||||
for target, bad, suggested in broken:
|
||||
if suggested and suggested.startswith("__RESTORE__"):
|
||||
wiki_hygiene.fix_broken_cross_ref(target, bad, suggested)
|
||||
# After restore, ghost should be live again
|
||||
assert (tmp_wiki / "patterns" / "ghost.md").exists()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Index drift
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestIndexDrift:
|
||||
def test_finds_page_missing_from_index(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
make_page(tmp_wiki, "patterns/missing.md")
|
||||
missing, stale = wiki_hygiene.find_index_drift()
|
||||
assert "patterns/missing.md" in missing
|
||||
assert stale == []
|
||||
|
||||
def test_finds_stale_index_entry(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
idx = tmp_wiki / "index.md"
|
||||
idx.write_text(
|
||||
idx.read_text()
|
||||
+ "- [Ghost](patterns/ghost.md) — page that no longer exists\n"
|
||||
)
|
||||
missing, stale = wiki_hygiene.find_index_drift()
|
||||
assert "patterns/ghost.md" in stale
|
||||
|
||||
def test_fix_adds_missing_and_removes_stale(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
make_page(tmp_wiki, "patterns/new.md")
|
||||
idx = tmp_wiki / "index.md"
|
||||
idx.write_text(
|
||||
idx.read_text()
|
||||
+ "- [Gone](patterns/gone.md) — deleted page\n"
|
||||
)
|
||||
missing, stale = wiki_hygiene.find_index_drift()
|
||||
wiki_hygiene.fix_index_drift(missing, stale)
|
||||
idx_text = idx.read_text()
|
||||
assert "patterns/new.md" in idx_text
|
||||
assert "patterns/gone.md" not in idx_text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Empty stubs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestEmptyStubs:
|
||||
def test_flags_small_body(self, wiki_hygiene: Any, tmp_wiki: Path) -> None:
|
||||
make_page(tmp_wiki, "patterns/stub.md", body="# Stub\n\nShort.\n")
|
||||
stubs = wiki_hygiene.find_empty_stubs()
|
||||
assert len(stubs) == 1
|
||||
assert stubs[0].path.stem == "stub"
|
||||
|
||||
def test_ignores_substantive_pages(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
body = "# Full\n\n" + ("This is substantive content. " * 20) + "\n"
|
||||
make_page(tmp_wiki, "patterns/full.md", body=body)
|
||||
stubs = wiki_hygiene.find_empty_stubs()
|
||||
assert stubs == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Conversation refresh signals
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestConversationRefreshSignals:
|
||||
def test_picks_up_related_link(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
make_page(tmp_wiki, "patterns/hot.md", last_verified="2026-01-01")
|
||||
make_conversation(
|
||||
tmp_wiki,
|
||||
"test",
|
||||
"2026-04-11-abc.md",
|
||||
date="2026-04-11",
|
||||
related=["patterns/hot.md"],
|
||||
)
|
||||
refs = wiki_hygiene.scan_conversation_references()
|
||||
assert "patterns/hot.md" in refs
|
||||
assert refs["patterns/hot.md"] == date(2026, 4, 11)
|
||||
|
||||
def test_apply_refresh_updates_last_verified(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
path = make_page(tmp_wiki, "patterns/hot.md", last_verified="2026-01-01")
|
||||
make_conversation(
|
||||
tmp_wiki,
|
||||
"test",
|
||||
"2026-04-11-abc.md",
|
||||
date="2026-04-11",
|
||||
related=["patterns/hot.md"],
|
||||
)
|
||||
refs = wiki_hygiene.scan_conversation_references()
|
||||
changes = wiki_hygiene.apply_refresh_signals(refs)
|
||||
assert len(changes) == 1
|
||||
|
||||
reparsed = wiki_hygiene.parse_page(path)
|
||||
assert reparsed.frontmatter["last_verified"] == "2026-04-11"
|
||||
|
||||
def test_bumps_low_confidence_to_medium(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
path = make_page(
|
||||
tmp_wiki,
|
||||
"patterns/reviving.md",
|
||||
confidence="low",
|
||||
last_verified="2026-01-01",
|
||||
)
|
||||
make_conversation(
|
||||
tmp_wiki,
|
||||
"test",
|
||||
"2026-04-11-ref.md",
|
||||
date="2026-04-11",
|
||||
related=["patterns/reviving.md"],
|
||||
)
|
||||
refs = wiki_hygiene.scan_conversation_references()
|
||||
wiki_hygiene.apply_refresh_signals(refs)
|
||||
reparsed = wiki_hygiene.parse_page(path)
|
||||
assert reparsed.frontmatter["confidence"] == "medium"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auto-restore
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAutoRestore:
|
||||
def test_restores_page_referenced_in_conversation(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
# Archive a page
|
||||
path = make_page(tmp_wiki, "patterns/returning.md")
|
||||
page = wiki_hygiene.parse_page(path)
|
||||
wiki_hygiene.archive_page(page, "aging out")
|
||||
assert (tmp_wiki / "archive" / "patterns" / "returning.md").exists()
|
||||
|
||||
# Reference it in a conversation
|
||||
make_conversation(
|
||||
tmp_wiki,
|
||||
"test",
|
||||
"2026-04-12-ref.md",
|
||||
related=["patterns/returning.md"],
|
||||
)
|
||||
|
||||
# Auto-restore
|
||||
restored = wiki_hygiene.auto_restore_archived()
|
||||
assert len(restored) == 1
|
||||
assert (tmp_wiki / "patterns" / "returning.md").exists()
|
||||
assert not (tmp_wiki / "archive" / "patterns" / "returning.md").exists()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Staging / archive index sync
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestIndexSync:
|
||||
def test_staging_sync_regenerates_index(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
make_staging_page(tmp_wiki, "patterns/pending.md")
|
||||
changed = wiki_hygiene.sync_staging_index()
|
||||
assert changed is True
|
||||
text = (tmp_wiki / "staging" / "index.md").read_text()
|
||||
assert "pending.md" in text
|
||||
|
||||
def test_staging_sync_idempotent(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
make_staging_page(tmp_wiki, "patterns/pending.md")
|
||||
wiki_hygiene.sync_staging_index()
|
||||
changed_second = wiki_hygiene.sync_staging_index()
|
||||
assert changed_second is False
|
||||
|
||||
def test_archive_sync_regenerates_index(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
make_page(
|
||||
tmp_wiki,
|
||||
"archive/patterns/old.md",
|
||||
confidence="stale",
|
||||
extra_fm={
|
||||
"archived_date": "2026-01-01",
|
||||
"archived_reason": "test",
|
||||
"original_path": "patterns/old.md",
|
||||
},
|
||||
)
|
||||
changed = wiki_hygiene.sync_archive_index()
|
||||
assert changed is True
|
||||
text = (tmp_wiki / "archive" / "index.md").read_text()
|
||||
assert "old" in text.lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# State drift detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestStateDrift:
|
||||
def test_detects_missing_raw_file(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
import json
|
||||
state = {
|
||||
"harvested_urls": {
|
||||
"https://example.com": {
|
||||
"raw_file": "raw/harvested/missing.md",
|
||||
"wiki_pages": [],
|
||||
}
|
||||
}
|
||||
}
|
||||
(tmp_wiki / ".harvest-state.json").write_text(json.dumps(state))
|
||||
issues = wiki_hygiene.find_state_drift()
|
||||
assert any("missing.md" in i for i in issues)
|
||||
|
||||
def test_empty_state_has_no_drift(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
# Fixture already creates an empty .harvest-state.json
|
||||
issues = wiki_hygiene.find_state_drift()
|
||||
assert issues == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hygiene state file
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestHygieneState:
|
||||
def test_load_returns_defaults_when_missing(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
state = wiki_hygiene.load_hygiene_state()
|
||||
assert state["last_quick_run"] is None
|
||||
assert state["pages_checked"] == {}
|
||||
|
||||
def test_save_and_reload(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
state = wiki_hygiene.load_hygiene_state()
|
||||
state["last_quick_run"] = "2026-04-12T00:00:00Z"
|
||||
wiki_hygiene.save_hygiene_state(state)
|
||||
|
||||
reloaded = wiki_hygiene.load_hygiene_state()
|
||||
assert reloaded["last_quick_run"] == "2026-04-12T00:00:00Z"
|
||||
|
||||
def test_mark_page_checked_stores_hash(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
path = make_page(tmp_wiki, "patterns/tracked.md")
|
||||
page = wiki_hygiene.parse_page(path)
|
||||
state = wiki_hygiene.load_hygiene_state()
|
||||
wiki_hygiene.mark_page_checked(state, page, "quick")
|
||||
entry = state["pages_checked"]["patterns/tracked.md"]
|
||||
assert entry["content_hash"].startswith("sha256:")
|
||||
assert "last_checked_quick" in entry
|
||||
|
||||
def test_page_changed_since_detects_body_change(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
path = make_page(tmp_wiki, "patterns/mutable.md", body="# One\n\nOne body.\n")
|
||||
page = wiki_hygiene.parse_page(path)
|
||||
state = wiki_hygiene.load_hygiene_state()
|
||||
wiki_hygiene.mark_page_checked(state, page, "quick")
|
||||
|
||||
assert not wiki_hygiene.page_changed_since(state, page, "quick")
|
||||
|
||||
# Mutate the body
|
||||
path.write_text(path.read_text().replace("One body", "Two body"))
|
||||
new_page = wiki_hygiene.parse_page(path)
|
||||
assert wiki_hygiene.page_changed_since(state, new_page, "quick")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Full quick-hygiene run end-to-end (dry-run, idempotent)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRunQuickHygiene:
|
||||
def test_empty_wiki_produces_empty_report(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
report = wiki_hygiene.run_quick_hygiene(dry_run=True)
|
||||
assert report.backfilled == []
|
||||
assert report.archived == []
|
||||
|
||||
def test_real_run_is_idempotent(
|
||||
self, wiki_hygiene: Any, tmp_wiki: Path
|
||||
) -> None:
|
||||
make_page(tmp_wiki, "patterns/one.md")
|
||||
make_page(tmp_wiki, "patterns/two.md")
|
||||
|
||||
report1 = wiki_hygiene.run_quick_hygiene()
|
||||
# Second run should have 0 work
|
||||
report2 = wiki_hygiene.run_quick_hygiene()
|
||||
assert report2.backfilled == []
|
||||
assert report2.decayed == []
|
||||
assert report2.archived == []
|
||||
assert report2.frontmatter_fixes == []
|
||||
Reference in New Issue
Block a user