#!/usr/bin/env python3 """Automated wiki hygiene — quick (no LLM) and full (LLM) modes. Implements Plan 02 (staleness & archive) and Plan 04 (automated hygiene). Quick mode checks (daily, no LLM): - Backfill missing last_verified - Refresh last_verified from conversation references - Auto-restore archived pages referenced again - Confidence decay per thresholds - Archive stale and superseded pages - Frontmatter repair (missing required fields) - Orphan pages (no inbound links) - Broken cross-references (with fuzzy-match fix) - Main index drift (missing/orphan entries) - Empty stubs (report-only) - State file drift (report-only) - Staging/archive index resync Full mode checks (weekly, LLM-powered, extends quick): - Missing cross-references (haiku) - Duplicate coverage (sonnet) - Contradictions (sonnet, report-only) - Technology lifecycle (haiku) Usage: python3 scripts/wiki-hygiene.py # Quick mode (default) python3 scripts/wiki-hygiene.py --quick # Explicit quick python3 scripts/wiki-hygiene.py --full # Full mode (quick + LLM) python3 scripts/wiki-hygiene.py --dry-run # Show what would change python3 scripts/wiki-hygiene.py --check-only # Report only, no auto-fixes python3 scripts/wiki-hygiene.py --backfill # Backfill last_verified only python3 scripts/wiki-hygiene.py --scan-refs # Refresh from conversation refs only python3 scripts/wiki-hygiene.py --archive PATH # Manually archive a page python3 scripts/wiki-hygiene.py --restore PATH # Manually restore an archived page """ from __future__ import annotations import argparse import difflib import json import re import subprocess import sys from dataclasses import dataclass, field from datetime import date, datetime, timezone from pathlib import Path from typing import Any sys.path.insert(0, str(Path(__file__).parent)) from wiki_lib import ( # noqa: E402 ARCHIVE_DIR, ARCHIVE_INDEX, CONVERSATIONS_DIR, HARVEST_STATE_FILE, INDEX_FILE, LIVE_CONTENT_DIRS, REPORTS_DIR, STAGING_DIR, STAGING_INDEX, WIKI_DIR, WikiPage, iter_archived_pages, iter_live_pages, iter_staging_pages, page_content_hash, parse_date, parse_page, today, write_page, ) sys.stdout.reconfigure(line_buffering=True) sys.stderr.reconfigure(line_buffering=True) # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- HYGIENE_STATE_FILE = WIKI_DIR / ".hygiene-state.json" MINE_STATE_FILE = WIKI_DIR / ".mine-state.json" # Decay thresholds in days since last_verified DECAY_HIGH_TO_MEDIUM = 180 DECAY_MEDIUM_TO_LOW = 270 DECAY_LOW_TO_STALE = 365 CONFIDENCE_ORDER = ["stale", "low", "medium", "high"] VALID_CONFIDENCE = {"high", "medium", "low", "stale"} VALID_TYPES = {"pattern", "decision", "environment", "concept"} EMPTY_STUB_THRESHOLD = 100 # body chars below which a page is a stub # Required fields per type — missing → auto-fix REQUIRED_FIELDS = ["title", "type", "confidence", "last_compiled", "last_verified"] # LLM call defaults CLAUDE_TIMEOUT = 300 CLAUDE_HAIKU = "haiku" CLAUDE_SONNET = "sonnet" # Tech version patterns for lifecycle check VERSION_REGEX = re.compile( r"\b(?:Node(?:\.js)?|Python|Docker|PostgreSQL|MySQL|Redis|Next\.js|NestJS)\s+(\d+(?:\.\d+)?)", re.IGNORECASE, ) # --------------------------------------------------------------------------- # Hygiene state (.hygiene-state.json) # --------------------------------------------------------------------------- def load_hygiene_state() -> dict[str, Any]: if HYGIENE_STATE_FILE.exists(): try: with open(HYGIENE_STATE_FILE) as f: return json.load(f) except (OSError, json.JSONDecodeError): pass return { "last_quick_run": None, "last_full_run": None, "pages_checked": {}, "deferred_issues": [], } def save_hygiene_state(state: dict[str, Any]) -> None: tmp = HYGIENE_STATE_FILE.with_suffix(".json.tmp") with open(tmp, "w") as f: json.dump(state, f, indent=2, sort_keys=True) tmp.replace(HYGIENE_STATE_FILE) def mark_page_checked(state: dict[str, Any], page: WikiPage, mode: str) -> None: rel = str(page.path.relative_to(WIKI_DIR)) entry = state.setdefault("pages_checked", {}).setdefault(rel, {}) now = datetime.now(timezone.utc).isoformat() if mode == "quick": entry["last_checked_quick"] = now elif mode == "full": entry["last_checked_full"] = now entry["content_hash"] = page_content_hash(page) def page_changed_since(state: dict[str, Any], page: WikiPage, mode: str) -> bool: rel = str(page.path.relative_to(WIKI_DIR)) entry = state.get("pages_checked", {}).get(rel, {}) stored_hash = entry.get("content_hash") if not stored_hash: return True return stored_hash != page_content_hash(page) def is_deferred(state: dict[str, Any], issue_type: str, pages: list[str]) -> bool: sorted_pages = sorted(pages) for issue in state.get("deferred_issues", []): if issue.get("type") == issue_type and sorted(issue.get("pages", [])) == sorted_pages: return True return False # --------------------------------------------------------------------------- # Date / git helpers # --------------------------------------------------------------------------- def git_first_commit_date(path: Path) -> date | None: try: result = subprocess.run( ["git", "-C", str(WIKI_DIR), "log", "--diff-filter=A", "--format=%cs", "--", str(path.relative_to(WIKI_DIR))], capture_output=True, text=True, timeout=10, ) if result.returncode == 0 and result.stdout.strip(): return parse_date(result.stdout.strip().splitlines()[-1]) except (subprocess.TimeoutExpired, OSError): pass return None def file_mtime_date(path: Path) -> date: return datetime.fromtimestamp(path.stat().st_mtime, tz=timezone.utc).date() # --------------------------------------------------------------------------- # Backfill last_verified # --------------------------------------------------------------------------- def backfill_last_verified(dry_run: bool = False) -> list[tuple[Path, str, date]]: changes: list[tuple[Path, str, date]] = [] for page in iter_live_pages(): if "last_verified" in page.frontmatter and parse_date(page.frontmatter["last_verified"]): continue source = "mtime" d = parse_date(page.frontmatter.get("last_compiled")) if d: source = "last_compiled" else: d = git_first_commit_date(page.path) if d: source = "git" else: d = file_mtime_date(page.path) changes.append((page.path, source, d)) if not dry_run: page.frontmatter["last_verified"] = d.isoformat() write_page(page) return changes # --------------------------------------------------------------------------- # Frontmatter repair # --------------------------------------------------------------------------- def repair_frontmatter(dry_run: bool = False) -> list[tuple[Path, list[str]]]: """Add missing required fields with sensible defaults. Returns list of (page, fields_fixed).""" changes: list[tuple[Path, list[str]]] = [] for page in iter_live_pages(): fixes: list[str] = [] fm = page.frontmatter if "title" not in fm: fm["title"] = page.path.stem.replace("-", " ").title() fixes.append("title") if "type" not in fm or fm["type"] not in VALID_TYPES: inferred = page.path.parent.name.rstrip("s") if inferred in VALID_TYPES: fm["type"] = inferred fixes.append("type") if "confidence" not in fm or str(fm.get("confidence")) not in VALID_CONFIDENCE: fm["confidence"] = "medium" fixes.append("confidence") if "last_compiled" not in fm or not parse_date(fm.get("last_compiled")): d = git_first_commit_date(page.path) or file_mtime_date(page.path) fm["last_compiled"] = d.isoformat() fixes.append("last_compiled") if "last_verified" not in fm or not parse_date(fm.get("last_verified")): fm["last_verified"] = fm.get("last_compiled") or today().isoformat() fixes.append("last_verified") if "sources" not in fm: fm["sources"] = [] fixes.append("sources") if "related" not in fm: fm["related"] = [] fixes.append("related") if fixes: changes.append((page.path, fixes)) if not dry_run: write_page(page) return changes # --------------------------------------------------------------------------- # Confidence decay # --------------------------------------------------------------------------- def expected_confidence(current: str, last_verified: date | None, is_superseded: bool) -> str: if is_superseded: return "stale" if not last_verified: return current elapsed = (today() - last_verified).days if elapsed >= DECAY_LOW_TO_STALE: return "stale" if elapsed >= DECAY_MEDIUM_TO_LOW: return _min_confidence(current, "low") if elapsed >= DECAY_HIGH_TO_MEDIUM: return _min_confidence(current, "medium") return current def _min_confidence(a: str, b: str) -> str: order = {c: i for i, c in enumerate(CONFIDENCE_ORDER)} ai = order.get(a, len(CONFIDENCE_ORDER)) bi = order.get(b, len(CONFIDENCE_ORDER)) return CONFIDENCE_ORDER[min(ai, bi)] def bump_confidence(current: str) -> str: idx = CONFIDENCE_ORDER.index(current) if current in CONFIDENCE_ORDER else 0 return CONFIDENCE_ORDER[min(idx + 1, len(CONFIDENCE_ORDER) - 1)] # --------------------------------------------------------------------------- # Archive / Restore # --------------------------------------------------------------------------- def archive_page(page: WikiPage, reason: str, dry_run: bool = False) -> Path | None: rel = page.path.relative_to(WIKI_DIR) parts = rel.parts if len(parts) < 2 or parts[0] not in LIVE_CONTENT_DIRS: print(f" [warn] cannot archive {rel} — not a live content page", file=sys.stderr) return None dest = ARCHIVE_DIR / rel original_path = str(rel) if dry_run: print(f" [dry-run] archive {rel} → {dest.relative_to(WIKI_DIR)} ({reason})") return dest dest.parent.mkdir(parents=True, exist_ok=True) page.frontmatter["archived_date"] = today().isoformat() page.frontmatter["archived_reason"] = reason page.frontmatter["original_path"] = original_path page.frontmatter["confidence"] = "stale" page.path.rename(dest) page.path = dest write_page(page) _remove_from_main_index(original_path) _append_to_archive_index(dest, original_path, reason) _rewrite_cross_references(original_path, f"archive/{original_path}") return dest def restore_page(page: WikiPage, dry_run: bool = False) -> Path | None: original_path = page.frontmatter.get("original_path") if not original_path: rel = page.path.relative_to(ARCHIVE_DIR) original_path = str(rel) dest = WIKI_DIR / original_path if dry_run: print(f" [dry-run] restore {page.path.relative_to(WIKI_DIR)} → {original_path}") return dest dest.parent.mkdir(parents=True, exist_ok=True) for key in ("archived_date", "archived_reason", "original_path"): page.frontmatter.pop(key, None) page.frontmatter["confidence"] = "medium" page.frontmatter["last_verified"] = today().isoformat() old = page.path page.path.rename(dest) page.path = dest write_page(page) _remove_from_archive_index(str(old.relative_to(ARCHIVE_DIR))) _rewrite_cross_references(f"archive/{original_path}", original_path) return dest # --------------------------------------------------------------------------- # Index I/O # --------------------------------------------------------------------------- def _remove_from_main_index(original_path: str) -> None: if not INDEX_FILE.exists(): return text = INDEX_FILE.read_text() lines = text.splitlines(keepends=True) pattern = re.compile(rf"^- \[.+\]\({re.escape(original_path)}\) ") new_lines = [line for line in lines if not pattern.match(line)] if len(new_lines) != len(lines): INDEX_FILE.write_text("".join(new_lines)) def _append_to_archive_index(archived_path: Path, original_path: str, reason: str) -> None: ARCHIVE_INDEX.parent.mkdir(parents=True, exist_ok=True) if not ARCHIVE_INDEX.exists(): ARCHIVE_INDEX.write_text(_default_archive_index()) text = ARCHIVE_INDEX.read_text() name = archived_path.stem.replace("-", " ").title() rel_in_archive = archived_path.relative_to(ARCHIVE_DIR) row = f"| [{name}]({rel_in_archive}) | {original_path} | {today().isoformat()} | {reason} |\n" text = text.replace("| _(none yet)_ | | | |\n", "") if row.strip() in text: return ARCHIVE_INDEX.write_text(text.rstrip() + "\n" + row) def _remove_from_archive_index(rel_in_archive: str) -> None: if not ARCHIVE_INDEX.exists(): return text = ARCHIVE_INDEX.read_text() pattern = re.compile(rf"^\|\s*\[.+\]\({re.escape(rel_in_archive)}\).*\n", re.MULTILINE) new_text = pattern.sub("", text) if new_text != text: ARCHIVE_INDEX.write_text(new_text) def _default_archive_index() -> str: return ( "# Archived Wiki Pages\n\n" "Pages archived due to staleness or obsolescence.\n\n" "## Archived Pages\n\n" "| Page | Original Location | Archived | Reason |\n" "|------|-------------------|----------|--------|\n" ) def _add_to_main_index(rel_path: str, title: str, summary: str = "") -> None: if not INDEX_FILE.exists(): return text = INDEX_FILE.read_text() if f"]({rel_path})" in text: return entry = f"- [{title}]({rel_path})" if summary: entry += f" — {summary}" entry += "\n" ptype = rel_path.split("/")[0] section_headers = { "patterns": "## Patterns", "decisions": "## Decisions", "concepts": "## Concepts", "environments": "## Environments", } header = section_headers.get(ptype) if header and header in text: idx = text.find(header) next_header = text.find("\n## ", idx + len(header)) if next_header == -1: next_header = len(text) section = text[idx:next_header] last_nl = section.rfind("\n", 0, len(section) - 1) + 1 INDEX_FILE.write_text(text[: idx + last_nl] + entry + text[idx + last_nl :]) else: INDEX_FILE.write_text(text.rstrip() + "\n" + entry) # --------------------------------------------------------------------------- # Cross-reference rewriting # --------------------------------------------------------------------------- def _rewrite_cross_references(old_path: str, new_path: str) -> int: targets: list[Path] = [INDEX_FILE] for sub in LIVE_CONTENT_DIRS: targets.extend((WIKI_DIR / sub).glob("*.md")) if STAGING_DIR.exists(): for sub in LIVE_CONTENT_DIRS: targets.extend((STAGING_DIR / sub).glob("*.md")) if ARCHIVE_DIR.exists(): for sub in LIVE_CONTENT_DIRS: targets.extend((ARCHIVE_DIR / sub).glob("*.md")) count = 0 old_esc = re.escape(old_path) link_patterns = [ (re.compile(rf"\]\({old_esc}\)"), f"]({new_path})"), (re.compile(rf"\]\(\.\./{old_esc}\)"), f"](../{new_path})"), ] related_patterns = [ (re.compile(rf"^(\s*-\s*){old_esc}$", re.MULTILINE), rf"\g<1>{new_path}"), ] for target in targets: if not target.exists(): continue try: text = target.read_text() except OSError: continue new_text = text for pat, repl in link_patterns + related_patterns: new_text = pat.sub(repl, new_text) if new_text != text: target.write_text(new_text) count += 1 return count # --------------------------------------------------------------------------- # Conversation refresh signals # --------------------------------------------------------------------------- def scan_conversation_references() -> dict[str, date]: refs: dict[str, date] = {} if not CONVERSATIONS_DIR.exists(): return refs page_link_pattern = re.compile( r"(?:patterns|decisions|concepts|environments)/[\w\-]+\.md" ) for project_dir in CONVERSATIONS_DIR.iterdir(): if not project_dir.is_dir(): continue for md in project_dir.glob("*.md"): page = parse_page(md) if not page: continue if page.frontmatter.get("status") != "summarized": continue conv_date = parse_date(page.frontmatter.get("date")) if not conv_date: continue related = page.frontmatter.get("related") or [] if isinstance(related, list): for ref in related: m = page_link_pattern.search(str(ref)) if m: path = m.group(0) if path not in refs or conv_date > refs[path]: refs[path] = conv_date for m in page_link_pattern.finditer(page.body): path = m.group(0) if path not in refs or conv_date > refs[path]: refs[path] = conv_date return refs def apply_refresh_signals(refs: dict[str, date], dry_run: bool = False) -> list[tuple[Path, str, str, date]]: changes: list[tuple[Path, str, str, date]] = [] for page in iter_live_pages(): rel = str(page.path.relative_to(WIKI_DIR)) ref_date = refs.get(rel) if not ref_date: continue current_verified = parse_date(page.frontmatter.get("last_verified")) if current_verified and current_verified >= ref_date: continue old_conf = str(page.frontmatter.get("confidence", "medium")) new_conf = bump_confidence(old_conf) if old_conf in ("low", "medium") else old_conf changes.append((page.path, old_conf, new_conf, ref_date)) if not dry_run: page.frontmatter["last_verified"] = ref_date.isoformat() if new_conf != old_conf: page.frontmatter["confidence"] = new_conf write_page(page) return changes # --------------------------------------------------------------------------- # Auto-restoration # --------------------------------------------------------------------------- def auto_restore_archived(dry_run: bool = False) -> list[Path]: restored: list[Path] = [] archived = { str(p.path.relative_to(ARCHIVE_DIR)): p for p in iter_archived_pages() if p.path.name != "index.md" } if not archived: return restored referenced: set[str] = set() scan_targets: list[Path] = [INDEX_FILE] for sub in LIVE_CONTENT_DIRS: scan_targets.extend((WIKI_DIR / sub).glob("*.md")) if CONVERSATIONS_DIR.exists(): for project_dir in CONVERSATIONS_DIR.iterdir(): if project_dir.is_dir(): scan_targets.extend(project_dir.glob("*.md")) for t in scan_targets: try: text = t.read_text() except OSError: continue for rel_archive in archived: if rel_archive in text or f"archive/{rel_archive}" in text: referenced.add(rel_archive) for rel_archive, page in archived.items(): if rel_archive in referenced: restored_path = restore_page(page, dry_run=dry_run) if restored_path: restored.append(restored_path) return restored # --------------------------------------------------------------------------- # Orphan detection # --------------------------------------------------------------------------- def find_orphan_pages() -> list[WikiPage]: """Pages with no inbound link from index.md or any other wiki page.""" all_pages = iter_live_pages() all_text = [] if INDEX_FILE.exists(): all_text.append(INDEX_FILE.read_text()) for p in all_pages: all_text.append(p.path.read_text()) combined = "\n".join(all_text) orphans: list[WikiPage] = [] for page in all_pages: rel = str(page.path.relative_to(WIKI_DIR)) # A page that only appears in its own file isn't linked own_count = page.path.read_text().count(rel) total = combined.count(rel) if total - own_count == 0: orphans.append(page) return orphans def fix_orphan_page(page: WikiPage, dry_run: bool = False) -> bool: """Add the page to index.md under its section. Returns True if fixed.""" rel = str(page.path.relative_to(WIKI_DIR)) title = str(page.frontmatter.get("title", page.path.stem)) # Use first non-heading non-empty body line as summary summary = "" for line in page.body.strip().splitlines(): line = line.strip() if not line or line.startswith("#"): continue summary = line[:120] break if dry_run: print(f" [dry-run] add orphan to index: {rel}") return True _add_to_main_index(rel, title, summary) return True # --------------------------------------------------------------------------- # Broken cross-references # --------------------------------------------------------------------------- LINK_REGEX = re.compile(r"\]\(((?:patterns|decisions|concepts|environments|archive)/[\w\-/]+\.md)\)") RELATED_LINE_REGEX = re.compile(r"^\s*-\s*((?:patterns|decisions|concepts|environments)/[\w\-]+\.md)\s*$", re.MULTILINE) def find_broken_cross_refs() -> list[tuple[Path, str, str | None]]: """Return list of (page_path, bad_link, suggested_fix_or_None). `archived_paths` is keyed by the page's *original* live path (relative to ARCHIVE_DIR, not WIKI_DIR) so we can directly check whether a broken live link corresponds to an archived file at the same subpath. """ results: list[tuple[Path, str, str | None]] = [] live_names = {str(p.path.relative_to(WIKI_DIR)) for p in iter_live_pages()} archived_paths = {str(p.path.relative_to(ARCHIVE_DIR)) for p in iter_archived_pages()} scan: list[Path] = [INDEX_FILE] for sub in LIVE_CONTENT_DIRS: scan.extend((WIKI_DIR / sub).glob("*.md")) for target in scan: try: text = target.read_text() except OSError: continue seen: set[str] = set() for link in LINK_REGEX.findall(text): if link in seen: continue seen.add(link) if link in live_names: continue if link in archived_paths: # Reference to archive → trigger restore results.append((target, link, f"__RESTORE__:{link}")) continue # Fuzzy match suggestion = fuzzy_find_page(link, live_names) results.append((target, link, suggestion)) # Also bare references in `related:` for m in RELATED_LINE_REGEX.finditer(text): link = m.group(1) if link in seen or link in live_names: continue seen.add(link) if link in archived_paths: results.append((target, link, f"__RESTORE__:{link}")) continue results.append((target, link, fuzzy_find_page(link, live_names))) return results def fuzzy_find_page(bad_link: str, candidates: set[str]) -> str | None: """Use difflib to find the closest valid page path.""" matches = difflib.get_close_matches(bad_link, list(candidates), n=1, cutoff=0.75) return matches[0] if matches else None def fix_broken_cross_ref(target: Path, bad_link: str, suggested: str, dry_run: bool = False) -> bool: if suggested.startswith("__RESTORE__:"): archived_rel = suggested.split(":", 1)[1] archived_page = parse_page(ARCHIVE_DIR / archived_rel) if archived_page and not dry_run: restore_page(archived_page) return True if dry_run: print(f" [dry-run] fix {target.relative_to(WIKI_DIR)}: {bad_link} → {suggested}") return True text = target.read_text() new_text = text.replace(f"]({bad_link})", f"]({suggested})") new_text = re.sub( rf"^(\s*-\s*){re.escape(bad_link)}$", rf"\g<1>{suggested}", new_text, flags=re.MULTILINE, ) if new_text != text: target.write_text(new_text) return True # --------------------------------------------------------------------------- # Index drift # --------------------------------------------------------------------------- def find_index_drift() -> tuple[list[str], list[str]]: """Return (missing_from_index, stale_index_entries).""" disk_pages = {str(p.path.relative_to(WIKI_DIR)) for p in iter_live_pages()} indexed: set[str] = set() if INDEX_FILE.exists(): for link in LINK_REGEX.findall(INDEX_FILE.read_text()): indexed.add(link) missing = sorted(disk_pages - indexed) stale = sorted(indexed - disk_pages - {p for p in indexed if p.startswith("archive/")}) return missing, stale def fix_index_drift(missing: list[str], stale: list[str], dry_run: bool = False) -> None: for rel in missing: page = parse_page(WIKI_DIR / rel) if not page: continue title = str(page.frontmatter.get("title", page.path.stem)) summary = "" for line in page.body.strip().splitlines(): line = line.strip() if line and not line.startswith("#"): summary = line[:120] break if dry_run: print(f" [dry-run] add to index: {rel}") else: _add_to_main_index(rel, title, summary) for rel in stale: if dry_run: print(f" [dry-run] remove from index: {rel}") else: _remove_from_main_index(rel) # --------------------------------------------------------------------------- # Empty stubs # --------------------------------------------------------------------------- def find_empty_stubs() -> list[WikiPage]: stubs: list[WikiPage] = [] for page in iter_live_pages(): body_text = re.sub(r"^#+\s+.*$", "", page.body, flags=re.MULTILINE).strip() if len(body_text) < EMPTY_STUB_THRESHOLD: stubs.append(page) return stubs # --------------------------------------------------------------------------- # State drift # --------------------------------------------------------------------------- def find_state_drift() -> list[str]: issues: list[str] = [] # .mine-state.json → output_file existence if MINE_STATE_FILE.exists(): try: mine = json.load(open(MINE_STATE_FILE)) for sid, info in mine.get("sessions", {}).items(): out = info.get("output_file") if out: out_path = WIKI_DIR / out if not out_path.exists(): issues.append(f"mine: session {sid[:8]} references missing {out}") except (OSError, json.JSONDecodeError) as e: issues.append(f"mine: could not parse .mine-state.json ({e})") # .harvest-state.json → raw_file / wiki_pages existence if HARVEST_STATE_FILE.exists(): try: harvest = json.load(open(HARVEST_STATE_FILE)) for url, info in harvest.get("harvested_urls", {}).items(): raw = info.get("raw_file") if raw and not (WIKI_DIR / raw).exists(): issues.append(f"harvest: {url[:60]} → missing raw file {raw}") for wiki_page in info.get("wiki_pages", []): if wiki_page and not (WIKI_DIR / wiki_page).exists(): issues.append(f"harvest: {url[:60]} → missing wiki page {wiki_page}") except (OSError, json.JSONDecodeError) as e: issues.append(f"harvest: could not parse .harvest-state.json ({e})") # .hygiene-state.json → pages_checked existence if HYGIENE_STATE_FILE.exists(): try: h = json.load(open(HYGIENE_STATE_FILE)) for rel in h.get("pages_checked", {}): if not (WIKI_DIR / rel).exists() and not (ARCHIVE_DIR / rel).exists(): issues.append(f"hygiene: pages_checked references missing {rel}") except (OSError, json.JSONDecodeError) as e: issues.append(f"hygiene: could not parse .hygiene-state.json ({e})") return issues # --------------------------------------------------------------------------- # Staging / archive index sync # --------------------------------------------------------------------------- def sync_staging_index(dry_run: bool = False) -> bool: """Regenerate staging/index.md from disk. Returns True if any change was needed.""" if not STAGING_DIR.exists(): return False pending = [p for p in iter_staging_pages() if p.path.name != "index.md"] expected = _build_staging_index(pending) if STAGING_INDEX.exists(): current = STAGING_INDEX.read_text() if current == expected: return False if dry_run: print(" [dry-run] staging/index.md would be regenerated") return True STAGING_DIR.mkdir(parents=True, exist_ok=True) STAGING_INDEX.write_text(expected) return True def _build_staging_index(pending: list[WikiPage]) -> str: lines = [ "# Staging — Pending Wiki Content", "", "Content awaiting human review. These pages were generated by automated scripts", "and need approval before joining the live wiki.", "", "**Review options**:", "- Browse in Obsidian and move files manually (then run `scripts/wiki-staging.py --sync`)", "- Run `python3 scripts/wiki-staging.py --list` for a summary", "- Start a Claude session: \"let's review what's in staging\"", "", f"**{len(pending)} pending item(s)** as of {today().isoformat()}", "", "## Pending Items", "", ] if not pending: lines.append("_No pending items._") else: lines.append("| Page | Type | Source | Staged | Target |") lines.append("|------|------|--------|--------|--------|") for p in pending: fm = p.frontmatter title = fm.get("title", p.path.stem) rel = str(p.path.relative_to(STAGING_DIR)) ptype = fm.get("type", "unknown") staged_by = fm.get("staged_by", "unknown") staged = fm.get("staged_date", "—") target = fm.get("target_path", rel) lines.append(f"| [{title}]({rel}) | {ptype} | {staged_by} | {staged} | `{target}` |") return "\n".join(lines) + "\n" def sync_archive_index(dry_run: bool = False) -> bool: """Rebuild archive/index.md from disk if out of sync. Returns True if changed.""" if not ARCHIVE_DIR.exists(): return False archived = [p for p in iter_archived_pages() if p.path.name != "index.md"] expected = _build_archive_index(archived) if ARCHIVE_INDEX.exists(): if ARCHIVE_INDEX.read_text() == expected: return False if dry_run: print(" [dry-run] archive/index.md would be regenerated") return True ARCHIVE_INDEX.write_text(expected) return True def _build_archive_index(archived: list[WikiPage]) -> str: lines = [ "# Archived Wiki Pages", "", "Pages archived due to staleness or obsolescence. Excluded from default", "wiki searches but available via `qmd search \"topic\" -c wiki-archive`.", "", "## Archived Pages", "", "| Page | Original Location | Archived | Reason |", "|------|-------------------|----------|--------|", ] if not archived: lines.append("| _(none yet)_ | | | |") else: for p in archived: fm = p.frontmatter name = p.path.stem.replace("-", " ").title() rel = str(p.path.relative_to(ARCHIVE_DIR)) original = fm.get("original_path", rel) archived_date = fm.get("archived_date", "—") reason = fm.get("archived_reason", "—") lines.append(f"| [{name}]({rel}) | {original} | {archived_date} | {reason} |") return "\n".join(lines) + "\n" # --------------------------------------------------------------------------- # LLM helpers (full mode) # --------------------------------------------------------------------------- def call_claude(prompt: str, model: str = CLAUDE_HAIKU) -> str | None: try: result = subprocess.run( ["claude", "-p", "--model", model, "--output-format", "text", prompt], capture_output=True, text=True, timeout=CLAUDE_TIMEOUT, ) except FileNotFoundError: print(" [warn] claude CLI not found", file=sys.stderr) return None except subprocess.TimeoutExpired: print(" [warn] claude -p timed out", file=sys.stderr) return None if result.returncode != 0: print(f" [warn] claude -p failed: {result.stderr.strip()[:200]}", file=sys.stderr) return None return result.stdout.strip() def _extract_json(text: str) -> Any: match = re.search(r"(\{.*\}|\[.*\])", text, re.DOTALL) if not match: return None try: return json.loads(match.group(0)) except json.JSONDecodeError: return None def _page_header_snippet(page: WikiPage) -> str: """Short representation of a page for LLM prompts: rel path + title + first paragraph.""" rel = str(page.path.relative_to(WIKI_DIR)) title = str(page.frontmatter.get("title", page.path.stem)) first_para = "" for line in page.body.strip().splitlines(): line = line.strip() if line and not line.startswith("#"): first_para = line[:400] break return f"`{rel}` — {title}\n{first_para}" def find_missing_cross_refs_llm(pages: list[WikiPage]) -> list[tuple[Path, list[str]]]: """For each page, ask haiku which other pages it should link to. Returns list of (page_path, suggested_rel_paths). """ if not pages: return [] # Use index.md as the catalog of candidates try: index_text = INDEX_FILE.read_text()[:10_000] except OSError: return [] results: list[tuple[Path, list[str]]] = [] # Batch 5 pages per call for i in range(0, len(pages), 5): batch = pages[i : i + 5] batch_text = "\n\n".join( f"### PAGE {n+1}: {str(p.path.relative_to(WIKI_DIR))}\n" f"title: {p.frontmatter.get('title', p.path.stem)}\n" f"current related: {p.frontmatter.get('related', [])}\n" f"first paragraph:\n{_page_header_snippet(p)}" for n, p in enumerate(batch) ) prompt = ( "You are reviewing wiki pages for missing cross-references. For each PAGE below, " "identify OTHER wiki pages it should link to but currently doesn't. Only suggest " "pages listed in the INDEX. Be conservative — only suggest strong topical matches.\n\n" "Emit a single JSON object mapping the page's relative path to an array of relative " "paths it should link to. Omit pages with no suggestions. No prose.\n\n" f"### INDEX\n{index_text}\n\n" f"### PAGES TO REVIEW\n{batch_text}\n" ) raw = call_claude(prompt, model=CLAUDE_HAIKU) if not raw: continue data = _extract_json(raw) if not isinstance(data, dict): continue for p in batch: rel = str(p.path.relative_to(WIKI_DIR)) suggestions = data.get(rel) if isinstance(suggestions, list) and suggestions: # Filter out pages already in related existing = set(str(x) for x in (p.frontmatter.get("related") or [])) new = [s for s in suggestions if s not in existing and s != rel] if new: results.append((p.path, new)) return results def find_duplicates_llm(pages: list[WikiPage]) -> list[tuple[Path, Path, str]]: """First pass (no LLM) groups by keyword overlap; second pass (sonnet) confirms duplicates. Returns list of (weaker_path, stronger_path, reason). """ if len(pages) < 2: return [] # Group pages by type by_type: dict[str, list[WikiPage]] = {} for p in pages: t = str(p.frontmatter.get("type", "")) by_type.setdefault(t, []).append(p) candidates: list[tuple[WikiPage, WikiPage]] = [] for type_pages in by_type.values(): for i, a in enumerate(type_pages): a_words = _title_keywords(a) for b in type_pages[i + 1 :]: overlap = a_words & _title_keywords(b) if len(overlap) >= 2: candidates.append((a, b)) results: list[tuple[Path, Path, str]] = [] for a, b in candidates[:10]: # cap to control LLM cost prompt = ( "Are these two wiki pages duplicates (substantially the same topic)?\n\n" f"### PAGE A: {a.path.relative_to(WIKI_DIR)}\n{a.body[:3000]}\n\n" f"### PAGE B: {b.path.relative_to(WIKI_DIR)}\n{b.body[:3000]}\n\n" "Emit a single JSON object: " '{\"duplicate\": true|false, \"stronger\": \"A\"|\"B\", \"reason\": \"...\"}. ' "No prose." ) raw = call_claude(prompt, model=CLAUDE_SONNET) data = _extract_json(raw or "") if isinstance(data, dict) and data.get("duplicate"): stronger = data.get("stronger", "A") reason = str(data.get("reason", "")) if stronger == "A": results.append((b.path, a.path, reason)) else: results.append((a.path, b.path, reason)) return results def _title_keywords(page: WikiPage) -> set[str]: title = str(page.frontmatter.get("title", page.path.stem)).lower() return {w for w in re.split(r"[^a-z0-9]+", title) if len(w) > 3} def find_contradictions_llm(pages: list[WikiPage]) -> list[tuple[Path, Path, str]]: """Report-only — pair up related pages and ask sonnet to find conflicting claims.""" # Focus on decisions/ and patterns/ focus = [p for p in pages if str(p.frontmatter.get("type")) in ("decision", "pattern")] if len(focus) < 2: return [] # Build candidate pairs from shared related: links by_path = {str(p.path.relative_to(WIKI_DIR)): p for p in focus} candidates: list[tuple[WikiPage, WikiPage]] = [] seen_pairs: set[tuple[str, str]] = set() for p in focus: related = p.frontmatter.get("related") or [] if not isinstance(related, list): continue for rel_link in related: other = by_path.get(str(rel_link)) if not other: continue key = tuple(sorted([str(p.path), str(other.path)])) if key in seen_pairs: continue seen_pairs.add(key) candidates.append((p, other)) results: list[tuple[Path, Path, str]] = [] for a, b in candidates[:8]: # cap prompt = ( "Compare these two wiki pages for contradictions in their claims or recommendations. " "Only flag genuine contradictions, not complementary content.\n\n" f"### PAGE A: {a.path.relative_to(WIKI_DIR)}\n{a.body[:3000]}\n\n" f"### PAGE B: {b.path.relative_to(WIKI_DIR)}\n{b.body[:3000]}\n\n" "Emit a single JSON object: " '{\"contradiction\": true|false, \"description\": \"...\"}. No prose.' ) raw = call_claude(prompt, model=CLAUDE_SONNET) data = _extract_json(raw or "") if isinstance(data, dict) and data.get("contradiction"): results.append((a.path, b.path, str(data.get("description", "")))) return results def find_tech_lifecycle_issues() -> list[tuple[Path, str]]: """Flag pages mentioning outdated versions when newer ones appear in recent conversations.""" page_versions: dict[Path, dict[str, str]] = {} for page in iter_live_pages(): versions = {} for m in VERSION_REGEX.finditer(page.body): tool = m.group(0).split()[0].lower() versions[tool] = m.group(1) if versions: page_versions[page.path] = versions if not CONVERSATIONS_DIR.exists(): return [] # Scan recent conversations (last 90 days) recent_versions: dict[str, str] = {} cutoff = today() - __import__("datetime").timedelta(days=90) for project_dir in CONVERSATIONS_DIR.iterdir(): if not project_dir.is_dir(): continue for md in project_dir.glob("*.md"): page = parse_page(md) if not page: continue d = parse_date(page.frontmatter.get("date")) if not d or d < cutoff: continue for m in VERSION_REGEX.finditer(page.body): tool = m.group(0).split()[0].lower() ver = m.group(1) if tool not in recent_versions or _version_gt(ver, recent_versions[tool]): recent_versions[tool] = ver results: list[tuple[Path, str]] = [] for path, versions in page_versions.items(): for tool, page_ver in versions.items(): recent = recent_versions.get(tool) if recent and _version_gt(recent, page_ver): results.append((path, f"{tool} {page_ver} in page; {recent} in recent conversations")) break # one flag per page is enough return results def _version_gt(a: str, b: str) -> bool: try: ap = [int(x) for x in a.split(".")] bp = [int(x) for x in b.split(".")] return ap > bp except ValueError: return False # --------------------------------------------------------------------------- # Reports # --------------------------------------------------------------------------- @dataclass class HygieneReport: # Quick-mode fields backfilled: list[tuple[Path, str, date]] = field(default_factory=list) refreshed: list[tuple[Path, str, str, date]] = field(default_factory=list) decayed: list[tuple[Path, str, str]] = field(default_factory=list) archived: list[tuple[Path, str]] = field(default_factory=list) restored: list[Path] = field(default_factory=list) frontmatter_fixes: list[tuple[Path, list[str]]] = field(default_factory=list) orphans_fixed: list[Path] = field(default_factory=list) orphans_unfixed: list[Path] = field(default_factory=list) xrefs_fixed: list[tuple[Path, str, str]] = field(default_factory=list) xrefs_unfixed: list[tuple[Path, str]] = field(default_factory=list) index_drift_added: list[str] = field(default_factory=list) index_drift_removed: list[str] = field(default_factory=list) staging_synced: bool = False archive_synced: bool = False # Report-only empty_stubs: list[Path] = field(default_factory=list) state_drift: list[str] = field(default_factory=list) # Full-mode fields missing_xrefs: list[tuple[Path, list[str]]] = field(default_factory=list) duplicates: list[tuple[Path, Path, str]] = field(default_factory=list) contradictions: list[tuple[Path, Path, str]] = field(default_factory=list) tech_lifecycle: list[tuple[Path, str]] = field(default_factory=list) # --------------------------------------------------------------------------- # Hygiene orchestrator # --------------------------------------------------------------------------- def run_quick_hygiene(dry_run: bool = False, check_only: bool = False) -> HygieneReport: report = HygieneReport() apply = not (dry_run or check_only) print("[quick] backfilling missing last_verified") report.backfilled = backfill_last_verified(dry_run=not apply) print("[quick] scanning conversation references") refs = scan_conversation_references() report.refreshed = apply_refresh_signals(refs, dry_run=not apply) print("[quick] auto-restoring archived pages referenced again") report.restored = auto_restore_archived(dry_run=not apply) print("[quick] repairing frontmatter") report.frontmatter_fixes = repair_frontmatter(dry_run=not apply) print("[quick] applying confidence decay") for page in iter_live_pages(): current = str(page.frontmatter.get("confidence", "medium")) last_verified = parse_date(page.frontmatter.get("last_verified")) is_superseded = bool(re.search(r"superseded by", str(page.frontmatter.get("status", "")), re.IGNORECASE)) expected = expected_confidence(current, last_verified, is_superseded) if expected != current: report.decayed.append((page.path, current, expected)) if apply: page.frontmatter["confidence"] = expected write_page(page) print("[quick] archiving stale and superseded pages") for page in iter_live_pages(): conf = str(page.frontmatter.get("confidence", "medium")) status_val = str(page.frontmatter.get("status", "")) is_superseded = bool(re.search(r"superseded by", status_val, re.IGNORECASE)) last_verified = parse_date(page.frontmatter.get("last_verified")) if is_superseded: reason = "Explicitly superseded" if apply: archive_page(page, reason) report.archived.append((page.path, reason)) continue if conf == "stale": days = (today() - last_verified).days if last_verified else -1 reason = f"Confidence decayed to stale — no references in {days} days" if apply: archive_page(page, reason) report.archived.append((page.path, reason)) print("[quick] checking index drift") missing, stale_entries = find_index_drift() report.index_drift_added = missing report.index_drift_removed = stale_entries if apply and (missing or stale_entries): fix_index_drift(missing, stale_entries) print("[quick] checking for orphan pages") orphans = find_orphan_pages() for o in orphans: if apply: fix_orphan_page(o) report.orphans_fixed.append(o.path) else: report.orphans_unfixed.append(o.path) print("[quick] checking for broken cross-references") broken = find_broken_cross_refs() for target, bad, suggested in broken: if suggested is None: report.xrefs_unfixed.append((target, bad)) else: if apply: fix_broken_cross_ref(target, bad, suggested) report.xrefs_fixed.append((target, bad, suggested)) print("[quick] checking for empty stubs") report.empty_stubs = [p.path for p in find_empty_stubs()] print("[quick] checking state drift") report.state_drift = find_state_drift() print("[quick] syncing staging/archive indexes") report.staging_synced = sync_staging_index(dry_run=not apply) report.archive_synced = sync_archive_index(dry_run=not apply) # Update hygiene state if apply: state = load_hygiene_state() state["last_quick_run"] = datetime.now(timezone.utc).isoformat() for page in iter_live_pages(): mark_page_checked(state, page, "quick") save_hygiene_state(state) return report def run_full_hygiene(dry_run: bool = False, check_only: bool = False) -> HygieneReport: """Quick hygiene + LLM-powered checks.""" print("[full] running quick hygiene first") report = run_quick_hygiene(dry_run=dry_run, check_only=check_only) apply = not (dry_run or check_only) # Only check pages that changed since last full run state = load_hygiene_state() all_pages = iter_live_pages() changed_pages = [p for p in all_pages if page_changed_since(state, p, "full")] print(f"[full] {len(changed_pages)}/{len(all_pages)} pages changed since last full run") print("[full] checking missing cross-references (haiku)") report.missing_xrefs = find_missing_cross_refs_llm(changed_pages) if apply: for path, suggestions in report.missing_xrefs: page = parse_page(path) if not page: continue existing = list(page.frontmatter.get("related") or []) for s in suggestions: if s not in existing: existing.append(s) page.frontmatter["related"] = existing write_page(page) print("[full] checking for duplicate coverage (sonnet)") report.duplicates = find_duplicates_llm(all_pages) if apply: for weaker, stronger, reason in report.duplicates: wp = parse_page(weaker) if wp: archive_page(wp, f"Merged into {stronger.relative_to(WIKI_DIR)} — {reason}") print("[full] checking for contradictions (sonnet) — report-only") report.contradictions = find_contradictions_llm(all_pages) print("[full] checking technology lifecycle") report.tech_lifecycle = find_tech_lifecycle_issues() if apply: state["last_full_run"] = datetime.now(timezone.utc).isoformat() for page in iter_live_pages(): mark_page_checked(state, page, "full") save_hygiene_state(state) return report # --------------------------------------------------------------------------- # Report writers # --------------------------------------------------------------------------- def write_fixed_report(report: HygieneReport, mode: str, dry_run: bool) -> Path: REPORTS_DIR.mkdir(parents=True, exist_ok=True) suffix = "-dry-run" if dry_run else "-fixed" path = REPORTS_DIR / f"hygiene-{today().isoformat()}{suffix}.md" lines = [ f"# Hygiene Report — Auto-Fixed ({today().isoformat()})", "", f"Mode: {mode}{' (dry-run)' if dry_run else ''}", "", "## Summary", "", f"- Backfilled last_verified: {len(report.backfilled)}", f"- Refreshed from conversations: {len(report.refreshed)}", f"- Frontmatter repairs: {len(report.frontmatter_fixes)}", f"- Orphans linked: {len(report.orphans_fixed)}", f"- Broken xrefs fixed: {len(report.xrefs_fixed)}", f"- Index drift — added: {len(report.index_drift_added)}, removed: {len(report.index_drift_removed)}", f"- Decayed: {len(report.decayed)}", f"- Archived: {len(report.archived)}", f"- Restored: {len(report.restored)}", f"- Staging index resynced: {report.staging_synced}", f"- Archive index resynced: {report.archive_synced}", ] if mode == "full": lines.extend([ f"- Missing xrefs added: {len(report.missing_xrefs)}", f"- Duplicates merged: {len(report.duplicates)}", ]) lines.append("") def _section(title: str, rows: list[str]) -> None: if not rows: return lines.append(f"## {title}") lines.append("") lines.extend(rows) lines.append("") _section( "Backfilled last_verified", [f"- `{p.relative_to(WIKI_DIR)}` ← {src} ({d.isoformat()})" for p, src, d in report.backfilled], ) _section( "Refreshed from conversations", [ f"- `{p.relative_to(WIKI_DIR)}` confidence {old} → {new} (ref {d.isoformat()})" for p, old, new, d in report.refreshed ], ) _section( "Frontmatter repairs", [f"- `{p.relative_to(WIKI_DIR)}` — added: {', '.join(fields)}" for p, fields in report.frontmatter_fixes], ) _section( "Orphans linked", [f"- `{p.relative_to(WIKI_DIR)}`" for p in report.orphans_fixed], ) _section( "Broken xrefs fixed", [f"- `{t.relative_to(WIKI_DIR)}` {bad} → {new}" for t, bad, new in report.xrefs_fixed], ) _section( "Index drift — added", [f"- `{p}`" for p in report.index_drift_added], ) _section( "Index drift — removed", [f"- `{p}`" for p in report.index_drift_removed], ) _section( "Confidence decayed", [f"- `{p.relative_to(WIKI_DIR)}` {old} → {new}" for p, old, new in report.decayed], ) _section( "Archived", [f"- `{p.relative_to(WIKI_DIR)}` — {reason}" for p, reason in report.archived], ) _section( "Restored", [f"- `{p.relative_to(WIKI_DIR)}`" for p in report.restored], ) if mode == "full": _section( "Missing xrefs added", [ f"- `{p.relative_to(WIKI_DIR)}` ← added: {', '.join(s)}" for p, s in report.missing_xrefs ], ) _section( "Duplicates merged", [ f"- `{w.relative_to(WIKI_DIR)}` → merged into `{s.relative_to(WIKI_DIR)}` ({r})" for w, s, r in report.duplicates ], ) path.write_text("\n".join(lines) + "\n") return path def write_needs_review_report(report: HygieneReport, mode: str) -> Path | None: """Write needs-review report if there's anything to review. Returns path or None.""" items: list[str] = [] if report.orphans_unfixed: items.append("## Orphan pages (no inbound links)") items.append("") items.extend(f"- `{p.relative_to(WIKI_DIR)}`" for p in report.orphans_unfixed) items.append("") if report.xrefs_unfixed: items.append("## Broken cross-references (no fuzzy match)") items.append("") items.extend( f"- `{t.relative_to(WIKI_DIR)}` → missing link `{bad}`" for t, bad in report.xrefs_unfixed ) items.append("") if report.empty_stubs: items.append("## Empty stubs (body < 100 chars)") items.append("") items.extend(f"- `{p.relative_to(WIKI_DIR)}`" for p in report.empty_stubs) items.append("") if report.state_drift: items.append("## State file drift") items.append("") items.extend(f"- {msg}" for msg in report.state_drift) items.append("") if mode == "full": if report.contradictions: items.append("## Contradictions (LLM-detected — human judgment required)") items.append("") for a, b, desc in report.contradictions: items.append(f"### `{a.relative_to(WIKI_DIR)}` vs `{b.relative_to(WIKI_DIR)}`") items.append("") items.append(desc) items.append("") if report.tech_lifecycle: items.append("## Technology lifecycle flags") items.append("") items.extend( f"- `{p.relative_to(WIKI_DIR)}` — {note}" for p, note in report.tech_lifecycle ) items.append("") if not items: return None REPORTS_DIR.mkdir(parents=True, exist_ok=True) path = REPORTS_DIR / f"hygiene-{today().isoformat()}-needs-review.md" header = [ f"# Hygiene Report — Needs Review ({today().isoformat()})", "", f"Mode: {mode}", f"Items requiring attention: {sum(1 for line in items if line.startswith(('## ', '### ')))}", "", ] path.write_text("\n".join(header + items) + "\n") return path # --------------------------------------------------------------------------- # CLI # --------------------------------------------------------------------------- def main() -> int: parser = argparse.ArgumentParser(description="Wiki hygiene — quick and full modes") mode = parser.add_mutually_exclusive_group() mode.add_argument("--quick", action="store_true", help="Run the quick hygiene loop (default)") mode.add_argument("--full", action="store_true", help="Run full hygiene (quick + LLM checks)") mode.add_argument("--backfill", action="store_true", help="Only run the last_verified backfill") mode.add_argument("--scan-refs", action="store_true", help="Only apply conversation refresh signals") mode.add_argument("--archive", metavar="PATH", help="Manually archive a live page") mode.add_argument("--restore", metavar="PATH", help="Manually restore an archived page") parser.add_argument("--dry-run", action="store_true", help="Show what would change without writing") parser.add_argument("--check-only", action="store_true", help="Report issues without auto-fixing") parser.add_argument("--reason", default="Manual archive", help="Reason for --archive") args = parser.parse_args() if args.backfill: changes = backfill_last_verified(dry_run=args.dry_run) for p, src, d in changes: print(f" {p.relative_to(WIKI_DIR)} ← {src} ({d.isoformat()})") print(f"\n{len(changes)} page(s) backfilled") return 0 if args.scan_refs: refs = scan_conversation_references() print(f"Found references to {len(refs)} wiki page(s)") changes = apply_refresh_signals(refs, dry_run=args.dry_run) for p, old, new, d in changes: print(f" {p.relative_to(WIKI_DIR)} {old}→{new} ({d.isoformat()})") print(f"\n{len(changes)} page(s) refreshed") return 0 if args.archive: path = Path(args.archive) if not path.is_absolute(): path = WIKI_DIR / path page = parse_page(path) if not page: print(f"Cannot parse page: {path}", file=sys.stderr) return 1 archive_page(page, args.reason, dry_run=args.dry_run) return 0 if args.restore: path = Path(args.restore) if not path.is_absolute(): path = WIKI_DIR / path page = parse_page(path) if not page: print(f"Cannot parse page: {path}", file=sys.stderr) return 1 restore_page(page, dry_run=args.dry_run) return 0 # Default: quick or full hygiene loop mode_name = "full" if args.full else "quick" if args.full: report = run_full_hygiene(dry_run=args.dry_run, check_only=args.check_only) else: report = run_quick_hygiene(dry_run=args.dry_run, check_only=args.check_only) fixed_path = write_fixed_report(report, mode_name, args.dry_run) review_path = write_needs_review_report(report, mode_name) print(f"\nFixed report: {fixed_path.relative_to(WIKI_DIR)}") if review_path: print(f"Needs-review report: {review_path.relative_to(WIKI_DIR)}") else: print("No items need human review.") return 0 if __name__ == "__main__": sys.exit(main())