A compounding LLM-maintained knowledge wiki. Synthesis of Andrej Karpathy's persistent-wiki gist and milla-jovovich's mempalace, with an automation layer on top for conversation mining, URL harvesting, human-in-the-loop staging, staleness decay, and hygiene. Includes: - 11 pipeline scripts (extract, summarize, index, harvest, stage, hygiene, maintain, sync, + shared library) - Full docs: README, SETUP, ARCHITECTURE, DESIGN-RATIONALE, CUSTOMIZE - Example CLAUDE.md files (wiki schema + global instructions) tuned for the three-collection qmd setup - 171-test pytest suite (cross-platform, runs in ~1.3s) - MIT licensed
301 lines
9.1 KiB
Python
301 lines
9.1 KiB
Python
"""Shared test fixtures for the wiki pipeline test suite.
|
|
|
|
All tests run against a disposable `tmp_wiki` directory — no test ever
|
|
touches the real ~/projects/wiki. Cross-platform: uses pathlib, no
|
|
platform-specific paths, and runs on both macOS and Linux/WSL.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import importlib
|
|
import importlib.util
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import pytest
|
|
|
|
SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Module loading helpers
|
|
# ---------------------------------------------------------------------------
|
|
#
|
|
# The wiki scripts use hyphenated filenames (wiki-hygiene.py etc.) which
|
|
# can't be imported via normal `import` syntax. These helpers load a script
|
|
# file as a module object so tests can exercise its functions directly.
|
|
|
|
|
|
def _load_script_module(name: str, path: Path) -> Any:
|
|
"""Load a Python script file as a module. Clears any cached version first."""
|
|
# Clear cached imports so WIKI_DIR env changes take effect between tests
|
|
for key in list(sys.modules):
|
|
if key in (name, "wiki_lib"):
|
|
del sys.modules[key]
|
|
|
|
# Make sure scripts/ is on sys.path so intra-script imports (wiki_lib) work
|
|
scripts_str = str(SCRIPTS_DIR)
|
|
if scripts_str not in sys.path:
|
|
sys.path.insert(0, scripts_str)
|
|
|
|
spec = importlib.util.spec_from_file_location(name, path)
|
|
assert spec is not None and spec.loader is not None
|
|
mod = importlib.util.module_from_spec(spec)
|
|
sys.modules[name] = mod
|
|
spec.loader.exec_module(mod)
|
|
return mod
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# tmp_wiki fixture — builds a realistic wiki tree under a tmp path
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture
|
|
def tmp_wiki(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
|
|
"""Set up a disposable wiki tree with all the directories the scripts expect.
|
|
|
|
Sets the WIKI_DIR environment variable so all imported modules resolve
|
|
paths against this tmp directory.
|
|
"""
|
|
wiki = tmp_path / "wiki"
|
|
wiki.mkdir()
|
|
|
|
# Create the directory tree
|
|
for sub in ["patterns", "decisions", "concepts", "environments"]:
|
|
(wiki / sub).mkdir()
|
|
(wiki / "staging" / sub).mkdir(parents=True)
|
|
(wiki / "archive" / sub).mkdir(parents=True)
|
|
(wiki / "raw" / "harvested").mkdir(parents=True)
|
|
(wiki / "conversations").mkdir()
|
|
(wiki / "reports").mkdir()
|
|
|
|
# Create minimal index.md
|
|
(wiki / "index.md").write_text(
|
|
"# Wiki Index\n\n"
|
|
"## Patterns\n\n"
|
|
"## Decisions\n\n"
|
|
"## Concepts\n\n"
|
|
"## Environments\n\n"
|
|
)
|
|
|
|
# Empty state files
|
|
(wiki / ".harvest-state.json").write_text(json.dumps({
|
|
"harvested_urls": {},
|
|
"skipped_urls": {},
|
|
"failed_urls": {},
|
|
"rejected_urls": {},
|
|
"last_run": None,
|
|
}))
|
|
|
|
# Point all scripts at this tmp wiki
|
|
monkeypatch.setenv("WIKI_DIR", str(wiki))
|
|
|
|
return wiki
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Sample page factories
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def make_page(
|
|
wiki: Path,
|
|
rel_path: str,
|
|
*,
|
|
title: str | None = None,
|
|
ptype: str | None = None,
|
|
confidence: str = "high",
|
|
last_compiled: str = "2026-04-01",
|
|
last_verified: str = "2026-04-01",
|
|
origin: str = "manual",
|
|
sources: list[str] | None = None,
|
|
related: list[str] | None = None,
|
|
body: str = "# Content\n\nA substantive page with real content so it is not a stub.\n",
|
|
extra_fm: dict[str, Any] | None = None,
|
|
) -> Path:
|
|
"""Write a well-formed wiki page with all required frontmatter fields."""
|
|
if sources is None:
|
|
sources = []
|
|
if related is None:
|
|
related = []
|
|
"""Write a page to the tmp wiki and return its path."""
|
|
path = wiki / rel_path
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
if title is None:
|
|
title = path.stem.replace("-", " ").title()
|
|
if ptype is None:
|
|
ptype = path.parent.name.rstrip("s")
|
|
|
|
fm_lines = [
|
|
"---",
|
|
f"title: {title}",
|
|
f"type: {ptype}",
|
|
f"confidence: {confidence}",
|
|
f"origin: {origin}",
|
|
f"last_compiled: {last_compiled}",
|
|
f"last_verified: {last_verified}",
|
|
]
|
|
if sources is not None:
|
|
if sources:
|
|
fm_lines.append("sources:")
|
|
fm_lines.extend(f" - {s}" for s in sources)
|
|
else:
|
|
fm_lines.append("sources: []")
|
|
if related is not None:
|
|
if related:
|
|
fm_lines.append("related:")
|
|
fm_lines.extend(f" - {r}" for r in related)
|
|
else:
|
|
fm_lines.append("related: []")
|
|
if extra_fm:
|
|
for k, v in extra_fm.items():
|
|
if isinstance(v, list):
|
|
if v:
|
|
fm_lines.append(f"{k}:")
|
|
fm_lines.extend(f" - {item}" for item in v)
|
|
else:
|
|
fm_lines.append(f"{k}: []")
|
|
else:
|
|
fm_lines.append(f"{k}: {v}")
|
|
fm_lines.append("---")
|
|
|
|
path.write_text("\n".join(fm_lines) + "\n" + body)
|
|
return path
|
|
|
|
|
|
def make_conversation(
|
|
wiki: Path,
|
|
project: str,
|
|
filename: str,
|
|
*,
|
|
date: str = "2026-04-10",
|
|
status: str = "summarized",
|
|
messages: int = 100,
|
|
related: list[str] | None = None,
|
|
body: str = "## Summary\n\nTest conversation summary.\n",
|
|
) -> Path:
|
|
"""Write a conversation file to the tmp wiki."""
|
|
proj_dir = wiki / "conversations" / project
|
|
proj_dir.mkdir(parents=True, exist_ok=True)
|
|
path = proj_dir / filename
|
|
|
|
fm_lines = [
|
|
"---",
|
|
f"title: Test Conversation {filename}",
|
|
"type: conversation",
|
|
f"project: {project}",
|
|
f"date: {date}",
|
|
f"status: {status}",
|
|
f"messages: {messages}",
|
|
]
|
|
if related:
|
|
fm_lines.append("related:")
|
|
fm_lines.extend(f" - {r}" for r in related)
|
|
fm_lines.append("---")
|
|
|
|
path.write_text("\n".join(fm_lines) + "\n" + body)
|
|
return path
|
|
|
|
|
|
def make_staging_page(
|
|
wiki: Path,
|
|
rel_under_staging: str,
|
|
*,
|
|
title: str = "Pending Page",
|
|
ptype: str = "pattern",
|
|
staged_by: str = "wiki-harvest",
|
|
staged_date: str = "2026-04-10",
|
|
modifies: str | None = None,
|
|
target_path: str | None = None,
|
|
body: str = "# Pending\n\nStaged content body.\n",
|
|
) -> Path:
|
|
path = wiki / "staging" / rel_under_staging
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
if target_path is None:
|
|
target_path = rel_under_staging
|
|
|
|
fm_lines = [
|
|
"---",
|
|
f"title: {title}",
|
|
f"type: {ptype}",
|
|
"confidence: medium",
|
|
"origin: automated",
|
|
"status: pending",
|
|
f"staged_date: {staged_date}",
|
|
f"staged_by: {staged_by}",
|
|
f"target_path: {target_path}",
|
|
]
|
|
if modifies:
|
|
fm_lines.append(f"modifies: {modifies}")
|
|
fm_lines.append("compilation_notes: test note")
|
|
fm_lines.append("last_verified: 2026-04-10")
|
|
fm_lines.append("---")
|
|
|
|
path.write_text("\n".join(fm_lines) + "\n" + body)
|
|
return path
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Module fixtures — each loads the corresponding script as a module
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture
|
|
def wiki_lib(tmp_wiki: Path) -> Any:
|
|
"""Load wiki_lib fresh against the tmp_wiki directory."""
|
|
return _load_script_module("wiki_lib", SCRIPTS_DIR / "wiki_lib.py")
|
|
|
|
|
|
@pytest.fixture
|
|
def wiki_hygiene(tmp_wiki: Path) -> Any:
|
|
"""Load wiki-hygiene.py fresh. wiki_lib must be loaded first for its imports."""
|
|
_load_script_module("wiki_lib", SCRIPTS_DIR / "wiki_lib.py")
|
|
return _load_script_module("wiki_hygiene", SCRIPTS_DIR / "wiki-hygiene.py")
|
|
|
|
|
|
@pytest.fixture
|
|
def wiki_staging(tmp_wiki: Path) -> Any:
|
|
_load_script_module("wiki_lib", SCRIPTS_DIR / "wiki_lib.py")
|
|
return _load_script_module("wiki_staging", SCRIPTS_DIR / "wiki-staging.py")
|
|
|
|
|
|
@pytest.fixture
|
|
def wiki_harvest(tmp_wiki: Path) -> Any:
|
|
_load_script_module("wiki_lib", SCRIPTS_DIR / "wiki_lib.py")
|
|
return _load_script_module("wiki_harvest", SCRIPTS_DIR / "wiki-harvest.py")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Subprocess helper — runs a script as if from the CLI, with WIKI_DIR set
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture
|
|
def run_script(tmp_wiki: Path):
|
|
"""Return a function that runs a script via subprocess with WIKI_DIR set."""
|
|
import subprocess
|
|
|
|
def _run(script_rel: str, *args: str, timeout: int = 60) -> subprocess.CompletedProcess:
|
|
script = SCRIPTS_DIR / script_rel
|
|
if script.suffix == ".py":
|
|
cmd = ["python3", str(script), *args]
|
|
else:
|
|
cmd = ["bash", str(script), *args]
|
|
env = os.environ.copy()
|
|
env["WIKI_DIR"] = str(tmp_wiki)
|
|
return subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=timeout,
|
|
env=env,
|
|
)
|
|
|
|
return _run
|