Files
memex/tests/conftest.py
Eric Turner ee54a2f5d4 Initial commit — memex
A compounding LLM-maintained knowledge wiki.

Synthesis of Andrej Karpathy's persistent-wiki gist and milla-jovovich's
mempalace, with an automation layer on top for conversation mining, URL
harvesting, human-in-the-loop staging, staleness decay, and hygiene.

Includes:
- 11 pipeline scripts (extract, summarize, index, harvest, stage,
  hygiene, maintain, sync, + shared library)
- Full docs: README, SETUP, ARCHITECTURE, DESIGN-RATIONALE, CUSTOMIZE
- Example CLAUDE.md files (wiki schema + global instructions) tuned for
  the three-collection qmd setup
- 171-test pytest suite (cross-platform, runs in ~1.3s)
- MIT licensed
2026-04-12 21:16:02 -06:00

301 lines
9.1 KiB
Python

"""Shared test fixtures for the wiki pipeline test suite.
All tests run against a disposable `tmp_wiki` directory — no test ever
touches the real ~/projects/wiki. Cross-platform: uses pathlib, no
platform-specific paths, and runs on both macOS and Linux/WSL.
"""
from __future__ import annotations
import importlib
import importlib.util
import json
import os
import sys
from pathlib import Path
from typing import Any
import pytest
SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts"
# ---------------------------------------------------------------------------
# Module loading helpers
# ---------------------------------------------------------------------------
#
# The wiki scripts use hyphenated filenames (wiki-hygiene.py etc.) which
# can't be imported via normal `import` syntax. These helpers load a script
# file as a module object so tests can exercise its functions directly.
def _load_script_module(name: str, path: Path) -> Any:
"""Load a Python script file as a module. Clears any cached version first."""
# Clear cached imports so WIKI_DIR env changes take effect between tests
for key in list(sys.modules):
if key in (name, "wiki_lib"):
del sys.modules[key]
# Make sure scripts/ is on sys.path so intra-script imports (wiki_lib) work
scripts_str = str(SCRIPTS_DIR)
if scripts_str not in sys.path:
sys.path.insert(0, scripts_str)
spec = importlib.util.spec_from_file_location(name, path)
assert spec is not None and spec.loader is not None
mod = importlib.util.module_from_spec(spec)
sys.modules[name] = mod
spec.loader.exec_module(mod)
return mod
# ---------------------------------------------------------------------------
# tmp_wiki fixture — builds a realistic wiki tree under a tmp path
# ---------------------------------------------------------------------------
@pytest.fixture
def tmp_wiki(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
"""Set up a disposable wiki tree with all the directories the scripts expect.
Sets the WIKI_DIR environment variable so all imported modules resolve
paths against this tmp directory.
"""
wiki = tmp_path / "wiki"
wiki.mkdir()
# Create the directory tree
for sub in ["patterns", "decisions", "concepts", "environments"]:
(wiki / sub).mkdir()
(wiki / "staging" / sub).mkdir(parents=True)
(wiki / "archive" / sub).mkdir(parents=True)
(wiki / "raw" / "harvested").mkdir(parents=True)
(wiki / "conversations").mkdir()
(wiki / "reports").mkdir()
# Create minimal index.md
(wiki / "index.md").write_text(
"# Wiki Index\n\n"
"## Patterns\n\n"
"## Decisions\n\n"
"## Concepts\n\n"
"## Environments\n\n"
)
# Empty state files
(wiki / ".harvest-state.json").write_text(json.dumps({
"harvested_urls": {},
"skipped_urls": {},
"failed_urls": {},
"rejected_urls": {},
"last_run": None,
}))
# Point all scripts at this tmp wiki
monkeypatch.setenv("WIKI_DIR", str(wiki))
return wiki
# ---------------------------------------------------------------------------
# Sample page factories
# ---------------------------------------------------------------------------
def make_page(
wiki: Path,
rel_path: str,
*,
title: str | None = None,
ptype: str | None = None,
confidence: str = "high",
last_compiled: str = "2026-04-01",
last_verified: str = "2026-04-01",
origin: str = "manual",
sources: list[str] | None = None,
related: list[str] | None = None,
body: str = "# Content\n\nA substantive page with real content so it is not a stub.\n",
extra_fm: dict[str, Any] | None = None,
) -> Path:
"""Write a well-formed wiki page with all required frontmatter fields."""
if sources is None:
sources = []
if related is None:
related = []
"""Write a page to the tmp wiki and return its path."""
path = wiki / rel_path
path.parent.mkdir(parents=True, exist_ok=True)
if title is None:
title = path.stem.replace("-", " ").title()
if ptype is None:
ptype = path.parent.name.rstrip("s")
fm_lines = [
"---",
f"title: {title}",
f"type: {ptype}",
f"confidence: {confidence}",
f"origin: {origin}",
f"last_compiled: {last_compiled}",
f"last_verified: {last_verified}",
]
if sources is not None:
if sources:
fm_lines.append("sources:")
fm_lines.extend(f" - {s}" for s in sources)
else:
fm_lines.append("sources: []")
if related is not None:
if related:
fm_lines.append("related:")
fm_lines.extend(f" - {r}" for r in related)
else:
fm_lines.append("related: []")
if extra_fm:
for k, v in extra_fm.items():
if isinstance(v, list):
if v:
fm_lines.append(f"{k}:")
fm_lines.extend(f" - {item}" for item in v)
else:
fm_lines.append(f"{k}: []")
else:
fm_lines.append(f"{k}: {v}")
fm_lines.append("---")
path.write_text("\n".join(fm_lines) + "\n" + body)
return path
def make_conversation(
wiki: Path,
project: str,
filename: str,
*,
date: str = "2026-04-10",
status: str = "summarized",
messages: int = 100,
related: list[str] | None = None,
body: str = "## Summary\n\nTest conversation summary.\n",
) -> Path:
"""Write a conversation file to the tmp wiki."""
proj_dir = wiki / "conversations" / project
proj_dir.mkdir(parents=True, exist_ok=True)
path = proj_dir / filename
fm_lines = [
"---",
f"title: Test Conversation {filename}",
"type: conversation",
f"project: {project}",
f"date: {date}",
f"status: {status}",
f"messages: {messages}",
]
if related:
fm_lines.append("related:")
fm_lines.extend(f" - {r}" for r in related)
fm_lines.append("---")
path.write_text("\n".join(fm_lines) + "\n" + body)
return path
def make_staging_page(
wiki: Path,
rel_under_staging: str,
*,
title: str = "Pending Page",
ptype: str = "pattern",
staged_by: str = "wiki-harvest",
staged_date: str = "2026-04-10",
modifies: str | None = None,
target_path: str | None = None,
body: str = "# Pending\n\nStaged content body.\n",
) -> Path:
path = wiki / "staging" / rel_under_staging
path.parent.mkdir(parents=True, exist_ok=True)
if target_path is None:
target_path = rel_under_staging
fm_lines = [
"---",
f"title: {title}",
f"type: {ptype}",
"confidence: medium",
"origin: automated",
"status: pending",
f"staged_date: {staged_date}",
f"staged_by: {staged_by}",
f"target_path: {target_path}",
]
if modifies:
fm_lines.append(f"modifies: {modifies}")
fm_lines.append("compilation_notes: test note")
fm_lines.append("last_verified: 2026-04-10")
fm_lines.append("---")
path.write_text("\n".join(fm_lines) + "\n" + body)
return path
# ---------------------------------------------------------------------------
# Module fixtures — each loads the corresponding script as a module
# ---------------------------------------------------------------------------
@pytest.fixture
def wiki_lib(tmp_wiki: Path) -> Any:
"""Load wiki_lib fresh against the tmp_wiki directory."""
return _load_script_module("wiki_lib", SCRIPTS_DIR / "wiki_lib.py")
@pytest.fixture
def wiki_hygiene(tmp_wiki: Path) -> Any:
"""Load wiki-hygiene.py fresh. wiki_lib must be loaded first for its imports."""
_load_script_module("wiki_lib", SCRIPTS_DIR / "wiki_lib.py")
return _load_script_module("wiki_hygiene", SCRIPTS_DIR / "wiki-hygiene.py")
@pytest.fixture
def wiki_staging(tmp_wiki: Path) -> Any:
_load_script_module("wiki_lib", SCRIPTS_DIR / "wiki_lib.py")
return _load_script_module("wiki_staging", SCRIPTS_DIR / "wiki-staging.py")
@pytest.fixture
def wiki_harvest(tmp_wiki: Path) -> Any:
_load_script_module("wiki_lib", SCRIPTS_DIR / "wiki_lib.py")
return _load_script_module("wiki_harvest", SCRIPTS_DIR / "wiki-harvest.py")
# ---------------------------------------------------------------------------
# Subprocess helper — runs a script as if from the CLI, with WIKI_DIR set
# ---------------------------------------------------------------------------
@pytest.fixture
def run_script(tmp_wiki: Path):
"""Return a function that runs a script via subprocess with WIKI_DIR set."""
import subprocess
def _run(script_rel: str, *args: str, timeout: int = 60) -> subprocess.CompletedProcess:
script = SCRIPTS_DIR / script_rel
if script.suffix == ".py":
cmd = ["python3", str(script), *args]
else:
cmd = ["bash", str(script), *args]
env = os.environ.copy()
env["WIKI_DIR"] = str(tmp_wiki)
return subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout,
env=env,
)
return _run