Initial commit — memex
A compounding LLM-maintained knowledge wiki. Synthesis of Andrej Karpathy's persistent-wiki gist and milla-jovovich's mempalace, with an automation layer on top for conversation mining, URL harvesting, human-in-the-loop staging, staleness decay, and hygiene. Includes: - 11 pipeline scripts (extract, summarize, index, harvest, stage, hygiene, maintain, sync, + shared library) - Full docs: README, SETUP, ARCHITECTURE, DESIGN-RATIONALE, CUSTOMIZE - Example CLAUDE.md files (wiki schema + global instructions) tuned for the three-collection qmd setup - 171-test pytest suite (cross-platform, runs in ~1.3s) - MIT licensed
This commit is contained in:
300
tests/conftest.py
Normal file
300
tests/conftest.py
Normal file
@@ -0,0 +1,300 @@
|
||||
"""Shared test fixtures for the wiki pipeline test suite.
|
||||
|
||||
All tests run against a disposable `tmp_wiki` directory — no test ever
|
||||
touches the real ~/projects/wiki. Cross-platform: uses pathlib, no
|
||||
platform-specific paths, and runs on both macOS and Linux/WSL.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import importlib.util
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
SCRIPTS_DIR = Path(__file__).resolve().parent.parent / "scripts"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module loading helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# The wiki scripts use hyphenated filenames (wiki-hygiene.py etc.) which
|
||||
# can't be imported via normal `import` syntax. These helpers load a script
|
||||
# file as a module object so tests can exercise its functions directly.
|
||||
|
||||
|
||||
def _load_script_module(name: str, path: Path) -> Any:
|
||||
"""Load a Python script file as a module. Clears any cached version first."""
|
||||
# Clear cached imports so WIKI_DIR env changes take effect between tests
|
||||
for key in list(sys.modules):
|
||||
if key in (name, "wiki_lib"):
|
||||
del sys.modules[key]
|
||||
|
||||
# Make sure scripts/ is on sys.path so intra-script imports (wiki_lib) work
|
||||
scripts_str = str(SCRIPTS_DIR)
|
||||
if scripts_str not in sys.path:
|
||||
sys.path.insert(0, scripts_str)
|
||||
|
||||
spec = importlib.util.spec_from_file_location(name, path)
|
||||
assert spec is not None and spec.loader is not None
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
sys.modules[name] = mod
|
||||
spec.loader.exec_module(mod)
|
||||
return mod
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# tmp_wiki fixture — builds a realistic wiki tree under a tmp path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_wiki(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
|
||||
"""Set up a disposable wiki tree with all the directories the scripts expect.
|
||||
|
||||
Sets the WIKI_DIR environment variable so all imported modules resolve
|
||||
paths against this tmp directory.
|
||||
"""
|
||||
wiki = tmp_path / "wiki"
|
||||
wiki.mkdir()
|
||||
|
||||
# Create the directory tree
|
||||
for sub in ["patterns", "decisions", "concepts", "environments"]:
|
||||
(wiki / sub).mkdir()
|
||||
(wiki / "staging" / sub).mkdir(parents=True)
|
||||
(wiki / "archive" / sub).mkdir(parents=True)
|
||||
(wiki / "raw" / "harvested").mkdir(parents=True)
|
||||
(wiki / "conversations").mkdir()
|
||||
(wiki / "reports").mkdir()
|
||||
|
||||
# Create minimal index.md
|
||||
(wiki / "index.md").write_text(
|
||||
"# Wiki Index\n\n"
|
||||
"## Patterns\n\n"
|
||||
"## Decisions\n\n"
|
||||
"## Concepts\n\n"
|
||||
"## Environments\n\n"
|
||||
)
|
||||
|
||||
# Empty state files
|
||||
(wiki / ".harvest-state.json").write_text(json.dumps({
|
||||
"harvested_urls": {},
|
||||
"skipped_urls": {},
|
||||
"failed_urls": {},
|
||||
"rejected_urls": {},
|
||||
"last_run": None,
|
||||
}))
|
||||
|
||||
# Point all scripts at this tmp wiki
|
||||
monkeypatch.setenv("WIKI_DIR", str(wiki))
|
||||
|
||||
return wiki
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sample page factories
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def make_page(
|
||||
wiki: Path,
|
||||
rel_path: str,
|
||||
*,
|
||||
title: str | None = None,
|
||||
ptype: str | None = None,
|
||||
confidence: str = "high",
|
||||
last_compiled: str = "2026-04-01",
|
||||
last_verified: str = "2026-04-01",
|
||||
origin: str = "manual",
|
||||
sources: list[str] | None = None,
|
||||
related: list[str] | None = None,
|
||||
body: str = "# Content\n\nA substantive page with real content so it is not a stub.\n",
|
||||
extra_fm: dict[str, Any] | None = None,
|
||||
) -> Path:
|
||||
"""Write a well-formed wiki page with all required frontmatter fields."""
|
||||
if sources is None:
|
||||
sources = []
|
||||
if related is None:
|
||||
related = []
|
||||
"""Write a page to the tmp wiki and return its path."""
|
||||
path = wiki / rel_path
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if title is None:
|
||||
title = path.stem.replace("-", " ").title()
|
||||
if ptype is None:
|
||||
ptype = path.parent.name.rstrip("s")
|
||||
|
||||
fm_lines = [
|
||||
"---",
|
||||
f"title: {title}",
|
||||
f"type: {ptype}",
|
||||
f"confidence: {confidence}",
|
||||
f"origin: {origin}",
|
||||
f"last_compiled: {last_compiled}",
|
||||
f"last_verified: {last_verified}",
|
||||
]
|
||||
if sources is not None:
|
||||
if sources:
|
||||
fm_lines.append("sources:")
|
||||
fm_lines.extend(f" - {s}" for s in sources)
|
||||
else:
|
||||
fm_lines.append("sources: []")
|
||||
if related is not None:
|
||||
if related:
|
||||
fm_lines.append("related:")
|
||||
fm_lines.extend(f" - {r}" for r in related)
|
||||
else:
|
||||
fm_lines.append("related: []")
|
||||
if extra_fm:
|
||||
for k, v in extra_fm.items():
|
||||
if isinstance(v, list):
|
||||
if v:
|
||||
fm_lines.append(f"{k}:")
|
||||
fm_lines.extend(f" - {item}" for item in v)
|
||||
else:
|
||||
fm_lines.append(f"{k}: []")
|
||||
else:
|
||||
fm_lines.append(f"{k}: {v}")
|
||||
fm_lines.append("---")
|
||||
|
||||
path.write_text("\n".join(fm_lines) + "\n" + body)
|
||||
return path
|
||||
|
||||
|
||||
def make_conversation(
|
||||
wiki: Path,
|
||||
project: str,
|
||||
filename: str,
|
||||
*,
|
||||
date: str = "2026-04-10",
|
||||
status: str = "summarized",
|
||||
messages: int = 100,
|
||||
related: list[str] | None = None,
|
||||
body: str = "## Summary\n\nTest conversation summary.\n",
|
||||
) -> Path:
|
||||
"""Write a conversation file to the tmp wiki."""
|
||||
proj_dir = wiki / "conversations" / project
|
||||
proj_dir.mkdir(parents=True, exist_ok=True)
|
||||
path = proj_dir / filename
|
||||
|
||||
fm_lines = [
|
||||
"---",
|
||||
f"title: Test Conversation {filename}",
|
||||
"type: conversation",
|
||||
f"project: {project}",
|
||||
f"date: {date}",
|
||||
f"status: {status}",
|
||||
f"messages: {messages}",
|
||||
]
|
||||
if related:
|
||||
fm_lines.append("related:")
|
||||
fm_lines.extend(f" - {r}" for r in related)
|
||||
fm_lines.append("---")
|
||||
|
||||
path.write_text("\n".join(fm_lines) + "\n" + body)
|
||||
return path
|
||||
|
||||
|
||||
def make_staging_page(
|
||||
wiki: Path,
|
||||
rel_under_staging: str,
|
||||
*,
|
||||
title: str = "Pending Page",
|
||||
ptype: str = "pattern",
|
||||
staged_by: str = "wiki-harvest",
|
||||
staged_date: str = "2026-04-10",
|
||||
modifies: str | None = None,
|
||||
target_path: str | None = None,
|
||||
body: str = "# Pending\n\nStaged content body.\n",
|
||||
) -> Path:
|
||||
path = wiki / "staging" / rel_under_staging
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if target_path is None:
|
||||
target_path = rel_under_staging
|
||||
|
||||
fm_lines = [
|
||||
"---",
|
||||
f"title: {title}",
|
||||
f"type: {ptype}",
|
||||
"confidence: medium",
|
||||
"origin: automated",
|
||||
"status: pending",
|
||||
f"staged_date: {staged_date}",
|
||||
f"staged_by: {staged_by}",
|
||||
f"target_path: {target_path}",
|
||||
]
|
||||
if modifies:
|
||||
fm_lines.append(f"modifies: {modifies}")
|
||||
fm_lines.append("compilation_notes: test note")
|
||||
fm_lines.append("last_verified: 2026-04-10")
|
||||
fm_lines.append("---")
|
||||
|
||||
path.write_text("\n".join(fm_lines) + "\n" + body)
|
||||
return path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module fixtures — each loads the corresponding script as a module
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def wiki_lib(tmp_wiki: Path) -> Any:
|
||||
"""Load wiki_lib fresh against the tmp_wiki directory."""
|
||||
return _load_script_module("wiki_lib", SCRIPTS_DIR / "wiki_lib.py")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def wiki_hygiene(tmp_wiki: Path) -> Any:
|
||||
"""Load wiki-hygiene.py fresh. wiki_lib must be loaded first for its imports."""
|
||||
_load_script_module("wiki_lib", SCRIPTS_DIR / "wiki_lib.py")
|
||||
return _load_script_module("wiki_hygiene", SCRIPTS_DIR / "wiki-hygiene.py")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def wiki_staging(tmp_wiki: Path) -> Any:
|
||||
_load_script_module("wiki_lib", SCRIPTS_DIR / "wiki_lib.py")
|
||||
return _load_script_module("wiki_staging", SCRIPTS_DIR / "wiki-staging.py")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def wiki_harvest(tmp_wiki: Path) -> Any:
|
||||
_load_script_module("wiki_lib", SCRIPTS_DIR / "wiki_lib.py")
|
||||
return _load_script_module("wiki_harvest", SCRIPTS_DIR / "wiki-harvest.py")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Subprocess helper — runs a script as if from the CLI, with WIKI_DIR set
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def run_script(tmp_wiki: Path):
|
||||
"""Return a function that runs a script via subprocess with WIKI_DIR set."""
|
||||
import subprocess
|
||||
|
||||
def _run(script_rel: str, *args: str, timeout: int = 60) -> subprocess.CompletedProcess:
|
||||
script = SCRIPTS_DIR / script_rel
|
||||
if script.suffix == ".py":
|
||||
cmd = ["python3", str(script), *args]
|
||||
else:
|
||||
cmd = ["bash", str(script), *args]
|
||||
env = os.environ.copy()
|
||||
env["WIKI_DIR"] = str(tmp_wiki)
|
||||
return subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
)
|
||||
|
||||
return _run
|
||||
Reference in New Issue
Block a user