Initial commit — memex

A compounding LLM-maintained knowledge wiki.

Synthesis of Andrej Karpathy's persistent-wiki gist and milla-jovovich's
mempalace, with an automation layer on top for conversation mining, URL
harvesting, human-in-the-loop staging, staleness decay, and hygiene.

Includes:
- 11 pipeline scripts (extract, summarize, index, harvest, stage,
  hygiene, maintain, sync, + shared library)
- Full docs: README, SETUP, ARCHITECTURE, DESIGN-RATIONALE, CUSTOMIZE
- Example CLAUDE.md files (wiki schema + global instructions) tuned for
  the three-collection qmd setup
- 171-test pytest suite (cross-platform, runs in ~1.3s)
- MIT licensed
This commit is contained in:
Eric Turner
2026-04-12 21:16:02 -06:00
commit ee54a2f5d4
31 changed files with 10792 additions and 0 deletions

View File

@@ -0,0 +1,121 @@
"""Smoke + integration tests for the conversation mining pipeline.
These scripts interact with external systems (Claude Code sessions dir,
claude CLI), so tests focus on CLI parsing, dry-run behavior, and error
handling rather than exercising the full extraction/summarization path.
"""
from __future__ import annotations
import json
from pathlib import Path
import pytest
# ---------------------------------------------------------------------------
# extract-sessions.py
# ---------------------------------------------------------------------------
class TestExtractSessions:
def test_help_exits_clean(self, run_script) -> None:
result = run_script("extract-sessions.py", "--help")
assert result.returncode == 0
assert "--project" in result.stdout
assert "--dry-run" in result.stdout
def test_dry_run_with_empty_sessions_dir(
self, run_script, tmp_wiki: Path, tmp_path: Path, monkeypatch
) -> None:
# Point CLAUDE_PROJECTS_DIR at an empty tmp dir via env (not currently
# supported — script reads ~/.claude/projects directly). Instead, use
# --project with a code that has no sessions to verify clean exit.
result = run_script("extract-sessions.py", "--dry-run", "--project", "nonexistent")
assert result.returncode == 0
def test_rejects_unknown_flag(self, run_script) -> None:
result = run_script("extract-sessions.py", "--bogus-flag")
assert result.returncode != 0
assert "error" in result.stderr.lower() or "unrecognized" in result.stderr.lower()
# ---------------------------------------------------------------------------
# summarize-conversations.py
# ---------------------------------------------------------------------------
class TestSummarizeConversations:
def test_help_exits_clean(self, run_script) -> None:
result = run_script("summarize-conversations.py", "--help")
assert result.returncode == 0
assert "--claude" in result.stdout
assert "--dry-run" in result.stdout
assert "--project" in result.stdout
def test_dry_run_empty_conversations(
self, run_script, tmp_wiki: Path
) -> None:
result = run_script("summarize-conversations.py", "--claude", "--dry-run")
assert result.returncode == 0
def test_dry_run_with_extracted_conversation(
self, run_script, tmp_wiki: Path
) -> None:
from conftest import make_conversation
make_conversation(
tmp_wiki,
"general",
"2026-04-10-abc.md",
status="extracted", # Not yet summarized
messages=50,
)
result = run_script("summarize-conversations.py", "--claude", "--dry-run")
assert result.returncode == 0
# Should mention the file or show it would be processed
assert "2026-04-10-abc.md" in result.stdout or "1 conversation" in result.stdout
# ---------------------------------------------------------------------------
# update-conversation-index.py
# ---------------------------------------------------------------------------
class TestUpdateConversationIndex:
def test_help_exits_clean(self, run_script) -> None:
result = run_script("update-conversation-index.py", "--help")
assert result.returncode == 0
def test_runs_on_empty_conversations_dir(
self, run_script, tmp_wiki: Path
) -> None:
result = run_script("update-conversation-index.py")
# Should not crash even with no conversations
assert result.returncode == 0
def test_builds_index_from_conversations(
self, run_script, tmp_wiki: Path
) -> None:
from conftest import make_conversation
make_conversation(
tmp_wiki,
"general",
"2026-04-10-one.md",
status="summarized",
)
make_conversation(
tmp_wiki,
"general",
"2026-04-11-two.md",
status="summarized",
)
result = run_script("update-conversation-index.py")
assert result.returncode == 0
idx = tmp_wiki / "conversations" / "index.md"
assert idx.exists()
text = idx.read_text()
assert "2026-04-10-one.md" in text or "one.md" in text
assert "2026-04-11-two.md" in text or "two.md" in text