Initial commit — memex

A compounding LLM-maintained knowledge wiki.

Synthesis of Andrej Karpathy's persistent-wiki gist and milla-jovovich's
mempalace, with an automation layer on top for conversation mining, URL
harvesting, human-in-the-loop staging, staleness decay, and hygiene.

Includes:
- 11 pipeline scripts (extract, summarize, index, harvest, stage,
  hygiene, maintain, sync, + shared library)
- Full docs: README, SETUP, ARCHITECTURE, DESIGN-RATIONALE, CUSTOMIZE
- Example CLAUDE.md files (wiki schema + global instructions) tuned for
  the three-collection qmd setup
- 171-test pytest suite (cross-platform, runs in ~1.3s)
- MIT licensed
This commit is contained in:
Eric Turner
2026-04-12 21:16:02 -06:00
commit ee54a2f5d4
31 changed files with 10792 additions and 0 deletions

209
tests/test_shell_scripts.py Normal file
View File

@@ -0,0 +1,209 @@
"""Smoke tests for the bash scripts.
Bash scripts are harder to unit-test in isolation — these tests verify
CLI parsing, help text, and dry-run/safe flags work correctly and that
scripts exit cleanly in all the no-op paths.
Cross-platform note: tests invoke scripts via `bash` explicitly, so they
work on both macOS (default /bin/bash) and Linux/WSL. They avoid anything
that requires external state (network, git, LLM).
"""
from __future__ import annotations
import os
import subprocess
from pathlib import Path
from typing import Any
import pytest
from conftest import make_conversation, make_page, make_staging_page
# ---------------------------------------------------------------------------
# wiki-maintain.sh
# ---------------------------------------------------------------------------
class TestWikiMaintainSh:
def test_help_flag(self, run_script) -> None:
result = run_script("wiki-maintain.sh", "--help")
assert result.returncode == 0
assert "Usage:" in result.stdout or "usage:" in result.stdout.lower()
assert "--full" in result.stdout
assert "--harvest-only" in result.stdout
assert "--hygiene-only" in result.stdout
def test_rejects_unknown_flag(self, run_script) -> None:
result = run_script("wiki-maintain.sh", "--bogus")
assert result.returncode != 0
assert "Unknown option" in result.stderr
def test_harvest_only_and_hygiene_only_conflict(self, run_script) -> None:
result = run_script(
"wiki-maintain.sh", "--harvest-only", "--hygiene-only"
)
assert result.returncode != 0
assert "mutually exclusive" in result.stderr
def test_hygiene_only_dry_run_completes(
self, run_script, tmp_wiki: Path
) -> None:
make_page(tmp_wiki, "patterns/one.md")
result = run_script(
"wiki-maintain.sh", "--hygiene-only", "--dry-run", "--no-reindex"
)
assert result.returncode == 0
assert "Phase 2: Hygiene checks" in result.stdout
assert "finished" in result.stdout
def test_phase_1_skipped_in_hygiene_only(
self, run_script, tmp_wiki: Path
) -> None:
result = run_script(
"wiki-maintain.sh", "--hygiene-only", "--dry-run", "--no-reindex"
)
assert result.returncode == 0
assert "Phase 1: URL harvesting (skipped)" in result.stdout
def test_phase_3_skipped_in_dry_run(
self, run_script, tmp_wiki: Path
) -> None:
make_page(tmp_wiki, "patterns/one.md")
result = run_script(
"wiki-maintain.sh", "--hygiene-only", "--dry-run"
)
assert "Phase 3: qmd reindex (skipped)" in result.stdout
def test_harvest_only_dry_run_completes(
self, run_script, tmp_wiki: Path
) -> None:
# Add a summarized conversation so harvest has something to scan
make_conversation(
tmp_wiki,
"test",
"2026-04-10-test.md",
status="summarized",
body="See https://docs.python.org/3/library/os.html for details.\n",
)
result = run_script(
"wiki-maintain.sh",
"--harvest-only",
"--dry-run",
"--no-compile",
"--no-reindex",
)
assert result.returncode == 0
assert "Phase 2: Hygiene checks (skipped)" in result.stdout
# ---------------------------------------------------------------------------
# wiki-sync.sh
# ---------------------------------------------------------------------------
class TestWikiSyncSh:
def test_status_on_non_git_dir_exits_cleanly(self, run_script) -> None:
"""wiki-sync.sh --status against a non-git dir should fail gracefully.
The tmp_wiki fixture is not a git repo, so git commands will fail.
The script should report the problem without hanging or leaking stack
traces. Any exit code is acceptable as long as it exits in reasonable
time and prints something useful to stdout/stderr.
"""
result = run_script("wiki-sync.sh", "--status", timeout=30)
# Should have produced some output and exited (not hung)
assert result.stdout or result.stderr
assert "Wiki Sync Status" in result.stdout or "not a git" in result.stderr.lower()
# ---------------------------------------------------------------------------
# mine-conversations.sh
# ---------------------------------------------------------------------------
class TestMineConversationsSh:
def test_extract_only_dry_run(self, run_script, tmp_wiki: Path) -> None:
"""mine-conversations.sh --extract-only --dry-run should complete without LLM."""
result = run_script(
"mine-conversations.sh", "--extract-only", "--dry-run", timeout=30
)
assert result.returncode == 0
def test_rejects_unknown_flag(self, run_script) -> None:
result = run_script("mine-conversations.sh", "--bogus-flag")
assert result.returncode != 0
# ---------------------------------------------------------------------------
# Cross-platform sanity — scripts use portable bash syntax
# ---------------------------------------------------------------------------
class TestBashPortability:
"""Verify scripts don't use bashisms that break on macOS /bin/bash 3.2."""
@pytest.mark.parametrize(
"script",
["wiki-maintain.sh", "mine-conversations.sh", "wiki-sync.sh"],
)
def test_shebang_is_env_bash(self, script: str) -> None:
"""All shell scripts should use `#!/usr/bin/env bash` for portability."""
path = Path(__file__).parent.parent / "scripts" / script
first_line = path.read_text().splitlines()[0]
assert first_line == "#!/usr/bin/env bash", (
f"{script} has shebang {first_line!r}, expected #!/usr/bin/env bash"
)
@pytest.mark.parametrize(
"script",
["wiki-maintain.sh", "mine-conversations.sh", "wiki-sync.sh"],
)
def test_uses_strict_mode(self, script: str) -> None:
"""All shell scripts should use `set -euo pipefail` for safe defaults."""
path = Path(__file__).parent.parent / "scripts" / script
text = path.read_text()
assert "set -euo pipefail" in text, f"{script} missing strict mode"
@pytest.mark.parametrize(
"script",
["wiki-maintain.sh", "mine-conversations.sh", "wiki-sync.sh"],
)
def test_bash_syntax_check(self, script: str) -> None:
"""bash -n does a syntax-only parse and catches obvious errors."""
path = Path(__file__).parent.parent / "scripts" / script
result = subprocess.run(
["bash", "-n", str(path)],
capture_output=True,
text=True,
timeout=10,
)
assert result.returncode == 0, f"{script} has bash syntax errors: {result.stderr}"
# ---------------------------------------------------------------------------
# Python script syntax check (smoke)
# ---------------------------------------------------------------------------
class TestPythonSyntax:
@pytest.mark.parametrize(
"script",
[
"wiki_lib.py",
"wiki-harvest.py",
"wiki-staging.py",
"wiki-hygiene.py",
"extract-sessions.py",
"summarize-conversations.py",
"update-conversation-index.py",
],
)
def test_py_compile(self, script: str) -> None:
"""py_compile catches syntax errors without executing the module."""
import py_compile
path = Path(__file__).parent.parent / "scripts" / script
# py_compile.compile raises on error; success returns the .pyc path
py_compile.compile(str(path), doraise=True)