Files
memex/tests/test_wiki_lib.py
Eric Turner ee54a2f5d4 Initial commit — memex
A compounding LLM-maintained knowledge wiki.

Synthesis of Andrej Karpathy's persistent-wiki gist and milla-jovovich's
mempalace, with an automation layer on top for conversation mining, URL
harvesting, human-in-the-loop staging, staleness decay, and hygiene.

Includes:
- 11 pipeline scripts (extract, summarize, index, harvest, stage,
  hygiene, maintain, sync, + shared library)
- Full docs: README, SETUP, ARCHITECTURE, DESIGN-RATIONALE, CUSTOMIZE
- Example CLAUDE.md files (wiki schema + global instructions) tuned for
  the three-collection qmd setup
- 171-test pytest suite (cross-platform, runs in ~1.3s)
- MIT licensed
2026-04-12 21:16:02 -06:00

315 lines
12 KiB
Python

"""Unit tests for scripts/wiki_lib.py — the shared frontmatter library."""
from __future__ import annotations
from datetime import date
from pathlib import Path
from typing import Any
import pytest
from conftest import make_page, make_staging_page
# ---------------------------------------------------------------------------
# parse_yaml_lite
# ---------------------------------------------------------------------------
class TestParseYamlLite:
def test_simple_key_value(self, wiki_lib: Any) -> None:
result = wiki_lib.parse_yaml_lite("title: Hello\ntype: pattern\n")
assert result == {"title": "Hello", "type": "pattern"}
def test_quoted_values_are_stripped(self, wiki_lib: Any) -> None:
result = wiki_lib.parse_yaml_lite('title: "Hello"\nother: \'World\'\n')
assert result["title"] == "Hello"
assert result["other"] == "World"
def test_inline_list(self, wiki_lib: Any) -> None:
result = wiki_lib.parse_yaml_lite("tags: [a, b, c]\n")
assert result["tags"] == ["a", "b", "c"]
def test_empty_inline_list(self, wiki_lib: Any) -> None:
result = wiki_lib.parse_yaml_lite("sources: []\n")
assert result["sources"] == []
def test_block_list(self, wiki_lib: Any) -> None:
yaml = "related:\n - foo.md\n - bar.md\n - baz.md\n"
result = wiki_lib.parse_yaml_lite(yaml)
assert result["related"] == ["foo.md", "bar.md", "baz.md"]
def test_mixed_keys(self, wiki_lib: Any) -> None:
yaml = (
"title: Mixed\n"
"type: pattern\n"
"related:\n"
" - one.md\n"
" - two.md\n"
"confidence: high\n"
)
result = wiki_lib.parse_yaml_lite(yaml)
assert result["title"] == "Mixed"
assert result["related"] == ["one.md", "two.md"]
assert result["confidence"] == "high"
def test_empty_value(self, wiki_lib: Any) -> None:
result = wiki_lib.parse_yaml_lite("empty: \n")
assert result["empty"] == ""
def test_comment_lines_ignored(self, wiki_lib: Any) -> None:
result = wiki_lib.parse_yaml_lite("# this is a comment\ntitle: X\n")
assert result == {"title": "X"}
def test_blank_lines_ignored(self, wiki_lib: Any) -> None:
result = wiki_lib.parse_yaml_lite("\ntitle: X\n\ntype: pattern\n\n")
assert result == {"title": "X", "type": "pattern"}
# ---------------------------------------------------------------------------
# parse_page
# ---------------------------------------------------------------------------
class TestParsePage:
def test_parses_valid_page(self, wiki_lib: Any, tmp_wiki: Path) -> None:
path = make_page(tmp_wiki, "patterns/foo.md", title="Foo", confidence="high")
page = wiki_lib.parse_page(path)
assert page is not None
assert page.frontmatter["title"] == "Foo"
assert page.frontmatter["confidence"] == "high"
assert "# Content" in page.body
def test_returns_none_without_frontmatter(
self, wiki_lib: Any, tmp_wiki: Path
) -> None:
path = tmp_wiki / "patterns" / "no-fm.md"
path.write_text("# Just a body\n\nNo frontmatter.\n")
assert wiki_lib.parse_page(path) is None
def test_returns_none_for_missing_file(self, wiki_lib: Any, tmp_wiki: Path) -> None:
assert wiki_lib.parse_page(tmp_wiki / "nonexistent.md") is None
def test_returns_none_for_truncated_frontmatter(
self, wiki_lib: Any, tmp_wiki: Path
) -> None:
path = tmp_wiki / "patterns" / "broken.md"
path.write_text("---\ntitle: Broken\n# never closed\n")
assert wiki_lib.parse_page(path) is None
def test_preserves_body_exactly(self, wiki_lib: Any, tmp_wiki: Path) -> None:
body = "# Heading\n\nLine 1\nLine 2\n\n## Sub\n\nMore.\n"
path = make_page(tmp_wiki, "patterns/body.md", body=body)
page = wiki_lib.parse_page(path)
assert page.body == body
# ---------------------------------------------------------------------------
# serialize_frontmatter
# ---------------------------------------------------------------------------
class TestSerializeFrontmatter:
def test_preferred_key_order(self, wiki_lib: Any) -> None:
fm = {
"related": ["a.md"],
"sources": ["raw/x.md"],
"title": "T",
"confidence": "high",
"type": "pattern",
}
yaml = wiki_lib.serialize_frontmatter(fm)
lines = yaml.split("\n")
# title/type/confidence should come before sources/related
assert lines[0].startswith("title:")
assert lines[1].startswith("type:")
assert lines[2].startswith("confidence:")
assert "sources:" in yaml
assert "related:" in yaml
# sources must come before related (both are in PREFERRED_KEY_ORDER)
assert yaml.index("sources:") < yaml.index("related:")
def test_list_formatted_as_block(self, wiki_lib: Any) -> None:
fm = {"title": "T", "related": ["one.md", "two.md"]}
yaml = wiki_lib.serialize_frontmatter(fm)
assert "related:\n - one.md\n - two.md" in yaml
def test_empty_list(self, wiki_lib: Any) -> None:
fm = {"title": "T", "sources": []}
yaml = wiki_lib.serialize_frontmatter(fm)
assert "sources: []" in yaml
def test_unknown_keys_appear_alphabetically_at_end(self, wiki_lib: Any) -> None:
fm = {"title": "T", "type": "pattern", "zoo": "z", "alpha": "a"}
yaml = wiki_lib.serialize_frontmatter(fm)
# alpha should come before zoo (alphabetical)
assert yaml.index("alpha:") < yaml.index("zoo:")
# ---------------------------------------------------------------------------
# Round-trip: parse_page → write_page → parse_page
# ---------------------------------------------------------------------------
class TestRoundTrip:
def test_round_trip_preserves_core_fields(
self, wiki_lib: Any, tmp_wiki: Path
) -> None:
path = make_page(
tmp_wiki,
"patterns/rt.md",
title="Round Trip",
sources=["raw/a.md", "raw/b.md"],
related=["patterns/other.md"],
)
page1 = wiki_lib.parse_page(path)
wiki_lib.write_page(page1)
page2 = wiki_lib.parse_page(path)
assert page2.frontmatter["title"] == "Round Trip"
assert page2.frontmatter["sources"] == ["raw/a.md", "raw/b.md"]
assert page2.frontmatter["related"] == ["patterns/other.md"]
assert page2.body == page1.body
def test_round_trip_preserves_mutation(
self, wiki_lib: Any, tmp_wiki: Path
) -> None:
path = make_page(tmp_wiki, "patterns/rt.md", confidence="high")
page = wiki_lib.parse_page(path)
page.frontmatter["confidence"] = "low"
wiki_lib.write_page(page)
page2 = wiki_lib.parse_page(path)
assert page2.frontmatter["confidence"] == "low"
# ---------------------------------------------------------------------------
# parse_date
# ---------------------------------------------------------------------------
class TestParseDate:
def test_iso_format(self, wiki_lib: Any) -> None:
assert wiki_lib.parse_date("2026-04-10") == date(2026, 4, 10)
def test_empty_string_returns_none(self, wiki_lib: Any) -> None:
assert wiki_lib.parse_date("") is None
def test_none_returns_none(self, wiki_lib: Any) -> None:
assert wiki_lib.parse_date(None) is None
def test_invalid_format_returns_none(self, wiki_lib: Any) -> None:
assert wiki_lib.parse_date("not-a-date") is None
assert wiki_lib.parse_date("2026/04/10") is None
assert wiki_lib.parse_date("04-10-2026") is None
def test_date_object_passthrough(self, wiki_lib: Any) -> None:
d = date(2026, 4, 10)
assert wiki_lib.parse_date(d) == d
# ---------------------------------------------------------------------------
# page_content_hash
# ---------------------------------------------------------------------------
class TestPageContentHash:
def test_deterministic(self, wiki_lib: Any, tmp_wiki: Path) -> None:
path = make_page(tmp_wiki, "patterns/h.md", body="# Same body\n\nLine.\n")
page = wiki_lib.parse_page(path)
h1 = wiki_lib.page_content_hash(page)
h2 = wiki_lib.page_content_hash(page)
assert h1 == h2
assert h1.startswith("sha256:")
def test_different_bodies_yield_different_hashes(
self, wiki_lib: Any, tmp_wiki: Path
) -> None:
p1 = make_page(tmp_wiki, "patterns/a.md", body="# A\n\nAlpha.\n")
p2 = make_page(tmp_wiki, "patterns/b.md", body="# B\n\nBeta.\n")
h1 = wiki_lib.page_content_hash(wiki_lib.parse_page(p1))
h2 = wiki_lib.page_content_hash(wiki_lib.parse_page(p2))
assert h1 != h2
def test_frontmatter_changes_dont_change_hash(
self, wiki_lib: Any, tmp_wiki: Path
) -> None:
"""Hash is body-only so mechanical frontmatter fixes don't churn it."""
path = make_page(tmp_wiki, "patterns/f.md", confidence="high")
page = wiki_lib.parse_page(path)
h1 = wiki_lib.page_content_hash(page)
page.frontmatter["confidence"] = "medium"
wiki_lib.write_page(page)
page2 = wiki_lib.parse_page(path)
h2 = wiki_lib.page_content_hash(page2)
assert h1 == h2
# ---------------------------------------------------------------------------
# Iterators
# ---------------------------------------------------------------------------
class TestIterators:
def test_iter_live_pages_finds_all_types(
self, wiki_lib: Any, tmp_wiki: Path
) -> None:
make_page(tmp_wiki, "patterns/p1.md")
make_page(tmp_wiki, "patterns/p2.md")
make_page(tmp_wiki, "decisions/d1.md")
make_page(tmp_wiki, "concepts/c1.md")
make_page(tmp_wiki, "environments/e1.md")
pages = wiki_lib.iter_live_pages()
assert len(pages) == 5
stems = {p.path.stem for p in pages}
assert stems == {"p1", "p2", "d1", "c1", "e1"}
def test_iter_live_pages_empty_wiki(
self, wiki_lib: Any, tmp_wiki: Path
) -> None:
assert wiki_lib.iter_live_pages() == []
def test_iter_staging_pages(self, wiki_lib: Any, tmp_wiki: Path) -> None:
make_staging_page(tmp_wiki, "patterns/s1.md")
make_staging_page(tmp_wiki, "decisions/s2.md", ptype="decision")
pages = wiki_lib.iter_staging_pages()
assert len(pages) == 2
assert all(p.frontmatter.get("status") == "pending" for p in pages)
def test_iter_archived_pages(self, wiki_lib: Any, tmp_wiki: Path) -> None:
make_page(
tmp_wiki,
"archive/patterns/old.md",
confidence="stale",
extra_fm={
"archived_date": "2026-01-01",
"archived_reason": "test",
"original_path": "patterns/old.md",
},
)
pages = wiki_lib.iter_archived_pages()
assert len(pages) == 1
assert pages[0].frontmatter["archived_reason"] == "test"
def test_iter_skips_malformed_pages(
self, wiki_lib: Any, tmp_wiki: Path
) -> None:
make_page(tmp_wiki, "patterns/good.md")
(tmp_wiki / "patterns" / "no-fm.md").write_text("# Just a body\n")
pages = wiki_lib.iter_live_pages()
assert len(pages) == 1
assert pages[0].path.stem == "good"
# ---------------------------------------------------------------------------
# WIKI_DIR env var override
# ---------------------------------------------------------------------------
class TestWikiDirEnvVar:
def test_honors_env_var(self, wiki_lib: Any, tmp_wiki: Path) -> None:
"""The tmp_wiki fixture sets WIKI_DIR — verify wiki_lib picks it up."""
assert wiki_lib.WIKI_DIR == tmp_wiki
assert wiki_lib.STAGING_DIR == tmp_wiki / "staging"
assert wiki_lib.ARCHIVE_DIR == tmp_wiki / "archive"
assert wiki_lib.INDEX_FILE == tmp_wiki / "index.md"