"""Unit + integration tests for scripts/wiki-distill.py. Mocks claude -p; no real LLM calls during tests. """ from __future__ import annotations import json from datetime import date, timedelta from pathlib import Path from typing import Any import pytest from conftest import make_conversation # --------------------------------------------------------------------------- # wiki_lib hall parsing helpers # --------------------------------------------------------------------------- class TestParseConversationHalls: def _make_conv_with_halls(self, tmp_wiki: Path, body: str) -> Path: return make_conversation( tmp_wiki, "test", "2026-04-12-halls.md", status="summarized", body=body, ) def test_extracts_fact_bullets(self, wiki_lib: Any, tmp_wiki: Path) -> None: body = ( "## Summary\n\nsome summary text.\n\n" "## Decisions (hall: fact)\n\n" "- First decision made\n" "- Second decision\n\n" "## Other section\n\nunrelated.\n" ) path = self._make_conv_with_halls(tmp_wiki, body) page = wiki_lib.parse_page(path) halls = wiki_lib.parse_conversation_halls(page) assert "fact" in halls assert halls["fact"] == ["First decision made", "Second decision"] def test_extracts_multiple_hall_types( self, wiki_lib: Any, tmp_wiki: Path ) -> None: body = ( "## Decisions (hall: fact)\n\n- A\n- B\n\n" "## Discoveries (hall: discovery)\n\n- root cause X\n\n" "## Advice (hall: advice)\n\n- try Y\n- consider Z\n" ) path = self._make_conv_with_halls(tmp_wiki, body) page = wiki_lib.parse_page(path) halls = wiki_lib.parse_conversation_halls(page) assert halls["fact"] == ["A", "B"] assert halls["discovery"] == ["root cause X"] assert halls["advice"] == ["try Y", "consider Z"] def test_ignores_sections_without_hall_marker( self, wiki_lib: Any, tmp_wiki: Path ) -> None: body = ( "## Summary\n\n- not a hall bullet\n\n" "## Decisions (hall: fact)\n\n- real bullet\n" ) path = self._make_conv_with_halls(tmp_wiki, body) page = wiki_lib.parse_page(path) halls = wiki_lib.parse_conversation_halls(page) assert halls == {"fact": ["real bullet"]} def test_flattens_multiline_bullets( self, wiki_lib: Any, tmp_wiki: Path ) -> None: body = ( "## Decisions (hall: fact)\n\n" "- A bullet that goes on\n and continues here\n" "- Second bullet\n" ) path = self._make_conv_with_halls(tmp_wiki, body) page = wiki_lib.parse_page(path) halls = wiki_lib.parse_conversation_halls(page) # The simple regex captures each "- " line separately; continuation # lines are not part of the bullet. This matches the current behavior. assert halls["fact"][0].startswith("A bullet") assert "Second bullet" in halls["fact"] def test_empty_body_returns_empty( self, wiki_lib: Any, tmp_wiki: Path ) -> None: path = self._make_conv_with_halls(tmp_wiki, "## Summary\n\ntext.\n") page = wiki_lib.parse_page(path) assert wiki_lib.parse_conversation_halls(page) == {} def test_high_signal_halls_filters_out_preference_event_tooling( self, wiki_lib: Any, tmp_wiki: Path ) -> None: body = ( "## Decisions (hall: fact)\n- f\n" "## Preferences (hall: preference)\n- p\n" "## Events (hall: event)\n- e\n" "## Tooling (hall: tooling)\n- t\n" "## Advice (hall: advice)\n- a\n" ) path = self._make_conv_with_halls(tmp_wiki, body) page = wiki_lib.parse_page(path) halls = wiki_lib.high_signal_halls(page) assert set(halls.keys()) == {"fact", "advice"} # --------------------------------------------------------------------------- # wiki-distill.py module fixture # --------------------------------------------------------------------------- @pytest.fixture def wiki_distill(tmp_wiki: Path) -> Any: from conftest import SCRIPTS_DIR, _load_script_module _load_script_module("wiki_lib", SCRIPTS_DIR / "wiki_lib.py") return _load_script_module("wiki_distill", SCRIPTS_DIR / "wiki-distill.py") # --------------------------------------------------------------------------- # Topic rollup logic # --------------------------------------------------------------------------- class TestTopicRollup: def _make_summarized_conv( self, tmp_wiki: Path, project: str, filename: str, conv_date: str, topics: list[str], fact_bullets: list[str] | None = None, ) -> Path: fact_section = "" if fact_bullets: fact_section = "## Decisions (hall: fact)\n\n" + "\n".join( f"- {b}" for b in fact_bullets ) return make_conversation( tmp_wiki, project, filename, date=conv_date, status="summarized", related=[f"topic:{t}" for t in []], body=f"## Summary\n\ntest.\n\n{fact_section}\n", ) def test_extract_topics_from_today_only( self, wiki_distill: Any, tmp_wiki: Path ) -> None: today_date = wiki_distill.today() yesterday = today_date - timedelta(days=1) # Today's conversation with topics _write_conv_with_topics( tmp_wiki, "test", "today.md", date_str=today_date.isoformat(), topics=["alpha", "beta"], ) # Yesterday's conversation — should be excluded at lookback=0 _write_conv_with_topics( tmp_wiki, "test", "yesterday.md", date_str=yesterday.isoformat(), topics=["gamma"], ) all_convs = wiki_distill.iter_summarized_conversations() topics = wiki_distill.extract_topics_from_today(all_convs, today_date, 0) assert topics == {"alpha", "beta"} def test_extract_topics_with_lookback( self, wiki_distill: Any, tmp_wiki: Path ) -> None: today_date = wiki_distill.today() day3 = today_date - timedelta(days=3) day10 = today_date - timedelta(days=10) _write_conv_with_topics( tmp_wiki, "test", "today.md", date_str=today_date.isoformat(), topics=["a"], ) _write_conv_with_topics( tmp_wiki, "test", "day3.md", date_str=day3.isoformat(), topics=["b"], ) _write_conv_with_topics( tmp_wiki, "test", "day10.md", date_str=day10.isoformat(), topics=["c"], ) all_convs = wiki_distill.iter_summarized_conversations() topics_7 = wiki_distill.extract_topics_from_today(all_convs, today_date, 7) assert topics_7 == {"a", "b"} # day10 excluded by 7-day lookback def test_rollup_by_topic_across_history( self, wiki_distill: Any, tmp_wiki: Path ) -> None: today_date = wiki_distill.today() # Three conversations all tagged with "shared-topic", different dates _write_conv_with_topics( tmp_wiki, "test", "a.md", date_str=today_date.isoformat(), topics=["shared-topic"], ) _write_conv_with_topics( tmp_wiki, "test", "b.md", date_str=(today_date - timedelta(days=30)).isoformat(), topics=["shared-topic", "other"], ) _write_conv_with_topics( tmp_wiki, "test", "c.md", date_str=(today_date - timedelta(days=90)).isoformat(), topics=["shared-topic"], ) # One unrelated _write_conv_with_topics( tmp_wiki, "test", "d.md", date_str=today_date.isoformat(), topics=["unrelated"], ) all_convs = wiki_distill.iter_summarized_conversations() rollup = wiki_distill.rollup_conversations_by_topic( "shared-topic", all_convs ) assert len(rollup) == 3 stems = [c.path.stem for c in rollup] # Most recent first assert stems[0] == "a" def _write_conv_with_topics( tmp_wiki: Path, project: str, filename: str, *, date_str: str, topics: list[str], ) -> Path: """Helper — write a summarized conversation with topic frontmatter.""" proj_dir = tmp_wiki / "conversations" / project proj_dir.mkdir(parents=True, exist_ok=True) path = proj_dir / filename topic_yaml = "topics: [" + ", ".join(topics) + "]" content = ( f"---\n" f"title: Test Conv\n" f"type: conversation\n" f"project: {project}\n" f"date: {date_str}\n" f"status: summarized\n" f"messages: 50\n" f"{topic_yaml}\n" f"---\n" f"## Summary\n\ntest.\n\n" f"## Decisions (hall: fact)\n\n" f"- Fact one for these topics\n" f"- Fact two\n" ) path.write_text(content) return path # --------------------------------------------------------------------------- # Topic group building # --------------------------------------------------------------------------- class TestTopicGroupBuild: def test_counts_total_bullets( self, wiki_distill: Any, tmp_wiki: Path ) -> None: _write_conv_with_topics( tmp_wiki, "test", "one.md", date_str="2026-04-12", topics=["foo"], ) all_convs = wiki_distill.iter_summarized_conversations() rollup = wiki_distill.rollup_conversations_by_topic("foo", all_convs) group = wiki_distill.build_topic_group("foo", rollup) assert group.topic == "foo" assert group.total_bullets == 2 # the helper writes 2 fact bullets def test_format_for_llm_includes_topic_and_sections( self, wiki_distill: Any, tmp_wiki: Path ) -> None: _write_conv_with_topics( tmp_wiki, "test", "one.md", date_str="2026-04-12", topics=["bar"], ) all_convs = wiki_distill.iter_summarized_conversations() rollup = wiki_distill.rollup_conversations_by_topic("bar", all_convs) group = wiki_distill.build_topic_group("bar", rollup) text = wiki_distill.format_topic_group_for_llm(group) assert "# Topic: bar" in text assert "Fact one" in text assert "Decisions:" in text # --------------------------------------------------------------------------- # State management # --------------------------------------------------------------------------- class TestDistillState: def test_load_returns_defaults( self, wiki_distill: Any, tmp_wiki: Path ) -> None: state = wiki_distill.load_state() assert state["processed_convs"] == {} assert state["processed_topics"] == {} assert state["first_run_complete"] is False def test_save_and_reload( self, wiki_distill: Any, tmp_wiki: Path ) -> None: state = wiki_distill.load_state() state["first_run_complete"] = True state["processed_topics"]["foo"] = {"distilled_date": "2026-04-12"} wiki_distill.save_state(state) reloaded = wiki_distill.load_state() assert reloaded["first_run_complete"] is True assert "foo" in reloaded["processed_topics"] def test_conv_needs_distill_first_time( self, wiki_distill: Any, tmp_wiki: Path ) -> None: path = _write_conv_with_topics( tmp_wiki, "test", "fresh.md", date_str="2026-04-12", topics=["x"], ) conv = wiki_distill.parse_page(path) state = wiki_distill.load_state() assert wiki_distill.conv_needs_distill(state, conv) is True def test_conv_needs_distill_detects_content_change( self, wiki_distill: Any, tmp_wiki: Path ) -> None: path = _write_conv_with_topics( tmp_wiki, "test", "mut.md", date_str="2026-04-12", topics=["x"], ) conv = wiki_distill.parse_page(path) state = wiki_distill.load_state() wiki_distill.mark_conv_distilled(state, conv, ["staging/patterns/x.md"]) assert wiki_distill.conv_needs_distill(state, conv) is False # Mutate the body text = path.read_text() path.write_text(text + "\n- Another bullet\n") conv2 = wiki_distill.parse_page(path) assert wiki_distill.conv_needs_distill(state, conv2) is True def test_conv_needs_distill_detects_new_topic( self, wiki_distill: Any, tmp_wiki: Path ) -> None: path = _write_conv_with_topics( tmp_wiki, "test", "new-topic.md", date_str="2026-04-12", topics=["original"], ) conv = wiki_distill.parse_page(path) state = wiki_distill.load_state() wiki_distill.mark_conv_distilled(state, conv, []) assert wiki_distill.conv_needs_distill(state, conv) is False # Rewrite with a new topic added _write_conv_with_topics( tmp_wiki, "test", "new-topic.md", date_str="2026-04-12", topics=["original", "freshly-added"], ) conv2 = wiki_distill.parse_page(path) assert wiki_distill.conv_needs_distill(state, conv2) is True # --------------------------------------------------------------------------- # CLI smoke tests (no real LLM calls — uses --dry-run) # --------------------------------------------------------------------------- class TestDistillCli: def test_help_flag(self, run_script) -> None: result = run_script("wiki-distill.py", "--help") assert result.returncode == 0 assert "--first-run" in result.stdout assert "--topic" in result.stdout assert "--dry-run" in result.stdout def test_dry_run_empty_wiki(self, run_script, tmp_wiki: Path) -> None: result = run_script("wiki-distill.py", "--dry-run", "--first-run") assert result.returncode == 0 def test_dry_run_with_topic_rollup( self, run_script, tmp_wiki: Path ) -> None: _write_conv_with_topics( tmp_wiki, "test", "convA.md", date_str="2026-04-12", topics=["rollup-test"], ) _write_conv_with_topics( tmp_wiki, "test", "convB.md", date_str="2026-04-11", topics=["rollup-test"], ) result = run_script( "wiki-distill.py", "--dry-run", "--first-run", ) assert result.returncode == 0 # Should mention the rollup topic assert "rollup-test" in result.stdout def test_topic_flag_narrow_mode( self, run_script, tmp_wiki: Path ) -> None: _write_conv_with_topics( tmp_wiki, "test", "a.md", date_str="2026-04-12", topics=["explicit-topic"], ) result = run_script( "wiki-distill.py", "--dry-run", "--topic", "explicit-topic", ) assert result.returncode == 0 assert "Explicit topic mode" in result.stdout assert "explicit-topic" in result.stdout def test_too_thin_topic_is_skipped( self, run_script, tmp_wiki: Path, wiki_distill: Any ) -> None: # Write a conversation with only ONE hall bullet on this topic proj_dir = tmp_wiki / "conversations" / "test" proj_dir.mkdir(parents=True, exist_ok=True) (proj_dir / "thin.md").write_text( "---\n" "title: Thin\n" "type: conversation\n" "project: test\n" "date: 2026-04-12\n" "status: summarized\n" "messages: 5\n" "topics: [thin-topic]\n" "---\n" "## Summary\n\n\n" "## Decisions (hall: fact)\n\n" "- Single bullet\n" ) result = run_script( "wiki-distill.py", "--dry-run", "--topic", "thin-topic", ) assert result.returncode == 0 assert "too-thin" in result.stdout or "too-thin" in result.stderr