memex/tests/test_conversation_pipeline.py

"""Smoke + integration tests for the conversation mining pipeline.

These scripts interact with external systems (Claude Code sessions dir,
claude CLI), so tests focus on CLI parsing, dry-run behavior, and error
handling rather than exercising the full extraction/summarization path.
"""

from __future__ import annotations

import json
from pathlib import Path

import pytest


# ---------------------------------------------------------------------------
# extract-sessions.py
# ---------------------------------------------------------------------------


class TestExtractSessions:
    def test_help_exits_clean(self, run_script) -> None:
        result = run_script("extract-sessions.py", "--help")
        assert result.returncode == 0
        assert "--project" in result.stdout
        assert "--dry-run" in result.stdout

    def test_dry_run_with_empty_sessions_dir(
        self, run_script, tmp_wiki: Path, tmp_path: Path, monkeypatch
    ) -> None:
        # Point CLAUDE_PROJECTS_DIR at an empty tmp dir via env (not currently
        # supported — script reads ~/.claude/projects directly). Instead, use
        # --project with a code that has no sessions to verify clean exit.
        result = run_script("extract-sessions.py", "--dry-run", "--project", "nonexistent")
        assert result.returncode == 0

    def test_rejects_unknown_flag(self, run_script) -> None:
        result = run_script("extract-sessions.py", "--bogus-flag")
        assert result.returncode != 0
        assert "error" in result.stderr.lower() or "unrecognized" in result.stderr.lower()


# ---------------------------------------------------------------------------
# summarize-conversations.py
# ---------------------------------------------------------------------------


class TestSummarizeConversations:
    def test_help_exits_clean(self, run_script) -> None:
        result = run_script("summarize-conversations.py", "--help")
        assert result.returncode == 0
        assert "--claude" in result.stdout
        assert "--dry-run" in result.stdout
        assert "--project" in result.stdout

    def test_dry_run_empty_conversations(
        self, run_script, tmp_wiki: Path
    ) -> None:
        result = run_script("summarize-conversations.py", "--claude", "--dry-run")
        assert result.returncode == 0

    def test_dry_run_with_extracted_conversation(
        self, run_script, tmp_wiki: Path
    ) -> None:
        from conftest import make_conversation

        make_conversation(
            tmp_wiki,
            "general",
            "2026-04-10-abc.md",
            status="extracted",  # Not yet summarized
            messages=50,
        )
        result = run_script("summarize-conversations.py", "--claude", "--dry-run")
        assert result.returncode == 0
        # Should mention the file or show it would be processed
        assert "2026-04-10-abc.md" in result.stdout or "1 conversation" in result.stdout


# ---------------------------------------------------------------------------
# update-conversation-index.py
# ---------------------------------------------------------------------------


class TestUpdateConversationIndex:
    def test_help_exits_clean(self, run_script) -> None:
        result = run_script("update-conversation-index.py", "--help")
        assert result.returncode == 0

    def test_runs_on_empty_conversations_dir(
        self, run_script, tmp_wiki: Path
    ) -> None:
        result = run_script("update-conversation-index.py")
        # Should not crash even with no conversations
        assert result.returncode == 0

    def test_builds_index_from_conversations(
        self, run_script, tmp_wiki: Path
    ) -> None:
        from conftest import make_conversation

        make_conversation(
            tmp_wiki,
            "general",
            "2026-04-10-one.md",
            status="summarized",
        )
        make_conversation(
            tmp_wiki,
            "general",
            "2026-04-11-two.md",
            status="summarized",
        )
        result = run_script("update-conversation-index.py")
        assert result.returncode == 0

        idx = tmp_wiki / "conversations" / "index.md"
        assert idx.exists()
        text = idx.read_text()
        assert "2026-04-10-one.md" in text or "one.md" in text
        assert "2026-04-11-two.md" in text or "two.md" in text