memex/tests/test_shell_scripts.py

"""Smoke tests for the bash scripts.

Bash scripts are harder to unit-test in isolation — these tests verify
CLI parsing, help text, and dry-run/safe flags work correctly and that
scripts exit cleanly in all the no-op paths.

Cross-platform note: tests invoke scripts via `bash` explicitly, so they
work on both macOS (default /bin/bash) and Linux/WSL. They avoid anything
that requires external state (network, git, LLM).
"""

from __future__ import annotations

import os
import subprocess
from pathlib import Path
from typing import Any

import pytest

from conftest import make_conversation, make_page, make_staging_page


# ---------------------------------------------------------------------------
# wiki-maintain.sh
# ---------------------------------------------------------------------------


class TestWikiMaintainSh:
    def test_help_flag(self, run_script) -> None:
        result = run_script("wiki-maintain.sh", "--help")
        assert result.returncode == 0
        assert "Usage:" in result.stdout or "usage:" in result.stdout.lower()
        assert "--full" in result.stdout
        assert "--harvest-only" in result.stdout
        assert "--hygiene-only" in result.stdout

    def test_rejects_unknown_flag(self, run_script) -> None:
        result = run_script("wiki-maintain.sh", "--bogus")
        assert result.returncode != 0
        assert "Unknown option" in result.stderr

    def test_harvest_only_and_hygiene_only_conflict(self, run_script) -> None:
        result = run_script(
            "wiki-maintain.sh", "--harvest-only", "--hygiene-only"
        )
        assert result.returncode != 0
        assert "mutually exclusive" in result.stderr

    def test_hygiene_only_dry_run_completes(
        self, run_script, tmp_wiki: Path
    ) -> None:
        make_page(tmp_wiki, "patterns/one.md")
        result = run_script(
            "wiki-maintain.sh", "--hygiene-only", "--dry-run", "--no-reindex"
        )
        assert result.returncode == 0
        assert "Phase 2: Hygiene checks" in result.stdout
        assert "finished" in result.stdout

    def test_phase_1_skipped_in_hygiene_only(
        self, run_script, tmp_wiki: Path
    ) -> None:
        result = run_script(
            "wiki-maintain.sh", "--hygiene-only", "--dry-run", "--no-reindex"
        )
        assert result.returncode == 0
        assert "Phase 1: URL harvesting (skipped)" in result.stdout

    def test_phase_3_skipped_in_dry_run(
        self, run_script, tmp_wiki: Path
    ) -> None:
        make_page(tmp_wiki, "patterns/one.md")
        result = run_script(
            "wiki-maintain.sh", "--hygiene-only", "--dry-run"
        )
        assert "Phase 3: qmd reindex (skipped)" in result.stdout

    def test_harvest_only_dry_run_completes(
        self, run_script, tmp_wiki: Path
    ) -> None:
        # Add a summarized conversation so harvest has something to scan
        make_conversation(
            tmp_wiki,
            "test",
            "2026-04-10-test.md",
            status="summarized",
            body="See https://docs.python.org/3/library/os.html for details.\n",
        )
        result = run_script(
            "wiki-maintain.sh",
            "--harvest-only",
            "--dry-run",
            "--no-compile",
            "--no-reindex",
        )
        assert result.returncode == 0
        assert "Phase 2: Hygiene checks (skipped)" in result.stdout


# ---------------------------------------------------------------------------
# wiki-sync.sh
# ---------------------------------------------------------------------------


class TestWikiSyncSh:
    def test_status_on_non_git_dir_exits_cleanly(self, run_script) -> None:
        """wiki-sync.sh --status against a non-git dir should fail gracefully.

        The tmp_wiki fixture is not a git repo, so git commands will fail.
        The script should report the problem without hanging or leaking stack
        traces. Any exit code is acceptable as long as it exits in reasonable
        time and prints something useful to stdout/stderr.
        """
        result = run_script("wiki-sync.sh", "--status", timeout=30)
        # Should have produced some output and exited (not hung)
        assert result.stdout or result.stderr
        assert "Wiki Sync Status" in result.stdout or "not a git" in result.stderr.lower()


# ---------------------------------------------------------------------------
# mine-conversations.sh
# ---------------------------------------------------------------------------


class TestMineConversationsSh:
    def test_extract_only_dry_run(self, run_script, tmp_wiki: Path) -> None:
        """mine-conversations.sh --extract-only --dry-run should complete without LLM."""
        result = run_script(
            "mine-conversations.sh", "--extract-only", "--dry-run", timeout=30
        )
        assert result.returncode == 0

    def test_rejects_unknown_flag(self, run_script) -> None:
        result = run_script("mine-conversations.sh", "--bogus-flag")
        assert result.returncode != 0


# ---------------------------------------------------------------------------
# Cross-platform sanity — scripts use portable bash syntax
# ---------------------------------------------------------------------------


class TestBashPortability:
    """Verify scripts don't use bashisms that break on macOS /bin/bash 3.2."""

    @pytest.mark.parametrize(
        "script",
        ["wiki-maintain.sh", "mine-conversations.sh", "wiki-sync.sh"],
    )
    def test_shebang_is_env_bash(self, script: str) -> None:
        """All shell scripts should use `#!/usr/bin/env bash` for portability."""
        path = Path(__file__).parent.parent / "scripts" / script
        first_line = path.read_text().splitlines()[0]
        assert first_line == "#!/usr/bin/env bash", (
            f"{script} has shebang {first_line!r}, expected #!/usr/bin/env bash"
        )

    @pytest.mark.parametrize(
        "script",
        ["wiki-maintain.sh", "mine-conversations.sh", "wiki-sync.sh"],
    )
    def test_uses_strict_mode(self, script: str) -> None:
        """All shell scripts should use `set -euo pipefail` for safe defaults."""
        path = Path(__file__).parent.parent / "scripts" / script
        text = path.read_text()
        assert "set -euo pipefail" in text, f"{script} missing strict mode"

    @pytest.mark.parametrize(
        "script",
        ["wiki-maintain.sh", "mine-conversations.sh", "wiki-sync.sh"],
    )
    def test_bash_syntax_check(self, script: str) -> None:
        """bash -n does a syntax-only parse and catches obvious errors."""
        path = Path(__file__).parent.parent / "scripts" / script
        result = subprocess.run(
            ["bash", "-n", str(path)],
            capture_output=True,
            text=True,
            timeout=10,
        )
        assert result.returncode == 0, f"{script} has bash syntax errors: {result.stderr}"


# ---------------------------------------------------------------------------
# Python script syntax check (smoke)
# ---------------------------------------------------------------------------


class TestPythonSyntax:
    @pytest.mark.parametrize(
        "script",
        [
            "wiki_lib.py",
            "wiki-harvest.py",
            "wiki-staging.py",
            "wiki-hygiene.py",
            "extract-sessions.py",
            "summarize-conversations.py",
            "update-conversation-index.py",
        ],
    )
    def test_py_compile(self, script: str) -> None:
        """py_compile catches syntax errors without executing the module."""
        import py_compile

        path = Path(__file__).parent.parent / "scripts" / script
        # py_compile.compile raises on error; success returns the .pyc path
        py_compile.compile(str(path), doraise=True)