memex/scripts/mine-conversations.sh

#!/usr/bin/env bash
set -euo pipefail

# mine-conversations.sh — Top-level orchestrator for conversation mining pipeline
#
# Chains: Extract (Python) → Summarize (llama.cpp) → Index (Python)
#
# Usage:
#   mine-conversations.sh                    # Full pipeline
#   mine-conversations.sh --extract-only     # Phase A only (no LLM)
#   mine-conversations.sh --summarize-only   # Phase B only (requires llama-server)
#   mine-conversations.sh --index-only       # Phase C only
#   mine-conversations.sh --project mc       # Filter to one project
#   mine-conversations.sh --dry-run          # Show what would be done

# Resolve script location first so sibling scripts are found regardless of WIKI_DIR
SCRIPTS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
WIKI_DIR="${WIKI_DIR:-$(dirname "${SCRIPTS_DIR}")}"
LOG_FILE="${SCRIPTS_DIR}/.mine.log"

# ---------------------------------------------------------------------------
# Argument parsing
# ---------------------------------------------------------------------------

EXTRACT=true
SUMMARIZE=true
INDEX=true
PROJECT=""
DRY_RUN=""
EXTRA_ARGS=()

while [[ $# -gt 0 ]]; do
    case "$1" in
        --extract-only)
            SUMMARIZE=false
            INDEX=false
            shift
            ;;
        --summarize-only)
            EXTRACT=false
            INDEX=false
            shift
            ;;
        --index-only)
            EXTRACT=false
            SUMMARIZE=false
            shift
            ;;
        --project)
            PROJECT="$2"
            shift 2
            ;;
        --dry-run)
            DRY_RUN="--dry-run"
            shift
            ;;
        *)
            EXTRA_ARGS+=("$1")
            shift
            ;;
    esac
done

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

log() {
    local msg
    msg="[$(date '+%Y-%m-%d %H:%M:%S')] $*"
    echo "${msg}" | tee -a "${LOG_FILE}"
}

# ---------------------------------------------------------------------------
# Pipeline
# ---------------------------------------------------------------------------

mkdir -p "${WIKI_DIR}/scripts"

log "=== Conversation mining started ==="

# Phase A: Extract
if [[ "${EXTRACT}" == true ]]; then
    log "Phase A: Extracting sessions..."
    local_args=()
    if [[ -n "${PROJECT}" ]]; then
        local_args+=(--project "${PROJECT}")
    fi
    if [[ -n "${DRY_RUN}" ]]; then
        local_args+=(--dry-run)
    fi
    python3 "${SCRIPTS_DIR}/extract-sessions.py" "${local_args[@]}" "${EXTRA_ARGS[@]}" 2>&1 | tee -a "${LOG_FILE}"
fi

# Phase B: Summarize
if [[ "${SUMMARIZE}" == true ]]; then
    log "Phase B: Summarizing conversations..."
    local_args=()
    if [[ -n "${PROJECT}" ]]; then
        local_args+=(--project "${PROJECT}")
    fi
    if [[ -n "${DRY_RUN}" ]]; then
        local_args+=(--dry-run)
    fi
    python3 "${SCRIPTS_DIR}/summarize-conversations.py" "${local_args[@]}" "${EXTRA_ARGS[@]}" 2>&1 | tee -a "${LOG_FILE}"
fi

# Phase C: Index
if [[ "${INDEX}" == true ]]; then
    log "Phase C: Updating index and context..."
    local_args=()
    if [[ -z "${DRY_RUN}" ]]; then
        local_args+=(--reindex)
    fi
    python3 "${SCRIPTS_DIR}/update-conversation-index.py" "${local_args[@]}" 2>&1 | tee -a "${LOG_FILE}"
fi

log "=== Conversation mining complete ==="