#!/usr/bin/env bash set -euo pipefail # mine-conversations.sh — Top-level orchestrator for conversation mining pipeline # # Chains: Extract (Python) → Summarize (llama.cpp) → Index (Python) # # Usage: # mine-conversations.sh # Full pipeline # mine-conversations.sh --extract-only # Phase A only (no LLM) # mine-conversations.sh --summarize-only # Phase B only (requires llama-server) # mine-conversations.sh --index-only # Phase C only # mine-conversations.sh --project mc # Filter to one project # mine-conversations.sh --dry-run # Show what would be done # Resolve script location first so sibling scripts are found regardless of WIKI_DIR SCRIPTS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" WIKI_DIR="${WIKI_DIR:-$(dirname "${SCRIPTS_DIR}")}" LOG_FILE="${SCRIPTS_DIR}/.mine.log" # --------------------------------------------------------------------------- # Argument parsing # --------------------------------------------------------------------------- EXTRACT=true SUMMARIZE=true INDEX=true PROJECT="" DRY_RUN="" EXTRA_ARGS=() while [[ $# -gt 0 ]]; do case "$1" in --extract-only) SUMMARIZE=false INDEX=false shift ;; --summarize-only) EXTRACT=false INDEX=false shift ;; --index-only) EXTRACT=false SUMMARIZE=false shift ;; --project) PROJECT="$2" shift 2 ;; --dry-run) DRY_RUN="--dry-run" shift ;; *) EXTRA_ARGS+=("$1") shift ;; esac done # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- log() { local msg msg="[$(date '+%Y-%m-%d %H:%M:%S')] $*" echo "${msg}" | tee -a "${LOG_FILE}" } # --------------------------------------------------------------------------- # Pipeline # --------------------------------------------------------------------------- mkdir -p "${WIKI_DIR}/scripts" log "=== Conversation mining started ===" # Phase A: Extract if [[ "${EXTRACT}" == true ]]; then log "Phase A: Extracting sessions..." local_args=() if [[ -n "${PROJECT}" ]]; then local_args+=(--project "${PROJECT}") fi if [[ -n "${DRY_RUN}" ]]; then local_args+=(--dry-run) fi python3 "${SCRIPTS_DIR}/extract-sessions.py" "${local_args[@]}" "${EXTRA_ARGS[@]}" 2>&1 | tee -a "${LOG_FILE}" fi # Phase B: Summarize if [[ "${SUMMARIZE}" == true ]]; then log "Phase B: Summarizing conversations..." local_args=() if [[ -n "${PROJECT}" ]]; then local_args+=(--project "${PROJECT}") fi if [[ -n "${DRY_RUN}" ]]; then local_args+=(--dry-run) fi python3 "${SCRIPTS_DIR}/summarize-conversations.py" "${local_args[@]}" "${EXTRA_ARGS[@]}" 2>&1 | tee -a "${LOG_FILE}" fi # Phase C: Index if [[ "${INDEX}" == true ]]; then log "Phase C: Updating index and context..." local_args=() if [[ -z "${DRY_RUN}" ]]; then local_args+=(--reindex) fi python3 "${SCRIPTS_DIR}/update-conversation-index.py" "${local_args[@]}" 2>&1 | tee -a "${LOG_FILE}" fi log "=== Conversation mining complete ==="